Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the enums were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 early_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717         unsigned *map_cmdline_to_pid;
1718         unsigned cmdline_num;
1719         int cmdline_idx;
1720         char *saved_cmdlines;
1721 };
1722 static struct saved_cmdlines_buffer *savedcmd;
1723
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1726
1727 static inline char *get_saved_cmdlines(int idx)
1728 {
1729         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1730 }
1731
1732 static inline void set_cmdline(int idx, const char *cmdline)
1733 {
1734         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1735 }
1736
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738                                     struct saved_cmdlines_buffer *s)
1739 {
1740         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1741                                         GFP_KERNEL);
1742         if (!s->map_cmdline_to_pid)
1743                 return -ENOMEM;
1744
1745         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746         if (!s->saved_cmdlines) {
1747                 kfree(s->map_cmdline_to_pid);
1748                 return -ENOMEM;
1749         }
1750
1751         s->cmdline_idx = 0;
1752         s->cmdline_num = val;
1753         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754                sizeof(s->map_pid_to_cmdline));
1755         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756                val * sizeof(*s->map_cmdline_to_pid));
1757
1758         return 0;
1759 }
1760
1761 static int trace_create_savedcmd(void)
1762 {
1763         int ret;
1764
1765         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1766         if (!savedcmd)
1767                 return -ENOMEM;
1768
1769         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1770         if (ret < 0) {
1771                 kfree(savedcmd);
1772                 savedcmd = NULL;
1773                 return -ENOMEM;
1774         }
1775
1776         return 0;
1777 }
1778
1779 int is_tracing_stopped(void)
1780 {
1781         return global_trace.stop_count;
1782 }
1783
1784 /**
1785  * tracing_start - quick start of the tracer
1786  *
1787  * If tracing is enabled but was stopped by tracing_stop,
1788  * this will start the tracer back up.
1789  */
1790 void tracing_start(void)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         if (tracing_disabled)
1796                 return;
1797
1798         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799         if (--global_trace.stop_count) {
1800                 if (global_trace.stop_count < 0) {
1801                         /* Someone screwed up their debugging */
1802                         WARN_ON_ONCE(1);
1803                         global_trace.stop_count = 0;
1804                 }
1805                 goto out;
1806         }
1807
1808         /* Prevent the buffers from switching */
1809         arch_spin_lock(&global_trace.max_lock);
1810
1811         buffer = global_trace.trace_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_enable(buffer);
1814
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816         buffer = global_trace.max_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819 #endif
1820
1821         arch_spin_unlock(&global_trace.max_lock);
1822
1823  out:
1824         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1825 }
1826
1827 static void tracing_start_tr(struct trace_array *tr)
1828 {
1829         struct ring_buffer *buffer;
1830         unsigned long flags;
1831
1832         if (tracing_disabled)
1833                 return;
1834
1835         /* If global, we need to also start the max tracer */
1836         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837                 return tracing_start();
1838
1839         raw_spin_lock_irqsave(&tr->start_lock, flags);
1840
1841         if (--tr->stop_count) {
1842                 if (tr->stop_count < 0) {
1843                         /* Someone screwed up their debugging */
1844                         WARN_ON_ONCE(1);
1845                         tr->stop_count = 0;
1846                 }
1847                 goto out;
1848         }
1849
1850         buffer = tr->trace_buffer.buffer;
1851         if (buffer)
1852                 ring_buffer_record_enable(buffer);
1853
1854  out:
1855         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1856 }
1857
1858 /**
1859  * tracing_stop - quick stop of the tracer
1860  *
1861  * Light weight way to stop tracing. Use in conjunction with
1862  * tracing_start.
1863  */
1864 void tracing_stop(void)
1865 {
1866         struct ring_buffer *buffer;
1867         unsigned long flags;
1868
1869         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870         if (global_trace.stop_count++)
1871                 goto out;
1872
1873         /* Prevent the buffers from switching */
1874         arch_spin_lock(&global_trace.max_lock);
1875
1876         buffer = global_trace.trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_disable(buffer);
1879
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881         buffer = global_trace.max_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884 #endif
1885
1886         arch_spin_unlock(&global_trace.max_lock);
1887
1888  out:
1889         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1890 }
1891
1892 static void tracing_stop_tr(struct trace_array *tr)
1893 {
1894         struct ring_buffer *buffer;
1895         unsigned long flags;
1896
1897         /* If global, we need to also stop the max tracer */
1898         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899                 return tracing_stop();
1900
1901         raw_spin_lock_irqsave(&tr->start_lock, flags);
1902         if (tr->stop_count++)
1903                 goto out;
1904
1905         buffer = tr->trace_buffer.buffer;
1906         if (buffer)
1907                 ring_buffer_record_disable(buffer);
1908
1909  out:
1910         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1911 }
1912
1913 void trace_stop_cmdline_recording(void);
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917         unsigned pid, idx;
1918
1919         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920                 return 0;
1921
1922         /*
1923          * It's not the end of the world if we don't get
1924          * the lock, but we also don't want to spin
1925          * nor do we want to disable interrupts,
1926          * so if we miss here, then better luck next time.
1927          */
1928         if (!arch_spin_trylock(&trace_cmdline_lock))
1929                 return 0;
1930
1931         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932         if (idx == NO_CMDLINE_MAP) {
1933                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935                 /*
1936                  * Check whether the cmdline buffer at idx has a pid
1937                  * mapped. We are going to overwrite that entry so we
1938                  * need to clear the map_pid_to_cmdline. Otherwise we
1939                  * would read the new comm for the old pid.
1940                  */
1941                 pid = savedcmd->map_cmdline_to_pid[idx];
1942                 if (pid != NO_CMDLINE_MAP)
1943                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948                 savedcmd->cmdline_idx = idx;
1949         }
1950
1951         set_cmdline(idx, tsk->comm);
1952
1953         arch_spin_unlock(&trace_cmdline_lock);
1954
1955         return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960         unsigned map;
1961
1962         if (!pid) {
1963                 strcpy(comm, "<idle>");
1964                 return;
1965         }
1966
1967         if (WARN_ON_ONCE(pid < 0)) {
1968                 strcpy(comm, "<XXX>");
1969                 return;
1970         }
1971
1972         if (pid > PID_MAX_DEFAULT) {
1973                 strcpy(comm, "<...>");
1974                 return;
1975         }
1976
1977         map = savedcmd->map_pid_to_cmdline[pid];
1978         if (map != NO_CMDLINE_MAP)
1979                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980         else
1981                 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986         preempt_disable();
1987         arch_spin_lock(&trace_cmdline_lock);
1988
1989         __trace_find_cmdline(pid, comm);
1990
1991         arch_spin_unlock(&trace_cmdline_lock);
1992         preempt_enable();
1993 }
1994
1995 void tracing_record_cmdline(struct task_struct *tsk)
1996 {
1997         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1998                 return;
1999
2000         if (!__this_cpu_read(trace_cmdline_save))
2001                 return;
2002
2003         if (trace_save_cmdline(tsk))
2004                 __this_cpu_write(trace_cmdline_save, false);
2005 }
2006
2007 /*
2008  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2009  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2010  * simplifies those functions and keeps them in sync.
2011  */
2012 enum print_line_t trace_handle_return(struct trace_seq *s)
2013 {
2014         return trace_seq_has_overflowed(s) ?
2015                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2016 }
2017 EXPORT_SYMBOL_GPL(trace_handle_return);
2018
2019 void
2020 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2021                              int pc)
2022 {
2023         struct task_struct *tsk = current;
2024
2025         entry->preempt_count            = pc & 0xff;
2026         entry->pid                      = (tsk) ? tsk->pid : 0;
2027         entry->flags =
2028 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2029                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2030 #else
2031                 TRACE_FLAG_IRQS_NOSUPPORT |
2032 #endif
2033                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2034                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2035                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2036                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2037                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2038 }
2039 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2040
2041 struct ring_buffer_event *
2042 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2043                           int type,
2044                           unsigned long len,
2045                           unsigned long flags, int pc)
2046 {
2047         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2048 }
2049
2050 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2051 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2052 static int trace_buffered_event_ref;
2053
2054 /**
2055  * trace_buffered_event_enable - enable buffering events
2056  *
2057  * When events are being filtered, it is quicker to use a temporary
2058  * buffer to write the event data into if there's a likely chance
2059  * that it will not be committed. The discard of the ring buffer
2060  * is not as fast as committing, and is much slower than copying
2061  * a commit.
2062  *
2063  * When an event is to be filtered, allocate per cpu buffers to
2064  * write the event data into, and if the event is filtered and discarded
2065  * it is simply dropped, otherwise, the entire data is to be committed
2066  * in one shot.
2067  */
2068 void trace_buffered_event_enable(void)
2069 {
2070         struct ring_buffer_event *event;
2071         struct page *page;
2072         int cpu;
2073
2074         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2075
2076         if (trace_buffered_event_ref++)
2077                 return;
2078
2079         for_each_tracing_cpu(cpu) {
2080                 page = alloc_pages_node(cpu_to_node(cpu),
2081                                         GFP_KERNEL | __GFP_NORETRY, 0);
2082                 if (!page)
2083                         goto failed;
2084
2085                 event = page_address(page);
2086                 memset(event, 0, sizeof(*event));
2087
2088                 per_cpu(trace_buffered_event, cpu) = event;
2089
2090                 preempt_disable();
2091                 if (cpu == smp_processor_id() &&
2092                     this_cpu_read(trace_buffered_event) !=
2093                     per_cpu(trace_buffered_event, cpu))
2094                         WARN_ON_ONCE(1);
2095                 preempt_enable();
2096         }
2097
2098         return;
2099  failed:
2100         trace_buffered_event_disable();
2101 }
2102
2103 static void enable_trace_buffered_event(void *data)
2104 {
2105         /* Probably not needed, but do it anyway */
2106         smp_rmb();
2107         this_cpu_dec(trace_buffered_event_cnt);
2108 }
2109
2110 static void disable_trace_buffered_event(void *data)
2111 {
2112         this_cpu_inc(trace_buffered_event_cnt);
2113 }
2114
2115 /**
2116  * trace_buffered_event_disable - disable buffering events
2117  *
2118  * When a filter is removed, it is faster to not use the buffered
2119  * events, and to commit directly into the ring buffer. Free up
2120  * the temp buffers when there are no more users. This requires
2121  * special synchronization with current events.
2122  */
2123 void trace_buffered_event_disable(void)
2124 {
2125         int cpu;
2126
2127         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2128
2129         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2130                 return;
2131
2132         if (--trace_buffered_event_ref)
2133                 return;
2134
2135         preempt_disable();
2136         /* For each CPU, set the buffer as used. */
2137         smp_call_function_many(tracing_buffer_mask,
2138                                disable_trace_buffered_event, NULL, 1);
2139         preempt_enable();
2140
2141         /* Wait for all current users to finish */
2142         synchronize_sched();
2143
2144         for_each_tracing_cpu(cpu) {
2145                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2146                 per_cpu(trace_buffered_event, cpu) = NULL;
2147         }
2148         /*
2149          * Make sure trace_buffered_event is NULL before clearing
2150          * trace_buffered_event_cnt.
2151          */
2152         smp_wmb();
2153
2154         preempt_disable();
2155         /* Do the work on each cpu */
2156         smp_call_function_many(tracing_buffer_mask,
2157                                enable_trace_buffered_event, NULL, 1);
2158         preempt_enable();
2159 }
2160
2161 static struct ring_buffer *temp_buffer;
2162
2163 struct ring_buffer_event *
2164 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2165                           struct trace_event_file *trace_file,
2166                           int type, unsigned long len,
2167                           unsigned long flags, int pc)
2168 {
2169         struct ring_buffer_event *entry;
2170         int val;
2171
2172         *current_rb = trace_file->tr->trace_buffer.buffer;
2173
2174         if ((trace_file->flags &
2175              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2176             (entry = this_cpu_read(trace_buffered_event))) {
2177                 /* Try to use the per cpu buffer first */
2178                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2179                 if (val == 1) {
2180                         trace_event_setup(entry, type, flags, pc);
2181                         entry->array[0] = len;
2182                         return entry;
2183                 }
2184                 this_cpu_dec(trace_buffered_event_cnt);
2185         }
2186
2187         entry = __trace_buffer_lock_reserve(*current_rb,
2188                                             type, len, flags, pc);
2189         /*
2190          * If tracing is off, but we have triggers enabled
2191          * we still need to look at the event data. Use the temp_buffer
2192          * to store the trace event for the tigger to use. It's recusive
2193          * safe and will not be recorded anywhere.
2194          */
2195         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2196                 *current_rb = temp_buffer;
2197                 entry = __trace_buffer_lock_reserve(*current_rb,
2198                                                     type, len, flags, pc);
2199         }
2200         return entry;
2201 }
2202 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2203
2204 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2205 static DEFINE_MUTEX(tracepoint_printk_mutex);
2206
2207 static void output_printk(struct trace_event_buffer *fbuffer)
2208 {
2209         struct trace_event_call *event_call;
2210         struct trace_event *event;
2211         unsigned long flags;
2212         struct trace_iterator *iter = tracepoint_print_iter;
2213
2214         /* We should never get here if iter is NULL */
2215         if (WARN_ON_ONCE(!iter))
2216                 return;
2217
2218         event_call = fbuffer->trace_file->event_call;
2219         if (!event_call || !event_call->event.funcs ||
2220             !event_call->event.funcs->trace)
2221                 return;
2222
2223         event = &fbuffer->trace_file->event_call->event;
2224
2225         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2226         trace_seq_init(&iter->seq);
2227         iter->ent = fbuffer->entry;
2228         event_call->event.funcs->trace(iter, 0, event);
2229         trace_seq_putc(&iter->seq, 0);
2230         printk("%s", iter->seq.buffer);
2231
2232         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2233 }
2234
2235 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2236                              void __user *buffer, size_t *lenp,
2237                              loff_t *ppos)
2238 {
2239         int save_tracepoint_printk;
2240         int ret;
2241
2242         mutex_lock(&tracepoint_printk_mutex);
2243         save_tracepoint_printk = tracepoint_printk;
2244
2245         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2246
2247         /*
2248          * This will force exiting early, as tracepoint_printk
2249          * is always zero when tracepoint_printk_iter is not allocated
2250          */
2251         if (!tracepoint_print_iter)
2252                 tracepoint_printk = 0;
2253
2254         if (save_tracepoint_printk == tracepoint_printk)
2255                 goto out;
2256
2257         if (tracepoint_printk)
2258                 static_key_enable(&tracepoint_printk_key.key);
2259         else
2260                 static_key_disable(&tracepoint_printk_key.key);
2261
2262  out:
2263         mutex_unlock(&tracepoint_printk_mutex);
2264
2265         return ret;
2266 }
2267
2268 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2269 {
2270         if (static_key_false(&tracepoint_printk_key.key))
2271                 output_printk(fbuffer);
2272
2273         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2274                                     fbuffer->event, fbuffer->entry,
2275                                     fbuffer->flags, fbuffer->pc);
2276 }
2277 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2278
2279 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2280                                      struct ring_buffer *buffer,
2281                                      struct ring_buffer_event *event,
2282                                      unsigned long flags, int pc,
2283                                      struct pt_regs *regs)
2284 {
2285         __buffer_unlock_commit(buffer, event);
2286
2287         /*
2288          * If regs is not set, then skip the following callers:
2289          *   trace_buffer_unlock_commit_regs
2290          *   event_trigger_unlock_commit
2291          *   trace_event_buffer_commit
2292          *   trace_event_raw_event_sched_switch
2293          * Note, we can still get here via blktrace, wakeup tracer
2294          * and mmiotrace, but that's ok if they lose a function or
2295          * two. They are that meaningful.
2296          */
2297         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2298         ftrace_trace_userstack(buffer, flags, pc);
2299 }
2300
2301 /*
2302  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2303  */
2304 void
2305 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2306                                    struct ring_buffer_event *event)
2307 {
2308         __buffer_unlock_commit(buffer, event);
2309 }
2310
2311 static void
2312 trace_process_export(struct trace_export *export,
2313                struct ring_buffer_event *event)
2314 {
2315         struct trace_entry *entry;
2316         unsigned int size = 0;
2317
2318         entry = ring_buffer_event_data(event);
2319         size = ring_buffer_event_length(event);
2320         export->write(entry, size);
2321 }
2322
2323 static DEFINE_MUTEX(ftrace_export_lock);
2324
2325 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2326
2327 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2328
2329 static inline void ftrace_exports_enable(void)
2330 {
2331         static_branch_enable(&ftrace_exports_enabled);
2332 }
2333
2334 static inline void ftrace_exports_disable(void)
2335 {
2336         static_branch_disable(&ftrace_exports_enabled);
2337 }
2338
2339 void ftrace_exports(struct ring_buffer_event *event)
2340 {
2341         struct trace_export *export;
2342
2343         preempt_disable_notrace();
2344
2345         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2346         while (export) {
2347                 trace_process_export(export, event);
2348                 export = rcu_dereference_raw_notrace(export->next);
2349         }
2350
2351         preempt_enable_notrace();
2352 }
2353
2354 static inline void
2355 add_trace_export(struct trace_export **list, struct trace_export *export)
2356 {
2357         rcu_assign_pointer(export->next, *list);
2358         /*
2359          * We are entering export into the list but another
2360          * CPU might be walking that list. We need to make sure
2361          * the export->next pointer is valid before another CPU sees
2362          * the export pointer included into the list.
2363          */
2364         rcu_assign_pointer(*list, export);
2365 }
2366
2367 static inline int
2368 rm_trace_export(struct trace_export **list, struct trace_export *export)
2369 {
2370         struct trace_export **p;
2371
2372         for (p = list; *p != NULL; p = &(*p)->next)
2373                 if (*p == export)
2374                         break;
2375
2376         if (*p != export)
2377                 return -1;
2378
2379         rcu_assign_pointer(*p, (*p)->next);
2380
2381         return 0;
2382 }
2383
2384 static inline void
2385 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2386 {
2387         if (*list == NULL)
2388                 ftrace_exports_enable();
2389
2390         add_trace_export(list, export);
2391 }
2392
2393 static inline int
2394 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2395 {
2396         int ret;
2397
2398         ret = rm_trace_export(list, export);
2399         if (*list == NULL)
2400                 ftrace_exports_disable();
2401
2402         return ret;
2403 }
2404
2405 int register_ftrace_export(struct trace_export *export)
2406 {
2407         if (WARN_ON_ONCE(!export->write))
2408                 return -1;
2409
2410         mutex_lock(&ftrace_export_lock);
2411
2412         add_ftrace_export(&ftrace_exports_list, export);
2413
2414         mutex_unlock(&ftrace_export_lock);
2415
2416         return 0;
2417 }
2418 EXPORT_SYMBOL_GPL(register_ftrace_export);
2419
2420 int unregister_ftrace_export(struct trace_export *export)
2421 {
2422         int ret;
2423
2424         mutex_lock(&ftrace_export_lock);
2425
2426         ret = rm_ftrace_export(&ftrace_exports_list, export);
2427
2428         mutex_unlock(&ftrace_export_lock);
2429
2430         return ret;
2431 }
2432 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2433
2434 void
2435 trace_function(struct trace_array *tr,
2436                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2437                int pc)
2438 {
2439         struct trace_event_call *call = &event_function;
2440         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2441         struct ring_buffer_event *event;
2442         struct ftrace_entry *entry;
2443
2444         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2445                                             flags, pc);
2446         if (!event)
2447                 return;
2448         entry   = ring_buffer_event_data(event);
2449         entry->ip                       = ip;
2450         entry->parent_ip                = parent_ip;
2451
2452         if (!call_filter_check_discard(call, entry, buffer, event)) {
2453                 if (static_branch_unlikely(&ftrace_exports_enabled))
2454                         ftrace_exports(event);
2455                 __buffer_unlock_commit(buffer, event);
2456         }
2457 }
2458
2459 #ifdef CONFIG_STACKTRACE
2460
2461 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2462 struct ftrace_stack {
2463         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2464 };
2465
2466 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2467 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2468
2469 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2470                                  unsigned long flags,
2471                                  int skip, int pc, struct pt_regs *regs)
2472 {
2473         struct trace_event_call *call = &event_kernel_stack;
2474         struct ring_buffer_event *event;
2475         struct stack_entry *entry;
2476         struct stack_trace trace;
2477         int use_stack;
2478         int size = FTRACE_STACK_ENTRIES;
2479
2480         trace.nr_entries        = 0;
2481         trace.skip              = skip;
2482
2483         /*
2484          * Add two, for this function and the call to save_stack_trace()
2485          * If regs is set, then these functions will not be in the way.
2486          */
2487         if (!regs)
2488                 trace.skip += 2;
2489
2490         /*
2491          * Since events can happen in NMIs there's no safe way to
2492          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2493          * or NMI comes in, it will just have to use the default
2494          * FTRACE_STACK_SIZE.
2495          */
2496         preempt_disable_notrace();
2497
2498         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2499         /*
2500          * We don't need any atomic variables, just a barrier.
2501          * If an interrupt comes in, we don't care, because it would
2502          * have exited and put the counter back to what we want.
2503          * We just need a barrier to keep gcc from moving things
2504          * around.
2505          */
2506         barrier();
2507         if (use_stack == 1) {
2508                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2509                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2510
2511                 if (regs)
2512                         save_stack_trace_regs(regs, &trace);
2513                 else
2514                         save_stack_trace(&trace);
2515
2516                 if (trace.nr_entries > size)
2517                         size = trace.nr_entries;
2518         } else
2519                 /* From now on, use_stack is a boolean */
2520                 use_stack = 0;
2521
2522         size *= sizeof(unsigned long);
2523
2524         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2525                                             sizeof(*entry) + size, flags, pc);
2526         if (!event)
2527                 goto out;
2528         entry = ring_buffer_event_data(event);
2529
2530         memset(&entry->caller, 0, size);
2531
2532         if (use_stack)
2533                 memcpy(&entry->caller, trace.entries,
2534                        trace.nr_entries * sizeof(unsigned long));
2535         else {
2536                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2537                 trace.entries           = entry->caller;
2538                 if (regs)
2539                         save_stack_trace_regs(regs, &trace);
2540                 else
2541                         save_stack_trace(&trace);
2542         }
2543
2544         entry->size = trace.nr_entries;
2545
2546         if (!call_filter_check_discard(call, entry, buffer, event))
2547                 __buffer_unlock_commit(buffer, event);
2548
2549  out:
2550         /* Again, don't let gcc optimize things here */
2551         barrier();
2552         __this_cpu_dec(ftrace_stack_reserve);
2553         preempt_enable_notrace();
2554
2555 }
2556
2557 static inline void ftrace_trace_stack(struct trace_array *tr,
2558                                       struct ring_buffer *buffer,
2559                                       unsigned long flags,
2560                                       int skip, int pc, struct pt_regs *regs)
2561 {
2562         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2563                 return;
2564
2565         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2566 }
2567
2568 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569                    int pc)
2570 {
2571         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2572 }
2573
2574 /**
2575  * trace_dump_stack - record a stack back trace in the trace buffer
2576  * @skip: Number of functions to skip (helper handlers)
2577  */
2578 void trace_dump_stack(int skip)
2579 {
2580         unsigned long flags;
2581
2582         if (tracing_disabled || tracing_selftest_running)
2583                 return;
2584
2585         local_save_flags(flags);
2586
2587         /*
2588          * Skip 3 more, seems to get us at the caller of
2589          * this function.
2590          */
2591         skip += 3;
2592         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2593                              flags, skip, preempt_count(), NULL);
2594 }
2595
2596 static DEFINE_PER_CPU(int, user_stack_count);
2597
2598 void
2599 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2600 {
2601         struct trace_event_call *call = &event_user_stack;
2602         struct ring_buffer_event *event;
2603         struct userstack_entry *entry;
2604         struct stack_trace trace;
2605
2606         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2607                 return;
2608
2609         /*
2610          * NMIs can not handle page faults, even with fix ups.
2611          * The save user stack can (and often does) fault.
2612          */
2613         if (unlikely(in_nmi()))
2614                 return;
2615
2616         /*
2617          * prevent recursion, since the user stack tracing may
2618          * trigger other kernel events.
2619          */
2620         preempt_disable();
2621         if (__this_cpu_read(user_stack_count))
2622                 goto out;
2623
2624         __this_cpu_inc(user_stack_count);
2625
2626         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2627                                             sizeof(*entry), flags, pc);
2628         if (!event)
2629                 goto out_drop_count;
2630         entry   = ring_buffer_event_data(event);
2631
2632         entry->tgid             = current->tgid;
2633         memset(&entry->caller, 0, sizeof(entry->caller));
2634
2635         trace.nr_entries        = 0;
2636         trace.max_entries       = FTRACE_STACK_ENTRIES;
2637         trace.skip              = 0;
2638         trace.entries           = entry->caller;
2639
2640         save_stack_trace_user(&trace);
2641         if (!call_filter_check_discard(call, entry, buffer, event))
2642                 __buffer_unlock_commit(buffer, event);
2643
2644  out_drop_count:
2645         __this_cpu_dec(user_stack_count);
2646  out:
2647         preempt_enable();
2648 }
2649
2650 #ifdef UNUSED
2651 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2652 {
2653         ftrace_trace_userstack(tr, flags, preempt_count());
2654 }
2655 #endif /* UNUSED */
2656
2657 #endif /* CONFIG_STACKTRACE */
2658
2659 /* created for use with alloc_percpu */
2660 struct trace_buffer_struct {
2661         int nesting;
2662         char buffer[4][TRACE_BUF_SIZE];
2663 };
2664
2665 static struct trace_buffer_struct *trace_percpu_buffer;
2666
2667 /*
2668  * Thise allows for lockless recording.  If we're nested too deeply, then
2669  * this returns NULL.
2670  */
2671 static char *get_trace_buf(void)
2672 {
2673         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2674
2675         if (!buffer || buffer->nesting >= 4)
2676                 return NULL;
2677
2678         return &buffer->buffer[buffer->nesting++][0];
2679 }
2680
2681 static void put_trace_buf(void)
2682 {
2683         this_cpu_dec(trace_percpu_buffer->nesting);
2684 }
2685
2686 static int alloc_percpu_trace_buffer(void)
2687 {
2688         struct trace_buffer_struct *buffers;
2689
2690         buffers = alloc_percpu(struct trace_buffer_struct);
2691         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2692                 return -ENOMEM;
2693
2694         trace_percpu_buffer = buffers;
2695         return 0;
2696 }
2697
2698 static int buffers_allocated;
2699
2700 void trace_printk_init_buffers(void)
2701 {
2702         if (buffers_allocated)
2703                 return;
2704
2705         if (alloc_percpu_trace_buffer())
2706                 return;
2707
2708         /* trace_printk() is for debug use only. Don't use it in production. */
2709
2710         pr_warn("\n");
2711         pr_warn("**********************************************************\n");
2712         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2713         pr_warn("**                                                      **\n");
2714         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2715         pr_warn("**                                                      **\n");
2716         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2717         pr_warn("** unsafe for production use.                           **\n");
2718         pr_warn("**                                                      **\n");
2719         pr_warn("** If you see this message and you are not debugging    **\n");
2720         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2721         pr_warn("**                                                      **\n");
2722         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2723         pr_warn("**********************************************************\n");
2724
2725         /* Expand the buffers to set size */
2726         tracing_update_buffers();
2727
2728         buffers_allocated = 1;
2729
2730         /*
2731          * trace_printk_init_buffers() can be called by modules.
2732          * If that happens, then we need to start cmdline recording
2733          * directly here. If the global_trace.buffer is already
2734          * allocated here, then this was called by module code.
2735          */
2736         if (global_trace.trace_buffer.buffer)
2737                 tracing_start_cmdline_record();
2738 }
2739
2740 void trace_printk_start_comm(void)
2741 {
2742         /* Start tracing comms if trace printk is set */
2743         if (!buffers_allocated)
2744                 return;
2745         tracing_start_cmdline_record();
2746 }
2747
2748 static void trace_printk_start_stop_comm(int enabled)
2749 {
2750         if (!buffers_allocated)
2751                 return;
2752
2753         if (enabled)
2754                 tracing_start_cmdline_record();
2755         else
2756                 tracing_stop_cmdline_record();
2757 }
2758
2759 /**
2760  * trace_vbprintk - write binary msg to tracing buffer
2761  *
2762  */
2763 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2764 {
2765         struct trace_event_call *call = &event_bprint;
2766         struct ring_buffer_event *event;
2767         struct ring_buffer *buffer;
2768         struct trace_array *tr = &global_trace;
2769         struct bprint_entry *entry;
2770         unsigned long flags;
2771         char *tbuffer;
2772         int len = 0, size, pc;
2773
2774         if (unlikely(tracing_selftest_running || tracing_disabled))
2775                 return 0;
2776
2777         /* Don't pollute graph traces with trace_vprintk internals */
2778         pause_graph_tracing();
2779
2780         pc = preempt_count();
2781         preempt_disable_notrace();
2782
2783         tbuffer = get_trace_buf();
2784         if (!tbuffer) {
2785                 len = 0;
2786                 goto out_nobuffer;
2787         }
2788
2789         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2790
2791         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2792                 goto out;
2793
2794         local_save_flags(flags);
2795         size = sizeof(*entry) + sizeof(u32) * len;
2796         buffer = tr->trace_buffer.buffer;
2797         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2798                                             flags, pc);
2799         if (!event)
2800                 goto out;
2801         entry = ring_buffer_event_data(event);
2802         entry->ip                       = ip;
2803         entry->fmt                      = fmt;
2804
2805         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2806         if (!call_filter_check_discard(call, entry, buffer, event)) {
2807                 __buffer_unlock_commit(buffer, event);
2808                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2809         }
2810
2811 out:
2812         put_trace_buf();
2813
2814 out_nobuffer:
2815         preempt_enable_notrace();
2816         unpause_graph_tracing();
2817
2818         return len;
2819 }
2820 EXPORT_SYMBOL_GPL(trace_vbprintk);
2821
2822 static int
2823 __trace_array_vprintk(struct ring_buffer *buffer,
2824                       unsigned long ip, const char *fmt, va_list args)
2825 {
2826         struct trace_event_call *call = &event_print;
2827         struct ring_buffer_event *event;
2828         int len = 0, size, pc;
2829         struct print_entry *entry;
2830         unsigned long flags;
2831         char *tbuffer;
2832
2833         if (tracing_disabled || tracing_selftest_running)
2834                 return 0;
2835
2836         /* Don't pollute graph traces with trace_vprintk internals */
2837         pause_graph_tracing();
2838
2839         pc = preempt_count();
2840         preempt_disable_notrace();
2841
2842
2843         tbuffer = get_trace_buf();
2844         if (!tbuffer) {
2845                 len = 0;
2846                 goto out_nobuffer;
2847         }
2848
2849         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2850
2851         local_save_flags(flags);
2852         size = sizeof(*entry) + len + 1;
2853         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2854                                             flags, pc);
2855         if (!event)
2856                 goto out;
2857         entry = ring_buffer_event_data(event);
2858         entry->ip = ip;
2859
2860         memcpy(&entry->buf, tbuffer, len + 1);
2861         if (!call_filter_check_discard(call, entry, buffer, event)) {
2862                 __buffer_unlock_commit(buffer, event);
2863                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2864         }
2865
2866 out:
2867         put_trace_buf();
2868
2869 out_nobuffer:
2870         preempt_enable_notrace();
2871         unpause_graph_tracing();
2872
2873         return len;
2874 }
2875
2876 int trace_array_vprintk(struct trace_array *tr,
2877                         unsigned long ip, const char *fmt, va_list args)
2878 {
2879         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2880 }
2881
2882 int trace_array_printk(struct trace_array *tr,
2883                        unsigned long ip, const char *fmt, ...)
2884 {
2885         int ret;
2886         va_list ap;
2887
2888         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2889                 return 0;
2890
2891         va_start(ap, fmt);
2892         ret = trace_array_vprintk(tr, ip, fmt, ap);
2893         va_end(ap);
2894         return ret;
2895 }
2896
2897 int trace_array_printk_buf(struct ring_buffer *buffer,
2898                            unsigned long ip, const char *fmt, ...)
2899 {
2900         int ret;
2901         va_list ap;
2902
2903         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2904                 return 0;
2905
2906         va_start(ap, fmt);
2907         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2908         va_end(ap);
2909         return ret;
2910 }
2911
2912 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2913 {
2914         return trace_array_vprintk(&global_trace, ip, fmt, args);
2915 }
2916 EXPORT_SYMBOL_GPL(trace_vprintk);
2917
2918 static void trace_iterator_increment(struct trace_iterator *iter)
2919 {
2920         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2921
2922         iter->idx++;
2923         if (buf_iter)
2924                 ring_buffer_read(buf_iter, NULL);
2925 }
2926
2927 static struct trace_entry *
2928 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2929                 unsigned long *lost_events)
2930 {
2931         struct ring_buffer_event *event;
2932         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2933
2934         if (buf_iter)
2935                 event = ring_buffer_iter_peek(buf_iter, ts);
2936         else
2937                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2938                                          lost_events);
2939
2940         if (event) {
2941                 iter->ent_size = ring_buffer_event_length(event);
2942                 return ring_buffer_event_data(event);
2943         }
2944         iter->ent_size = 0;
2945         return NULL;
2946 }
2947
2948 static struct trace_entry *
2949 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2950                   unsigned long *missing_events, u64 *ent_ts)
2951 {
2952         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2953         struct trace_entry *ent, *next = NULL;
2954         unsigned long lost_events = 0, next_lost = 0;
2955         int cpu_file = iter->cpu_file;
2956         u64 next_ts = 0, ts;
2957         int next_cpu = -1;
2958         int next_size = 0;
2959         int cpu;
2960
2961         /*
2962          * If we are in a per_cpu trace file, don't bother by iterating over
2963          * all cpu and peek directly.
2964          */
2965         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2966                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2967                         return NULL;
2968                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2969                 if (ent_cpu)
2970                         *ent_cpu = cpu_file;
2971
2972                 return ent;
2973         }
2974
2975         for_each_tracing_cpu(cpu) {
2976
2977                 if (ring_buffer_empty_cpu(buffer, cpu))
2978                         continue;
2979
2980                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2981
2982                 /*
2983                  * Pick the entry with the smallest timestamp:
2984                  */
2985                 if (ent && (!next || ts < next_ts)) {
2986                         next = ent;
2987                         next_cpu = cpu;
2988                         next_ts = ts;
2989                         next_lost = lost_events;
2990                         next_size = iter->ent_size;
2991                 }
2992         }
2993
2994         iter->ent_size = next_size;
2995
2996         if (ent_cpu)
2997                 *ent_cpu = next_cpu;
2998
2999         if (ent_ts)
3000                 *ent_ts = next_ts;
3001
3002         if (missing_events)
3003                 *missing_events = next_lost;
3004
3005         return next;
3006 }
3007
3008 /* Find the next real entry, without updating the iterator itself */
3009 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3010                                           int *ent_cpu, u64 *ent_ts)
3011 {
3012         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3013 }
3014
3015 /* Find the next real entry, and increment the iterator to the next entry */
3016 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3017 {
3018         iter->ent = __find_next_entry(iter, &iter->cpu,
3019                                       &iter->lost_events, &iter->ts);
3020
3021         if (iter->ent)
3022                 trace_iterator_increment(iter);
3023
3024         return iter->ent ? iter : NULL;
3025 }
3026
3027 static void trace_consume(struct trace_iterator *iter)
3028 {
3029         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3030                             &iter->lost_events);
3031 }
3032
3033 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3034 {
3035         struct trace_iterator *iter = m->private;
3036         int i = (int)*pos;
3037         void *ent;
3038
3039         WARN_ON_ONCE(iter->leftover);
3040
3041         (*pos)++;
3042
3043         /* can't go backwards */
3044         if (iter->idx > i)
3045                 return NULL;
3046
3047         if (iter->idx < 0)
3048                 ent = trace_find_next_entry_inc(iter);
3049         else
3050                 ent = iter;
3051
3052         while (ent && iter->idx < i)
3053                 ent = trace_find_next_entry_inc(iter);
3054
3055         iter->pos = *pos;
3056
3057         return ent;
3058 }
3059
3060 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3061 {
3062         struct ring_buffer_event *event;
3063         struct ring_buffer_iter *buf_iter;
3064         unsigned long entries = 0;
3065         u64 ts;
3066
3067         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3068
3069         buf_iter = trace_buffer_iter(iter, cpu);
3070         if (!buf_iter)
3071                 return;
3072
3073         ring_buffer_iter_reset(buf_iter);
3074
3075         /*
3076          * We could have the case with the max latency tracers
3077          * that a reset never took place on a cpu. This is evident
3078          * by the timestamp being before the start of the buffer.
3079          */
3080         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3081                 if (ts >= iter->trace_buffer->time_start)
3082                         break;
3083                 entries++;
3084                 ring_buffer_read(buf_iter, NULL);
3085         }
3086
3087         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3088 }
3089
3090 /*
3091  * The current tracer is copied to avoid a global locking
3092  * all around.
3093  */
3094 static void *s_start(struct seq_file *m, loff_t *pos)
3095 {
3096         struct trace_iterator *iter = m->private;
3097         struct trace_array *tr = iter->tr;
3098         int cpu_file = iter->cpu_file;
3099         void *p = NULL;
3100         loff_t l = 0;
3101         int cpu;
3102
3103         /*
3104          * copy the tracer to avoid using a global lock all around.
3105          * iter->trace is a copy of current_trace, the pointer to the
3106          * name may be used instead of a strcmp(), as iter->trace->name
3107          * will point to the same string as current_trace->name.
3108          */
3109         mutex_lock(&trace_types_lock);
3110         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3111                 *iter->trace = *tr->current_trace;
3112         mutex_unlock(&trace_types_lock);
3113
3114 #ifdef CONFIG_TRACER_MAX_TRACE
3115         if (iter->snapshot && iter->trace->use_max_tr)
3116                 return ERR_PTR(-EBUSY);
3117 #endif
3118
3119         if (!iter->snapshot)
3120                 atomic_inc(&trace_record_cmdline_disabled);
3121
3122         if (*pos != iter->pos) {
3123                 iter->ent = NULL;
3124                 iter->cpu = 0;
3125                 iter->idx = -1;
3126
3127                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3128                         for_each_tracing_cpu(cpu)
3129                                 tracing_iter_reset(iter, cpu);
3130                 } else
3131                         tracing_iter_reset(iter, cpu_file);
3132
3133                 iter->leftover = 0;
3134                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3135                         ;
3136
3137         } else {
3138                 /*
3139                  * If we overflowed the seq_file before, then we want
3140                  * to just reuse the trace_seq buffer again.
3141                  */
3142                 if (iter->leftover)
3143                         p = iter;
3144                 else {
3145                         l = *pos - 1;
3146                         p = s_next(m, p, &l);
3147                 }
3148         }
3149
3150         trace_event_read_lock();
3151         trace_access_lock(cpu_file);
3152         return p;
3153 }
3154
3155 static void s_stop(struct seq_file *m, void *p)
3156 {
3157         struct trace_iterator *iter = m->private;
3158
3159 #ifdef CONFIG_TRACER_MAX_TRACE
3160         if (iter->snapshot && iter->trace->use_max_tr)
3161                 return;
3162 #endif
3163
3164         if (!iter->snapshot)
3165                 atomic_dec(&trace_record_cmdline_disabled);
3166
3167         trace_access_unlock(iter->cpu_file);
3168         trace_event_read_unlock();
3169 }
3170
3171 static void
3172 get_total_entries(struct trace_buffer *buf,
3173                   unsigned long *total, unsigned long *entries)
3174 {
3175         unsigned long count;
3176         int cpu;
3177
3178         *total = 0;
3179         *entries = 0;
3180
3181         for_each_tracing_cpu(cpu) {
3182                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3183                 /*
3184                  * If this buffer has skipped entries, then we hold all
3185                  * entries for the trace and we need to ignore the
3186                  * ones before the time stamp.
3187                  */
3188                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3189                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3190                         /* total is the same as the entries */
3191                         *total += count;
3192                 } else
3193                         *total += count +
3194                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3195                 *entries += count;
3196         }
3197 }
3198
3199 static void print_lat_help_header(struct seq_file *m)
3200 {
3201         seq_puts(m, "#                  _------=> CPU#            \n"
3202                     "#                 / _-----=> irqs-off        \n"
3203                     "#                | / _----=> need-resched    \n"
3204                     "#                || / _---=> hardirq/softirq \n"
3205                     "#                ||| / _--=> preempt-depth   \n"
3206                     "#                |||| /     delay            \n"
3207                     "#  cmd     pid   ||||| time  |   caller      \n"
3208                     "#     \\   /      |||||  \\    |   /         \n");
3209 }
3210
3211 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3212 {
3213         unsigned long total;
3214         unsigned long entries;
3215
3216         get_total_entries(buf, &total, &entries);
3217         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3218                    entries, total, num_online_cpus());
3219         seq_puts(m, "#\n");
3220 }
3221
3222 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3223 {
3224         print_event_info(buf, m);
3225         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3226                     "#              | |       |          |         |\n");
3227 }
3228
3229 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3230 {
3231         print_event_info(buf, m);
3232         seq_puts(m, "#                              _-----=> irqs-off\n"
3233                     "#                             / _----=> need-resched\n"
3234                     "#                            | / _---=> hardirq/softirq\n"
3235                     "#                            || / _--=> preempt-depth\n"
3236                     "#                            ||| /     delay\n"
3237                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3238                     "#              | |       |   ||||       |         |\n");
3239 }
3240
3241 void
3242 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3243 {
3244         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3245         struct trace_buffer *buf = iter->trace_buffer;
3246         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3247         struct tracer *type = iter->trace;
3248         unsigned long entries;
3249         unsigned long total;
3250         const char *name = "preemption";
3251
3252         name = type->name;
3253
3254         get_total_entries(buf, &total, &entries);
3255
3256         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3257                    name, UTS_RELEASE);
3258         seq_puts(m, "# -----------------------------------"
3259                  "---------------------------------\n");
3260         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3261                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3262                    nsecs_to_usecs(data->saved_latency),
3263                    entries,
3264                    total,
3265                    buf->cpu,
3266 #if defined(CONFIG_PREEMPT_NONE)
3267                    "server",
3268 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3269                    "desktop",
3270 #elif defined(CONFIG_PREEMPT)
3271                    "preempt",
3272 #else
3273                    "unknown",
3274 #endif
3275                    /* These are reserved for later use */
3276                    0, 0, 0, 0);
3277 #ifdef CONFIG_SMP
3278         seq_printf(m, " #P:%d)\n", num_online_cpus());
3279 #else
3280         seq_puts(m, ")\n");
3281 #endif
3282         seq_puts(m, "#    -----------------\n");
3283         seq_printf(m, "#    | task: %.16s-%d "
3284                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3285                    data->comm, data->pid,
3286                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3287                    data->policy, data->rt_priority);
3288         seq_puts(m, "#    -----------------\n");
3289
3290         if (data->critical_start) {
3291                 seq_puts(m, "#  => started at: ");
3292                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3293                 trace_print_seq(m, &iter->seq);
3294                 seq_puts(m, "\n#  => ended at:   ");
3295                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3296                 trace_print_seq(m, &iter->seq);
3297                 seq_puts(m, "\n#\n");
3298         }
3299
3300         seq_puts(m, "#\n");
3301 }
3302
3303 static void test_cpu_buff_start(struct trace_iterator *iter)
3304 {
3305         struct trace_seq *s = &iter->seq;
3306         struct trace_array *tr = iter->tr;
3307
3308         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3309                 return;
3310
3311         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3312                 return;
3313
3314         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3315                 return;
3316
3317         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3318                 return;
3319
3320         if (iter->started)
3321                 cpumask_set_cpu(iter->cpu, iter->started);
3322
3323         /* Don't print started cpu buffer for the first entry of the trace */
3324         if (iter->idx > 1)
3325                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3326                                 iter->cpu);
3327 }
3328
3329 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3330 {
3331         struct trace_array *tr = iter->tr;
3332         struct trace_seq *s = &iter->seq;
3333         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3334         struct trace_entry *entry;
3335         struct trace_event *event;
3336
3337         entry = iter->ent;
3338
3339         test_cpu_buff_start(iter);
3340
3341         event = ftrace_find_event(entry->type);
3342
3343         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3344                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3345                         trace_print_lat_context(iter);
3346                 else
3347                         trace_print_context(iter);
3348         }
3349
3350         if (trace_seq_has_overflowed(s))
3351                 return TRACE_TYPE_PARTIAL_LINE;
3352
3353         if (event)
3354                 return event->funcs->trace(iter, sym_flags, event);
3355
3356         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3357
3358         return trace_handle_return(s);
3359 }
3360
3361 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3362 {
3363         struct trace_array *tr = iter->tr;
3364         struct trace_seq *s = &iter->seq;
3365         struct trace_entry *entry;
3366         struct trace_event *event;
3367
3368         entry = iter->ent;
3369
3370         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3371                 trace_seq_printf(s, "%d %d %llu ",
3372                                  entry->pid, iter->cpu, iter->ts);
3373
3374         if (trace_seq_has_overflowed(s))
3375                 return TRACE_TYPE_PARTIAL_LINE;
3376
3377         event = ftrace_find_event(entry->type);
3378         if (event)
3379                 return event->funcs->raw(iter, 0, event);
3380
3381         trace_seq_printf(s, "%d ?\n", entry->type);
3382
3383         return trace_handle_return(s);
3384 }
3385
3386 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3387 {
3388         struct trace_array *tr = iter->tr;
3389         struct trace_seq *s = &iter->seq;
3390         unsigned char newline = '\n';
3391         struct trace_entry *entry;
3392         struct trace_event *event;
3393
3394         entry = iter->ent;
3395
3396         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3397                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3398                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3399                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3400                 if (trace_seq_has_overflowed(s))
3401                         return TRACE_TYPE_PARTIAL_LINE;
3402         }
3403
3404         event = ftrace_find_event(entry->type);
3405         if (event) {
3406                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3407                 if (ret != TRACE_TYPE_HANDLED)
3408                         return ret;
3409         }
3410
3411         SEQ_PUT_FIELD(s, newline);
3412
3413         return trace_handle_return(s);
3414 }
3415
3416 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3417 {
3418         struct trace_array *tr = iter->tr;
3419         struct trace_seq *s = &iter->seq;
3420         struct trace_entry *entry;
3421         struct trace_event *event;
3422
3423         entry = iter->ent;
3424
3425         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3426                 SEQ_PUT_FIELD(s, entry->pid);
3427                 SEQ_PUT_FIELD(s, iter->cpu);
3428                 SEQ_PUT_FIELD(s, iter->ts);
3429                 if (trace_seq_has_overflowed(s))
3430                         return TRACE_TYPE_PARTIAL_LINE;
3431         }
3432
3433         event = ftrace_find_event(entry->type);
3434         return event ? event->funcs->binary(iter, 0, event) :
3435                 TRACE_TYPE_HANDLED;
3436 }
3437
3438 int trace_empty(struct trace_iterator *iter)
3439 {
3440         struct ring_buffer_iter *buf_iter;
3441         int cpu;
3442
3443         /* If we are looking at one CPU buffer, only check that one */
3444         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3445                 cpu = iter->cpu_file;
3446                 buf_iter = trace_buffer_iter(iter, cpu);
3447                 if (buf_iter) {
3448                         if (!ring_buffer_iter_empty(buf_iter))
3449                                 return 0;
3450                 } else {
3451                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3452                                 return 0;
3453                 }
3454                 return 1;
3455         }
3456
3457         for_each_tracing_cpu(cpu) {
3458                 buf_iter = trace_buffer_iter(iter, cpu);
3459                 if (buf_iter) {
3460                         if (!ring_buffer_iter_empty(buf_iter))
3461                                 return 0;
3462                 } else {
3463                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3464                                 return 0;
3465                 }
3466         }
3467
3468         return 1;
3469 }
3470
3471 /*  Called with trace_event_read_lock() held. */
3472 enum print_line_t print_trace_line(struct trace_iterator *iter)
3473 {
3474         struct trace_array *tr = iter->tr;
3475         unsigned long trace_flags = tr->trace_flags;
3476         enum print_line_t ret;
3477
3478         if (iter->lost_events) {
3479                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3480                                  iter->cpu, iter->lost_events);
3481                 if (trace_seq_has_overflowed(&iter->seq))
3482                         return TRACE_TYPE_PARTIAL_LINE;
3483         }
3484
3485         if (iter->trace && iter->trace->print_line) {
3486                 ret = iter->trace->print_line(iter);
3487                 if (ret != TRACE_TYPE_UNHANDLED)
3488                         return ret;
3489         }
3490
3491         if (iter->ent->type == TRACE_BPUTS &&
3492                         trace_flags & TRACE_ITER_PRINTK &&
3493                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3494                 return trace_print_bputs_msg_only(iter);
3495
3496         if (iter->ent->type == TRACE_BPRINT &&
3497                         trace_flags & TRACE_ITER_PRINTK &&
3498                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3499                 return trace_print_bprintk_msg_only(iter);
3500
3501         if (iter->ent->type == TRACE_PRINT &&
3502                         trace_flags & TRACE_ITER_PRINTK &&
3503                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3504                 return trace_print_printk_msg_only(iter);
3505
3506         if (trace_flags & TRACE_ITER_BIN)
3507                 return print_bin_fmt(iter);
3508
3509         if (trace_flags & TRACE_ITER_HEX)
3510                 return print_hex_fmt(iter);
3511
3512         if (trace_flags & TRACE_ITER_RAW)
3513                 return print_raw_fmt(iter);
3514
3515         return print_trace_fmt(iter);
3516 }
3517
3518 void trace_latency_header(struct seq_file *m)
3519 {
3520         struct trace_iterator *iter = m->private;
3521         struct trace_array *tr = iter->tr;
3522
3523         /* print nothing if the buffers are empty */
3524         if (trace_empty(iter))
3525                 return;
3526
3527         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3528                 print_trace_header(m, iter);
3529
3530         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3531                 print_lat_help_header(m);
3532 }
3533
3534 void trace_default_header(struct seq_file *m)
3535 {
3536         struct trace_iterator *iter = m->private;
3537         struct trace_array *tr = iter->tr;
3538         unsigned long trace_flags = tr->trace_flags;
3539
3540         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3541                 return;
3542
3543         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3544                 /* print nothing if the buffers are empty */
3545                 if (trace_empty(iter))
3546                         return;
3547                 print_trace_header(m, iter);
3548                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3549                         print_lat_help_header(m);
3550         } else {
3551                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3552                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3553                                 print_func_help_header_irq(iter->trace_buffer, m);
3554                         else
3555                                 print_func_help_header(iter->trace_buffer, m);
3556                 }
3557         }
3558 }
3559
3560 static void test_ftrace_alive(struct seq_file *m)
3561 {
3562         if (!ftrace_is_dead())
3563                 return;
3564         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3565                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3566 }
3567
3568 #ifdef CONFIG_TRACER_MAX_TRACE
3569 static void show_snapshot_main_help(struct seq_file *m)
3570 {
3571         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3572                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3573                     "#                      Takes a snapshot of the main buffer.\n"
3574                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3575                     "#                      (Doesn't have to be '2' works with any number that\n"
3576                     "#                       is not a '0' or '1')\n");
3577 }
3578
3579 static void show_snapshot_percpu_help(struct seq_file *m)
3580 {
3581         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3582 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3583         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3584                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3585 #else
3586         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3587                     "#                     Must use main snapshot file to allocate.\n");
3588 #endif
3589         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3590                     "#                      (Doesn't have to be '2' works with any number that\n"
3591                     "#                       is not a '0' or '1')\n");
3592 }
3593
3594 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3595 {
3596         if (iter->tr->allocated_snapshot)
3597                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3598         else
3599                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3600
3601         seq_puts(m, "# Snapshot commands:\n");
3602         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3603                 show_snapshot_main_help(m);
3604         else
3605                 show_snapshot_percpu_help(m);
3606 }
3607 #else
3608 /* Should never be called */
3609 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3610 #endif
3611
3612 static int s_show(struct seq_file *m, void *v)
3613 {
3614         struct trace_iterator *iter = v;
3615         int ret;
3616
3617         if (iter->ent == NULL) {
3618                 if (iter->tr) {
3619                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3620                         seq_puts(m, "#\n");
3621                         test_ftrace_alive(m);
3622                 }
3623                 if (iter->snapshot && trace_empty(iter))
3624                         print_snapshot_help(m, iter);
3625                 else if (iter->trace && iter->trace->print_header)
3626                         iter->trace->print_header(m);
3627                 else
3628                         trace_default_header(m);
3629
3630         } else if (iter->leftover) {
3631                 /*
3632                  * If we filled the seq_file buffer earlier, we
3633                  * want to just show it now.
3634                  */
3635                 ret = trace_print_seq(m, &iter->seq);
3636
3637                 /* ret should this time be zero, but you never know */
3638                 iter->leftover = ret;
3639
3640         } else {
3641                 print_trace_line(iter);
3642                 ret = trace_print_seq(m, &iter->seq);
3643                 /*
3644                  * If we overflow the seq_file buffer, then it will
3645                  * ask us for this data again at start up.
3646                  * Use that instead.
3647                  *  ret is 0 if seq_file write succeeded.
3648                  *        -1 otherwise.
3649                  */
3650                 iter->leftover = ret;
3651         }
3652
3653         return 0;
3654 }
3655
3656 /*
3657  * Should be used after trace_array_get(), trace_types_lock
3658  * ensures that i_cdev was already initialized.
3659  */
3660 static inline int tracing_get_cpu(struct inode *inode)
3661 {
3662         if (inode->i_cdev) /* See trace_create_cpu_file() */
3663                 return (long)inode->i_cdev - 1;
3664         return RING_BUFFER_ALL_CPUS;
3665 }
3666
3667 static const struct seq_operations tracer_seq_ops = {
3668         .start          = s_start,
3669         .next           = s_next,
3670         .stop           = s_stop,
3671         .show           = s_show,
3672 };
3673
3674 static struct trace_iterator *
3675 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3676 {
3677         struct trace_array *tr = inode->i_private;
3678         struct trace_iterator *iter;
3679         int cpu;
3680
3681         if (tracing_disabled)
3682                 return ERR_PTR(-ENODEV);
3683
3684         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3685         if (!iter)
3686                 return ERR_PTR(-ENOMEM);
3687
3688         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3689                                     GFP_KERNEL);
3690         if (!iter->buffer_iter)
3691                 goto release;
3692
3693         /*
3694          * We make a copy of the current tracer to avoid concurrent
3695          * changes on it while we are reading.
3696          */
3697         mutex_lock(&trace_types_lock);
3698         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3699         if (!iter->trace)
3700                 goto fail;
3701
3702         *iter->trace = *tr->current_trace;
3703
3704         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3705                 goto fail;
3706
3707         iter->tr = tr;
3708
3709 #ifdef CONFIG_TRACER_MAX_TRACE
3710         /* Currently only the top directory has a snapshot */
3711         if (tr->current_trace->print_max || snapshot)
3712                 iter->trace_buffer = &tr->max_buffer;
3713         else
3714 #endif
3715                 iter->trace_buffer = &tr->trace_buffer;
3716         iter->snapshot = snapshot;
3717         iter->pos = -1;
3718         iter->cpu_file = tracing_get_cpu(inode);
3719         mutex_init(&iter->mutex);
3720
3721         /* Notify the tracer early; before we stop tracing. */
3722         if (iter->trace && iter->trace->open)
3723                 iter->trace->open(iter);
3724
3725         /* Annotate start of buffers if we had overruns */
3726         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3727                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3728
3729         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3730         if (trace_clocks[tr->clock_id].in_ns)
3731                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3732
3733         /* stop the trace while dumping if we are not opening "snapshot" */
3734         if (!iter->snapshot)
3735                 tracing_stop_tr(tr);
3736
3737         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3738                 for_each_tracing_cpu(cpu) {
3739                         iter->buffer_iter[cpu] =
3740                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3741                 }
3742                 ring_buffer_read_prepare_sync();
3743                 for_each_tracing_cpu(cpu) {
3744                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3745                         tracing_iter_reset(iter, cpu);
3746                 }
3747         } else {
3748                 cpu = iter->cpu_file;
3749                 iter->buffer_iter[cpu] =
3750                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3751                 ring_buffer_read_prepare_sync();
3752                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3753                 tracing_iter_reset(iter, cpu);
3754         }
3755
3756         mutex_unlock(&trace_types_lock);
3757
3758         return iter;
3759
3760  fail:
3761         mutex_unlock(&trace_types_lock);
3762         kfree(iter->trace);
3763         kfree(iter->buffer_iter);
3764 release:
3765         seq_release_private(inode, file);
3766         return ERR_PTR(-ENOMEM);
3767 }
3768
3769 int tracing_open_generic(struct inode *inode, struct file *filp)
3770 {
3771         if (tracing_disabled)
3772                 return -ENODEV;
3773
3774         filp->private_data = inode->i_private;
3775         return 0;
3776 }
3777
3778 bool tracing_is_disabled(void)
3779 {
3780         return (tracing_disabled) ? true: false;
3781 }
3782
3783 /*
3784  * Open and update trace_array ref count.
3785  * Must have the current trace_array passed to it.
3786  */
3787 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3788 {
3789         struct trace_array *tr = inode->i_private;
3790
3791         if (tracing_disabled)
3792                 return -ENODEV;
3793
3794         if (trace_array_get(tr) < 0)
3795                 return -ENODEV;
3796
3797         filp->private_data = inode->i_private;
3798
3799         return 0;
3800 }
3801
3802 static int tracing_release(struct inode *inode, struct file *file)
3803 {
3804         struct trace_array *tr = inode->i_private;
3805         struct seq_file *m = file->private_data;
3806         struct trace_iterator *iter;
3807         int cpu;
3808
3809         if (!(file->f_mode & FMODE_READ)) {
3810                 trace_array_put(tr);
3811                 return 0;
3812         }
3813
3814         /* Writes do not use seq_file */
3815         iter = m->private;
3816         mutex_lock(&trace_types_lock);
3817
3818         for_each_tracing_cpu(cpu) {
3819                 if (iter->buffer_iter[cpu])
3820                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3821         }
3822
3823         if (iter->trace && iter->trace->close)
3824                 iter->trace->close(iter);
3825
3826         if (!iter->snapshot)
3827                 /* reenable tracing if it was previously enabled */
3828                 tracing_start_tr(tr);
3829
3830         __trace_array_put(tr);
3831
3832         mutex_unlock(&trace_types_lock);
3833
3834         mutex_destroy(&iter->mutex);
3835         free_cpumask_var(iter->started);
3836         kfree(iter->trace);
3837         kfree(iter->buffer_iter);
3838         seq_release_private(inode, file);
3839
3840         return 0;
3841 }
3842
3843 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3844 {
3845         struct trace_array *tr = inode->i_private;
3846
3847         trace_array_put(tr);
3848         return 0;
3849 }
3850
3851 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3852 {
3853         struct trace_array *tr = inode->i_private;
3854
3855         trace_array_put(tr);
3856
3857         return single_release(inode, file);
3858 }
3859
3860 static int tracing_open(struct inode *inode, struct file *file)
3861 {
3862         struct trace_array *tr = inode->i_private;
3863         struct trace_iterator *iter;
3864         int ret = 0;
3865
3866         if (trace_array_get(tr) < 0)
3867                 return -ENODEV;
3868
3869         /* If this file was open for write, then erase contents */
3870         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3871                 int cpu = tracing_get_cpu(inode);
3872
3873                 if (cpu == RING_BUFFER_ALL_CPUS)
3874                         tracing_reset_online_cpus(&tr->trace_buffer);
3875                 else
3876                         tracing_reset(&tr->trace_buffer, cpu);
3877         }
3878
3879         if (file->f_mode & FMODE_READ) {
3880                 iter = __tracing_open(inode, file, false);
3881                 if (IS_ERR(iter))
3882                         ret = PTR_ERR(iter);
3883                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3884                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3885         }
3886
3887         if (ret < 0)
3888                 trace_array_put(tr);
3889
3890         return ret;
3891 }
3892
3893 /*
3894  * Some tracers are not suitable for instance buffers.
3895  * A tracer is always available for the global array (toplevel)
3896  * or if it explicitly states that it is.
3897  */
3898 static bool
3899 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3900 {
3901         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3902 }
3903
3904 /* Find the next tracer that this trace array may use */
3905 static struct tracer *
3906 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3907 {
3908         while (t && !trace_ok_for_array(t, tr))
3909                 t = t->next;
3910
3911         return t;
3912 }
3913
3914 static void *
3915 t_next(struct seq_file *m, void *v, loff_t *pos)
3916 {
3917         struct trace_array *tr = m->private;
3918         struct tracer *t = v;
3919
3920         (*pos)++;
3921
3922         if (t)
3923                 t = get_tracer_for_array(tr, t->next);
3924
3925         return t;
3926 }
3927
3928 static void *t_start(struct seq_file *m, loff_t *pos)
3929 {
3930         struct trace_array *tr = m->private;
3931         struct tracer *t;
3932         loff_t l = 0;
3933
3934         mutex_lock(&trace_types_lock);
3935
3936         t = get_tracer_for_array(tr, trace_types);
3937         for (; t && l < *pos; t = t_next(m, t, &l))
3938                         ;
3939
3940         return t;
3941 }
3942
3943 static void t_stop(struct seq_file *m, void *p)
3944 {
3945         mutex_unlock(&trace_types_lock);
3946 }
3947
3948 static int t_show(struct seq_file *m, void *v)
3949 {
3950         struct tracer *t = v;
3951
3952         if (!t)
3953                 return 0;
3954
3955         seq_puts(m, t->name);
3956         if (t->next)
3957                 seq_putc(m, ' ');
3958         else
3959                 seq_putc(m, '\n');
3960
3961         return 0;
3962 }
3963
3964 static const struct seq_operations show_traces_seq_ops = {
3965         .start          = t_start,
3966         .next           = t_next,
3967         .stop           = t_stop,
3968         .show           = t_show,
3969 };
3970
3971 static int show_traces_open(struct inode *inode, struct file *file)
3972 {
3973         struct trace_array *tr = inode->i_private;
3974         struct seq_file *m;
3975         int ret;
3976
3977         if (tracing_disabled)
3978                 return -ENODEV;
3979
3980         ret = seq_open(file, &show_traces_seq_ops);
3981         if (ret)
3982                 return ret;
3983
3984         m = file->private_data;
3985         m->private = tr;
3986
3987         return 0;
3988 }
3989
3990 static ssize_t
3991 tracing_write_stub(struct file *filp, const char __user *ubuf,
3992                    size_t count, loff_t *ppos)
3993 {
3994         return count;
3995 }
3996
3997 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3998 {
3999         int ret;
4000
4001         if (file->f_mode & FMODE_READ)
4002                 ret = seq_lseek(file, offset, whence);
4003         else
4004                 file->f_pos = ret = 0;
4005
4006         return ret;
4007 }
4008
4009 static const struct file_operations tracing_fops = {
4010         .open           = tracing_open,
4011         .read           = seq_read,
4012         .write          = tracing_write_stub,
4013         .llseek         = tracing_lseek,
4014         .release        = tracing_release,
4015 };
4016
4017 static const struct file_operations show_traces_fops = {
4018         .open           = show_traces_open,
4019         .read           = seq_read,
4020         .release        = seq_release,
4021         .llseek         = seq_lseek,
4022 };
4023
4024 /*
4025  * The tracer itself will not take this lock, but still we want
4026  * to provide a consistent cpumask to user-space:
4027  */
4028 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4029
4030 /*
4031  * Temporary storage for the character representation of the
4032  * CPU bitmask (and one more byte for the newline):
4033  */
4034 static char mask_str[NR_CPUS + 1];
4035
4036 static ssize_t
4037 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4038                      size_t count, loff_t *ppos)
4039 {
4040         struct trace_array *tr = file_inode(filp)->i_private;
4041         int len;
4042
4043         mutex_lock(&tracing_cpumask_update_lock);
4044
4045         len = snprintf(mask_str, count, "%*pb\n",
4046                        cpumask_pr_args(tr->tracing_cpumask));
4047         if (len >= count) {
4048                 count = -EINVAL;
4049                 goto out_err;
4050         }
4051         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4052
4053 out_err:
4054         mutex_unlock(&tracing_cpumask_update_lock);
4055
4056         return count;
4057 }
4058
4059 static ssize_t
4060 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4061                       size_t count, loff_t *ppos)
4062 {
4063         struct trace_array *tr = file_inode(filp)->i_private;
4064         cpumask_var_t tracing_cpumask_new;
4065         int err, cpu;
4066
4067         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4068                 return -ENOMEM;
4069
4070         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4071         if (err)
4072                 goto err_unlock;
4073
4074         mutex_lock(&tracing_cpumask_update_lock);
4075
4076         local_irq_disable();
4077         arch_spin_lock(&tr->max_lock);
4078         for_each_tracing_cpu(cpu) {
4079                 /*
4080                  * Increase/decrease the disabled counter if we are
4081                  * about to flip a bit in the cpumask:
4082                  */
4083                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4084                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4085                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4086                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4087                 }
4088                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4089                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4090                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4091                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4092                 }
4093         }
4094         arch_spin_unlock(&tr->max_lock);
4095         local_irq_enable();
4096
4097         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4098
4099         mutex_unlock(&tracing_cpumask_update_lock);
4100         free_cpumask_var(tracing_cpumask_new);
4101
4102         return count;
4103
4104 err_unlock:
4105         free_cpumask_var(tracing_cpumask_new);
4106
4107         return err;
4108 }
4109
4110 static const struct file_operations tracing_cpumask_fops = {
4111         .open           = tracing_open_generic_tr,
4112         .read           = tracing_cpumask_read,
4113         .write          = tracing_cpumask_write,
4114         .release        = tracing_release_generic_tr,
4115         .llseek         = generic_file_llseek,
4116 };
4117
4118 static int tracing_trace_options_show(struct seq_file *m, void *v)
4119 {
4120         struct tracer_opt *trace_opts;
4121         struct trace_array *tr = m->private;
4122         u32 tracer_flags;
4123         int i;
4124
4125         mutex_lock(&trace_types_lock);
4126         tracer_flags = tr->current_trace->flags->val;
4127         trace_opts = tr->current_trace->flags->opts;
4128
4129         for (i = 0; trace_options[i]; i++) {
4130                 if (tr->trace_flags & (1 << i))
4131                         seq_printf(m, "%s\n", trace_options[i]);
4132                 else
4133                         seq_printf(m, "no%s\n", trace_options[i]);
4134         }
4135
4136         for (i = 0; trace_opts[i].name; i++) {
4137                 if (tracer_flags & trace_opts[i].bit)
4138                         seq_printf(m, "%s\n", trace_opts[i].name);
4139                 else
4140                         seq_printf(m, "no%s\n", trace_opts[i].name);
4141         }
4142         mutex_unlock(&trace_types_lock);
4143
4144         return 0;
4145 }
4146
4147 static int __set_tracer_option(struct trace_array *tr,
4148                                struct tracer_flags *tracer_flags,
4149                                struct tracer_opt *opts, int neg)
4150 {
4151         struct tracer *trace = tracer_flags->trace;
4152         int ret;
4153
4154         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4155         if (ret)
4156                 return ret;
4157
4158         if (neg)
4159                 tracer_flags->val &= ~opts->bit;
4160         else
4161                 tracer_flags->val |= opts->bit;
4162         return 0;
4163 }
4164
4165 /* Try to assign a tracer specific option */
4166 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4167 {
4168         struct tracer *trace = tr->current_trace;
4169         struct tracer_flags *tracer_flags = trace->flags;
4170         struct tracer_opt *opts = NULL;
4171         int i;
4172
4173         for (i = 0; tracer_flags->opts[i].name; i++) {
4174                 opts = &tracer_flags->opts[i];
4175
4176                 if (strcmp(cmp, opts->name) == 0)
4177                         return __set_tracer_option(tr, trace->flags, opts, neg);
4178         }
4179
4180         return -EINVAL;
4181 }
4182
4183 /* Some tracers require overwrite to stay enabled */
4184 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4185 {
4186         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4187                 return -1;
4188
4189         return 0;
4190 }
4191
4192 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4193 {
4194         /* do nothing if flag is already set */
4195         if (!!(tr->trace_flags & mask) == !!enabled)
4196                 return 0;
4197
4198         /* Give the tracer a chance to approve the change */
4199         if (tr->current_trace->flag_changed)
4200                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4201                         return -EINVAL;
4202
4203         if (enabled)
4204                 tr->trace_flags |= mask;
4205         else
4206                 tr->trace_flags &= ~mask;
4207
4208         if (mask == TRACE_ITER_RECORD_CMD)
4209                 trace_event_enable_cmd_record(enabled);
4210
4211         if (mask == TRACE_ITER_EVENT_FORK)
4212                 trace_event_follow_fork(tr, enabled);
4213
4214         if (mask == TRACE_ITER_FUNC_FORK)
4215                 ftrace_pid_follow_fork(tr, enabled);
4216
4217         if (mask == TRACE_ITER_OVERWRITE) {
4218                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4219 #ifdef CONFIG_TRACER_MAX_TRACE
4220                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4221 #endif
4222         }
4223
4224         if (mask == TRACE_ITER_PRINTK) {
4225                 trace_printk_start_stop_comm(enabled);
4226                 trace_printk_control(enabled);
4227         }
4228
4229         return 0;
4230 }
4231
4232 static int trace_set_options(struct trace_array *tr, char *option)
4233 {
4234         char *cmp;
4235         int neg = 0;
4236         int ret = -ENODEV;
4237         int i;
4238         size_t orig_len = strlen(option);
4239
4240         cmp = strstrip(option);
4241
4242         if (strncmp(cmp, "no", 2) == 0) {
4243                 neg = 1;
4244                 cmp += 2;
4245         }
4246
4247         mutex_lock(&trace_types_lock);
4248
4249         for (i = 0; trace_options[i]; i++) {
4250                 if (strcmp(cmp, trace_options[i]) == 0) {
4251                         ret = set_tracer_flag(tr, 1 << i, !neg);
4252                         break;
4253                 }
4254         }
4255
4256         /* If no option could be set, test the specific tracer options */
4257         if (!trace_options[i])
4258                 ret = set_tracer_option(tr, cmp, neg);
4259
4260         mutex_unlock(&trace_types_lock);
4261
4262         /*
4263          * If the first trailing whitespace is replaced with '\0' by strstrip,
4264          * turn it back into a space.
4265          */
4266         if (orig_len > strlen(option))
4267                 option[strlen(option)] = ' ';
4268
4269         return ret;
4270 }
4271
4272 static void __init apply_trace_boot_options(void)
4273 {
4274         char *buf = trace_boot_options_buf;
4275         char *option;
4276
4277         while (true) {
4278                 option = strsep(&buf, ",");
4279
4280                 if (!option)
4281                         break;
4282
4283                 if (*option)
4284                         trace_set_options(&global_trace, option);
4285
4286                 /* Put back the comma to allow this to be called again */
4287                 if (buf)
4288                         *(buf - 1) = ',';
4289         }
4290 }
4291
4292 static ssize_t
4293 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4294                         size_t cnt, loff_t *ppos)
4295 {
4296         struct seq_file *m = filp->private_data;
4297         struct trace_array *tr = m->private;
4298         char buf[64];
4299         int ret;
4300
4301         if (cnt >= sizeof(buf))
4302                 return -EINVAL;
4303
4304         if (copy_from_user(buf, ubuf, cnt))
4305                 return -EFAULT;
4306
4307         buf[cnt] = 0;
4308
4309         ret = trace_set_options(tr, buf);
4310         if (ret < 0)
4311                 return ret;
4312
4313         *ppos += cnt;
4314
4315         return cnt;
4316 }
4317
4318 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4319 {
4320         struct trace_array *tr = inode->i_private;
4321         int ret;
4322
4323         if (tracing_disabled)
4324                 return -ENODEV;
4325
4326         if (trace_array_get(tr) < 0)
4327                 return -ENODEV;
4328
4329         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4330         if (ret < 0)
4331                 trace_array_put(tr);
4332
4333         return ret;
4334 }
4335
4336 static const struct file_operations tracing_iter_fops = {
4337         .open           = tracing_trace_options_open,
4338         .read           = seq_read,
4339         .llseek         = seq_lseek,
4340         .release        = tracing_single_release_tr,
4341         .write          = tracing_trace_options_write,
4342 };
4343
4344 static const char readme_msg[] =
4345         "tracing mini-HOWTO:\n\n"
4346         "# echo 0 > tracing_on : quick way to disable tracing\n"
4347         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4348         " Important files:\n"
4349         "  trace\t\t\t- The static contents of the buffer\n"
4350         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4351         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4352         "  current_tracer\t- function and latency tracers\n"
4353         "  available_tracers\t- list of configured tracers for current_tracer\n"
4354         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4355         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4356         "  trace_clock\t\t-change the clock used to order events\n"
4357         "       local:   Per cpu clock but may not be synced across CPUs\n"
4358         "      global:   Synced across CPUs but slows tracing down.\n"
4359         "     counter:   Not a clock, but just an increment\n"
4360         "      uptime:   Jiffy counter from time of boot\n"
4361         "        perf:   Same clock that perf events use\n"
4362 #ifdef CONFIG_X86_64
4363         "     x86-tsc:   TSC cycle counter\n"
4364 #endif
4365         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4366         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4367         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4368         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4369         "\t\t\t  Remove sub-buffer with rmdir\n"
4370         "  trace_options\t\t- Set format or modify how tracing happens\n"
4371         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4372         "\t\t\t  option name\n"
4373         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4374 #ifdef CONFIG_DYNAMIC_FTRACE
4375         "\n  available_filter_functions - list of functions that can be filtered on\n"
4376         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4377         "\t\t\t  functions\n"
4378         "\t     accepts: func_full_name or glob-matching-pattern\n"
4379         "\t     modules: Can select a group via module\n"
4380         "\t      Format: :mod:<module-name>\n"
4381         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4382         "\t    triggers: a command to perform when function is hit\n"
4383         "\t      Format: <function>:<trigger>[:count]\n"
4384         "\t     trigger: traceon, traceoff\n"
4385         "\t\t      enable_event:<system>:<event>\n"
4386         "\t\t      disable_event:<system>:<event>\n"
4387 #ifdef CONFIG_STACKTRACE
4388         "\t\t      stacktrace\n"
4389 #endif
4390 #ifdef CONFIG_TRACER_SNAPSHOT
4391         "\t\t      snapshot\n"
4392 #endif
4393         "\t\t      dump\n"
4394         "\t\t      cpudump\n"
4395         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4396         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4397         "\t     The first one will disable tracing every time do_fault is hit\n"
4398         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4399         "\t       The first time do trap is hit and it disables tracing, the\n"
4400         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4401         "\t       the counter will not decrement. It only decrements when the\n"
4402         "\t       trigger did work\n"
4403         "\t     To remove trigger without count:\n"
4404         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4405         "\t     To remove trigger with a count:\n"
4406         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4407         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4408         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4409         "\t    modules: Can select a group via module command :mod:\n"
4410         "\t    Does not accept triggers\n"
4411 #endif /* CONFIG_DYNAMIC_FTRACE */
4412 #ifdef CONFIG_FUNCTION_TRACER
4413         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4414         "\t\t    (function)\n"
4415 #endif
4416 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4417         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4418         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4419         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4420 #endif
4421 #ifdef CONFIG_TRACER_SNAPSHOT
4422         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4423         "\t\t\t  snapshot buffer. Read the contents for more\n"
4424         "\t\t\t  information\n"
4425 #endif
4426 #ifdef CONFIG_STACK_TRACER
4427         "  stack_trace\t\t- Shows the max stack trace when active\n"
4428         "  stack_max_size\t- Shows current max stack size that was traced\n"
4429         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4430         "\t\t\t  new trace)\n"
4431 #ifdef CONFIG_DYNAMIC_FTRACE
4432         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4433         "\t\t\t  traces\n"
4434 #endif
4435 #endif /* CONFIG_STACK_TRACER */
4436 #ifdef CONFIG_KPROBE_EVENTS
4437         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4438         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4439 #endif
4440 #ifdef CONFIG_UPROBE_EVENTS
4441         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4442         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4443 #endif
4444 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4445         "\t  accepts: event-definitions (one definition per line)\n"
4446         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4447         "\t           -:[<group>/]<event>\n"
4448 #ifdef CONFIG_KPROBE_EVENTS
4449         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4450   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4451 #endif
4452 #ifdef CONFIG_UPROBE_EVENTS
4453         "\t    place: <path>:<offset>\n"
4454 #endif
4455         "\t     args: <name>=fetcharg[:type]\n"
4456         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4457         "\t           $stack<index>, $stack, $retval, $comm\n"
4458         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4459         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4460 #endif
4461         "  events/\t\t- Directory containing all trace event subsystems:\n"
4462         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4463         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4464         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4465         "\t\t\t  events\n"
4466         "      filter\t\t- If set, only events passing filter are traced\n"
4467         "  events/<system>/<event>/\t- Directory containing control files for\n"
4468         "\t\t\t  <event>:\n"
4469         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4470         "      filter\t\t- If set, only events passing filter are traced\n"
4471         "      trigger\t\t- If set, a command to perform when event is hit\n"
4472         "\t    Format: <trigger>[:count][if <filter>]\n"
4473         "\t   trigger: traceon, traceoff\n"
4474         "\t            enable_event:<system>:<event>\n"
4475         "\t            disable_event:<system>:<event>\n"
4476 #ifdef CONFIG_HIST_TRIGGERS
4477         "\t            enable_hist:<system>:<event>\n"
4478         "\t            disable_hist:<system>:<event>\n"
4479 #endif
4480 #ifdef CONFIG_STACKTRACE
4481         "\t\t    stacktrace\n"
4482 #endif
4483 #ifdef CONFIG_TRACER_SNAPSHOT
4484         "\t\t    snapshot\n"
4485 #endif
4486 #ifdef CONFIG_HIST_TRIGGERS
4487         "\t\t    hist (see below)\n"
4488 #endif
4489         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4490         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4491         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4492         "\t                  events/block/block_unplug/trigger\n"
4493         "\t   The first disables tracing every time block_unplug is hit.\n"
4494         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4495         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4496         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4497         "\t   Like function triggers, the counter is only decremented if it\n"
4498         "\t    enabled or disabled tracing.\n"
4499         "\t   To remove a trigger without a count:\n"
4500         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4501         "\t   To remove a trigger with a count:\n"
4502         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4503         "\t   Filters can be ignored when removing a trigger.\n"
4504 #ifdef CONFIG_HIST_TRIGGERS
4505         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4506         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4507         "\t            [:values=<field1[,field2,...]>]\n"
4508         "\t            [:sort=<field1[,field2,...]>]\n"
4509         "\t            [:size=#entries]\n"
4510         "\t            [:pause][:continue][:clear]\n"
4511         "\t            [:name=histname1]\n"
4512         "\t            [if <filter>]\n\n"
4513         "\t    When a matching event is hit, an entry is added to a hash\n"
4514         "\t    table using the key(s) and value(s) named, and the value of a\n"
4515         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4516         "\t    correspond to fields in the event's format description.  Keys\n"
4517         "\t    can be any field, or the special string 'stacktrace'.\n"
4518         "\t    Compound keys consisting of up to two fields can be specified\n"
4519         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4520         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4521         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4522         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4523         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4524         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4525         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4526         "\t    its histogram data will be shared with other triggers of the\n"
4527         "\t    same name, and trigger hits will update this common data.\n\n"
4528         "\t    Reading the 'hist' file for the event will dump the hash\n"
4529         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4530         "\t    triggers attached to an event, there will be a table for each\n"
4531         "\t    trigger in the output.  The table displayed for a named\n"
4532         "\t    trigger will be the same as any other instance having the\n"
4533         "\t    same name.  The default format used to display a given field\n"
4534         "\t    can be modified by appending any of the following modifiers\n"
4535         "\t    to the field name, as applicable:\n\n"
4536         "\t            .hex        display a number as a hex value\n"
4537         "\t            .sym        display an address as a symbol\n"
4538         "\t            .sym-offset display an address as a symbol and offset\n"
4539         "\t            .execname   display a common_pid as a program name\n"
4540         "\t            .syscall    display a syscall id as a syscall name\n\n"
4541         "\t            .log2       display log2 value rather than raw number\n\n"
4542         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4543         "\t    trigger or to start a hist trigger but not log any events\n"
4544         "\t    until told to do so.  'continue' can be used to start or\n"
4545         "\t    restart a paused hist trigger.\n\n"
4546         "\t    The 'clear' parameter will clear the contents of a running\n"
4547         "\t    hist trigger and leave its current paused/active state\n"
4548         "\t    unchanged.\n\n"
4549         "\t    The enable_hist and disable_hist triggers can be used to\n"
4550         "\t    have one event conditionally start and stop another event's\n"
4551         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4552         "\t    the enable_event and disable_event triggers.\n"
4553 #endif
4554 ;
4555
4556 static ssize_t
4557 tracing_readme_read(struct file *filp, char __user *ubuf,
4558                        size_t cnt, loff_t *ppos)
4559 {
4560         return simple_read_from_buffer(ubuf, cnt, ppos,
4561                                         readme_msg, strlen(readme_msg));
4562 }
4563
4564 static const struct file_operations tracing_readme_fops = {
4565         .open           = tracing_open_generic,
4566         .read           = tracing_readme_read,
4567         .llseek         = generic_file_llseek,
4568 };
4569
4570 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4571 {
4572         unsigned int *ptr = v;
4573
4574         if (*pos || m->count)
4575                 ptr++;
4576
4577         (*pos)++;
4578
4579         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4580              ptr++) {
4581                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4582                         continue;
4583
4584                 return ptr;
4585         }
4586
4587         return NULL;
4588 }
4589
4590 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4591 {
4592         void *v;
4593         loff_t l = 0;
4594
4595         preempt_disable();
4596         arch_spin_lock(&trace_cmdline_lock);
4597
4598         v = &savedcmd->map_cmdline_to_pid[0];
4599         while (l <= *pos) {
4600                 v = saved_cmdlines_next(m, v, &l);
4601                 if (!v)
4602                         return NULL;
4603         }
4604
4605         return v;
4606 }
4607
4608 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4609 {
4610         arch_spin_unlock(&trace_cmdline_lock);
4611         preempt_enable();
4612 }
4613
4614 static int saved_cmdlines_show(struct seq_file *m, void *v)
4615 {
4616         char buf[TASK_COMM_LEN];
4617         unsigned int *pid = v;
4618
4619         __trace_find_cmdline(*pid, buf);
4620         seq_printf(m, "%d %s\n", *pid, buf);
4621         return 0;
4622 }
4623
4624 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4625         .start          = saved_cmdlines_start,
4626         .next           = saved_cmdlines_next,
4627         .stop           = saved_cmdlines_stop,
4628         .show           = saved_cmdlines_show,
4629 };
4630
4631 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4632 {
4633         if (tracing_disabled)
4634                 return -ENODEV;
4635
4636         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4637 }
4638
4639 static const struct file_operations tracing_saved_cmdlines_fops = {
4640         .open           = tracing_saved_cmdlines_open,
4641         .read           = seq_read,
4642         .llseek         = seq_lseek,
4643         .release        = seq_release,
4644 };
4645
4646 static ssize_t
4647 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4648                                  size_t cnt, loff_t *ppos)
4649 {
4650         char buf[64];
4651         int r;
4652
4653         arch_spin_lock(&trace_cmdline_lock);
4654         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4655         arch_spin_unlock(&trace_cmdline_lock);
4656
4657         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4658 }
4659
4660 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4661 {
4662         kfree(s->saved_cmdlines);
4663         kfree(s->map_cmdline_to_pid);
4664         kfree(s);
4665 }
4666
4667 static int tracing_resize_saved_cmdlines(unsigned int val)
4668 {
4669         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4670
4671         s = kmalloc(sizeof(*s), GFP_KERNEL);
4672         if (!s)
4673                 return -ENOMEM;
4674
4675         if (allocate_cmdlines_buffer(val, s) < 0) {
4676                 kfree(s);
4677                 return -ENOMEM;
4678         }
4679
4680         arch_spin_lock(&trace_cmdline_lock);
4681         savedcmd_temp = savedcmd;
4682         savedcmd = s;
4683         arch_spin_unlock(&trace_cmdline_lock);
4684         free_saved_cmdlines_buffer(savedcmd_temp);
4685
4686         return 0;
4687 }
4688
4689 static ssize_t
4690 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4691                                   size_t cnt, loff_t *ppos)
4692 {
4693         unsigned long val;
4694         int ret;
4695
4696         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4697         if (ret)
4698                 return ret;
4699
4700         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4701         if (!val || val > PID_MAX_DEFAULT)
4702                 return -EINVAL;
4703
4704         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4705         if (ret < 0)
4706                 return ret;
4707
4708         *ppos += cnt;
4709
4710         return cnt;
4711 }
4712
4713 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4714         .open           = tracing_open_generic,
4715         .read           = tracing_saved_cmdlines_size_read,
4716         .write          = tracing_saved_cmdlines_size_write,
4717 };
4718
4719 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4720 static union trace_enum_map_item *
4721 update_enum_map(union trace_enum_map_item *ptr)
4722 {
4723         if (!ptr->map.enum_string) {
4724                 if (ptr->tail.next) {
4725                         ptr = ptr->tail.next;
4726                         /* Set ptr to the next real item (skip head) */
4727                         ptr++;
4728                 } else
4729                         return NULL;
4730         }
4731         return ptr;
4732 }
4733
4734 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4735 {
4736         union trace_enum_map_item *ptr = v;
4737
4738         /*
4739          * Paranoid! If ptr points to end, we don't want to increment past it.
4740          * This really should never happen.
4741          */
4742         ptr = update_enum_map(ptr);
4743         if (WARN_ON_ONCE(!ptr))
4744                 return NULL;
4745
4746         ptr++;
4747
4748         (*pos)++;
4749
4750         ptr = update_enum_map(ptr);
4751
4752         return ptr;
4753 }
4754
4755 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4756 {
4757         union trace_enum_map_item *v;
4758         loff_t l = 0;
4759
4760         mutex_lock(&trace_enum_mutex);
4761
4762         v = trace_enum_maps;
4763         if (v)
4764                 v++;
4765
4766         while (v && l < *pos) {
4767                 v = enum_map_next(m, v, &l);
4768         }
4769
4770         return v;
4771 }
4772
4773 static void enum_map_stop(struct seq_file *m, void *v)
4774 {
4775         mutex_unlock(&trace_enum_mutex);
4776 }
4777
4778 static int enum_map_show(struct seq_file *m, void *v)
4779 {
4780         union trace_enum_map_item *ptr = v;
4781
4782         seq_printf(m, "%s %ld (%s)\n",
4783                    ptr->map.enum_string, ptr->map.enum_value,
4784                    ptr->map.system);
4785
4786         return 0;
4787 }
4788
4789 static const struct seq_operations tracing_enum_map_seq_ops = {
4790         .start          = enum_map_start,
4791         .next           = enum_map_next,
4792         .stop           = enum_map_stop,
4793         .show           = enum_map_show,
4794 };
4795
4796 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4797 {
4798         if (tracing_disabled)
4799                 return -ENODEV;
4800
4801         return seq_open(filp, &tracing_enum_map_seq_ops);
4802 }
4803
4804 static const struct file_operations tracing_enum_map_fops = {
4805         .open           = tracing_enum_map_open,
4806         .read           = seq_read,
4807         .llseek         = seq_lseek,
4808         .release        = seq_release,
4809 };
4810
4811 static inline union trace_enum_map_item *
4812 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4813 {
4814         /* Return tail of array given the head */
4815         return ptr + ptr->head.length + 1;
4816 }
4817
4818 static void
4819 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4820                            int len)
4821 {
4822         struct trace_enum_map **stop;
4823         struct trace_enum_map **map;
4824         union trace_enum_map_item *map_array;
4825         union trace_enum_map_item *ptr;
4826
4827         stop = start + len;
4828
4829         /*
4830          * The trace_enum_maps contains the map plus a head and tail item,
4831          * where the head holds the module and length of array, and the
4832          * tail holds a pointer to the next list.
4833          */
4834         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4835         if (!map_array) {
4836                 pr_warn("Unable to allocate trace enum mapping\n");
4837                 return;
4838         }
4839
4840         mutex_lock(&trace_enum_mutex);
4841
4842         if (!trace_enum_maps)
4843                 trace_enum_maps = map_array;
4844         else {
4845                 ptr = trace_enum_maps;
4846                 for (;;) {
4847                         ptr = trace_enum_jmp_to_tail(ptr);
4848                         if (!ptr->tail.next)
4849                                 break;
4850                         ptr = ptr->tail.next;
4851
4852                 }
4853                 ptr->tail.next = map_array;
4854         }
4855         map_array->head.mod = mod;
4856         map_array->head.length = len;
4857         map_array++;
4858
4859         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4860                 map_array->map = **map;
4861                 map_array++;
4862         }
4863         memset(map_array, 0, sizeof(*map_array));
4864
4865         mutex_unlock(&trace_enum_mutex);
4866 }
4867
4868 static void trace_create_enum_file(struct dentry *d_tracer)
4869 {
4870         trace_create_file("enum_map", 0444, d_tracer,
4871                           NULL, &tracing_enum_map_fops);
4872 }
4873
4874 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4875 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4876 static inline void trace_insert_enum_map_file(struct module *mod,
4877                               struct trace_enum_map **start, int len) { }
4878 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4879
4880 static void trace_insert_enum_map(struct module *mod,
4881                                   struct trace_enum_map **start, int len)
4882 {
4883         struct trace_enum_map **map;
4884
4885         if (len <= 0)
4886                 return;
4887
4888         map = start;
4889
4890         trace_event_enum_update(map, len);
4891
4892         trace_insert_enum_map_file(mod, start, len);
4893 }
4894
4895 static ssize_t
4896 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4897                        size_t cnt, loff_t *ppos)
4898 {
4899         struct trace_array *tr = filp->private_data;
4900         char buf[MAX_TRACER_SIZE+2];
4901         int r;
4902
4903         mutex_lock(&trace_types_lock);
4904         r = sprintf(buf, "%s\n", tr->current_trace->name);
4905         mutex_unlock(&trace_types_lock);
4906
4907         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4908 }
4909
4910 int tracer_init(struct tracer *t, struct trace_array *tr)
4911 {
4912         tracing_reset_online_cpus(&tr->trace_buffer);
4913         return t->init(tr);
4914 }
4915
4916 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4917 {
4918         int cpu;
4919
4920         for_each_tracing_cpu(cpu)
4921                 per_cpu_ptr(buf->data, cpu)->entries = val;
4922 }
4923
4924 #ifdef CONFIG_TRACER_MAX_TRACE
4925 /* resize @tr's buffer to the size of @size_tr's entries */
4926 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4927                                         struct trace_buffer *size_buf, int cpu_id)
4928 {
4929         int cpu, ret = 0;
4930
4931         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4932                 for_each_tracing_cpu(cpu) {
4933                         ret = ring_buffer_resize(trace_buf->buffer,
4934                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4935                         if (ret < 0)
4936                                 break;
4937                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4938                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4939                 }
4940         } else {
4941                 ret = ring_buffer_resize(trace_buf->buffer,
4942                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4943                 if (ret == 0)
4944                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4945                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4946         }
4947
4948         return ret;
4949 }
4950 #endif /* CONFIG_TRACER_MAX_TRACE */
4951
4952 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4953                                         unsigned long size, int cpu)
4954 {
4955         int ret;
4956
4957         /*
4958          * If kernel or user changes the size of the ring buffer
4959          * we use the size that was given, and we can forget about
4960          * expanding it later.
4961          */
4962         ring_buffer_expanded = true;
4963
4964         /* May be called before buffers are initialized */
4965         if (!tr->trace_buffer.buffer)
4966                 return 0;
4967
4968         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4969         if (ret < 0)
4970                 return ret;
4971
4972 #ifdef CONFIG_TRACER_MAX_TRACE
4973         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4974             !tr->current_trace->use_max_tr)
4975                 goto out;
4976
4977         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4978         if (ret < 0) {
4979                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4980                                                      &tr->trace_buffer, cpu);
4981                 if (r < 0) {
4982                         /*
4983                          * AARGH! We are left with different
4984                          * size max buffer!!!!
4985                          * The max buffer is our "snapshot" buffer.
4986                          * When a tracer needs a snapshot (one of the
4987                          * latency tracers), it swaps the max buffer
4988                          * with the saved snap shot. We succeeded to
4989                          * update the size of the main buffer, but failed to
4990                          * update the size of the max buffer. But when we tried
4991                          * to reset the main buffer to the original size, we
4992                          * failed there too. This is very unlikely to
4993                          * happen, but if it does, warn and kill all
4994                          * tracing.
4995                          */
4996                         WARN_ON(1);
4997                         tracing_disabled = 1;
4998                 }
4999                 return ret;
5000         }
5001
5002         if (cpu == RING_BUFFER_ALL_CPUS)
5003                 set_buffer_entries(&tr->max_buffer, size);
5004         else
5005                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5006
5007  out:
5008 #endif /* CONFIG_TRACER_MAX_TRACE */
5009
5010         if (cpu == RING_BUFFER_ALL_CPUS)
5011                 set_buffer_entries(&tr->trace_buffer, size);
5012         else
5013                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5014
5015         return ret;
5016 }
5017
5018 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5019                                           unsigned long size, int cpu_id)
5020 {
5021         int ret = size;
5022
5023         mutex_lock(&trace_types_lock);
5024
5025         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5026                 /* make sure, this cpu is enabled in the mask */
5027                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5028                         ret = -EINVAL;
5029                         goto out;
5030                 }
5031         }
5032
5033         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5034         if (ret < 0)
5035                 ret = -ENOMEM;
5036
5037 out:
5038         mutex_unlock(&trace_types_lock);
5039
5040         return ret;
5041 }
5042
5043
5044 /**
5045  * tracing_update_buffers - used by tracing facility to expand ring buffers
5046  *
5047  * To save on memory when the tracing is never used on a system with it
5048  * configured in. The ring buffers are set to a minimum size. But once
5049  * a user starts to use the tracing facility, then they need to grow
5050  * to their default size.
5051  *
5052  * This function is to be called when a tracer is about to be used.
5053  */
5054 int tracing_update_buffers(void)
5055 {
5056         int ret = 0;
5057
5058         mutex_lock(&trace_types_lock);
5059         if (!ring_buffer_expanded)
5060                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5061                                                 RING_BUFFER_ALL_CPUS);
5062         mutex_unlock(&trace_types_lock);
5063
5064         return ret;
5065 }
5066
5067 struct trace_option_dentry;
5068
5069 static void
5070 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5071
5072 /*
5073  * Used to clear out the tracer before deletion of an instance.
5074  * Must have trace_types_lock held.
5075  */
5076 static void tracing_set_nop(struct trace_array *tr)
5077 {
5078         if (tr->current_trace == &nop_trace)
5079                 return;
5080         
5081         tr->current_trace->enabled--;
5082
5083         if (tr->current_trace->reset)
5084                 tr->current_trace->reset(tr);
5085
5086         tr->current_trace = &nop_trace;
5087 }
5088
5089 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5090 {
5091         /* Only enable if the directory has been created already. */
5092         if (!tr->dir)
5093                 return;
5094
5095         create_trace_option_files(tr, t);
5096 }
5097
5098 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5099 {
5100         struct tracer *t;
5101 #ifdef CONFIG_TRACER_MAX_TRACE
5102         bool had_max_tr;
5103 #endif
5104         int ret = 0;
5105
5106         mutex_lock(&trace_types_lock);
5107
5108         if (!ring_buffer_expanded) {
5109                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5110                                                 RING_BUFFER_ALL_CPUS);
5111                 if (ret < 0)
5112                         goto out;
5113                 ret = 0;
5114         }
5115
5116         for (t = trace_types; t; t = t->next) {
5117                 if (strcmp(t->name, buf) == 0)
5118                         break;
5119         }
5120         if (!t) {
5121                 ret = -EINVAL;
5122                 goto out;
5123         }
5124         if (t == tr->current_trace)
5125                 goto out;
5126
5127         /* Some tracers are only allowed for the top level buffer */
5128         if (!trace_ok_for_array(t, tr)) {
5129                 ret = -EINVAL;
5130                 goto out;
5131         }
5132
5133         /* If trace pipe files are being read, we can't change the tracer */
5134         if (tr->current_trace->ref) {
5135                 ret = -EBUSY;
5136                 goto out;
5137         }
5138
5139         trace_branch_disable();
5140
5141         tr->current_trace->enabled--;
5142
5143         if (tr->current_trace->reset)
5144                 tr->current_trace->reset(tr);
5145
5146         /* Current trace needs to be nop_trace before synchronize_sched */
5147         tr->current_trace = &nop_trace;
5148
5149 #ifdef CONFIG_TRACER_MAX_TRACE
5150         had_max_tr = tr->allocated_snapshot;
5151
5152         if (had_max_tr && !t->use_max_tr) {
5153                 /*
5154                  * We need to make sure that the update_max_tr sees that
5155                  * current_trace changed to nop_trace to keep it from
5156                  * swapping the buffers after we resize it.
5157                  * The update_max_tr is called from interrupts disabled
5158                  * so a synchronized_sched() is sufficient.
5159                  */
5160                 synchronize_sched();
5161                 free_snapshot(tr);
5162         }
5163 #endif
5164
5165 #ifdef CONFIG_TRACER_MAX_TRACE
5166         if (t->use_max_tr && !had_max_tr) {
5167                 ret = alloc_snapshot(tr);
5168                 if (ret < 0)
5169                         goto out;
5170         }
5171 #endif
5172
5173         if (t->init) {
5174                 ret = tracer_init(t, tr);
5175                 if (ret)
5176                         goto out;
5177         }
5178
5179         tr->current_trace = t;
5180         tr->current_trace->enabled++;
5181         trace_branch_enable(tr);
5182  out:
5183         mutex_unlock(&trace_types_lock);
5184
5185         return ret;
5186 }
5187
5188 static ssize_t
5189 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5190                         size_t cnt, loff_t *ppos)
5191 {
5192         struct trace_array *tr = filp->private_data;
5193         char buf[MAX_TRACER_SIZE+1];
5194         int i;
5195         size_t ret;
5196         int err;
5197
5198         ret = cnt;
5199
5200         if (cnt > MAX_TRACER_SIZE)
5201                 cnt = MAX_TRACER_SIZE;
5202
5203         if (copy_from_user(buf, ubuf, cnt))
5204                 return -EFAULT;
5205
5206         buf[cnt] = 0;
5207
5208         /* strip ending whitespace. */
5209         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5210                 buf[i] = 0;
5211
5212         err = tracing_set_tracer(tr, buf);
5213         if (err)
5214                 return err;
5215
5216         *ppos += ret;
5217
5218         return ret;
5219 }
5220
5221 static ssize_t
5222 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5223                    size_t cnt, loff_t *ppos)
5224 {
5225         char buf[64];
5226         int r;
5227
5228         r = snprintf(buf, sizeof(buf), "%ld\n",
5229                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5230         if (r > sizeof(buf))
5231                 r = sizeof(buf);
5232         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5233 }
5234
5235 static ssize_t
5236 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5237                     size_t cnt, loff_t *ppos)
5238 {
5239         unsigned long val;
5240         int ret;
5241
5242         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5243         if (ret)
5244                 return ret;
5245
5246         *ptr = val * 1000;
5247
5248         return cnt;
5249 }
5250
5251 static ssize_t
5252 tracing_thresh_read(struct file *filp, char __user *ubuf,
5253                     size_t cnt, loff_t *ppos)
5254 {
5255         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5256 }
5257
5258 static ssize_t
5259 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5260                      size_t cnt, loff_t *ppos)
5261 {
5262         struct trace_array *tr = filp->private_data;
5263         int ret;
5264
5265         mutex_lock(&trace_types_lock);
5266         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5267         if (ret < 0)
5268                 goto out;
5269
5270         if (tr->current_trace->update_thresh) {
5271                 ret = tr->current_trace->update_thresh(tr);
5272                 if (ret < 0)
5273                         goto out;
5274         }
5275
5276         ret = cnt;
5277 out:
5278         mutex_unlock(&trace_types_lock);
5279
5280         return ret;
5281 }
5282
5283 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5284
5285 static ssize_t
5286 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5287                      size_t cnt, loff_t *ppos)
5288 {
5289         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5290 }
5291
5292 static ssize_t
5293 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5294                       size_t cnt, loff_t *ppos)
5295 {
5296         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5297 }
5298
5299 #endif
5300
5301 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5302 {
5303         struct trace_array *tr = inode->i_private;
5304         struct trace_iterator *iter;
5305         int ret = 0;
5306
5307         if (tracing_disabled)
5308                 return -ENODEV;
5309
5310         if (trace_array_get(tr) < 0)
5311                 return -ENODEV;
5312
5313         mutex_lock(&trace_types_lock);
5314
5315         /* create a buffer to store the information to pass to userspace */
5316         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5317         if (!iter) {
5318                 ret = -ENOMEM;
5319                 __trace_array_put(tr);
5320                 goto out;
5321         }
5322
5323         trace_seq_init(&iter->seq);
5324         iter->trace = tr->current_trace;
5325
5326         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5327                 ret = -ENOMEM;
5328                 goto fail;
5329         }
5330
5331         /* trace pipe does not show start of buffer */
5332         cpumask_setall(iter->started);
5333
5334         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5335                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5336
5337         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5338         if (trace_clocks[tr->clock_id].in_ns)
5339                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5340
5341         iter->tr = tr;
5342         iter->trace_buffer = &tr->trace_buffer;
5343         iter->cpu_file = tracing_get_cpu(inode);
5344         mutex_init(&iter->mutex);
5345         filp->private_data = iter;
5346
5347         if (iter->trace->pipe_open)
5348                 iter->trace->pipe_open(iter);
5349
5350         nonseekable_open(inode, filp);
5351
5352         tr->current_trace->ref++;
5353 out:
5354         mutex_unlock(&trace_types_lock);
5355         return ret;
5356
5357 fail:
5358         kfree(iter->trace);
5359         kfree(iter);
5360         __trace_array_put(tr);
5361         mutex_unlock(&trace_types_lock);
5362         return ret;
5363 }
5364
5365 static int tracing_release_pipe(struct inode *inode, struct file *file)
5366 {
5367         struct trace_iterator *iter = file->private_data;
5368         struct trace_array *tr = inode->i_private;
5369
5370         mutex_lock(&trace_types_lock);
5371
5372         tr->current_trace->ref--;
5373
5374         if (iter->trace->pipe_close)
5375                 iter->trace->pipe_close(iter);
5376
5377         mutex_unlock(&trace_types_lock);
5378
5379         free_cpumask_var(iter->started);
5380         mutex_destroy(&iter->mutex);
5381         kfree(iter);
5382
5383         trace_array_put(tr);
5384
5385         return 0;
5386 }
5387
5388 static unsigned int
5389 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5390 {
5391         struct trace_array *tr = iter->tr;
5392
5393         /* Iterators are static, they should be filled or empty */
5394         if (trace_buffer_iter(iter, iter->cpu_file))
5395                 return POLLIN | POLLRDNORM;
5396
5397         if (tr->trace_flags & TRACE_ITER_BLOCK)
5398                 /*
5399                  * Always select as readable when in blocking mode
5400                  */
5401                 return POLLIN | POLLRDNORM;
5402         else
5403                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5404                                              filp, poll_table);
5405 }
5406
5407 static unsigned int
5408 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5409 {
5410         struct trace_iterator *iter = filp->private_data;
5411
5412         return trace_poll(iter, filp, poll_table);
5413 }
5414
5415 /* Must be called with iter->mutex held. */
5416 static int tracing_wait_pipe(struct file *filp)
5417 {
5418         struct trace_iterator *iter = filp->private_data;
5419         int ret;
5420
5421         while (trace_empty(iter)) {
5422
5423                 if ((filp->f_flags & O_NONBLOCK)) {
5424                         return -EAGAIN;
5425                 }
5426
5427                 /*
5428                  * We block until we read something and tracing is disabled.
5429                  * We still block if tracing is disabled, but we have never
5430                  * read anything. This allows a user to cat this file, and
5431                  * then enable tracing. But after we have read something,
5432                  * we give an EOF when tracing is again disabled.
5433                  *
5434                  * iter->pos will be 0 if we haven't read anything.
5435                  */
5436                 if (!tracing_is_on() && iter->pos)
5437                         break;
5438
5439                 mutex_unlock(&iter->mutex);
5440
5441                 ret = wait_on_pipe(iter, false);
5442
5443                 mutex_lock(&iter->mutex);
5444
5445                 if (ret)
5446                         return ret;
5447         }
5448
5449         return 1;
5450 }
5451
5452 /*
5453  * Consumer reader.
5454  */
5455 static ssize_t
5456 tracing_read_pipe(struct file *filp, char __user *ubuf,
5457                   size_t cnt, loff_t *ppos)
5458 {
5459         struct trace_iterator *iter = filp->private_data;
5460         ssize_t sret;
5461
5462         /*
5463          * Avoid more than one consumer on a single file descriptor
5464          * This is just a matter of traces coherency, the ring buffer itself
5465          * is protected.
5466          */
5467         mutex_lock(&iter->mutex);
5468
5469         /* return any leftover data */
5470         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5471         if (sret != -EBUSY)
5472                 goto out;
5473
5474         trace_seq_init(&iter->seq);
5475
5476         if (iter->trace->read) {
5477                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5478                 if (sret)
5479                         goto out;
5480         }
5481
5482 waitagain:
5483         sret = tracing_wait_pipe(filp);
5484         if (sret <= 0)
5485                 goto out;
5486
5487         /* stop when tracing is finished */
5488         if (trace_empty(iter)) {
5489                 sret = 0;
5490                 goto out;
5491         }
5492
5493         if (cnt >= PAGE_SIZE)
5494                 cnt = PAGE_SIZE - 1;
5495
5496         /* reset all but tr, trace, and overruns */
5497         memset(&iter->seq, 0,
5498                sizeof(struct trace_iterator) -
5499                offsetof(struct trace_iterator, seq));
5500         cpumask_clear(iter->started);
5501         iter->pos = -1;
5502
5503         trace_event_read_lock();
5504         trace_access_lock(iter->cpu_file);
5505         while (trace_find_next_entry_inc(iter) != NULL) {
5506                 enum print_line_t ret;
5507                 int save_len = iter->seq.seq.len;
5508
5509                 ret = print_trace_line(iter);
5510                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5511                         /* don't print partial lines */
5512                         iter->seq.seq.len = save_len;
5513                         break;
5514                 }
5515                 if (ret != TRACE_TYPE_NO_CONSUME)
5516                         trace_consume(iter);
5517
5518                 if (trace_seq_used(&iter->seq) >= cnt)
5519                         break;
5520
5521                 /*
5522                  * Setting the full flag means we reached the trace_seq buffer
5523                  * size and we should leave by partial output condition above.
5524                  * One of the trace_seq_* functions is not used properly.
5525                  */
5526                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5527                           iter->ent->type);
5528         }
5529         trace_access_unlock(iter->cpu_file);
5530         trace_event_read_unlock();
5531
5532         /* Now copy what we have to the user */
5533         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5534         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5535                 trace_seq_init(&iter->seq);
5536
5537         /*
5538          * If there was nothing to send to user, in spite of consuming trace
5539          * entries, go back to wait for more entries.
5540          */
5541         if (sret == -EBUSY)
5542                 goto waitagain;
5543
5544 out:
5545         mutex_unlock(&iter->mutex);
5546
5547         return sret;
5548 }
5549
5550 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5551                                      unsigned int idx)
5552 {
5553         __free_page(spd->pages[idx]);
5554 }
5555
5556 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5557         .can_merge              = 0,
5558         .confirm                = generic_pipe_buf_confirm,
5559         .release                = generic_pipe_buf_release,
5560         .steal                  = generic_pipe_buf_steal,
5561         .get                    = generic_pipe_buf_get,
5562 };
5563
5564 static size_t
5565 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5566 {
5567         size_t count;
5568         int save_len;
5569         int ret;
5570
5571         /* Seq buffer is page-sized, exactly what we need. */
5572         for (;;) {
5573                 save_len = iter->seq.seq.len;
5574                 ret = print_trace_line(iter);
5575
5576                 if (trace_seq_has_overflowed(&iter->seq)) {
5577                         iter->seq.seq.len = save_len;
5578                         break;
5579                 }
5580
5581                 /*
5582                  * This should not be hit, because it should only
5583                  * be set if the iter->seq overflowed. But check it
5584                  * anyway to be safe.
5585                  */
5586                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5587                         iter->seq.seq.len = save_len;
5588                         break;
5589                 }
5590
5591                 count = trace_seq_used(&iter->seq) - save_len;
5592                 if (rem < count) {
5593                         rem = 0;
5594                         iter->seq.seq.len = save_len;
5595                         break;
5596                 }
5597
5598                 if (ret != TRACE_TYPE_NO_CONSUME)
5599                         trace_consume(iter);
5600                 rem -= count;
5601                 if (!trace_find_next_entry_inc(iter))   {
5602                         rem = 0;
5603                         iter->ent = NULL;
5604                         break;
5605                 }
5606         }
5607
5608         return rem;
5609 }
5610
5611 static ssize_t tracing_splice_read_pipe(struct file *filp,
5612                                         loff_t *ppos,
5613                                         struct pipe_inode_info *pipe,
5614                                         size_t len,
5615                                         unsigned int flags)
5616 {
5617         struct page *pages_def[PIPE_DEF_BUFFERS];
5618         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5619         struct trace_iterator *iter = filp->private_data;
5620         struct splice_pipe_desc spd = {
5621                 .pages          = pages_def,
5622                 .partial        = partial_def,
5623                 .nr_pages       = 0, /* This gets updated below. */
5624                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5625                 .ops            = &tracing_pipe_buf_ops,
5626                 .spd_release    = tracing_spd_release_pipe,
5627         };
5628         ssize_t ret;
5629         size_t rem;
5630         unsigned int i;
5631
5632         if (splice_grow_spd(pipe, &spd))
5633                 return -ENOMEM;
5634
5635         mutex_lock(&iter->mutex);
5636
5637         if (iter->trace->splice_read) {
5638                 ret = iter->trace->splice_read(iter, filp,
5639                                                ppos, pipe, len, flags);
5640                 if (ret)
5641                         goto out_err;
5642         }
5643
5644         ret = tracing_wait_pipe(filp);
5645         if (ret <= 0)
5646                 goto out_err;
5647
5648         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5649                 ret = -EFAULT;
5650                 goto out_err;
5651         }
5652
5653         trace_event_read_lock();
5654         trace_access_lock(iter->cpu_file);
5655
5656         /* Fill as many pages as possible. */
5657         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5658                 spd.pages[i] = alloc_page(GFP_KERNEL);
5659                 if (!spd.pages[i])
5660                         break;
5661
5662                 rem = tracing_fill_pipe_page(rem, iter);
5663
5664                 /* Copy the data into the page, so we can start over. */
5665                 ret = trace_seq_to_buffer(&iter->seq,
5666                                           page_address(spd.pages[i]),
5667                                           trace_seq_used(&iter->seq));
5668                 if (ret < 0) {
5669                         __free_page(spd.pages[i]);
5670                         break;
5671                 }
5672                 spd.partial[i].offset = 0;
5673                 spd.partial[i].len = trace_seq_used(&iter->seq);
5674
5675                 trace_seq_init(&iter->seq);
5676         }
5677
5678         trace_access_unlock(iter->cpu_file);
5679         trace_event_read_unlock();
5680         mutex_unlock(&iter->mutex);
5681
5682         spd.nr_pages = i;
5683
5684         if (i)
5685                 ret = splice_to_pipe(pipe, &spd);
5686         else
5687                 ret = 0;
5688 out:
5689         splice_shrink_spd(&spd);
5690         return ret;
5691
5692 out_err:
5693         mutex_unlock(&iter->mutex);
5694         goto out;
5695 }
5696
5697 static ssize_t
5698 tracing_entries_read(struct file *filp, char __user *ubuf,
5699                      size_t cnt, loff_t *ppos)
5700 {
5701         struct inode *inode = file_inode(filp);
5702         struct trace_array *tr = inode->i_private;
5703         int cpu = tracing_get_cpu(inode);
5704         char buf[64];
5705         int r = 0;
5706         ssize_t ret;
5707
5708         mutex_lock(&trace_types_lock);
5709
5710         if (cpu == RING_BUFFER_ALL_CPUS) {
5711                 int cpu, buf_size_same;
5712                 unsigned long size;
5713
5714                 size = 0;
5715                 buf_size_same = 1;
5716                 /* check if all cpu sizes are same */
5717                 for_each_tracing_cpu(cpu) {
5718                         /* fill in the size from first enabled cpu */
5719                         if (size == 0)
5720                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5721                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5722                                 buf_size_same = 0;
5723                                 break;
5724                         }
5725                 }
5726
5727                 if (buf_size_same) {
5728                         if (!ring_buffer_expanded)
5729                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5730                                             size >> 10,
5731                                             trace_buf_size >> 10);
5732                         else
5733                                 r = sprintf(buf, "%lu\n", size >> 10);
5734                 } else
5735                         r = sprintf(buf, "X\n");
5736         } else
5737                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5738
5739         mutex_unlock(&trace_types_lock);
5740
5741         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5742         return ret;
5743 }
5744
5745 static ssize_t
5746 tracing_entries_write(struct file *filp, const char __user *ubuf,
5747                       size_t cnt, loff_t *ppos)
5748 {
5749         struct inode *inode = file_inode(filp);
5750         struct trace_array *tr = inode->i_private;
5751         unsigned long val;
5752         int ret;
5753
5754         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5755         if (ret)
5756                 return ret;
5757
5758         /* must have at least 1 entry */
5759         if (!val)
5760                 return -EINVAL;
5761
5762         /* value is in KB */
5763         val <<= 10;
5764         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5765         if (ret < 0)
5766                 return ret;
5767
5768         *ppos += cnt;
5769
5770         return cnt;
5771 }
5772
5773 static ssize_t
5774 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5775                                 size_t cnt, loff_t *ppos)
5776 {
5777         struct trace_array *tr = filp->private_data;
5778         char buf[64];
5779         int r, cpu;
5780         unsigned long size = 0, expanded_size = 0;
5781
5782         mutex_lock(&trace_types_lock);
5783         for_each_tracing_cpu(cpu) {
5784                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5785                 if (!ring_buffer_expanded)
5786                         expanded_size += trace_buf_size >> 10;
5787         }
5788         if (ring_buffer_expanded)
5789                 r = sprintf(buf, "%lu\n", size);
5790         else
5791                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5792         mutex_unlock(&trace_types_lock);
5793
5794         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5795 }
5796
5797 static ssize_t
5798 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5799                           size_t cnt, loff_t *ppos)
5800 {
5801         /*
5802          * There is no need to read what the user has written, this function
5803          * is just to make sure that there is no error when "echo" is used
5804          */
5805
5806         *ppos += cnt;
5807
5808         return cnt;
5809 }
5810
5811 static int
5812 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5813 {
5814         struct trace_array *tr = inode->i_private;
5815
5816         /* disable tracing ? */
5817         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5818                 tracer_tracing_off(tr);
5819         /* resize the ring buffer to 0 */
5820         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5821
5822         trace_array_put(tr);
5823
5824         return 0;
5825 }
5826
5827 static ssize_t
5828 tracing_mark_write(struct file *filp, const char __user *ubuf,
5829                                         size_t cnt, loff_t *fpos)
5830 {
5831         struct trace_array *tr = filp->private_data;
5832         struct ring_buffer_event *event;
5833         struct ring_buffer *buffer;
5834         struct print_entry *entry;
5835         unsigned long irq_flags;
5836         const char faulted[] = "<faulted>";
5837         ssize_t written;
5838         int size;
5839         int len;
5840
5841 /* Used in tracing_mark_raw_write() as well */
5842 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5843
5844         if (tracing_disabled)
5845                 return -EINVAL;
5846
5847         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5848                 return -EINVAL;
5849
5850         if (cnt > TRACE_BUF_SIZE)
5851                 cnt = TRACE_BUF_SIZE;
5852
5853         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5854
5855         local_save_flags(irq_flags);
5856         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5857
5858         /* If less than "<faulted>", then make sure we can still add that */
5859         if (cnt < FAULTED_SIZE)
5860                 size += FAULTED_SIZE - cnt;
5861
5862         buffer = tr->trace_buffer.buffer;
5863         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5864                                             irq_flags, preempt_count());
5865         if (unlikely(!event))
5866                 /* Ring buffer disabled, return as if not open for write */
5867                 return -EBADF;
5868
5869         entry = ring_buffer_event_data(event);
5870         entry->ip = _THIS_IP_;
5871
5872         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5873         if (len) {
5874                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5875                 cnt = FAULTED_SIZE;
5876                 written = -EFAULT;
5877         } else
5878                 written = cnt;
5879         len = cnt;
5880
5881         if (entry->buf[cnt - 1] != '\n') {
5882                 entry->buf[cnt] = '\n';
5883                 entry->buf[cnt + 1] = '\0';
5884         } else
5885                 entry->buf[cnt] = '\0';
5886
5887         __buffer_unlock_commit(buffer, event);
5888
5889         if (written > 0)
5890                 *fpos += written;
5891
5892         return written;
5893 }
5894
5895 /* Limit it for now to 3K (including tag) */
5896 #define RAW_DATA_MAX_SIZE (1024*3)
5897
5898 static ssize_t
5899 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5900                                         size_t cnt, loff_t *fpos)
5901 {
5902         struct trace_array *tr = filp->private_data;
5903         struct ring_buffer_event *event;
5904         struct ring_buffer *buffer;
5905         struct raw_data_entry *entry;
5906         const char faulted[] = "<faulted>";
5907         unsigned long irq_flags;
5908         ssize_t written;
5909         int size;
5910         int len;
5911
5912 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5913
5914         if (tracing_disabled)
5915                 return -EINVAL;
5916
5917         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5918                 return -EINVAL;
5919
5920         /* The marker must at least have a tag id */
5921         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5922                 return -EINVAL;
5923
5924         if (cnt > TRACE_BUF_SIZE)
5925                 cnt = TRACE_BUF_SIZE;
5926
5927         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5928
5929         local_save_flags(irq_flags);
5930         size = sizeof(*entry) + cnt;
5931         if (cnt < FAULT_SIZE_ID)
5932                 size += FAULT_SIZE_ID - cnt;
5933
5934         buffer = tr->trace_buffer.buffer;
5935         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5936                                             irq_flags, preempt_count());
5937         if (!event)
5938                 /* Ring buffer disabled, return as if not open for write */
5939                 return -EBADF;
5940
5941         entry = ring_buffer_event_data(event);
5942
5943         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5944         if (len) {
5945                 entry->id = -1;
5946                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5947                 written = -EFAULT;
5948         } else
5949                 written = cnt;
5950
5951         __buffer_unlock_commit(buffer, event);
5952
5953         if (written > 0)
5954                 *fpos += written;
5955
5956         return written;
5957 }
5958
5959 static int tracing_clock_show(struct seq_file *m, void *v)
5960 {
5961         struct trace_array *tr = m->private;
5962         int i;
5963
5964         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5965                 seq_printf(m,
5966                         "%s%s%s%s", i ? " " : "",
5967                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5968                         i == tr->clock_id ? "]" : "");
5969         seq_putc(m, '\n');
5970
5971         return 0;
5972 }
5973
5974 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5975 {
5976         int i;
5977
5978         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5979                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5980                         break;
5981         }
5982         if (i == ARRAY_SIZE(trace_clocks))
5983                 return -EINVAL;
5984
5985         mutex_lock(&trace_types_lock);
5986
5987         tr->clock_id = i;
5988
5989         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5990
5991         /*
5992          * New clock may not be consistent with the previous clock.
5993          * Reset the buffer so that it doesn't have incomparable timestamps.
5994          */
5995         tracing_reset_online_cpus(&tr->trace_buffer);
5996
5997 #ifdef CONFIG_TRACER_MAX_TRACE
5998         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5999                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6000         tracing_reset_online_cpus(&tr->max_buffer);
6001 #endif
6002
6003         mutex_unlock(&trace_types_lock);
6004
6005         return 0;
6006 }
6007
6008 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6009                                    size_t cnt, loff_t *fpos)
6010 {
6011         struct seq_file *m = filp->private_data;
6012         struct trace_array *tr = m->private;
6013         char buf[64];
6014         const char *clockstr;
6015         int ret;
6016
6017         if (cnt >= sizeof(buf))
6018                 return -EINVAL;
6019
6020         if (copy_from_user(buf, ubuf, cnt))
6021                 return -EFAULT;
6022
6023         buf[cnt] = 0;
6024
6025         clockstr = strstrip(buf);
6026
6027         ret = tracing_set_clock(tr, clockstr);
6028         if (ret)
6029                 return ret;
6030
6031         *fpos += cnt;
6032
6033         return cnt;
6034 }
6035
6036 static int tracing_clock_open(struct inode *inode, struct file *file)
6037 {
6038         struct trace_array *tr = inode->i_private;
6039         int ret;
6040
6041         if (tracing_disabled)
6042                 return -ENODEV;
6043
6044         if (trace_array_get(tr))
6045                 return -ENODEV;
6046
6047         ret = single_open(file, tracing_clock_show, inode->i_private);
6048         if (ret < 0)
6049                 trace_array_put(tr);
6050
6051         return ret;
6052 }
6053
6054 struct ftrace_buffer_info {
6055         struct trace_iterator   iter;
6056         void                    *spare;
6057         unsigned int            spare_cpu;
6058         unsigned int            read;
6059 };
6060
6061 #ifdef CONFIG_TRACER_SNAPSHOT
6062 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6063 {
6064         struct trace_array *tr = inode->i_private;
6065         struct trace_iterator *iter;
6066         struct seq_file *m;
6067         int ret = 0;
6068
6069         if (trace_array_get(tr) < 0)
6070                 return -ENODEV;
6071
6072         if (file->f_mode & FMODE_READ) {
6073                 iter = __tracing_open(inode, file, true);
6074                 if (IS_ERR(iter))
6075                         ret = PTR_ERR(iter);
6076         } else {
6077                 /* Writes still need the seq_file to hold the private data */
6078                 ret = -ENOMEM;
6079                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6080                 if (!m)
6081                         goto out;
6082                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6083                 if (!iter) {
6084                         kfree(m);
6085                         goto out;
6086                 }
6087                 ret = 0;
6088
6089                 iter->tr = tr;
6090                 iter->trace_buffer = &tr->max_buffer;
6091                 iter->cpu_file = tracing_get_cpu(inode);
6092                 m->private = iter;
6093                 file->private_data = m;
6094         }
6095 out:
6096         if (ret < 0)
6097                 trace_array_put(tr);
6098
6099         return ret;
6100 }
6101
6102 static ssize_t
6103 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6104                        loff_t *ppos)
6105 {
6106         struct seq_file *m = filp->private_data;
6107         struct trace_iterator *iter = m->private;
6108         struct trace_array *tr = iter->tr;
6109         unsigned long val;
6110         int ret;
6111
6112         ret = tracing_update_buffers();
6113         if (ret < 0)
6114                 return ret;
6115
6116         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6117         if (ret)
6118                 return ret;
6119
6120         mutex_lock(&trace_types_lock);
6121
6122         if (tr->current_trace->use_max_tr) {
6123                 ret = -EBUSY;
6124                 goto out;
6125         }
6126
6127         switch (val) {
6128         case 0:
6129                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6130                         ret = -EINVAL;
6131                         break;
6132                 }
6133                 if (tr->allocated_snapshot)
6134                         free_snapshot(tr);
6135                 break;
6136         case 1:
6137 /* Only allow per-cpu swap if the ring buffer supports it */
6138 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6139                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6140                         ret = -EINVAL;
6141                         break;
6142                 }
6143 #endif
6144                 if (!tr->allocated_snapshot) {
6145                         ret = alloc_snapshot(tr);
6146                         if (ret < 0)
6147                                 break;
6148                 }
6149                 local_irq_disable();
6150                 /* Now, we're going to swap */
6151                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6152                         update_max_tr(tr, current, smp_processor_id());
6153                 else
6154                         update_max_tr_single(tr, current, iter->cpu_file);
6155                 local_irq_enable();
6156                 break;
6157         default:
6158                 if (tr->allocated_snapshot) {
6159                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6160                                 tracing_reset_online_cpus(&tr->max_buffer);
6161                         else
6162                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6163                 }
6164                 break;
6165         }
6166
6167         if (ret >= 0) {
6168                 *ppos += cnt;
6169                 ret = cnt;
6170         }
6171 out:
6172         mutex_unlock(&trace_types_lock);
6173         return ret;
6174 }
6175
6176 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6177 {
6178         struct seq_file *m = file->private_data;
6179         int ret;
6180
6181         ret = tracing_release(inode, file);
6182
6183         if (file->f_mode & FMODE_READ)
6184                 return ret;
6185
6186         /* If write only, the seq_file is just a stub */
6187         if (m)
6188                 kfree(m->private);
6189         kfree(m);
6190
6191         return 0;
6192 }
6193
6194 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6195 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6196                                     size_t count, loff_t *ppos);
6197 static int tracing_buffers_release(struct inode *inode, struct file *file);
6198 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6199                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6200
6201 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6202 {
6203         struct ftrace_buffer_info *info;
6204         int ret;
6205
6206         ret = tracing_buffers_open(inode, filp);
6207         if (ret < 0)
6208                 return ret;
6209
6210         info = filp->private_data;
6211
6212         if (info->iter.trace->use_max_tr) {
6213                 tracing_buffers_release(inode, filp);
6214                 return -EBUSY;
6215         }
6216
6217         info->iter.snapshot = true;
6218         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6219
6220         return ret;
6221 }
6222
6223 #endif /* CONFIG_TRACER_SNAPSHOT */
6224
6225
6226 static const struct file_operations tracing_thresh_fops = {
6227         .open           = tracing_open_generic,
6228         .read           = tracing_thresh_read,
6229         .write          = tracing_thresh_write,
6230         .llseek         = generic_file_llseek,
6231 };
6232
6233 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6234 static const struct file_operations tracing_max_lat_fops = {
6235         .open           = tracing_open_generic,
6236         .read           = tracing_max_lat_read,
6237         .write          = tracing_max_lat_write,
6238         .llseek         = generic_file_llseek,
6239 };
6240 #endif
6241
6242 static const struct file_operations set_tracer_fops = {
6243         .open           = tracing_open_generic,
6244         .read           = tracing_set_trace_read,
6245         .write          = tracing_set_trace_write,
6246         .llseek         = generic_file_llseek,
6247 };
6248
6249 static const struct file_operations tracing_pipe_fops = {
6250         .open           = tracing_open_pipe,
6251         .poll           = tracing_poll_pipe,
6252         .read           = tracing_read_pipe,
6253         .splice_read    = tracing_splice_read_pipe,
6254         .release        = tracing_release_pipe,
6255         .llseek         = no_llseek,
6256 };
6257
6258 static const struct file_operations tracing_entries_fops = {
6259         .open           = tracing_open_generic_tr,
6260         .read           = tracing_entries_read,
6261         .write          = tracing_entries_write,
6262         .llseek         = generic_file_llseek,
6263         .release        = tracing_release_generic_tr,
6264 };
6265
6266 static const struct file_operations tracing_total_entries_fops = {
6267         .open           = tracing_open_generic_tr,
6268         .read           = tracing_total_entries_read,
6269         .llseek         = generic_file_llseek,
6270         .release        = tracing_release_generic_tr,
6271 };
6272
6273 static const struct file_operations tracing_free_buffer_fops = {
6274         .open           = tracing_open_generic_tr,
6275         .write          = tracing_free_buffer_write,
6276         .release        = tracing_free_buffer_release,
6277 };
6278
6279 static const struct file_operations tracing_mark_fops = {
6280         .open           = tracing_open_generic_tr,
6281         .write          = tracing_mark_write,
6282         .llseek         = generic_file_llseek,
6283         .release        = tracing_release_generic_tr,
6284 };
6285
6286 static const struct file_operations tracing_mark_raw_fops = {
6287         .open           = tracing_open_generic_tr,
6288         .write          = tracing_mark_raw_write,
6289         .llseek         = generic_file_llseek,
6290         .release        = tracing_release_generic_tr,
6291 };
6292
6293 static const struct file_operations trace_clock_fops = {
6294         .open           = tracing_clock_open,
6295         .read           = seq_read,
6296         .llseek         = seq_lseek,
6297         .release        = tracing_single_release_tr,
6298         .write          = tracing_clock_write,
6299 };
6300
6301 #ifdef CONFIG_TRACER_SNAPSHOT
6302 static const struct file_operations snapshot_fops = {
6303         .open           = tracing_snapshot_open,
6304         .read           = seq_read,
6305         .write          = tracing_snapshot_write,
6306         .llseek         = tracing_lseek,
6307         .release        = tracing_snapshot_release,
6308 };
6309
6310 static const struct file_operations snapshot_raw_fops = {
6311         .open           = snapshot_raw_open,
6312         .read           = tracing_buffers_read,
6313         .release        = tracing_buffers_release,
6314         .splice_read    = tracing_buffers_splice_read,
6315         .llseek         = no_llseek,
6316 };
6317
6318 #endif /* CONFIG_TRACER_SNAPSHOT */
6319
6320 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6321 {
6322         struct trace_array *tr = inode->i_private;
6323         struct ftrace_buffer_info *info;
6324         int ret;
6325
6326         if (tracing_disabled)
6327                 return -ENODEV;
6328
6329         if (trace_array_get(tr) < 0)
6330                 return -ENODEV;
6331
6332         info = kzalloc(sizeof(*info), GFP_KERNEL);
6333         if (!info) {
6334                 trace_array_put(tr);
6335                 return -ENOMEM;
6336         }
6337
6338         mutex_lock(&trace_types_lock);
6339
6340         info->iter.tr           = tr;
6341         info->iter.cpu_file     = tracing_get_cpu(inode);
6342         info->iter.trace        = tr->current_trace;
6343         info->iter.trace_buffer = &tr->trace_buffer;
6344         info->spare             = NULL;
6345         /* Force reading ring buffer for first read */
6346         info->read              = (unsigned int)-1;
6347
6348         filp->private_data = info;
6349
6350         tr->current_trace->ref++;
6351
6352         mutex_unlock(&trace_types_lock);
6353
6354         ret = nonseekable_open(inode, filp);
6355         if (ret < 0)
6356                 trace_array_put(tr);
6357
6358         return ret;
6359 }
6360
6361 static unsigned int
6362 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6363 {
6364         struct ftrace_buffer_info *info = filp->private_data;
6365         struct trace_iterator *iter = &info->iter;
6366
6367         return trace_poll(iter, filp, poll_table);
6368 }
6369
6370 static ssize_t
6371 tracing_buffers_read(struct file *filp, char __user *ubuf,
6372                      size_t count, loff_t *ppos)
6373 {
6374         struct ftrace_buffer_info *info = filp->private_data;
6375         struct trace_iterator *iter = &info->iter;
6376         ssize_t ret;
6377         ssize_t size;
6378
6379         if (!count)
6380                 return 0;
6381
6382 #ifdef CONFIG_TRACER_MAX_TRACE
6383         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6384                 return -EBUSY;
6385 #endif
6386
6387         if (!info->spare) {
6388                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6389                                                           iter->cpu_file);
6390                 info->spare_cpu = iter->cpu_file;
6391         }
6392         if (!info->spare)
6393                 return -ENOMEM;
6394
6395         /* Do we have previous read data to read? */
6396         if (info->read < PAGE_SIZE)
6397                 goto read;
6398
6399  again:
6400         trace_access_lock(iter->cpu_file);
6401         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6402                                     &info->spare,
6403                                     count,
6404                                     iter->cpu_file, 0);
6405         trace_access_unlock(iter->cpu_file);
6406
6407         if (ret < 0) {
6408                 if (trace_empty(iter)) {
6409                         if ((filp->f_flags & O_NONBLOCK))
6410                                 return -EAGAIN;
6411
6412                         ret = wait_on_pipe(iter, false);
6413                         if (ret)
6414                                 return ret;
6415
6416                         goto again;
6417                 }
6418                 return 0;
6419         }
6420
6421         info->read = 0;
6422  read:
6423         size = PAGE_SIZE - info->read;
6424         if (size > count)
6425                 size = count;
6426
6427         ret = copy_to_user(ubuf, info->spare + info->read, size);
6428         if (ret == size)
6429                 return -EFAULT;
6430
6431         size -= ret;
6432
6433         *ppos += size;
6434         info->read += size;
6435
6436         return size;
6437 }
6438
6439 static int tracing_buffers_release(struct inode *inode, struct file *file)
6440 {
6441         struct ftrace_buffer_info *info = file->private_data;
6442         struct trace_iterator *iter = &info->iter;
6443
6444         mutex_lock(&trace_types_lock);
6445
6446         iter->tr->current_trace->ref--;
6447
6448         __trace_array_put(iter->tr);
6449
6450         if (info->spare)
6451                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6452                                            info->spare_cpu, info->spare);
6453         kfree(info);
6454
6455         mutex_unlock(&trace_types_lock);
6456
6457         return 0;
6458 }
6459
6460 struct buffer_ref {
6461         struct ring_buffer      *buffer;
6462         void                    *page;
6463         int                     cpu;
6464         int                     ref;
6465 };
6466
6467 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6468                                     struct pipe_buffer *buf)
6469 {
6470         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6471
6472         if (--ref->ref)
6473                 return;
6474
6475         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6476         kfree(ref);
6477         buf->private = 0;
6478 }
6479
6480 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6481                                 struct pipe_buffer *buf)
6482 {
6483         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6484
6485         ref->ref++;
6486 }
6487
6488 /* Pipe buffer operations for a buffer. */
6489 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6490         .can_merge              = 0,
6491         .confirm                = generic_pipe_buf_confirm,
6492         .release                = buffer_pipe_buf_release,
6493         .steal                  = generic_pipe_buf_steal,
6494         .get                    = buffer_pipe_buf_get,
6495 };
6496
6497 /*
6498  * Callback from splice_to_pipe(), if we need to release some pages
6499  * at the end of the spd in case we error'ed out in filling the pipe.
6500  */
6501 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6502 {
6503         struct buffer_ref *ref =
6504                 (struct buffer_ref *)spd->partial[i].private;
6505
6506         if (--ref->ref)
6507                 return;
6508
6509         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6510         kfree(ref);
6511         spd->partial[i].private = 0;
6512 }
6513
6514 static ssize_t
6515 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6516                             struct pipe_inode_info *pipe, size_t len,
6517                             unsigned int flags)
6518 {
6519         struct ftrace_buffer_info *info = file->private_data;
6520         struct trace_iterator *iter = &info->iter;
6521         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6522         struct page *pages_def[PIPE_DEF_BUFFERS];
6523         struct splice_pipe_desc spd = {
6524                 .pages          = pages_def,
6525                 .partial        = partial_def,
6526                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6527                 .ops            = &buffer_pipe_buf_ops,
6528                 .spd_release    = buffer_spd_release,
6529         };
6530         struct buffer_ref *ref;
6531         int entries, size, i;
6532         ssize_t ret = 0;
6533
6534 #ifdef CONFIG_TRACER_MAX_TRACE
6535         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6536                 return -EBUSY;
6537 #endif
6538
6539         if (*ppos & (PAGE_SIZE - 1))
6540                 return -EINVAL;
6541
6542         if (len & (PAGE_SIZE - 1)) {
6543                 if (len < PAGE_SIZE)
6544                         return -EINVAL;
6545                 len &= PAGE_MASK;
6546         }
6547
6548         if (splice_grow_spd(pipe, &spd))
6549                 return -ENOMEM;
6550
6551  again:
6552         trace_access_lock(iter->cpu_file);
6553         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6554
6555         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6556                 struct page *page;
6557                 int r;
6558
6559                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6560                 if (!ref) {
6561                         ret = -ENOMEM;
6562                         break;
6563                 }
6564
6565                 ref->ref = 1;
6566                 ref->buffer = iter->trace_buffer->buffer;
6567                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6568                 if (!ref->page) {
6569                         ret = -ENOMEM;
6570                         kfree(ref);
6571                         break;
6572                 }
6573                 ref->cpu = iter->cpu_file;
6574
6575                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6576                                           len, iter->cpu_file, 1);
6577                 if (r < 0) {
6578                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6579                                                    ref->page);
6580                         kfree(ref);
6581                         break;
6582                 }
6583
6584                 /*
6585                  * zero out any left over data, this is going to
6586                  * user land.
6587                  */
6588                 size = ring_buffer_page_len(ref->page);
6589                 if (size < PAGE_SIZE)
6590                         memset(ref->page + size, 0, PAGE_SIZE - size);
6591
6592                 page = virt_to_page(ref->page);
6593
6594                 spd.pages[i] = page;
6595                 spd.partial[i].len = PAGE_SIZE;
6596                 spd.partial[i].offset = 0;
6597                 spd.partial[i].private = (unsigned long)ref;
6598                 spd.nr_pages++;
6599                 *ppos += PAGE_SIZE;
6600
6601                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6602         }
6603
6604         trace_access_unlock(iter->cpu_file);
6605         spd.nr_pages = i;
6606
6607         /* did we read anything? */
6608         if (!spd.nr_pages) {
6609                 if (ret)
6610                         goto out;
6611
6612                 ret = -EAGAIN;
6613                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6614                         goto out;
6615
6616                 ret = wait_on_pipe(iter, true);
6617                 if (ret)
6618                         goto out;
6619
6620                 goto again;
6621         }
6622
6623         ret = splice_to_pipe(pipe, &spd);
6624 out:
6625         splice_shrink_spd(&spd);
6626
6627         return ret;
6628 }
6629
6630 static const struct file_operations tracing_buffers_fops = {
6631         .open           = tracing_buffers_open,
6632         .read           = tracing_buffers_read,
6633         .poll           = tracing_buffers_poll,
6634         .release        = tracing_buffers_release,
6635         .splice_read    = tracing_buffers_splice_read,
6636         .llseek         = no_llseek,
6637 };
6638
6639 static ssize_t
6640 tracing_stats_read(struct file *filp, char __user *ubuf,
6641                    size_t count, loff_t *ppos)
6642 {
6643         struct inode *inode = file_inode(filp);
6644         struct trace_array *tr = inode->i_private;
6645         struct trace_buffer *trace_buf = &tr->trace_buffer;
6646         int cpu = tracing_get_cpu(inode);
6647         struct trace_seq *s;
6648         unsigned long cnt;
6649         unsigned long long t;
6650         unsigned long usec_rem;
6651
6652         s = kmalloc(sizeof(*s), GFP_KERNEL);
6653         if (!s)
6654                 return -ENOMEM;
6655
6656         trace_seq_init(s);
6657
6658         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6659         trace_seq_printf(s, "entries: %ld\n", cnt);
6660
6661         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6662         trace_seq_printf(s, "overrun: %ld\n", cnt);
6663
6664         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6665         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6666
6667         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6668         trace_seq_printf(s, "bytes: %ld\n", cnt);
6669
6670         if (trace_clocks[tr->clock_id].in_ns) {
6671                 /* local or global for trace_clock */
6672                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6673                 usec_rem = do_div(t, USEC_PER_SEC);
6674                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6675                                                                 t, usec_rem);
6676
6677                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6678                 usec_rem = do_div(t, USEC_PER_SEC);
6679                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6680         } else {
6681                 /* counter or tsc mode for trace_clock */
6682                 trace_seq_printf(s, "oldest event ts: %llu\n",
6683                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6684
6685                 trace_seq_printf(s, "now ts: %llu\n",
6686                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6687         }
6688
6689         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6690         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6691
6692         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6693         trace_seq_printf(s, "read events: %ld\n", cnt);
6694
6695         count = simple_read_from_buffer(ubuf, count, ppos,
6696                                         s->buffer, trace_seq_used(s));
6697
6698         kfree(s);
6699
6700         return count;
6701 }
6702
6703 static const struct file_operations tracing_stats_fops = {
6704         .open           = tracing_open_generic_tr,
6705         .read           = tracing_stats_read,
6706         .llseek         = generic_file_llseek,
6707         .release        = tracing_release_generic_tr,
6708 };
6709
6710 #ifdef CONFIG_DYNAMIC_FTRACE
6711
6712 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6713 {
6714         return 0;
6715 }
6716
6717 static ssize_t
6718 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6719                   size_t cnt, loff_t *ppos)
6720 {
6721         static char ftrace_dyn_info_buffer[1024];
6722         static DEFINE_MUTEX(dyn_info_mutex);
6723         unsigned long *p = filp->private_data;
6724         char *buf = ftrace_dyn_info_buffer;
6725         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6726         int r;
6727
6728         mutex_lock(&dyn_info_mutex);
6729         r = sprintf(buf, "%ld ", *p);
6730
6731         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6732         buf[r++] = '\n';
6733
6734         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6735
6736         mutex_unlock(&dyn_info_mutex);
6737
6738         return r;
6739 }
6740
6741 static const struct file_operations tracing_dyn_info_fops = {
6742         .open           = tracing_open_generic,
6743         .read           = tracing_read_dyn_info,
6744         .llseek         = generic_file_llseek,
6745 };
6746 #endif /* CONFIG_DYNAMIC_FTRACE */
6747
6748 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6749 static void
6750 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6751                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6752                 void *data)
6753 {
6754         tracing_snapshot_instance(tr);
6755 }
6756
6757 static void
6758 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6759                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6760                       void *data)
6761 {
6762         struct ftrace_func_mapper *mapper = data;
6763         long *count = NULL;
6764
6765         if (mapper)
6766                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6767
6768         if (count) {
6769
6770                 if (*count <= 0)
6771                         return;
6772
6773                 (*count)--;
6774         }
6775
6776         tracing_snapshot_instance(tr);
6777 }
6778
6779 static int
6780 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6781                       struct ftrace_probe_ops *ops, void *data)
6782 {
6783         struct ftrace_func_mapper *mapper = data;
6784         long *count = NULL;
6785
6786         seq_printf(m, "%ps:", (void *)ip);
6787
6788         seq_puts(m, "snapshot");
6789
6790         if (mapper)
6791                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6792
6793         if (count)
6794                 seq_printf(m, ":count=%ld\n", *count);
6795         else
6796                 seq_puts(m, ":unlimited\n");
6797
6798         return 0;
6799 }
6800
6801 static int
6802 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6803                      unsigned long ip, void *init_data, void **data)
6804 {
6805         struct ftrace_func_mapper *mapper = *data;
6806
6807         if (!mapper) {
6808                 mapper = allocate_ftrace_func_mapper();
6809                 if (!mapper)
6810                         return -ENOMEM;
6811                 *data = mapper;
6812         }
6813
6814         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6815 }
6816
6817 static void
6818 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6819                      unsigned long ip, void *data)
6820 {
6821         struct ftrace_func_mapper *mapper = data;
6822
6823         if (!ip) {
6824                 if (!mapper)
6825                         return;
6826                 free_ftrace_func_mapper(mapper, NULL);
6827                 return;
6828         }
6829
6830         ftrace_func_mapper_remove_ip(mapper, ip);
6831 }
6832
6833 static struct ftrace_probe_ops snapshot_probe_ops = {
6834         .func                   = ftrace_snapshot,
6835         .print                  = ftrace_snapshot_print,
6836 };
6837
6838 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6839         .func                   = ftrace_count_snapshot,
6840         .print                  = ftrace_snapshot_print,
6841         .init                   = ftrace_snapshot_init,
6842         .free                   = ftrace_snapshot_free,
6843 };
6844
6845 static int
6846 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6847                                char *glob, char *cmd, char *param, int enable)
6848 {
6849         struct ftrace_probe_ops *ops;
6850         void *count = (void *)-1;
6851         char *number;
6852         int ret;
6853
6854         /* hash funcs only work with set_ftrace_filter */
6855         if (!enable)
6856                 return -EINVAL;
6857
6858         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6859
6860         if (glob[0] == '!')
6861                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6862
6863         if (!param)
6864                 goto out_reg;
6865
6866         number = strsep(&param, ":");
6867
6868         if (!strlen(number))
6869                 goto out_reg;
6870
6871         /*
6872          * We use the callback data field (which is a pointer)
6873          * as our counter.
6874          */
6875         ret = kstrtoul(number, 0, (unsigned long *)&count);
6876         if (ret)
6877                 return ret;
6878
6879  out_reg:
6880         ret = alloc_snapshot(tr);
6881         if (ret < 0)
6882                 goto out;
6883
6884         ret = register_ftrace_function_probe(glob, tr, ops, count);
6885
6886  out:
6887         return ret < 0 ? ret : 0;
6888 }
6889
6890 static struct ftrace_func_command ftrace_snapshot_cmd = {
6891         .name                   = "snapshot",
6892         .func                   = ftrace_trace_snapshot_callback,
6893 };
6894
6895 static __init int register_snapshot_cmd(void)
6896 {
6897         return register_ftrace_command(&ftrace_snapshot_cmd);
6898 }
6899 #else
6900 static inline __init int register_snapshot_cmd(void) { return 0; }
6901 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6902
6903 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6904 {
6905         if (WARN_ON(!tr->dir))
6906                 return ERR_PTR(-ENODEV);
6907
6908         /* Top directory uses NULL as the parent */
6909         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6910                 return NULL;
6911
6912         /* All sub buffers have a descriptor */
6913         return tr->dir;
6914 }
6915
6916 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6917 {
6918         struct dentry *d_tracer;
6919
6920         if (tr->percpu_dir)
6921                 return tr->percpu_dir;
6922
6923         d_tracer = tracing_get_dentry(tr);
6924         if (IS_ERR(d_tracer))
6925                 return NULL;
6926
6927         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6928
6929         WARN_ONCE(!tr->percpu_dir,
6930                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6931
6932         return tr->percpu_dir;
6933 }
6934
6935 static struct dentry *
6936 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6937                       void *data, long cpu, const struct file_operations *fops)
6938 {
6939         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6940
6941         if (ret) /* See tracing_get_cpu() */
6942                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6943         return ret;
6944 }
6945
6946 static void
6947 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6948 {
6949         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6950         struct dentry *d_cpu;
6951         char cpu_dir[30]; /* 30 characters should be more than enough */
6952
6953         if (!d_percpu)
6954                 return;
6955
6956         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6957         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6958         if (!d_cpu) {
6959                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6960                 return;
6961         }
6962
6963         /* per cpu trace_pipe */
6964         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6965                                 tr, cpu, &tracing_pipe_fops);
6966
6967         /* per cpu trace */
6968         trace_create_cpu_file("trace", 0644, d_cpu,
6969                                 tr, cpu, &tracing_fops);
6970
6971         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6972                                 tr, cpu, &tracing_buffers_fops);
6973
6974         trace_create_cpu_file("stats", 0444, d_cpu,
6975                                 tr, cpu, &tracing_stats_fops);
6976
6977         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6978                                 tr, cpu, &tracing_entries_fops);
6979
6980 #ifdef CONFIG_TRACER_SNAPSHOT
6981         trace_create_cpu_file("snapshot", 0644, d_cpu,
6982                                 tr, cpu, &snapshot_fops);
6983
6984         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6985                                 tr, cpu, &snapshot_raw_fops);
6986 #endif
6987 }
6988
6989 #ifdef CONFIG_FTRACE_SELFTEST
6990 /* Let selftest have access to static functions in this file */
6991 #include "trace_selftest.c"
6992 #endif
6993
6994 static ssize_t
6995 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6996                         loff_t *ppos)
6997 {
6998         struct trace_option_dentry *topt = filp->private_data;
6999         char *buf;
7000
7001         if (topt->flags->val & topt->opt->bit)
7002                 buf = "1\n";
7003         else
7004                 buf = "0\n";
7005
7006         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7007 }
7008
7009 static ssize_t
7010 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7011                          loff_t *ppos)
7012 {
7013         struct trace_option_dentry *topt = filp->private_data;
7014         unsigned long val;
7015         int ret;
7016
7017         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7018         if (ret)
7019                 return ret;
7020
7021         if (val != 0 && val != 1)
7022                 return -EINVAL;
7023
7024         if (!!(topt->flags->val & topt->opt->bit) != val) {
7025                 mutex_lock(&trace_types_lock);
7026                 ret = __set_tracer_option(topt->tr, topt->flags,
7027                                           topt->opt, !val);
7028                 mutex_unlock(&trace_types_lock);
7029                 if (ret)
7030                         return ret;
7031         }
7032
7033         *ppos += cnt;
7034
7035         return cnt;
7036 }
7037
7038
7039 static const struct file_operations trace_options_fops = {
7040         .open = tracing_open_generic,
7041         .read = trace_options_read,
7042         .write = trace_options_write,
7043         .llseek = generic_file_llseek,
7044 };
7045
7046 /*
7047  * In order to pass in both the trace_array descriptor as well as the index
7048  * to the flag that the trace option file represents, the trace_array
7049  * has a character array of trace_flags_index[], which holds the index
7050  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7051  * The address of this character array is passed to the flag option file
7052  * read/write callbacks.
7053  *
7054  * In order to extract both the index and the trace_array descriptor,
7055  * get_tr_index() uses the following algorithm.
7056  *
7057  *   idx = *ptr;
7058  *
7059  * As the pointer itself contains the address of the index (remember
7060  * index[1] == 1).
7061  *
7062  * Then to get the trace_array descriptor, by subtracting that index
7063  * from the ptr, we get to the start of the index itself.
7064  *
7065  *   ptr - idx == &index[0]
7066  *
7067  * Then a simple container_of() from that pointer gets us to the
7068  * trace_array descriptor.
7069  */
7070 static void get_tr_index(void *data, struct trace_array **ptr,
7071                          unsigned int *pindex)
7072 {
7073         *pindex = *(unsigned char *)data;
7074
7075         *ptr = container_of(data - *pindex, struct trace_array,
7076                             trace_flags_index);
7077 }
7078
7079 static ssize_t
7080 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7081                         loff_t *ppos)
7082 {
7083         void *tr_index = filp->private_data;
7084         struct trace_array *tr;
7085         unsigned int index;
7086         char *buf;
7087
7088         get_tr_index(tr_index, &tr, &index);
7089
7090         if (tr->trace_flags & (1 << index))
7091                 buf = "1\n";
7092         else
7093                 buf = "0\n";
7094
7095         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7096 }
7097
7098 static ssize_t
7099 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7100                          loff_t *ppos)
7101 {
7102         void *tr_index = filp->private_data;
7103         struct trace_array *tr;
7104         unsigned int index;
7105         unsigned long val;
7106         int ret;
7107
7108         get_tr_index(tr_index, &tr, &index);
7109
7110         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7111         if (ret)
7112                 return ret;
7113
7114         if (val != 0 && val != 1)
7115                 return -EINVAL;
7116
7117         mutex_lock(&trace_types_lock);
7118         ret = set_tracer_flag(tr, 1 << index, val);
7119         mutex_unlock(&trace_types_lock);
7120
7121         if (ret < 0)
7122                 return ret;
7123
7124         *ppos += cnt;
7125
7126         return cnt;
7127 }
7128
7129 static const struct file_operations trace_options_core_fops = {
7130         .open = tracing_open_generic,
7131         .read = trace_options_core_read,
7132         .write = trace_options_core_write,
7133         .llseek = generic_file_llseek,
7134 };
7135
7136 struct dentry *trace_create_file(const char *name,
7137                                  umode_t mode,
7138                                  struct dentry *parent,
7139                                  void *data,
7140                                  const struct file_operations *fops)
7141 {
7142         struct dentry *ret;
7143
7144         ret = tracefs_create_file(name, mode, parent, data, fops);
7145         if (!ret)
7146                 pr_warn("Could not create tracefs '%s' entry\n", name);
7147
7148         return ret;
7149 }
7150
7151
7152 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7153 {
7154         struct dentry *d_tracer;
7155
7156         if (tr->options)
7157                 return tr->options;
7158
7159         d_tracer = tracing_get_dentry(tr);
7160         if (IS_ERR(d_tracer))
7161                 return NULL;
7162
7163         tr->options = tracefs_create_dir("options", d_tracer);
7164         if (!tr->options) {
7165                 pr_warn("Could not create tracefs directory 'options'\n");
7166                 return NULL;
7167         }
7168
7169         return tr->options;
7170 }
7171
7172 static void
7173 create_trace_option_file(struct trace_array *tr,
7174                          struct trace_option_dentry *topt,
7175                          struct tracer_flags *flags,
7176                          struct tracer_opt *opt)
7177 {
7178         struct dentry *t_options;
7179
7180         t_options = trace_options_init_dentry(tr);
7181         if (!t_options)
7182                 return;
7183
7184         topt->flags = flags;
7185         topt->opt = opt;
7186         topt->tr = tr;
7187
7188         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7189                                     &trace_options_fops);
7190
7191 }
7192
7193 static void
7194 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7195 {
7196         struct trace_option_dentry *topts;
7197         struct trace_options *tr_topts;
7198         struct tracer_flags *flags;
7199         struct tracer_opt *opts;
7200         int cnt;
7201         int i;
7202
7203         if (!tracer)
7204                 return;
7205
7206         flags = tracer->flags;
7207
7208         if (!flags || !flags->opts)
7209                 return;
7210
7211         /*
7212          * If this is an instance, only create flags for tracers
7213          * the instance may have.
7214          */
7215         if (!trace_ok_for_array(tracer, tr))
7216                 return;
7217
7218         for (i = 0; i < tr->nr_topts; i++) {
7219                 /* Make sure there's no duplicate flags. */
7220                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7221                         return;
7222         }
7223
7224         opts = flags->opts;
7225
7226         for (cnt = 0; opts[cnt].name; cnt++)
7227                 ;
7228
7229         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7230         if (!topts)
7231                 return;
7232
7233         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7234                             GFP_KERNEL);
7235         if (!tr_topts) {
7236                 kfree(topts);
7237                 return;
7238         }
7239
7240         tr->topts = tr_topts;
7241         tr->topts[tr->nr_topts].tracer = tracer;
7242         tr->topts[tr->nr_topts].topts = topts;
7243         tr->nr_topts++;
7244
7245         for (cnt = 0; opts[cnt].name; cnt++) {
7246                 create_trace_option_file(tr, &topts[cnt], flags,
7247                                          &opts[cnt]);
7248                 WARN_ONCE(topts[cnt].entry == NULL,
7249                           "Failed to create trace option: %s",
7250                           opts[cnt].name);
7251         }
7252 }
7253
7254 static struct dentry *
7255 create_trace_option_core_file(struct trace_array *tr,
7256                               const char *option, long index)
7257 {
7258         struct dentry *t_options;
7259
7260         t_options = trace_options_init_dentry(tr);
7261         if (!t_options)
7262                 return NULL;
7263
7264         return trace_create_file(option, 0644, t_options,
7265                                  (void *)&tr->trace_flags_index[index],
7266                                  &trace_options_core_fops);
7267 }
7268
7269 static void create_trace_options_dir(struct trace_array *tr)
7270 {
7271         struct dentry *t_options;
7272         bool top_level = tr == &global_trace;
7273         int i;
7274
7275         t_options = trace_options_init_dentry(tr);
7276         if (!t_options)
7277                 return;
7278
7279         for (i = 0; trace_options[i]; i++) {
7280                 if (top_level ||
7281                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7282                         create_trace_option_core_file(tr, trace_options[i], i);
7283         }
7284 }
7285
7286 static ssize_t
7287 rb_simple_read(struct file *filp, char __user *ubuf,
7288                size_t cnt, loff_t *ppos)
7289 {
7290         struct trace_array *tr = filp->private_data;
7291         char buf[64];
7292         int r;
7293
7294         r = tracer_tracing_is_on(tr);
7295         r = sprintf(buf, "%d\n", r);
7296
7297         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7298 }
7299
7300 static ssize_t
7301 rb_simple_write(struct file *filp, const char __user *ubuf,
7302                 size_t cnt, loff_t *ppos)
7303 {
7304         struct trace_array *tr = filp->private_data;
7305         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7306         unsigned long val;
7307         int ret;
7308
7309         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7310         if (ret)
7311                 return ret;
7312
7313         if (buffer) {
7314                 mutex_lock(&trace_types_lock);
7315                 if (val) {
7316                         tracer_tracing_on(tr);
7317                         if (tr->current_trace->start)
7318                                 tr->current_trace->start(tr);
7319                 } else {
7320                         tracer_tracing_off(tr);
7321                         if (tr->current_trace->stop)
7322                                 tr->current_trace->stop(tr);
7323                 }
7324                 mutex_unlock(&trace_types_lock);
7325         }
7326
7327         (*ppos)++;
7328
7329         return cnt;
7330 }
7331
7332 static const struct file_operations rb_simple_fops = {
7333         .open           = tracing_open_generic_tr,
7334         .read           = rb_simple_read,
7335         .write          = rb_simple_write,
7336         .release        = tracing_release_generic_tr,
7337         .llseek         = default_llseek,
7338 };
7339
7340 struct dentry *trace_instance_dir;
7341
7342 static void
7343 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7344
7345 static int
7346 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7347 {
7348         enum ring_buffer_flags rb_flags;
7349
7350         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7351
7352         buf->tr = tr;
7353
7354         buf->buffer = ring_buffer_alloc(size, rb_flags);
7355         if (!buf->buffer)
7356                 return -ENOMEM;
7357
7358         buf->data = alloc_percpu(struct trace_array_cpu);
7359         if (!buf->data) {
7360                 ring_buffer_free(buf->buffer);
7361                 return -ENOMEM;
7362         }
7363
7364         /* Allocate the first page for all buffers */
7365         set_buffer_entries(&tr->trace_buffer,
7366                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7367
7368         return 0;
7369 }
7370
7371 static int allocate_trace_buffers(struct trace_array *tr, int size)
7372 {
7373         int ret;
7374
7375         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7376         if (ret)
7377                 return ret;
7378
7379 #ifdef CONFIG_TRACER_MAX_TRACE
7380         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7381                                     allocate_snapshot ? size : 1);
7382         if (WARN_ON(ret)) {
7383                 ring_buffer_free(tr->trace_buffer.buffer);
7384                 free_percpu(tr->trace_buffer.data);
7385                 return -ENOMEM;
7386         }
7387         tr->allocated_snapshot = allocate_snapshot;
7388
7389         /*
7390          * Only the top level trace array gets its snapshot allocated
7391          * from the kernel command line.
7392          */
7393         allocate_snapshot = false;
7394 #endif
7395         return 0;
7396 }
7397
7398 static void free_trace_buffer(struct trace_buffer *buf)
7399 {
7400         if (buf->buffer) {
7401                 ring_buffer_free(buf->buffer);
7402                 buf->buffer = NULL;
7403                 free_percpu(buf->data);
7404                 buf->data = NULL;
7405         }
7406 }
7407
7408 static void free_trace_buffers(struct trace_array *tr)
7409 {
7410         if (!tr)
7411                 return;
7412
7413         free_trace_buffer(&tr->trace_buffer);
7414
7415 #ifdef CONFIG_TRACER_MAX_TRACE
7416         free_trace_buffer(&tr->max_buffer);
7417 #endif
7418 }
7419
7420 static void init_trace_flags_index(struct trace_array *tr)
7421 {
7422         int i;
7423
7424         /* Used by the trace options files */
7425         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7426                 tr->trace_flags_index[i] = i;
7427 }
7428
7429 static void __update_tracer_options(struct trace_array *tr)
7430 {
7431         struct tracer *t;
7432
7433         for (t = trace_types; t; t = t->next)
7434                 add_tracer_options(tr, t);
7435 }
7436
7437 static void update_tracer_options(struct trace_array *tr)
7438 {
7439         mutex_lock(&trace_types_lock);
7440         __update_tracer_options(tr);
7441         mutex_unlock(&trace_types_lock);
7442 }
7443
7444 static int instance_mkdir(const char *name)
7445 {
7446         struct trace_array *tr;
7447         int ret;
7448
7449         mutex_lock(&trace_types_lock);
7450
7451         ret = -EEXIST;
7452         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7453                 if (tr->name && strcmp(tr->name, name) == 0)
7454                         goto out_unlock;
7455         }
7456
7457         ret = -ENOMEM;
7458         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7459         if (!tr)
7460                 goto out_unlock;
7461
7462         tr->name = kstrdup(name, GFP_KERNEL);
7463         if (!tr->name)
7464                 goto out_free_tr;
7465
7466         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7467                 goto out_free_tr;
7468
7469         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7470
7471         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7472
7473         raw_spin_lock_init(&tr->start_lock);
7474
7475         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7476
7477         tr->current_trace = &nop_trace;
7478
7479         INIT_LIST_HEAD(&tr->systems);
7480         INIT_LIST_HEAD(&tr->events);
7481
7482         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7483                 goto out_free_tr;
7484
7485         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7486         if (!tr->dir)
7487                 goto out_free_tr;
7488
7489         ret = event_trace_add_tracer(tr->dir, tr);
7490         if (ret) {
7491                 tracefs_remove_recursive(tr->dir);
7492                 goto out_free_tr;
7493         }
7494
7495         ftrace_init_trace_array(tr);
7496
7497         init_tracer_tracefs(tr, tr->dir);
7498         init_trace_flags_index(tr);
7499         __update_tracer_options(tr);
7500
7501         list_add(&tr->list, &ftrace_trace_arrays);
7502
7503         mutex_unlock(&trace_types_lock);
7504
7505         return 0;
7506
7507  out_free_tr:
7508         free_trace_buffers(tr);
7509         free_cpumask_var(tr->tracing_cpumask);
7510         kfree(tr->name);
7511         kfree(tr);
7512
7513  out_unlock:
7514         mutex_unlock(&trace_types_lock);
7515
7516         return ret;
7517
7518 }
7519
7520 static int instance_rmdir(const char *name)
7521 {
7522         struct trace_array *tr;
7523         int found = 0;
7524         int ret;
7525         int i;
7526
7527         mutex_lock(&trace_types_lock);
7528
7529         ret = -ENODEV;
7530         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7531                 if (tr->name && strcmp(tr->name, name) == 0) {
7532                         found = 1;
7533                         break;
7534                 }
7535         }
7536         if (!found)
7537                 goto out_unlock;
7538
7539         ret = -EBUSY;
7540         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7541                 goto out_unlock;
7542
7543         list_del(&tr->list);
7544
7545         /* Disable all the flags that were enabled coming in */
7546         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7547                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7548                         set_tracer_flag(tr, 1 << i, 0);
7549         }
7550
7551         tracing_set_nop(tr);
7552         event_trace_del_tracer(tr);
7553         ftrace_clear_pids(tr);
7554         ftrace_destroy_function_files(tr);
7555         tracefs_remove_recursive(tr->dir);
7556         free_trace_buffers(tr);
7557
7558         for (i = 0; i < tr->nr_topts; i++) {
7559                 kfree(tr->topts[i].topts);
7560         }
7561         kfree(tr->topts);
7562
7563         kfree(tr->name);
7564         kfree(tr);
7565
7566         ret = 0;
7567
7568  out_unlock:
7569         mutex_unlock(&trace_types_lock);
7570
7571         return ret;
7572 }
7573
7574 static __init void create_trace_instances(struct dentry *d_tracer)
7575 {
7576         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7577                                                          instance_mkdir,
7578                                                          instance_rmdir);
7579         if (WARN_ON(!trace_instance_dir))
7580                 return;
7581 }
7582
7583 static void
7584 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7585 {
7586         int cpu;
7587
7588         trace_create_file("available_tracers", 0444, d_tracer,
7589                         tr, &show_traces_fops);
7590
7591         trace_create_file("current_tracer", 0644, d_tracer,
7592                         tr, &set_tracer_fops);
7593
7594         trace_create_file("tracing_cpumask", 0644, d_tracer,
7595                           tr, &tracing_cpumask_fops);
7596
7597         trace_create_file("trace_options", 0644, d_tracer,
7598                           tr, &tracing_iter_fops);
7599
7600         trace_create_file("trace", 0644, d_tracer,
7601                           tr, &tracing_fops);
7602
7603         trace_create_file("trace_pipe", 0444, d_tracer,
7604                           tr, &tracing_pipe_fops);
7605
7606         trace_create_file("buffer_size_kb", 0644, d_tracer,
7607                           tr, &tracing_entries_fops);
7608
7609         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7610                           tr, &tracing_total_entries_fops);
7611
7612         trace_create_file("free_buffer", 0200, d_tracer,
7613                           tr, &tracing_free_buffer_fops);
7614
7615         trace_create_file("trace_marker", 0220, d_tracer,
7616                           tr, &tracing_mark_fops);
7617
7618         trace_create_file("trace_marker_raw", 0220, d_tracer,
7619                           tr, &tracing_mark_raw_fops);
7620
7621         trace_create_file("trace_clock", 0644, d_tracer, tr,
7622                           &trace_clock_fops);
7623
7624         trace_create_file("tracing_on", 0644, d_tracer,
7625                           tr, &rb_simple_fops);
7626
7627         create_trace_options_dir(tr);
7628
7629 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7630         trace_create_file("tracing_max_latency", 0644, d_tracer,
7631                         &tr->max_latency, &tracing_max_lat_fops);
7632 #endif
7633
7634         if (ftrace_create_function_files(tr, d_tracer))
7635                 WARN(1, "Could not allocate function filter files");
7636
7637 #ifdef CONFIG_TRACER_SNAPSHOT
7638         trace_create_file("snapshot", 0644, d_tracer,
7639                           tr, &snapshot_fops);
7640 #endif
7641
7642         for_each_tracing_cpu(cpu)
7643                 tracing_init_tracefs_percpu(tr, cpu);
7644
7645         ftrace_init_tracefs(tr, d_tracer);
7646 }
7647
7648 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7649 {
7650         struct vfsmount *mnt;
7651         struct file_system_type *type;
7652
7653         /*
7654          * To maintain backward compatibility for tools that mount
7655          * debugfs to get to the tracing facility, tracefs is automatically
7656          * mounted to the debugfs/tracing directory.
7657          */
7658         type = get_fs_type("tracefs");
7659         if (!type)
7660                 return NULL;
7661         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7662         put_filesystem(type);
7663         if (IS_ERR(mnt))
7664                 return NULL;
7665         mntget(mnt);
7666
7667         return mnt;
7668 }
7669
7670 /**
7671  * tracing_init_dentry - initialize top level trace array
7672  *
7673  * This is called when creating files or directories in the tracing
7674  * directory. It is called via fs_initcall() by any of the boot up code
7675  * and expects to return the dentry of the top level tracing directory.
7676  */
7677 struct dentry *tracing_init_dentry(void)
7678 {
7679         struct trace_array *tr = &global_trace;
7680
7681         /* The top level trace array uses  NULL as parent */
7682         if (tr->dir)
7683                 return NULL;
7684
7685         if (WARN_ON(!tracefs_initialized()) ||
7686                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7687                  WARN_ON(!debugfs_initialized())))
7688                 return ERR_PTR(-ENODEV);
7689
7690         /*
7691          * As there may still be users that expect the tracing
7692          * files to exist in debugfs/tracing, we must automount
7693          * the tracefs file system there, so older tools still
7694          * work with the newer kerenl.
7695          */
7696         tr->dir = debugfs_create_automount("tracing", NULL,
7697                                            trace_automount, NULL);
7698         if (!tr->dir) {
7699                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7700                 return ERR_PTR(-ENOMEM);
7701         }
7702
7703         return NULL;
7704 }
7705
7706 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7707 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7708
7709 static void __init trace_enum_init(void)
7710 {
7711         int len;
7712
7713         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7714         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7715 }
7716
7717 #ifdef CONFIG_MODULES
7718 static void trace_module_add_enums(struct module *mod)
7719 {
7720         if (!mod->num_trace_enums)
7721                 return;
7722
7723         /*
7724          * Modules with bad taint do not have events created, do
7725          * not bother with enums either.
7726          */
7727         if (trace_module_has_bad_taint(mod))
7728                 return;
7729
7730         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7731 }
7732
7733 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7734 static void trace_module_remove_enums(struct module *mod)
7735 {
7736         union trace_enum_map_item *map;
7737         union trace_enum_map_item **last = &trace_enum_maps;
7738
7739         if (!mod->num_trace_enums)
7740                 return;
7741
7742         mutex_lock(&trace_enum_mutex);
7743
7744         map = trace_enum_maps;
7745
7746         while (map) {
7747                 if (map->head.mod == mod)
7748                         break;
7749                 map = trace_enum_jmp_to_tail(map);
7750                 last = &map->tail.next;
7751                 map = map->tail.next;
7752         }
7753         if (!map)
7754                 goto out;
7755
7756         *last = trace_enum_jmp_to_tail(map)->tail.next;
7757         kfree(map);
7758  out:
7759         mutex_unlock(&trace_enum_mutex);
7760 }
7761 #else
7762 static inline void trace_module_remove_enums(struct module *mod) { }
7763 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7764
7765 static int trace_module_notify(struct notifier_block *self,
7766                                unsigned long val, void *data)
7767 {
7768         struct module *mod = data;
7769
7770         switch (val) {
7771         case MODULE_STATE_COMING:
7772                 trace_module_add_enums(mod);
7773                 break;
7774         case MODULE_STATE_GOING:
7775                 trace_module_remove_enums(mod);
7776                 break;
7777         }
7778
7779         return 0;
7780 }
7781
7782 static struct notifier_block trace_module_nb = {
7783         .notifier_call = trace_module_notify,
7784         .priority = 0,
7785 };
7786 #endif /* CONFIG_MODULES */
7787
7788 static __init int tracer_init_tracefs(void)
7789 {
7790         struct dentry *d_tracer;
7791
7792         trace_access_lock_init();
7793
7794         d_tracer = tracing_init_dentry();
7795         if (IS_ERR(d_tracer))
7796                 return 0;
7797
7798         init_tracer_tracefs(&global_trace, d_tracer);
7799         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7800
7801         trace_create_file("tracing_thresh", 0644, d_tracer,
7802                         &global_trace, &tracing_thresh_fops);
7803
7804         trace_create_file("README", 0444, d_tracer,
7805                         NULL, &tracing_readme_fops);
7806
7807         trace_create_file("saved_cmdlines", 0444, d_tracer,
7808                         NULL, &tracing_saved_cmdlines_fops);
7809
7810         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7811                           NULL, &tracing_saved_cmdlines_size_fops);
7812
7813         trace_enum_init();
7814
7815         trace_create_enum_file(d_tracer);
7816
7817 #ifdef CONFIG_MODULES
7818         register_module_notifier(&trace_module_nb);
7819 #endif
7820
7821 #ifdef CONFIG_DYNAMIC_FTRACE
7822         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7823                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7824 #endif
7825
7826         create_trace_instances(d_tracer);
7827
7828         update_tracer_options(&global_trace);
7829
7830         return 0;
7831 }
7832
7833 static int trace_panic_handler(struct notifier_block *this,
7834                                unsigned long event, void *unused)
7835 {
7836         if (ftrace_dump_on_oops)
7837                 ftrace_dump(ftrace_dump_on_oops);
7838         return NOTIFY_OK;
7839 }
7840
7841 static struct notifier_block trace_panic_notifier = {
7842         .notifier_call  = trace_panic_handler,
7843         .next           = NULL,
7844         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7845 };
7846
7847 static int trace_die_handler(struct notifier_block *self,
7848                              unsigned long val,
7849                              void *data)
7850 {
7851         switch (val) {
7852         case DIE_OOPS:
7853                 if (ftrace_dump_on_oops)
7854                         ftrace_dump(ftrace_dump_on_oops);
7855                 break;
7856         default:
7857                 break;
7858         }
7859         return NOTIFY_OK;
7860 }
7861
7862 static struct notifier_block trace_die_notifier = {
7863         .notifier_call = trace_die_handler,
7864         .priority = 200
7865 };
7866
7867 /*
7868  * printk is set to max of 1024, we really don't need it that big.
7869  * Nothing should be printing 1000 characters anyway.
7870  */
7871 #define TRACE_MAX_PRINT         1000
7872
7873 /*
7874  * Define here KERN_TRACE so that we have one place to modify
7875  * it if we decide to change what log level the ftrace dump
7876  * should be at.
7877  */
7878 #define KERN_TRACE              KERN_EMERG
7879
7880 void
7881 trace_printk_seq(struct trace_seq *s)
7882 {
7883         /* Probably should print a warning here. */
7884         if (s->seq.len >= TRACE_MAX_PRINT)
7885                 s->seq.len = TRACE_MAX_PRINT;
7886
7887         /*
7888          * More paranoid code. Although the buffer size is set to
7889          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7890          * an extra layer of protection.
7891          */
7892         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7893                 s->seq.len = s->seq.size - 1;
7894
7895         /* should be zero ended, but we are paranoid. */
7896         s->buffer[s->seq.len] = 0;
7897
7898         printk(KERN_TRACE "%s", s->buffer);
7899
7900         trace_seq_init(s);
7901 }
7902
7903 void trace_init_global_iter(struct trace_iterator *iter)
7904 {
7905         iter->tr = &global_trace;
7906         iter->trace = iter->tr->current_trace;
7907         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7908         iter->trace_buffer = &global_trace.trace_buffer;
7909
7910         if (iter->trace && iter->trace->open)
7911                 iter->trace->open(iter);
7912
7913         /* Annotate start of buffers if we had overruns */
7914         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7915                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7916
7917         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7918         if (trace_clocks[iter->tr->clock_id].in_ns)
7919                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7920 }
7921
7922 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7923 {
7924         /* use static because iter can be a bit big for the stack */
7925         static struct trace_iterator iter;
7926         static atomic_t dump_running;
7927         struct trace_array *tr = &global_trace;
7928         unsigned int old_userobj;
7929         unsigned long flags;
7930         int cnt = 0, cpu;
7931
7932         /* Only allow one dump user at a time. */
7933         if (atomic_inc_return(&dump_running) != 1) {
7934                 atomic_dec(&dump_running);
7935                 return;
7936         }
7937
7938         /*
7939          * Always turn off tracing when we dump.
7940          * We don't need to show trace output of what happens
7941          * between multiple crashes.
7942          *
7943          * If the user does a sysrq-z, then they can re-enable
7944          * tracing with echo 1 > tracing_on.
7945          */
7946         tracing_off();
7947
7948         local_irq_save(flags);
7949
7950         /* Simulate the iterator */
7951         trace_init_global_iter(&iter);
7952
7953         for_each_tracing_cpu(cpu) {
7954                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7955         }
7956
7957         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7958
7959         /* don't look at user memory in panic mode */
7960         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7961
7962         switch (oops_dump_mode) {
7963         case DUMP_ALL:
7964                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7965                 break;
7966         case DUMP_ORIG:
7967                 iter.cpu_file = raw_smp_processor_id();
7968                 break;
7969         case DUMP_NONE:
7970                 goto out_enable;
7971         default:
7972                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7973                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7974         }
7975
7976         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7977
7978         /* Did function tracer already get disabled? */
7979         if (ftrace_is_dead()) {
7980                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7981                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7982         }
7983
7984         /*
7985          * We need to stop all tracing on all CPUS to read the
7986          * the next buffer. This is a bit expensive, but is
7987          * not done often. We fill all what we can read,
7988          * and then release the locks again.
7989          */
7990
7991         while (!trace_empty(&iter)) {
7992
7993                 if (!cnt)
7994                         printk(KERN_TRACE "---------------------------------\n");
7995
7996                 cnt++;
7997
7998                 /* reset all but tr, trace, and overruns */
7999                 memset(&iter.seq, 0,
8000                        sizeof(struct trace_iterator) -
8001                        offsetof(struct trace_iterator, seq));
8002                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8003                 iter.pos = -1;
8004
8005                 if (trace_find_next_entry_inc(&iter) != NULL) {
8006                         int ret;
8007
8008                         ret = print_trace_line(&iter);
8009                         if (ret != TRACE_TYPE_NO_CONSUME)
8010                                 trace_consume(&iter);
8011                 }
8012                 touch_nmi_watchdog();
8013
8014                 trace_printk_seq(&iter.seq);
8015         }
8016
8017         if (!cnt)
8018                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8019         else
8020                 printk(KERN_TRACE "---------------------------------\n");
8021
8022  out_enable:
8023         tr->trace_flags |= old_userobj;
8024
8025         for_each_tracing_cpu(cpu) {
8026                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8027         }
8028         atomic_dec(&dump_running);
8029         local_irq_restore(flags);
8030 }
8031 EXPORT_SYMBOL_GPL(ftrace_dump);
8032
8033 __init static int tracer_alloc_buffers(void)
8034 {
8035         int ring_buf_size;
8036         int ret = -ENOMEM;
8037
8038         /*
8039          * Make sure we don't accidently add more trace options
8040          * than we have bits for.
8041          */
8042         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8043
8044         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8045                 goto out;
8046
8047         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8048                 goto out_free_buffer_mask;
8049
8050         /* Only allocate trace_printk buffers if a trace_printk exists */
8051         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8052                 /* Must be called before global_trace.buffer is allocated */
8053                 trace_printk_init_buffers();
8054
8055         /* To save memory, keep the ring buffer size to its minimum */
8056         if (ring_buffer_expanded)
8057                 ring_buf_size = trace_buf_size;
8058         else
8059                 ring_buf_size = 1;
8060
8061         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8062         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8063
8064         raw_spin_lock_init(&global_trace.start_lock);
8065
8066         /*
8067          * The prepare callbacks allocates some memory for the ring buffer. We
8068          * don't free the buffer if the if the CPU goes down. If we were to free
8069          * the buffer, then the user would lose any trace that was in the
8070          * buffer. The memory will be removed once the "instance" is removed.
8071          */
8072         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8073                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8074                                       NULL);
8075         if (ret < 0)
8076                 goto out_free_cpumask;
8077         /* Used for event triggers */
8078         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8079         if (!temp_buffer)
8080                 goto out_rm_hp_state;
8081
8082         if (trace_create_savedcmd() < 0)
8083                 goto out_free_temp_buffer;
8084
8085         /* TODO: make the number of buffers hot pluggable with CPUS */
8086         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8087                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8088                 WARN_ON(1);
8089                 goto out_free_savedcmd;
8090         }
8091
8092         if (global_trace.buffer_disabled)
8093                 tracing_off();
8094
8095         if (trace_boot_clock) {
8096                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8097                 if (ret < 0)
8098                         pr_warn("Trace clock %s not defined, going back to default\n",
8099                                 trace_boot_clock);
8100         }
8101
8102         /*
8103          * register_tracer() might reference current_trace, so it
8104          * needs to be set before we register anything. This is
8105          * just a bootstrap of current_trace anyway.
8106          */
8107         global_trace.current_trace = &nop_trace;
8108
8109         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8110
8111         ftrace_init_global_array_ops(&global_trace);
8112
8113         init_trace_flags_index(&global_trace);
8114
8115         register_tracer(&nop_trace);
8116
8117         /* Function tracing may start here (via kernel command line) */
8118         init_function_trace();
8119
8120         /* All seems OK, enable tracing */
8121         tracing_disabled = 0;
8122
8123         atomic_notifier_chain_register(&panic_notifier_list,
8124                                        &trace_panic_notifier);
8125
8126         register_die_notifier(&trace_die_notifier);
8127
8128         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8129
8130         INIT_LIST_HEAD(&global_trace.systems);
8131         INIT_LIST_HEAD(&global_trace.events);
8132         list_add(&global_trace.list, &ftrace_trace_arrays);
8133
8134         apply_trace_boot_options();
8135
8136         register_snapshot_cmd();
8137
8138         return 0;
8139
8140 out_free_savedcmd:
8141         free_saved_cmdlines_buffer(savedcmd);
8142 out_free_temp_buffer:
8143         ring_buffer_free(temp_buffer);
8144 out_rm_hp_state:
8145         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8146 out_free_cpumask:
8147         free_cpumask_var(global_trace.tracing_cpumask);
8148 out_free_buffer_mask:
8149         free_cpumask_var(tracing_buffer_mask);
8150 out:
8151         return ret;
8152 }
8153
8154 void __init early_trace_init(void)
8155 {
8156         if (tracepoint_printk) {
8157                 tracepoint_print_iter =
8158                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8159                 if (WARN_ON(!tracepoint_print_iter))
8160                         tracepoint_printk = 0;
8161                 else
8162                         static_key_enable(&tracepoint_printk_key.key);
8163         }
8164         tracer_alloc_buffers();
8165 }
8166
8167 void __init trace_init(void)
8168 {
8169         trace_event_init();
8170 }
8171
8172 __init static int clear_boot_tracer(void)
8173 {
8174         /*
8175          * The default tracer at boot buffer is an init section.
8176          * This function is called in lateinit. If we did not
8177          * find the boot tracer, then clear it out, to prevent
8178          * later registration from accessing the buffer that is
8179          * about to be freed.
8180          */
8181         if (!default_bootup_tracer)
8182                 return 0;
8183
8184         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8185                default_bootup_tracer);
8186         default_bootup_tracer = NULL;
8187
8188         return 0;
8189 }
8190
8191 fs_initcall(tracer_init_tracefs);
8192 late_initcall(clear_boot_tracer);