Merge tag 'docs-4.12-2' of git://git.lwn.net/linux
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the enums were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 early_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717         unsigned *map_cmdline_to_pid;
1718         unsigned cmdline_num;
1719         int cmdline_idx;
1720         char *saved_cmdlines;
1721 };
1722 static struct saved_cmdlines_buffer *savedcmd;
1723
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1726
1727 static inline char *get_saved_cmdlines(int idx)
1728 {
1729         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1730 }
1731
1732 static inline void set_cmdline(int idx, const char *cmdline)
1733 {
1734         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1735 }
1736
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738                                     struct saved_cmdlines_buffer *s)
1739 {
1740         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1741                                         GFP_KERNEL);
1742         if (!s->map_cmdline_to_pid)
1743                 return -ENOMEM;
1744
1745         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746         if (!s->saved_cmdlines) {
1747                 kfree(s->map_cmdline_to_pid);
1748                 return -ENOMEM;
1749         }
1750
1751         s->cmdline_idx = 0;
1752         s->cmdline_num = val;
1753         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754                sizeof(s->map_pid_to_cmdline));
1755         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756                val * sizeof(*s->map_cmdline_to_pid));
1757
1758         return 0;
1759 }
1760
1761 static int trace_create_savedcmd(void)
1762 {
1763         int ret;
1764
1765         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1766         if (!savedcmd)
1767                 return -ENOMEM;
1768
1769         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1770         if (ret < 0) {
1771                 kfree(savedcmd);
1772                 savedcmd = NULL;
1773                 return -ENOMEM;
1774         }
1775
1776         return 0;
1777 }
1778
1779 int is_tracing_stopped(void)
1780 {
1781         return global_trace.stop_count;
1782 }
1783
1784 /**
1785  * tracing_start - quick start of the tracer
1786  *
1787  * If tracing is enabled but was stopped by tracing_stop,
1788  * this will start the tracer back up.
1789  */
1790 void tracing_start(void)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         if (tracing_disabled)
1796                 return;
1797
1798         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799         if (--global_trace.stop_count) {
1800                 if (global_trace.stop_count < 0) {
1801                         /* Someone screwed up their debugging */
1802                         WARN_ON_ONCE(1);
1803                         global_trace.stop_count = 0;
1804                 }
1805                 goto out;
1806         }
1807
1808         /* Prevent the buffers from switching */
1809         arch_spin_lock(&global_trace.max_lock);
1810
1811         buffer = global_trace.trace_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_enable(buffer);
1814
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816         buffer = global_trace.max_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819 #endif
1820
1821         arch_spin_unlock(&global_trace.max_lock);
1822
1823  out:
1824         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1825 }
1826
1827 static void tracing_start_tr(struct trace_array *tr)
1828 {
1829         struct ring_buffer *buffer;
1830         unsigned long flags;
1831
1832         if (tracing_disabled)
1833                 return;
1834
1835         /* If global, we need to also start the max tracer */
1836         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837                 return tracing_start();
1838
1839         raw_spin_lock_irqsave(&tr->start_lock, flags);
1840
1841         if (--tr->stop_count) {
1842                 if (tr->stop_count < 0) {
1843                         /* Someone screwed up their debugging */
1844                         WARN_ON_ONCE(1);
1845                         tr->stop_count = 0;
1846                 }
1847                 goto out;
1848         }
1849
1850         buffer = tr->trace_buffer.buffer;
1851         if (buffer)
1852                 ring_buffer_record_enable(buffer);
1853
1854  out:
1855         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1856 }
1857
1858 /**
1859  * tracing_stop - quick stop of the tracer
1860  *
1861  * Light weight way to stop tracing. Use in conjunction with
1862  * tracing_start.
1863  */
1864 void tracing_stop(void)
1865 {
1866         struct ring_buffer *buffer;
1867         unsigned long flags;
1868
1869         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870         if (global_trace.stop_count++)
1871                 goto out;
1872
1873         /* Prevent the buffers from switching */
1874         arch_spin_lock(&global_trace.max_lock);
1875
1876         buffer = global_trace.trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_disable(buffer);
1879
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881         buffer = global_trace.max_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884 #endif
1885
1886         arch_spin_unlock(&global_trace.max_lock);
1887
1888  out:
1889         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1890 }
1891
1892 static void tracing_stop_tr(struct trace_array *tr)
1893 {
1894         struct ring_buffer *buffer;
1895         unsigned long flags;
1896
1897         /* If global, we need to also stop the max tracer */
1898         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899                 return tracing_stop();
1900
1901         raw_spin_lock_irqsave(&tr->start_lock, flags);
1902         if (tr->stop_count++)
1903                 goto out;
1904
1905         buffer = tr->trace_buffer.buffer;
1906         if (buffer)
1907                 ring_buffer_record_disable(buffer);
1908
1909  out:
1910         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1911 }
1912
1913 void trace_stop_cmdline_recording(void);
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917         unsigned pid, idx;
1918
1919         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920                 return 0;
1921
1922         /*
1923          * It's not the end of the world if we don't get
1924          * the lock, but we also don't want to spin
1925          * nor do we want to disable interrupts,
1926          * so if we miss here, then better luck next time.
1927          */
1928         if (!arch_spin_trylock(&trace_cmdline_lock))
1929                 return 0;
1930
1931         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932         if (idx == NO_CMDLINE_MAP) {
1933                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935                 /*
1936                  * Check whether the cmdline buffer at idx has a pid
1937                  * mapped. We are going to overwrite that entry so we
1938                  * need to clear the map_pid_to_cmdline. Otherwise we
1939                  * would read the new comm for the old pid.
1940                  */
1941                 pid = savedcmd->map_cmdline_to_pid[idx];
1942                 if (pid != NO_CMDLINE_MAP)
1943                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948                 savedcmd->cmdline_idx = idx;
1949         }
1950
1951         set_cmdline(idx, tsk->comm);
1952
1953         arch_spin_unlock(&trace_cmdline_lock);
1954
1955         return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960         unsigned map;
1961
1962         if (!pid) {
1963                 strcpy(comm, "<idle>");
1964                 return;
1965         }
1966
1967         if (WARN_ON_ONCE(pid < 0)) {
1968                 strcpy(comm, "<XXX>");
1969                 return;
1970         }
1971
1972         if (pid > PID_MAX_DEFAULT) {
1973                 strcpy(comm, "<...>");
1974                 return;
1975         }
1976
1977         map = savedcmd->map_pid_to_cmdline[pid];
1978         if (map != NO_CMDLINE_MAP)
1979                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980         else
1981                 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986         preempt_disable();
1987         arch_spin_lock(&trace_cmdline_lock);
1988
1989         __trace_find_cmdline(pid, comm);
1990
1991         arch_spin_unlock(&trace_cmdline_lock);
1992         preempt_enable();
1993 }
1994
1995 void tracing_record_cmdline(struct task_struct *tsk)
1996 {
1997         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1998                 return;
1999
2000         if (!__this_cpu_read(trace_cmdline_save))
2001                 return;
2002
2003         if (trace_save_cmdline(tsk))
2004                 __this_cpu_write(trace_cmdline_save, false);
2005 }
2006
2007 /*
2008  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2009  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2010  * simplifies those functions and keeps them in sync.
2011  */
2012 enum print_line_t trace_handle_return(struct trace_seq *s)
2013 {
2014         return trace_seq_has_overflowed(s) ?
2015                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2016 }
2017 EXPORT_SYMBOL_GPL(trace_handle_return);
2018
2019 void
2020 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2021                              int pc)
2022 {
2023         struct task_struct *tsk = current;
2024
2025         entry->preempt_count            = pc & 0xff;
2026         entry->pid                      = (tsk) ? tsk->pid : 0;
2027         entry->flags =
2028 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2029                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2030 #else
2031                 TRACE_FLAG_IRQS_NOSUPPORT |
2032 #endif
2033                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2034                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2035                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2036                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2037                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2038 }
2039 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2040
2041 struct ring_buffer_event *
2042 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2043                           int type,
2044                           unsigned long len,
2045                           unsigned long flags, int pc)
2046 {
2047         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2048 }
2049
2050 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2051 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2052 static int trace_buffered_event_ref;
2053
2054 /**
2055  * trace_buffered_event_enable - enable buffering events
2056  *
2057  * When events are being filtered, it is quicker to use a temporary
2058  * buffer to write the event data into if there's a likely chance
2059  * that it will not be committed. The discard of the ring buffer
2060  * is not as fast as committing, and is much slower than copying
2061  * a commit.
2062  *
2063  * When an event is to be filtered, allocate per cpu buffers to
2064  * write the event data into, and if the event is filtered and discarded
2065  * it is simply dropped, otherwise, the entire data is to be committed
2066  * in one shot.
2067  */
2068 void trace_buffered_event_enable(void)
2069 {
2070         struct ring_buffer_event *event;
2071         struct page *page;
2072         int cpu;
2073
2074         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2075
2076         if (trace_buffered_event_ref++)
2077                 return;
2078
2079         for_each_tracing_cpu(cpu) {
2080                 page = alloc_pages_node(cpu_to_node(cpu),
2081                                         GFP_KERNEL | __GFP_NORETRY, 0);
2082                 if (!page)
2083                         goto failed;
2084
2085                 event = page_address(page);
2086                 memset(event, 0, sizeof(*event));
2087
2088                 per_cpu(trace_buffered_event, cpu) = event;
2089
2090                 preempt_disable();
2091                 if (cpu == smp_processor_id() &&
2092                     this_cpu_read(trace_buffered_event) !=
2093                     per_cpu(trace_buffered_event, cpu))
2094                         WARN_ON_ONCE(1);
2095                 preempt_enable();
2096         }
2097
2098         return;
2099  failed:
2100         trace_buffered_event_disable();
2101 }
2102
2103 static void enable_trace_buffered_event(void *data)
2104 {
2105         /* Probably not needed, but do it anyway */
2106         smp_rmb();
2107         this_cpu_dec(trace_buffered_event_cnt);
2108 }
2109
2110 static void disable_trace_buffered_event(void *data)
2111 {
2112         this_cpu_inc(trace_buffered_event_cnt);
2113 }
2114
2115 /**
2116  * trace_buffered_event_disable - disable buffering events
2117  *
2118  * When a filter is removed, it is faster to not use the buffered
2119  * events, and to commit directly into the ring buffer. Free up
2120  * the temp buffers when there are no more users. This requires
2121  * special synchronization with current events.
2122  */
2123 void trace_buffered_event_disable(void)
2124 {
2125         int cpu;
2126
2127         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2128
2129         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2130                 return;
2131
2132         if (--trace_buffered_event_ref)
2133                 return;
2134
2135         preempt_disable();
2136         /* For each CPU, set the buffer as used. */
2137         smp_call_function_many(tracing_buffer_mask,
2138                                disable_trace_buffered_event, NULL, 1);
2139         preempt_enable();
2140
2141         /* Wait for all current users to finish */
2142         synchronize_sched();
2143
2144         for_each_tracing_cpu(cpu) {
2145                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2146                 per_cpu(trace_buffered_event, cpu) = NULL;
2147         }
2148         /*
2149          * Make sure trace_buffered_event is NULL before clearing
2150          * trace_buffered_event_cnt.
2151          */
2152         smp_wmb();
2153
2154         preempt_disable();
2155         /* Do the work on each cpu */
2156         smp_call_function_many(tracing_buffer_mask,
2157                                enable_trace_buffered_event, NULL, 1);
2158         preempt_enable();
2159 }
2160
2161 static struct ring_buffer *temp_buffer;
2162
2163 struct ring_buffer_event *
2164 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2165                           struct trace_event_file *trace_file,
2166                           int type, unsigned long len,
2167                           unsigned long flags, int pc)
2168 {
2169         struct ring_buffer_event *entry;
2170         int val;
2171
2172         *current_rb = trace_file->tr->trace_buffer.buffer;
2173
2174         if ((trace_file->flags &
2175              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2176             (entry = this_cpu_read(trace_buffered_event))) {
2177                 /* Try to use the per cpu buffer first */
2178                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2179                 if (val == 1) {
2180                         trace_event_setup(entry, type, flags, pc);
2181                         entry->array[0] = len;
2182                         return entry;
2183                 }
2184                 this_cpu_dec(trace_buffered_event_cnt);
2185         }
2186
2187         entry = __trace_buffer_lock_reserve(*current_rb,
2188                                             type, len, flags, pc);
2189         /*
2190          * If tracing is off, but we have triggers enabled
2191          * we still need to look at the event data. Use the temp_buffer
2192          * to store the trace event for the tigger to use. It's recusive
2193          * safe and will not be recorded anywhere.
2194          */
2195         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2196                 *current_rb = temp_buffer;
2197                 entry = __trace_buffer_lock_reserve(*current_rb,
2198                                                     type, len, flags, pc);
2199         }
2200         return entry;
2201 }
2202 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2203
2204 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2205 static DEFINE_MUTEX(tracepoint_printk_mutex);
2206
2207 static void output_printk(struct trace_event_buffer *fbuffer)
2208 {
2209         struct trace_event_call *event_call;
2210         struct trace_event *event;
2211         unsigned long flags;
2212         struct trace_iterator *iter = tracepoint_print_iter;
2213
2214         /* We should never get here if iter is NULL */
2215         if (WARN_ON_ONCE(!iter))
2216                 return;
2217
2218         event_call = fbuffer->trace_file->event_call;
2219         if (!event_call || !event_call->event.funcs ||
2220             !event_call->event.funcs->trace)
2221                 return;
2222
2223         event = &fbuffer->trace_file->event_call->event;
2224
2225         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2226         trace_seq_init(&iter->seq);
2227         iter->ent = fbuffer->entry;
2228         event_call->event.funcs->trace(iter, 0, event);
2229         trace_seq_putc(&iter->seq, 0);
2230         printk("%s", iter->seq.buffer);
2231
2232         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2233 }
2234
2235 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2236                              void __user *buffer, size_t *lenp,
2237                              loff_t *ppos)
2238 {
2239         int save_tracepoint_printk;
2240         int ret;
2241
2242         mutex_lock(&tracepoint_printk_mutex);
2243         save_tracepoint_printk = tracepoint_printk;
2244
2245         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2246
2247         /*
2248          * This will force exiting early, as tracepoint_printk
2249          * is always zero when tracepoint_printk_iter is not allocated
2250          */
2251         if (!tracepoint_print_iter)
2252                 tracepoint_printk = 0;
2253
2254         if (save_tracepoint_printk == tracepoint_printk)
2255                 goto out;
2256
2257         if (tracepoint_printk)
2258                 static_key_enable(&tracepoint_printk_key.key);
2259         else
2260                 static_key_disable(&tracepoint_printk_key.key);
2261
2262  out:
2263         mutex_unlock(&tracepoint_printk_mutex);
2264
2265         return ret;
2266 }
2267
2268 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2269 {
2270         if (static_key_false(&tracepoint_printk_key.key))
2271                 output_printk(fbuffer);
2272
2273         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2274                                     fbuffer->event, fbuffer->entry,
2275                                     fbuffer->flags, fbuffer->pc);
2276 }
2277 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2278
2279 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2280                                      struct ring_buffer *buffer,
2281                                      struct ring_buffer_event *event,
2282                                      unsigned long flags, int pc,
2283                                      struct pt_regs *regs)
2284 {
2285         __buffer_unlock_commit(buffer, event);
2286
2287         /*
2288          * If regs is not set, then skip the following callers:
2289          *   trace_buffer_unlock_commit_regs
2290          *   event_trigger_unlock_commit
2291          *   trace_event_buffer_commit
2292          *   trace_event_raw_event_sched_switch
2293          * Note, we can still get here via blktrace, wakeup tracer
2294          * and mmiotrace, but that's ok if they lose a function or
2295          * two. They are that meaningful.
2296          */
2297         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2298         ftrace_trace_userstack(buffer, flags, pc);
2299 }
2300
2301 /*
2302  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2303  */
2304 void
2305 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2306                                    struct ring_buffer_event *event)
2307 {
2308         __buffer_unlock_commit(buffer, event);
2309 }
2310
2311 static void
2312 trace_process_export(struct trace_export *export,
2313                struct ring_buffer_event *event)
2314 {
2315         struct trace_entry *entry;
2316         unsigned int size = 0;
2317
2318         entry = ring_buffer_event_data(event);
2319         size = ring_buffer_event_length(event);
2320         export->write(entry, size);
2321 }
2322
2323 static DEFINE_MUTEX(ftrace_export_lock);
2324
2325 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2326
2327 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2328
2329 static inline void ftrace_exports_enable(void)
2330 {
2331         static_branch_enable(&ftrace_exports_enabled);
2332 }
2333
2334 static inline void ftrace_exports_disable(void)
2335 {
2336         static_branch_disable(&ftrace_exports_enabled);
2337 }
2338
2339 void ftrace_exports(struct ring_buffer_event *event)
2340 {
2341         struct trace_export *export;
2342
2343         preempt_disable_notrace();
2344
2345         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2346         while (export) {
2347                 trace_process_export(export, event);
2348                 export = rcu_dereference_raw_notrace(export->next);
2349         }
2350
2351         preempt_enable_notrace();
2352 }
2353
2354 static inline void
2355 add_trace_export(struct trace_export **list, struct trace_export *export)
2356 {
2357         rcu_assign_pointer(export->next, *list);
2358         /*
2359          * We are entering export into the list but another
2360          * CPU might be walking that list. We need to make sure
2361          * the export->next pointer is valid before another CPU sees
2362          * the export pointer included into the list.
2363          */
2364         rcu_assign_pointer(*list, export);
2365 }
2366
2367 static inline int
2368 rm_trace_export(struct trace_export **list, struct trace_export *export)
2369 {
2370         struct trace_export **p;
2371
2372         for (p = list; *p != NULL; p = &(*p)->next)
2373                 if (*p == export)
2374                         break;
2375
2376         if (*p != export)
2377                 return -1;
2378
2379         rcu_assign_pointer(*p, (*p)->next);
2380
2381         return 0;
2382 }
2383
2384 static inline void
2385 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2386 {
2387         if (*list == NULL)
2388                 ftrace_exports_enable();
2389
2390         add_trace_export(list, export);
2391 }
2392
2393 static inline int
2394 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2395 {
2396         int ret;
2397
2398         ret = rm_trace_export(list, export);
2399         if (*list == NULL)
2400                 ftrace_exports_disable();
2401
2402         return ret;
2403 }
2404
2405 int register_ftrace_export(struct trace_export *export)
2406 {
2407         if (WARN_ON_ONCE(!export->write))
2408                 return -1;
2409
2410         mutex_lock(&ftrace_export_lock);
2411
2412         add_ftrace_export(&ftrace_exports_list, export);
2413
2414         mutex_unlock(&ftrace_export_lock);
2415
2416         return 0;
2417 }
2418 EXPORT_SYMBOL_GPL(register_ftrace_export);
2419
2420 int unregister_ftrace_export(struct trace_export *export)
2421 {
2422         int ret;
2423
2424         mutex_lock(&ftrace_export_lock);
2425
2426         ret = rm_ftrace_export(&ftrace_exports_list, export);
2427
2428         mutex_unlock(&ftrace_export_lock);
2429
2430         return ret;
2431 }
2432 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2433
2434 void
2435 trace_function(struct trace_array *tr,
2436                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2437                int pc)
2438 {
2439         struct trace_event_call *call = &event_function;
2440         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2441         struct ring_buffer_event *event;
2442         struct ftrace_entry *entry;
2443
2444         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2445                                             flags, pc);
2446         if (!event)
2447                 return;
2448         entry   = ring_buffer_event_data(event);
2449         entry->ip                       = ip;
2450         entry->parent_ip                = parent_ip;
2451
2452         if (!call_filter_check_discard(call, entry, buffer, event)) {
2453                 if (static_branch_unlikely(&ftrace_exports_enabled))
2454                         ftrace_exports(event);
2455                 __buffer_unlock_commit(buffer, event);
2456         }
2457 }
2458
2459 #ifdef CONFIG_STACKTRACE
2460
2461 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2462 struct ftrace_stack {
2463         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2464 };
2465
2466 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2467 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2468
2469 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2470                                  unsigned long flags,
2471                                  int skip, int pc, struct pt_regs *regs)
2472 {
2473         struct trace_event_call *call = &event_kernel_stack;
2474         struct ring_buffer_event *event;
2475         struct stack_entry *entry;
2476         struct stack_trace trace;
2477         int use_stack;
2478         int size = FTRACE_STACK_ENTRIES;
2479
2480         trace.nr_entries        = 0;
2481         trace.skip              = skip;
2482
2483         /*
2484          * Add two, for this function and the call to save_stack_trace()
2485          * If regs is set, then these functions will not be in the way.
2486          */
2487         if (!regs)
2488                 trace.skip += 2;
2489
2490         /*
2491          * Since events can happen in NMIs there's no safe way to
2492          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2493          * or NMI comes in, it will just have to use the default
2494          * FTRACE_STACK_SIZE.
2495          */
2496         preempt_disable_notrace();
2497
2498         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2499         /*
2500          * We don't need any atomic variables, just a barrier.
2501          * If an interrupt comes in, we don't care, because it would
2502          * have exited and put the counter back to what we want.
2503          * We just need a barrier to keep gcc from moving things
2504          * around.
2505          */
2506         barrier();
2507         if (use_stack == 1) {
2508                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2509                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2510
2511                 if (regs)
2512                         save_stack_trace_regs(regs, &trace);
2513                 else
2514                         save_stack_trace(&trace);
2515
2516                 if (trace.nr_entries > size)
2517                         size = trace.nr_entries;
2518         } else
2519                 /* From now on, use_stack is a boolean */
2520                 use_stack = 0;
2521
2522         size *= sizeof(unsigned long);
2523
2524         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2525                                             sizeof(*entry) + size, flags, pc);
2526         if (!event)
2527                 goto out;
2528         entry = ring_buffer_event_data(event);
2529
2530         memset(&entry->caller, 0, size);
2531
2532         if (use_stack)
2533                 memcpy(&entry->caller, trace.entries,
2534                        trace.nr_entries * sizeof(unsigned long));
2535         else {
2536                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2537                 trace.entries           = entry->caller;
2538                 if (regs)
2539                         save_stack_trace_regs(regs, &trace);
2540                 else
2541                         save_stack_trace(&trace);
2542         }
2543
2544         entry->size = trace.nr_entries;
2545
2546         if (!call_filter_check_discard(call, entry, buffer, event))
2547                 __buffer_unlock_commit(buffer, event);
2548
2549  out:
2550         /* Again, don't let gcc optimize things here */
2551         barrier();
2552         __this_cpu_dec(ftrace_stack_reserve);
2553         preempt_enable_notrace();
2554
2555 }
2556
2557 static inline void ftrace_trace_stack(struct trace_array *tr,
2558                                       struct ring_buffer *buffer,
2559                                       unsigned long flags,
2560                                       int skip, int pc, struct pt_regs *regs)
2561 {
2562         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2563                 return;
2564
2565         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2566 }
2567
2568 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569                    int pc)
2570 {
2571         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2572 }
2573
2574 /**
2575  * trace_dump_stack - record a stack back trace in the trace buffer
2576  * @skip: Number of functions to skip (helper handlers)
2577  */
2578 void trace_dump_stack(int skip)
2579 {
2580         unsigned long flags;
2581
2582         if (tracing_disabled || tracing_selftest_running)
2583                 return;
2584
2585         local_save_flags(flags);
2586
2587         /*
2588          * Skip 3 more, seems to get us at the caller of
2589          * this function.
2590          */
2591         skip += 3;
2592         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2593                              flags, skip, preempt_count(), NULL);
2594 }
2595
2596 static DEFINE_PER_CPU(int, user_stack_count);
2597
2598 void
2599 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2600 {
2601         struct trace_event_call *call = &event_user_stack;
2602         struct ring_buffer_event *event;
2603         struct userstack_entry *entry;
2604         struct stack_trace trace;
2605
2606         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2607                 return;
2608
2609         /*
2610          * NMIs can not handle page faults, even with fix ups.
2611          * The save user stack can (and often does) fault.
2612          */
2613         if (unlikely(in_nmi()))
2614                 return;
2615
2616         /*
2617          * prevent recursion, since the user stack tracing may
2618          * trigger other kernel events.
2619          */
2620         preempt_disable();
2621         if (__this_cpu_read(user_stack_count))
2622                 goto out;
2623
2624         __this_cpu_inc(user_stack_count);
2625
2626         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2627                                             sizeof(*entry), flags, pc);
2628         if (!event)
2629                 goto out_drop_count;
2630         entry   = ring_buffer_event_data(event);
2631
2632         entry->tgid             = current->tgid;
2633         memset(&entry->caller, 0, sizeof(entry->caller));
2634
2635         trace.nr_entries        = 0;
2636         trace.max_entries       = FTRACE_STACK_ENTRIES;
2637         trace.skip              = 0;
2638         trace.entries           = entry->caller;
2639
2640         save_stack_trace_user(&trace);
2641         if (!call_filter_check_discard(call, entry, buffer, event))
2642                 __buffer_unlock_commit(buffer, event);
2643
2644  out_drop_count:
2645         __this_cpu_dec(user_stack_count);
2646  out:
2647         preempt_enable();
2648 }
2649
2650 #ifdef UNUSED
2651 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2652 {
2653         ftrace_trace_userstack(tr, flags, preempt_count());
2654 }
2655 #endif /* UNUSED */
2656
2657 #endif /* CONFIG_STACKTRACE */
2658
2659 /* created for use with alloc_percpu */
2660 struct trace_buffer_struct {
2661         int nesting;
2662         char buffer[4][TRACE_BUF_SIZE];
2663 };
2664
2665 static struct trace_buffer_struct *trace_percpu_buffer;
2666
2667 /*
2668  * Thise allows for lockless recording.  If we're nested too deeply, then
2669  * this returns NULL.
2670  */
2671 static char *get_trace_buf(void)
2672 {
2673         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2674
2675         if (!buffer || buffer->nesting >= 4)
2676                 return NULL;
2677
2678         return &buffer->buffer[buffer->nesting++][0];
2679 }
2680
2681 static void put_trace_buf(void)
2682 {
2683         this_cpu_dec(trace_percpu_buffer->nesting);
2684 }
2685
2686 static int alloc_percpu_trace_buffer(void)
2687 {
2688         struct trace_buffer_struct *buffers;
2689
2690         buffers = alloc_percpu(struct trace_buffer_struct);
2691         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2692                 return -ENOMEM;
2693
2694         trace_percpu_buffer = buffers;
2695         return 0;
2696 }
2697
2698 static int buffers_allocated;
2699
2700 void trace_printk_init_buffers(void)
2701 {
2702         if (buffers_allocated)
2703                 return;
2704
2705         if (alloc_percpu_trace_buffer())
2706                 return;
2707
2708         /* trace_printk() is for debug use only. Don't use it in production. */
2709
2710         pr_warn("\n");
2711         pr_warn("**********************************************************\n");
2712         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2713         pr_warn("**                                                      **\n");
2714         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2715         pr_warn("**                                                      **\n");
2716         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2717         pr_warn("** unsafe for production use.                           **\n");
2718         pr_warn("**                                                      **\n");
2719         pr_warn("** If you see this message and you are not debugging    **\n");
2720         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2721         pr_warn("**                                                      **\n");
2722         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2723         pr_warn("**********************************************************\n");
2724
2725         /* Expand the buffers to set size */
2726         tracing_update_buffers();
2727
2728         buffers_allocated = 1;
2729
2730         /*
2731          * trace_printk_init_buffers() can be called by modules.
2732          * If that happens, then we need to start cmdline recording
2733          * directly here. If the global_trace.buffer is already
2734          * allocated here, then this was called by module code.
2735          */
2736         if (global_trace.trace_buffer.buffer)
2737                 tracing_start_cmdline_record();
2738 }
2739
2740 void trace_printk_start_comm(void)
2741 {
2742         /* Start tracing comms if trace printk is set */
2743         if (!buffers_allocated)
2744                 return;
2745         tracing_start_cmdline_record();
2746 }
2747
2748 static void trace_printk_start_stop_comm(int enabled)
2749 {
2750         if (!buffers_allocated)
2751                 return;
2752
2753         if (enabled)
2754                 tracing_start_cmdline_record();
2755         else
2756                 tracing_stop_cmdline_record();
2757 }
2758
2759 /**
2760  * trace_vbprintk - write binary msg to tracing buffer
2761  *
2762  */
2763 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2764 {
2765         struct trace_event_call *call = &event_bprint;
2766         struct ring_buffer_event *event;
2767         struct ring_buffer *buffer;
2768         struct trace_array *tr = &global_trace;
2769         struct bprint_entry *entry;
2770         unsigned long flags;
2771         char *tbuffer;
2772         int len = 0, size, pc;
2773
2774         if (unlikely(tracing_selftest_running || tracing_disabled))
2775                 return 0;
2776
2777         /* Don't pollute graph traces with trace_vprintk internals */
2778         pause_graph_tracing();
2779
2780         pc = preempt_count();
2781         preempt_disable_notrace();
2782
2783         tbuffer = get_trace_buf();
2784         if (!tbuffer) {
2785                 len = 0;
2786                 goto out_nobuffer;
2787         }
2788
2789         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2790
2791         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2792                 goto out;
2793
2794         local_save_flags(flags);
2795         size = sizeof(*entry) + sizeof(u32) * len;
2796         buffer = tr->trace_buffer.buffer;
2797         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2798                                             flags, pc);
2799         if (!event)
2800                 goto out;
2801         entry = ring_buffer_event_data(event);
2802         entry->ip                       = ip;
2803         entry->fmt                      = fmt;
2804
2805         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2806         if (!call_filter_check_discard(call, entry, buffer, event)) {
2807                 __buffer_unlock_commit(buffer, event);
2808                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2809         }
2810
2811 out:
2812         put_trace_buf();
2813
2814 out_nobuffer:
2815         preempt_enable_notrace();
2816         unpause_graph_tracing();
2817
2818         return len;
2819 }
2820 EXPORT_SYMBOL_GPL(trace_vbprintk);
2821
2822 static int
2823 __trace_array_vprintk(struct ring_buffer *buffer,
2824                       unsigned long ip, const char *fmt, va_list args)
2825 {
2826         struct trace_event_call *call = &event_print;
2827         struct ring_buffer_event *event;
2828         int len = 0, size, pc;
2829         struct print_entry *entry;
2830         unsigned long flags;
2831         char *tbuffer;
2832
2833         if (tracing_disabled || tracing_selftest_running)
2834                 return 0;
2835
2836         /* Don't pollute graph traces with trace_vprintk internals */
2837         pause_graph_tracing();
2838
2839         pc = preempt_count();
2840         preempt_disable_notrace();
2841
2842
2843         tbuffer = get_trace_buf();
2844         if (!tbuffer) {
2845                 len = 0;
2846                 goto out_nobuffer;
2847         }
2848
2849         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2850
2851         local_save_flags(flags);
2852         size = sizeof(*entry) + len + 1;
2853         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2854                                             flags, pc);
2855         if (!event)
2856                 goto out;
2857         entry = ring_buffer_event_data(event);
2858         entry->ip = ip;
2859
2860         memcpy(&entry->buf, tbuffer, len + 1);
2861         if (!call_filter_check_discard(call, entry, buffer, event)) {
2862                 __buffer_unlock_commit(buffer, event);
2863                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2864         }
2865
2866 out:
2867         put_trace_buf();
2868
2869 out_nobuffer:
2870         preempt_enable_notrace();
2871         unpause_graph_tracing();
2872
2873         return len;
2874 }
2875
2876 int trace_array_vprintk(struct trace_array *tr,
2877                         unsigned long ip, const char *fmt, va_list args)
2878 {
2879         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2880 }
2881
2882 int trace_array_printk(struct trace_array *tr,
2883                        unsigned long ip, const char *fmt, ...)
2884 {
2885         int ret;
2886         va_list ap;
2887
2888         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2889                 return 0;
2890
2891         va_start(ap, fmt);
2892         ret = trace_array_vprintk(tr, ip, fmt, ap);
2893         va_end(ap);
2894         return ret;
2895 }
2896
2897 int trace_array_printk_buf(struct ring_buffer *buffer,
2898                            unsigned long ip, const char *fmt, ...)
2899 {
2900         int ret;
2901         va_list ap;
2902
2903         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2904                 return 0;
2905
2906         va_start(ap, fmt);
2907         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2908         va_end(ap);
2909         return ret;
2910 }
2911
2912 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2913 {
2914         return trace_array_vprintk(&global_trace, ip, fmt, args);
2915 }
2916 EXPORT_SYMBOL_GPL(trace_vprintk);
2917
2918 static void trace_iterator_increment(struct trace_iterator *iter)
2919 {
2920         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2921
2922         iter->idx++;
2923         if (buf_iter)
2924                 ring_buffer_read(buf_iter, NULL);
2925 }
2926
2927 static struct trace_entry *
2928 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2929                 unsigned long *lost_events)
2930 {
2931         struct ring_buffer_event *event;
2932         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2933
2934         if (buf_iter)
2935                 event = ring_buffer_iter_peek(buf_iter, ts);
2936         else
2937                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2938                                          lost_events);
2939
2940         if (event) {
2941                 iter->ent_size = ring_buffer_event_length(event);
2942                 return ring_buffer_event_data(event);
2943         }
2944         iter->ent_size = 0;
2945         return NULL;
2946 }
2947
2948 static struct trace_entry *
2949 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2950                   unsigned long *missing_events, u64 *ent_ts)
2951 {
2952         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2953         struct trace_entry *ent, *next = NULL;
2954         unsigned long lost_events = 0, next_lost = 0;
2955         int cpu_file = iter->cpu_file;
2956         u64 next_ts = 0, ts;
2957         int next_cpu = -1;
2958         int next_size = 0;
2959         int cpu;
2960
2961         /*
2962          * If we are in a per_cpu trace file, don't bother by iterating over
2963          * all cpu and peek directly.
2964          */
2965         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2966                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2967                         return NULL;
2968                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2969                 if (ent_cpu)
2970                         *ent_cpu = cpu_file;
2971
2972                 return ent;
2973         }
2974
2975         for_each_tracing_cpu(cpu) {
2976
2977                 if (ring_buffer_empty_cpu(buffer, cpu))
2978                         continue;
2979
2980                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2981
2982                 /*
2983                  * Pick the entry with the smallest timestamp:
2984                  */
2985                 if (ent && (!next || ts < next_ts)) {
2986                         next = ent;
2987                         next_cpu = cpu;
2988                         next_ts = ts;
2989                         next_lost = lost_events;
2990                         next_size = iter->ent_size;
2991                 }
2992         }
2993
2994         iter->ent_size = next_size;
2995
2996         if (ent_cpu)
2997                 *ent_cpu = next_cpu;
2998
2999         if (ent_ts)
3000                 *ent_ts = next_ts;
3001
3002         if (missing_events)
3003                 *missing_events = next_lost;
3004
3005         return next;
3006 }
3007
3008 /* Find the next real entry, without updating the iterator itself */
3009 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3010                                           int *ent_cpu, u64 *ent_ts)
3011 {
3012         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3013 }
3014
3015 /* Find the next real entry, and increment the iterator to the next entry */
3016 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3017 {
3018         iter->ent = __find_next_entry(iter, &iter->cpu,
3019                                       &iter->lost_events, &iter->ts);
3020
3021         if (iter->ent)
3022                 trace_iterator_increment(iter);
3023
3024         return iter->ent ? iter : NULL;
3025 }
3026
3027 static void trace_consume(struct trace_iterator *iter)
3028 {
3029         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3030                             &iter->lost_events);
3031 }
3032
3033 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3034 {
3035         struct trace_iterator *iter = m->private;
3036         int i = (int)*pos;
3037         void *ent;
3038
3039         WARN_ON_ONCE(iter->leftover);
3040
3041         (*pos)++;
3042
3043         /* can't go backwards */
3044         if (iter->idx > i)
3045                 return NULL;
3046
3047         if (iter->idx < 0)
3048                 ent = trace_find_next_entry_inc(iter);
3049         else
3050                 ent = iter;
3051
3052         while (ent && iter->idx < i)
3053                 ent = trace_find_next_entry_inc(iter);
3054
3055         iter->pos = *pos;
3056
3057         return ent;
3058 }
3059
3060 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3061 {
3062         struct ring_buffer_event *event;
3063         struct ring_buffer_iter *buf_iter;
3064         unsigned long entries = 0;
3065         u64 ts;
3066
3067         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3068
3069         buf_iter = trace_buffer_iter(iter, cpu);
3070         if (!buf_iter)
3071                 return;
3072
3073         ring_buffer_iter_reset(buf_iter);
3074
3075         /*
3076          * We could have the case with the max latency tracers
3077          * that a reset never took place on a cpu. This is evident
3078          * by the timestamp being before the start of the buffer.
3079          */
3080         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3081                 if (ts >= iter->trace_buffer->time_start)
3082                         break;
3083                 entries++;
3084                 ring_buffer_read(buf_iter, NULL);
3085         }
3086
3087         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3088 }
3089
3090 /*
3091  * The current tracer is copied to avoid a global locking
3092  * all around.
3093  */
3094 static void *s_start(struct seq_file *m, loff_t *pos)
3095 {
3096         struct trace_iterator *iter = m->private;
3097         struct trace_array *tr = iter->tr;
3098         int cpu_file = iter->cpu_file;
3099         void *p = NULL;
3100         loff_t l = 0;
3101         int cpu;
3102
3103         /*
3104          * copy the tracer to avoid using a global lock all around.
3105          * iter->trace is a copy of current_trace, the pointer to the
3106          * name may be used instead of a strcmp(), as iter->trace->name
3107          * will point to the same string as current_trace->name.
3108          */
3109         mutex_lock(&trace_types_lock);
3110         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3111                 *iter->trace = *tr->current_trace;
3112         mutex_unlock(&trace_types_lock);
3113
3114 #ifdef CONFIG_TRACER_MAX_TRACE
3115         if (iter->snapshot && iter->trace->use_max_tr)
3116                 return ERR_PTR(-EBUSY);
3117 #endif
3118
3119         if (!iter->snapshot)
3120                 atomic_inc(&trace_record_cmdline_disabled);
3121
3122         if (*pos != iter->pos) {
3123                 iter->ent = NULL;
3124                 iter->cpu = 0;
3125                 iter->idx = -1;
3126
3127                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3128                         for_each_tracing_cpu(cpu)
3129                                 tracing_iter_reset(iter, cpu);
3130                 } else
3131                         tracing_iter_reset(iter, cpu_file);
3132
3133                 iter->leftover = 0;
3134                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3135                         ;
3136
3137         } else {
3138                 /*
3139                  * If we overflowed the seq_file before, then we want
3140                  * to just reuse the trace_seq buffer again.
3141                  */
3142                 if (iter->leftover)
3143                         p = iter;
3144                 else {
3145                         l = *pos - 1;
3146                         p = s_next(m, p, &l);
3147                 }
3148         }
3149
3150         trace_event_read_lock();
3151         trace_access_lock(cpu_file);
3152         return p;
3153 }
3154
3155 static void s_stop(struct seq_file *m, void *p)
3156 {
3157         struct trace_iterator *iter = m->private;
3158
3159 #ifdef CONFIG_TRACER_MAX_TRACE
3160         if (iter->snapshot && iter->trace->use_max_tr)
3161                 return;
3162 #endif
3163
3164         if (!iter->snapshot)
3165                 atomic_dec(&trace_record_cmdline_disabled);
3166
3167         trace_access_unlock(iter->cpu_file);
3168         trace_event_read_unlock();
3169 }
3170
3171 static void
3172 get_total_entries(struct trace_buffer *buf,
3173                   unsigned long *total, unsigned long *entries)
3174 {
3175         unsigned long count;
3176         int cpu;
3177
3178         *total = 0;
3179         *entries = 0;
3180
3181         for_each_tracing_cpu(cpu) {
3182                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3183                 /*
3184                  * If this buffer has skipped entries, then we hold all
3185                  * entries for the trace and we need to ignore the
3186                  * ones before the time stamp.
3187                  */
3188                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3189                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3190                         /* total is the same as the entries */
3191                         *total += count;
3192                 } else
3193                         *total += count +
3194                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3195                 *entries += count;
3196         }
3197 }
3198
3199 static void print_lat_help_header(struct seq_file *m)
3200 {
3201         seq_puts(m, "#                  _------=> CPU#            \n"
3202                     "#                 / _-----=> irqs-off        \n"
3203                     "#                | / _----=> need-resched    \n"
3204                     "#                || / _---=> hardirq/softirq \n"
3205                     "#                ||| / _--=> preempt-depth   \n"
3206                     "#                |||| /     delay            \n"
3207                     "#  cmd     pid   ||||| time  |   caller      \n"
3208                     "#     \\   /      |||||  \\    |   /         \n");
3209 }
3210
3211 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3212 {
3213         unsigned long total;
3214         unsigned long entries;
3215
3216         get_total_entries(buf, &total, &entries);
3217         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3218                    entries, total, num_online_cpus());
3219         seq_puts(m, "#\n");
3220 }
3221
3222 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3223 {
3224         print_event_info(buf, m);
3225         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3226                     "#              | |       |          |         |\n");
3227 }
3228
3229 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3230 {
3231         print_event_info(buf, m);
3232         seq_puts(m, "#                              _-----=> irqs-off\n"
3233                     "#                             / _----=> need-resched\n"
3234                     "#                            | / _---=> hardirq/softirq\n"
3235                     "#                            || / _--=> preempt-depth\n"
3236                     "#                            ||| /     delay\n"
3237                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3238                     "#              | |       |   ||||       |         |\n");
3239 }
3240
3241 void
3242 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3243 {
3244         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3245         struct trace_buffer *buf = iter->trace_buffer;
3246         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3247         struct tracer *type = iter->trace;
3248         unsigned long entries;
3249         unsigned long total;
3250         const char *name = "preemption";
3251
3252         name = type->name;
3253
3254         get_total_entries(buf, &total, &entries);
3255
3256         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3257                    name, UTS_RELEASE);
3258         seq_puts(m, "# -----------------------------------"
3259                  "---------------------------------\n");
3260         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3261                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3262                    nsecs_to_usecs(data->saved_latency),
3263                    entries,
3264                    total,
3265                    buf->cpu,
3266 #if defined(CONFIG_PREEMPT_NONE)
3267                    "server",
3268 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3269                    "desktop",
3270 #elif defined(CONFIG_PREEMPT)
3271                    "preempt",
3272 #else
3273                    "unknown",
3274 #endif
3275                    /* These are reserved for later use */
3276                    0, 0, 0, 0);
3277 #ifdef CONFIG_SMP
3278         seq_printf(m, " #P:%d)\n", num_online_cpus());
3279 #else
3280         seq_puts(m, ")\n");
3281 #endif
3282         seq_puts(m, "#    -----------------\n");
3283         seq_printf(m, "#    | task: %.16s-%d "
3284                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3285                    data->comm, data->pid,
3286                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3287                    data->policy, data->rt_priority);
3288         seq_puts(m, "#    -----------------\n");
3289
3290         if (data->critical_start) {
3291                 seq_puts(m, "#  => started at: ");
3292                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3293                 trace_print_seq(m, &iter->seq);
3294                 seq_puts(m, "\n#  => ended at:   ");
3295                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3296                 trace_print_seq(m, &iter->seq);
3297                 seq_puts(m, "\n#\n");
3298         }
3299
3300         seq_puts(m, "#\n");
3301 }
3302
3303 static void test_cpu_buff_start(struct trace_iterator *iter)
3304 {
3305         struct trace_seq *s = &iter->seq;
3306         struct trace_array *tr = iter->tr;
3307
3308         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3309                 return;
3310
3311         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3312                 return;
3313
3314         if (cpumask_available(iter->started) &&
3315             cpumask_test_cpu(iter->cpu, iter->started))
3316                 return;
3317
3318         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3319                 return;
3320
3321         if (cpumask_available(iter->started))
3322                 cpumask_set_cpu(iter->cpu, iter->started);
3323
3324         /* Don't print started cpu buffer for the first entry of the trace */
3325         if (iter->idx > 1)
3326                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3327                                 iter->cpu);
3328 }
3329
3330 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3331 {
3332         struct trace_array *tr = iter->tr;
3333         struct trace_seq *s = &iter->seq;
3334         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3335         struct trace_entry *entry;
3336         struct trace_event *event;
3337
3338         entry = iter->ent;
3339
3340         test_cpu_buff_start(iter);
3341
3342         event = ftrace_find_event(entry->type);
3343
3344         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3345                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3346                         trace_print_lat_context(iter);
3347                 else
3348                         trace_print_context(iter);
3349         }
3350
3351         if (trace_seq_has_overflowed(s))
3352                 return TRACE_TYPE_PARTIAL_LINE;
3353
3354         if (event)
3355                 return event->funcs->trace(iter, sym_flags, event);
3356
3357         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3358
3359         return trace_handle_return(s);
3360 }
3361
3362 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3363 {
3364         struct trace_array *tr = iter->tr;
3365         struct trace_seq *s = &iter->seq;
3366         struct trace_entry *entry;
3367         struct trace_event *event;
3368
3369         entry = iter->ent;
3370
3371         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3372                 trace_seq_printf(s, "%d %d %llu ",
3373                                  entry->pid, iter->cpu, iter->ts);
3374
3375         if (trace_seq_has_overflowed(s))
3376                 return TRACE_TYPE_PARTIAL_LINE;
3377
3378         event = ftrace_find_event(entry->type);
3379         if (event)
3380                 return event->funcs->raw(iter, 0, event);
3381
3382         trace_seq_printf(s, "%d ?\n", entry->type);
3383
3384         return trace_handle_return(s);
3385 }
3386
3387 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3388 {
3389         struct trace_array *tr = iter->tr;
3390         struct trace_seq *s = &iter->seq;
3391         unsigned char newline = '\n';
3392         struct trace_entry *entry;
3393         struct trace_event *event;
3394
3395         entry = iter->ent;
3396
3397         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3398                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3399                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3400                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3401                 if (trace_seq_has_overflowed(s))
3402                         return TRACE_TYPE_PARTIAL_LINE;
3403         }
3404
3405         event = ftrace_find_event(entry->type);
3406         if (event) {
3407                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3408                 if (ret != TRACE_TYPE_HANDLED)
3409                         return ret;
3410         }
3411
3412         SEQ_PUT_FIELD(s, newline);
3413
3414         return trace_handle_return(s);
3415 }
3416
3417 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3418 {
3419         struct trace_array *tr = iter->tr;
3420         struct trace_seq *s = &iter->seq;
3421         struct trace_entry *entry;
3422         struct trace_event *event;
3423
3424         entry = iter->ent;
3425
3426         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3427                 SEQ_PUT_FIELD(s, entry->pid);
3428                 SEQ_PUT_FIELD(s, iter->cpu);
3429                 SEQ_PUT_FIELD(s, iter->ts);
3430                 if (trace_seq_has_overflowed(s))
3431                         return TRACE_TYPE_PARTIAL_LINE;
3432         }
3433
3434         event = ftrace_find_event(entry->type);
3435         return event ? event->funcs->binary(iter, 0, event) :
3436                 TRACE_TYPE_HANDLED;
3437 }
3438
3439 int trace_empty(struct trace_iterator *iter)
3440 {
3441         struct ring_buffer_iter *buf_iter;
3442         int cpu;
3443
3444         /* If we are looking at one CPU buffer, only check that one */
3445         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3446                 cpu = iter->cpu_file;
3447                 buf_iter = trace_buffer_iter(iter, cpu);
3448                 if (buf_iter) {
3449                         if (!ring_buffer_iter_empty(buf_iter))
3450                                 return 0;
3451                 } else {
3452                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3453                                 return 0;
3454                 }
3455                 return 1;
3456         }
3457
3458         for_each_tracing_cpu(cpu) {
3459                 buf_iter = trace_buffer_iter(iter, cpu);
3460                 if (buf_iter) {
3461                         if (!ring_buffer_iter_empty(buf_iter))
3462                                 return 0;
3463                 } else {
3464                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3465                                 return 0;
3466                 }
3467         }
3468
3469         return 1;
3470 }
3471
3472 /*  Called with trace_event_read_lock() held. */
3473 enum print_line_t print_trace_line(struct trace_iterator *iter)
3474 {
3475         struct trace_array *tr = iter->tr;
3476         unsigned long trace_flags = tr->trace_flags;
3477         enum print_line_t ret;
3478
3479         if (iter->lost_events) {
3480                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3481                                  iter->cpu, iter->lost_events);
3482                 if (trace_seq_has_overflowed(&iter->seq))
3483                         return TRACE_TYPE_PARTIAL_LINE;
3484         }
3485
3486         if (iter->trace && iter->trace->print_line) {
3487                 ret = iter->trace->print_line(iter);
3488                 if (ret != TRACE_TYPE_UNHANDLED)
3489                         return ret;
3490         }
3491
3492         if (iter->ent->type == TRACE_BPUTS &&
3493                         trace_flags & TRACE_ITER_PRINTK &&
3494                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3495                 return trace_print_bputs_msg_only(iter);
3496
3497         if (iter->ent->type == TRACE_BPRINT &&
3498                         trace_flags & TRACE_ITER_PRINTK &&
3499                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3500                 return trace_print_bprintk_msg_only(iter);
3501
3502         if (iter->ent->type == TRACE_PRINT &&
3503                         trace_flags & TRACE_ITER_PRINTK &&
3504                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3505                 return trace_print_printk_msg_only(iter);
3506
3507         if (trace_flags & TRACE_ITER_BIN)
3508                 return print_bin_fmt(iter);
3509
3510         if (trace_flags & TRACE_ITER_HEX)
3511                 return print_hex_fmt(iter);
3512
3513         if (trace_flags & TRACE_ITER_RAW)
3514                 return print_raw_fmt(iter);
3515
3516         return print_trace_fmt(iter);
3517 }
3518
3519 void trace_latency_header(struct seq_file *m)
3520 {
3521         struct trace_iterator *iter = m->private;
3522         struct trace_array *tr = iter->tr;
3523
3524         /* print nothing if the buffers are empty */
3525         if (trace_empty(iter))
3526                 return;
3527
3528         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3529                 print_trace_header(m, iter);
3530
3531         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3532                 print_lat_help_header(m);
3533 }
3534
3535 void trace_default_header(struct seq_file *m)
3536 {
3537         struct trace_iterator *iter = m->private;
3538         struct trace_array *tr = iter->tr;
3539         unsigned long trace_flags = tr->trace_flags;
3540
3541         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3542                 return;
3543
3544         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3545                 /* print nothing if the buffers are empty */
3546                 if (trace_empty(iter))
3547                         return;
3548                 print_trace_header(m, iter);
3549                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3550                         print_lat_help_header(m);
3551         } else {
3552                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3553                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3554                                 print_func_help_header_irq(iter->trace_buffer, m);
3555                         else
3556                                 print_func_help_header(iter->trace_buffer, m);
3557                 }
3558         }
3559 }
3560
3561 static void test_ftrace_alive(struct seq_file *m)
3562 {
3563         if (!ftrace_is_dead())
3564                 return;
3565         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3566                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3567 }
3568
3569 #ifdef CONFIG_TRACER_MAX_TRACE
3570 static void show_snapshot_main_help(struct seq_file *m)
3571 {
3572         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3573                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3574                     "#                      Takes a snapshot of the main buffer.\n"
3575                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3576                     "#                      (Doesn't have to be '2' works with any number that\n"
3577                     "#                       is not a '0' or '1')\n");
3578 }
3579
3580 static void show_snapshot_percpu_help(struct seq_file *m)
3581 {
3582         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3583 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3584         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3585                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3586 #else
3587         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3588                     "#                     Must use main snapshot file to allocate.\n");
3589 #endif
3590         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3591                     "#                      (Doesn't have to be '2' works with any number that\n"
3592                     "#                       is not a '0' or '1')\n");
3593 }
3594
3595 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3596 {
3597         if (iter->tr->allocated_snapshot)
3598                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3599         else
3600                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3601
3602         seq_puts(m, "# Snapshot commands:\n");
3603         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3604                 show_snapshot_main_help(m);
3605         else
3606                 show_snapshot_percpu_help(m);
3607 }
3608 #else
3609 /* Should never be called */
3610 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3611 #endif
3612
3613 static int s_show(struct seq_file *m, void *v)
3614 {
3615         struct trace_iterator *iter = v;
3616         int ret;
3617
3618         if (iter->ent == NULL) {
3619                 if (iter->tr) {
3620                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3621                         seq_puts(m, "#\n");
3622                         test_ftrace_alive(m);
3623                 }
3624                 if (iter->snapshot && trace_empty(iter))
3625                         print_snapshot_help(m, iter);
3626                 else if (iter->trace && iter->trace->print_header)
3627                         iter->trace->print_header(m);
3628                 else
3629                         trace_default_header(m);
3630
3631         } else if (iter->leftover) {
3632                 /*
3633                  * If we filled the seq_file buffer earlier, we
3634                  * want to just show it now.
3635                  */
3636                 ret = trace_print_seq(m, &iter->seq);
3637
3638                 /* ret should this time be zero, but you never know */
3639                 iter->leftover = ret;
3640
3641         } else {
3642                 print_trace_line(iter);
3643                 ret = trace_print_seq(m, &iter->seq);
3644                 /*
3645                  * If we overflow the seq_file buffer, then it will
3646                  * ask us for this data again at start up.
3647                  * Use that instead.
3648                  *  ret is 0 if seq_file write succeeded.
3649                  *        -1 otherwise.
3650                  */
3651                 iter->leftover = ret;
3652         }
3653
3654         return 0;
3655 }
3656
3657 /*
3658  * Should be used after trace_array_get(), trace_types_lock
3659  * ensures that i_cdev was already initialized.
3660  */
3661 static inline int tracing_get_cpu(struct inode *inode)
3662 {
3663         if (inode->i_cdev) /* See trace_create_cpu_file() */
3664                 return (long)inode->i_cdev - 1;
3665         return RING_BUFFER_ALL_CPUS;
3666 }
3667
3668 static const struct seq_operations tracer_seq_ops = {
3669         .start          = s_start,
3670         .next           = s_next,
3671         .stop           = s_stop,
3672         .show           = s_show,
3673 };
3674
3675 static struct trace_iterator *
3676 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3677 {
3678         struct trace_array *tr = inode->i_private;
3679         struct trace_iterator *iter;
3680         int cpu;
3681
3682         if (tracing_disabled)
3683                 return ERR_PTR(-ENODEV);
3684
3685         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3686         if (!iter)
3687                 return ERR_PTR(-ENOMEM);
3688
3689         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3690                                     GFP_KERNEL);
3691         if (!iter->buffer_iter)
3692                 goto release;
3693
3694         /*
3695          * We make a copy of the current tracer to avoid concurrent
3696          * changes on it while we are reading.
3697          */
3698         mutex_lock(&trace_types_lock);
3699         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3700         if (!iter->trace)
3701                 goto fail;
3702
3703         *iter->trace = *tr->current_trace;
3704
3705         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3706                 goto fail;
3707
3708         iter->tr = tr;
3709
3710 #ifdef CONFIG_TRACER_MAX_TRACE
3711         /* Currently only the top directory has a snapshot */
3712         if (tr->current_trace->print_max || snapshot)
3713                 iter->trace_buffer = &tr->max_buffer;
3714         else
3715 #endif
3716                 iter->trace_buffer = &tr->trace_buffer;
3717         iter->snapshot = snapshot;
3718         iter->pos = -1;
3719         iter->cpu_file = tracing_get_cpu(inode);
3720         mutex_init(&iter->mutex);
3721
3722         /* Notify the tracer early; before we stop tracing. */
3723         if (iter->trace && iter->trace->open)
3724                 iter->trace->open(iter);
3725
3726         /* Annotate start of buffers if we had overruns */
3727         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3728                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3729
3730         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3731         if (trace_clocks[tr->clock_id].in_ns)
3732                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3733
3734         /* stop the trace while dumping if we are not opening "snapshot" */
3735         if (!iter->snapshot)
3736                 tracing_stop_tr(tr);
3737
3738         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3739                 for_each_tracing_cpu(cpu) {
3740                         iter->buffer_iter[cpu] =
3741                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3742                 }
3743                 ring_buffer_read_prepare_sync();
3744                 for_each_tracing_cpu(cpu) {
3745                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3746                         tracing_iter_reset(iter, cpu);
3747                 }
3748         } else {
3749                 cpu = iter->cpu_file;
3750                 iter->buffer_iter[cpu] =
3751                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3752                 ring_buffer_read_prepare_sync();
3753                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3754                 tracing_iter_reset(iter, cpu);
3755         }
3756
3757         mutex_unlock(&trace_types_lock);
3758
3759         return iter;
3760
3761  fail:
3762         mutex_unlock(&trace_types_lock);
3763         kfree(iter->trace);
3764         kfree(iter->buffer_iter);
3765 release:
3766         seq_release_private(inode, file);
3767         return ERR_PTR(-ENOMEM);
3768 }
3769
3770 int tracing_open_generic(struct inode *inode, struct file *filp)
3771 {
3772         if (tracing_disabled)
3773                 return -ENODEV;
3774
3775         filp->private_data = inode->i_private;
3776         return 0;
3777 }
3778
3779 bool tracing_is_disabled(void)
3780 {
3781         return (tracing_disabled) ? true: false;
3782 }
3783
3784 /*
3785  * Open and update trace_array ref count.
3786  * Must have the current trace_array passed to it.
3787  */
3788 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3789 {
3790         struct trace_array *tr = inode->i_private;
3791
3792         if (tracing_disabled)
3793                 return -ENODEV;
3794
3795         if (trace_array_get(tr) < 0)
3796                 return -ENODEV;
3797
3798         filp->private_data = inode->i_private;
3799
3800         return 0;
3801 }
3802
3803 static int tracing_release(struct inode *inode, struct file *file)
3804 {
3805         struct trace_array *tr = inode->i_private;
3806         struct seq_file *m = file->private_data;
3807         struct trace_iterator *iter;
3808         int cpu;
3809
3810         if (!(file->f_mode & FMODE_READ)) {
3811                 trace_array_put(tr);
3812                 return 0;
3813         }
3814
3815         /* Writes do not use seq_file */
3816         iter = m->private;
3817         mutex_lock(&trace_types_lock);
3818
3819         for_each_tracing_cpu(cpu) {
3820                 if (iter->buffer_iter[cpu])
3821                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3822         }
3823
3824         if (iter->trace && iter->trace->close)
3825                 iter->trace->close(iter);
3826
3827         if (!iter->snapshot)
3828                 /* reenable tracing if it was previously enabled */
3829                 tracing_start_tr(tr);
3830
3831         __trace_array_put(tr);
3832
3833         mutex_unlock(&trace_types_lock);
3834
3835         mutex_destroy(&iter->mutex);
3836         free_cpumask_var(iter->started);
3837         kfree(iter->trace);
3838         kfree(iter->buffer_iter);
3839         seq_release_private(inode, file);
3840
3841         return 0;
3842 }
3843
3844 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3845 {
3846         struct trace_array *tr = inode->i_private;
3847
3848         trace_array_put(tr);
3849         return 0;
3850 }
3851
3852 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3853 {
3854         struct trace_array *tr = inode->i_private;
3855
3856         trace_array_put(tr);
3857
3858         return single_release(inode, file);
3859 }
3860
3861 static int tracing_open(struct inode *inode, struct file *file)
3862 {
3863         struct trace_array *tr = inode->i_private;
3864         struct trace_iterator *iter;
3865         int ret = 0;
3866
3867         if (trace_array_get(tr) < 0)
3868                 return -ENODEV;
3869
3870         /* If this file was open for write, then erase contents */
3871         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3872                 int cpu = tracing_get_cpu(inode);
3873
3874                 if (cpu == RING_BUFFER_ALL_CPUS)
3875                         tracing_reset_online_cpus(&tr->trace_buffer);
3876                 else
3877                         tracing_reset(&tr->trace_buffer, cpu);
3878         }
3879
3880         if (file->f_mode & FMODE_READ) {
3881                 iter = __tracing_open(inode, file, false);
3882                 if (IS_ERR(iter))
3883                         ret = PTR_ERR(iter);
3884                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3885                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3886         }
3887
3888         if (ret < 0)
3889                 trace_array_put(tr);
3890
3891         return ret;
3892 }
3893
3894 /*
3895  * Some tracers are not suitable for instance buffers.
3896  * A tracer is always available for the global array (toplevel)
3897  * or if it explicitly states that it is.
3898  */
3899 static bool
3900 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3901 {
3902         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3903 }
3904
3905 /* Find the next tracer that this trace array may use */
3906 static struct tracer *
3907 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3908 {
3909         while (t && !trace_ok_for_array(t, tr))
3910                 t = t->next;
3911
3912         return t;
3913 }
3914
3915 static void *
3916 t_next(struct seq_file *m, void *v, loff_t *pos)
3917 {
3918         struct trace_array *tr = m->private;
3919         struct tracer *t = v;
3920
3921         (*pos)++;
3922
3923         if (t)
3924                 t = get_tracer_for_array(tr, t->next);
3925
3926         return t;
3927 }
3928
3929 static void *t_start(struct seq_file *m, loff_t *pos)
3930 {
3931         struct trace_array *tr = m->private;
3932         struct tracer *t;
3933         loff_t l = 0;
3934
3935         mutex_lock(&trace_types_lock);
3936
3937         t = get_tracer_for_array(tr, trace_types);
3938         for (; t && l < *pos; t = t_next(m, t, &l))
3939                         ;
3940
3941         return t;
3942 }
3943
3944 static void t_stop(struct seq_file *m, void *p)
3945 {
3946         mutex_unlock(&trace_types_lock);
3947 }
3948
3949 static int t_show(struct seq_file *m, void *v)
3950 {
3951         struct tracer *t = v;
3952
3953         if (!t)
3954                 return 0;
3955
3956         seq_puts(m, t->name);
3957         if (t->next)
3958                 seq_putc(m, ' ');
3959         else
3960                 seq_putc(m, '\n');
3961
3962         return 0;
3963 }
3964
3965 static const struct seq_operations show_traces_seq_ops = {
3966         .start          = t_start,
3967         .next           = t_next,
3968         .stop           = t_stop,
3969         .show           = t_show,
3970 };
3971
3972 static int show_traces_open(struct inode *inode, struct file *file)
3973 {
3974         struct trace_array *tr = inode->i_private;
3975         struct seq_file *m;
3976         int ret;
3977
3978         if (tracing_disabled)
3979                 return -ENODEV;
3980
3981         ret = seq_open(file, &show_traces_seq_ops);
3982         if (ret)
3983                 return ret;
3984
3985         m = file->private_data;
3986         m->private = tr;
3987
3988         return 0;
3989 }
3990
3991 static ssize_t
3992 tracing_write_stub(struct file *filp, const char __user *ubuf,
3993                    size_t count, loff_t *ppos)
3994 {
3995         return count;
3996 }
3997
3998 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3999 {
4000         int ret;
4001
4002         if (file->f_mode & FMODE_READ)
4003                 ret = seq_lseek(file, offset, whence);
4004         else
4005                 file->f_pos = ret = 0;
4006
4007         return ret;
4008 }
4009
4010 static const struct file_operations tracing_fops = {
4011         .open           = tracing_open,
4012         .read           = seq_read,
4013         .write          = tracing_write_stub,
4014         .llseek         = tracing_lseek,
4015         .release        = tracing_release,
4016 };
4017
4018 static const struct file_operations show_traces_fops = {
4019         .open           = show_traces_open,
4020         .read           = seq_read,
4021         .release        = seq_release,
4022         .llseek         = seq_lseek,
4023 };
4024
4025 /*
4026  * The tracer itself will not take this lock, but still we want
4027  * to provide a consistent cpumask to user-space:
4028  */
4029 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4030
4031 /*
4032  * Temporary storage for the character representation of the
4033  * CPU bitmask (and one more byte for the newline):
4034  */
4035 static char mask_str[NR_CPUS + 1];
4036
4037 static ssize_t
4038 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4039                      size_t count, loff_t *ppos)
4040 {
4041         struct trace_array *tr = file_inode(filp)->i_private;
4042         int len;
4043
4044         mutex_lock(&tracing_cpumask_update_lock);
4045
4046         len = snprintf(mask_str, count, "%*pb\n",
4047                        cpumask_pr_args(tr->tracing_cpumask));
4048         if (len >= count) {
4049                 count = -EINVAL;
4050                 goto out_err;
4051         }
4052         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4053
4054 out_err:
4055         mutex_unlock(&tracing_cpumask_update_lock);
4056
4057         return count;
4058 }
4059
4060 static ssize_t
4061 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4062                       size_t count, loff_t *ppos)
4063 {
4064         struct trace_array *tr = file_inode(filp)->i_private;
4065         cpumask_var_t tracing_cpumask_new;
4066         int err, cpu;
4067
4068         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4069                 return -ENOMEM;
4070
4071         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4072         if (err)
4073                 goto err_unlock;
4074
4075         mutex_lock(&tracing_cpumask_update_lock);
4076
4077         local_irq_disable();
4078         arch_spin_lock(&tr->max_lock);
4079         for_each_tracing_cpu(cpu) {
4080                 /*
4081                  * Increase/decrease the disabled counter if we are
4082                  * about to flip a bit in the cpumask:
4083                  */
4084                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4085                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4086                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4087                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4088                 }
4089                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4090                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4091                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4092                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4093                 }
4094         }
4095         arch_spin_unlock(&tr->max_lock);
4096         local_irq_enable();
4097
4098         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4099
4100         mutex_unlock(&tracing_cpumask_update_lock);
4101         free_cpumask_var(tracing_cpumask_new);
4102
4103         return count;
4104
4105 err_unlock:
4106         free_cpumask_var(tracing_cpumask_new);
4107
4108         return err;
4109 }
4110
4111 static const struct file_operations tracing_cpumask_fops = {
4112         .open           = tracing_open_generic_tr,
4113         .read           = tracing_cpumask_read,
4114         .write          = tracing_cpumask_write,
4115         .release        = tracing_release_generic_tr,
4116         .llseek         = generic_file_llseek,
4117 };
4118
4119 static int tracing_trace_options_show(struct seq_file *m, void *v)
4120 {
4121         struct tracer_opt *trace_opts;
4122         struct trace_array *tr = m->private;
4123         u32 tracer_flags;
4124         int i;
4125
4126         mutex_lock(&trace_types_lock);
4127         tracer_flags = tr->current_trace->flags->val;
4128         trace_opts = tr->current_trace->flags->opts;
4129
4130         for (i = 0; trace_options[i]; i++) {
4131                 if (tr->trace_flags & (1 << i))
4132                         seq_printf(m, "%s\n", trace_options[i]);
4133                 else
4134                         seq_printf(m, "no%s\n", trace_options[i]);
4135         }
4136
4137         for (i = 0; trace_opts[i].name; i++) {
4138                 if (tracer_flags & trace_opts[i].bit)
4139                         seq_printf(m, "%s\n", trace_opts[i].name);
4140                 else
4141                         seq_printf(m, "no%s\n", trace_opts[i].name);
4142         }
4143         mutex_unlock(&trace_types_lock);
4144
4145         return 0;
4146 }
4147
4148 static int __set_tracer_option(struct trace_array *tr,
4149                                struct tracer_flags *tracer_flags,
4150                                struct tracer_opt *opts, int neg)
4151 {
4152         struct tracer *trace = tracer_flags->trace;
4153         int ret;
4154
4155         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4156         if (ret)
4157                 return ret;
4158
4159         if (neg)
4160                 tracer_flags->val &= ~opts->bit;
4161         else
4162                 tracer_flags->val |= opts->bit;
4163         return 0;
4164 }
4165
4166 /* Try to assign a tracer specific option */
4167 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4168 {
4169         struct tracer *trace = tr->current_trace;
4170         struct tracer_flags *tracer_flags = trace->flags;
4171         struct tracer_opt *opts = NULL;
4172         int i;
4173
4174         for (i = 0; tracer_flags->opts[i].name; i++) {
4175                 opts = &tracer_flags->opts[i];
4176
4177                 if (strcmp(cmp, opts->name) == 0)
4178                         return __set_tracer_option(tr, trace->flags, opts, neg);
4179         }
4180
4181         return -EINVAL;
4182 }
4183
4184 /* Some tracers require overwrite to stay enabled */
4185 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4186 {
4187         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4188                 return -1;
4189
4190         return 0;
4191 }
4192
4193 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4194 {
4195         /* do nothing if flag is already set */
4196         if (!!(tr->trace_flags & mask) == !!enabled)
4197                 return 0;
4198
4199         /* Give the tracer a chance to approve the change */
4200         if (tr->current_trace->flag_changed)
4201                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4202                         return -EINVAL;
4203
4204         if (enabled)
4205                 tr->trace_flags |= mask;
4206         else
4207                 tr->trace_flags &= ~mask;
4208
4209         if (mask == TRACE_ITER_RECORD_CMD)
4210                 trace_event_enable_cmd_record(enabled);
4211
4212         if (mask == TRACE_ITER_EVENT_FORK)
4213                 trace_event_follow_fork(tr, enabled);
4214
4215         if (mask == TRACE_ITER_FUNC_FORK)
4216                 ftrace_pid_follow_fork(tr, enabled);
4217
4218         if (mask == TRACE_ITER_OVERWRITE) {
4219                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4220 #ifdef CONFIG_TRACER_MAX_TRACE
4221                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4222 #endif
4223         }
4224
4225         if (mask == TRACE_ITER_PRINTK) {
4226                 trace_printk_start_stop_comm(enabled);
4227                 trace_printk_control(enabled);
4228         }
4229
4230         return 0;
4231 }
4232
4233 static int trace_set_options(struct trace_array *tr, char *option)
4234 {
4235         char *cmp;
4236         int neg = 0;
4237         int ret = -ENODEV;
4238         int i;
4239         size_t orig_len = strlen(option);
4240
4241         cmp = strstrip(option);
4242
4243         if (strncmp(cmp, "no", 2) == 0) {
4244                 neg = 1;
4245                 cmp += 2;
4246         }
4247
4248         mutex_lock(&trace_types_lock);
4249
4250         for (i = 0; trace_options[i]; i++) {
4251                 if (strcmp(cmp, trace_options[i]) == 0) {
4252                         ret = set_tracer_flag(tr, 1 << i, !neg);
4253                         break;
4254                 }
4255         }
4256
4257         /* If no option could be set, test the specific tracer options */
4258         if (!trace_options[i])
4259                 ret = set_tracer_option(tr, cmp, neg);
4260
4261         mutex_unlock(&trace_types_lock);
4262
4263         /*
4264          * If the first trailing whitespace is replaced with '\0' by strstrip,
4265          * turn it back into a space.
4266          */
4267         if (orig_len > strlen(option))
4268                 option[strlen(option)] = ' ';
4269
4270         return ret;
4271 }
4272
4273 static void __init apply_trace_boot_options(void)
4274 {
4275         char *buf = trace_boot_options_buf;
4276         char *option;
4277
4278         while (true) {
4279                 option = strsep(&buf, ",");
4280
4281                 if (!option)
4282                         break;
4283
4284                 if (*option)
4285                         trace_set_options(&global_trace, option);
4286
4287                 /* Put back the comma to allow this to be called again */
4288                 if (buf)
4289                         *(buf - 1) = ',';
4290         }
4291 }
4292
4293 static ssize_t
4294 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4295                         size_t cnt, loff_t *ppos)
4296 {
4297         struct seq_file *m = filp->private_data;
4298         struct trace_array *tr = m->private;
4299         char buf[64];
4300         int ret;
4301
4302         if (cnt >= sizeof(buf))
4303                 return -EINVAL;
4304
4305         if (copy_from_user(buf, ubuf, cnt))
4306                 return -EFAULT;
4307
4308         buf[cnt] = 0;
4309
4310         ret = trace_set_options(tr, buf);
4311         if (ret < 0)
4312                 return ret;
4313
4314         *ppos += cnt;
4315
4316         return cnt;
4317 }
4318
4319 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4320 {
4321         struct trace_array *tr = inode->i_private;
4322         int ret;
4323
4324         if (tracing_disabled)
4325                 return -ENODEV;
4326
4327         if (trace_array_get(tr) < 0)
4328                 return -ENODEV;
4329
4330         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4331         if (ret < 0)
4332                 trace_array_put(tr);
4333
4334         return ret;
4335 }
4336
4337 static const struct file_operations tracing_iter_fops = {
4338         .open           = tracing_trace_options_open,
4339         .read           = seq_read,
4340         .llseek         = seq_lseek,
4341         .release        = tracing_single_release_tr,
4342         .write          = tracing_trace_options_write,
4343 };
4344
4345 static const char readme_msg[] =
4346         "tracing mini-HOWTO:\n\n"
4347         "# echo 0 > tracing_on : quick way to disable tracing\n"
4348         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4349         " Important files:\n"
4350         "  trace\t\t\t- The static contents of the buffer\n"
4351         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4352         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4353         "  current_tracer\t- function and latency tracers\n"
4354         "  available_tracers\t- list of configured tracers for current_tracer\n"
4355         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4356         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4357         "  trace_clock\t\t-change the clock used to order events\n"
4358         "       local:   Per cpu clock but may not be synced across CPUs\n"
4359         "      global:   Synced across CPUs but slows tracing down.\n"
4360         "     counter:   Not a clock, but just an increment\n"
4361         "      uptime:   Jiffy counter from time of boot\n"
4362         "        perf:   Same clock that perf events use\n"
4363 #ifdef CONFIG_X86_64
4364         "     x86-tsc:   TSC cycle counter\n"
4365 #endif
4366         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4367         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4368         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4369         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4370         "\t\t\t  Remove sub-buffer with rmdir\n"
4371         "  trace_options\t\t- Set format or modify how tracing happens\n"
4372         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4373         "\t\t\t  option name\n"
4374         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4375 #ifdef CONFIG_DYNAMIC_FTRACE
4376         "\n  available_filter_functions - list of functions that can be filtered on\n"
4377         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4378         "\t\t\t  functions\n"
4379         "\t     accepts: func_full_name or glob-matching-pattern\n"
4380         "\t     modules: Can select a group via module\n"
4381         "\t      Format: :mod:<module-name>\n"
4382         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4383         "\t    triggers: a command to perform when function is hit\n"
4384         "\t      Format: <function>:<trigger>[:count]\n"
4385         "\t     trigger: traceon, traceoff\n"
4386         "\t\t      enable_event:<system>:<event>\n"
4387         "\t\t      disable_event:<system>:<event>\n"
4388 #ifdef CONFIG_STACKTRACE
4389         "\t\t      stacktrace\n"
4390 #endif
4391 #ifdef CONFIG_TRACER_SNAPSHOT
4392         "\t\t      snapshot\n"
4393 #endif
4394         "\t\t      dump\n"
4395         "\t\t      cpudump\n"
4396         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4397         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4398         "\t     The first one will disable tracing every time do_fault is hit\n"
4399         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4400         "\t       The first time do trap is hit and it disables tracing, the\n"
4401         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4402         "\t       the counter will not decrement. It only decrements when the\n"
4403         "\t       trigger did work\n"
4404         "\t     To remove trigger without count:\n"
4405         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4406         "\t     To remove trigger with a count:\n"
4407         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4408         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4409         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4410         "\t    modules: Can select a group via module command :mod:\n"
4411         "\t    Does not accept triggers\n"
4412 #endif /* CONFIG_DYNAMIC_FTRACE */
4413 #ifdef CONFIG_FUNCTION_TRACER
4414         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4415         "\t\t    (function)\n"
4416 #endif
4417 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4418         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4419         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4420         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4421 #endif
4422 #ifdef CONFIG_TRACER_SNAPSHOT
4423         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4424         "\t\t\t  snapshot buffer. Read the contents for more\n"
4425         "\t\t\t  information\n"
4426 #endif
4427 #ifdef CONFIG_STACK_TRACER
4428         "  stack_trace\t\t- Shows the max stack trace when active\n"
4429         "  stack_max_size\t- Shows current max stack size that was traced\n"
4430         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4431         "\t\t\t  new trace)\n"
4432 #ifdef CONFIG_DYNAMIC_FTRACE
4433         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4434         "\t\t\t  traces\n"
4435 #endif
4436 #endif /* CONFIG_STACK_TRACER */
4437 #ifdef CONFIG_KPROBE_EVENTS
4438         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4439         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4440 #endif
4441 #ifdef CONFIG_UPROBE_EVENTS
4442         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4443         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4444 #endif
4445 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4446         "\t  accepts: event-definitions (one definition per line)\n"
4447         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4448         "\t           -:[<group>/]<event>\n"
4449 #ifdef CONFIG_KPROBE_EVENTS
4450         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4451   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4452 #endif
4453 #ifdef CONFIG_UPROBE_EVENTS
4454         "\t    place: <path>:<offset>\n"
4455 #endif
4456         "\t     args: <name>=fetcharg[:type]\n"
4457         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4458         "\t           $stack<index>, $stack, $retval, $comm\n"
4459         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4460         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4461 #endif
4462         "  events/\t\t- Directory containing all trace event subsystems:\n"
4463         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4464         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4465         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4466         "\t\t\t  events\n"
4467         "      filter\t\t- If set, only events passing filter are traced\n"
4468         "  events/<system>/<event>/\t- Directory containing control files for\n"
4469         "\t\t\t  <event>:\n"
4470         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4471         "      filter\t\t- If set, only events passing filter are traced\n"
4472         "      trigger\t\t- If set, a command to perform when event is hit\n"
4473         "\t    Format: <trigger>[:count][if <filter>]\n"
4474         "\t   trigger: traceon, traceoff\n"
4475         "\t            enable_event:<system>:<event>\n"
4476         "\t            disable_event:<system>:<event>\n"
4477 #ifdef CONFIG_HIST_TRIGGERS
4478         "\t            enable_hist:<system>:<event>\n"
4479         "\t            disable_hist:<system>:<event>\n"
4480 #endif
4481 #ifdef CONFIG_STACKTRACE
4482         "\t\t    stacktrace\n"
4483 #endif
4484 #ifdef CONFIG_TRACER_SNAPSHOT
4485         "\t\t    snapshot\n"
4486 #endif
4487 #ifdef CONFIG_HIST_TRIGGERS
4488         "\t\t    hist (see below)\n"
4489 #endif
4490         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4491         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4492         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4493         "\t                  events/block/block_unplug/trigger\n"
4494         "\t   The first disables tracing every time block_unplug is hit.\n"
4495         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4496         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4497         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4498         "\t   Like function triggers, the counter is only decremented if it\n"
4499         "\t    enabled or disabled tracing.\n"
4500         "\t   To remove a trigger without a count:\n"
4501         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4502         "\t   To remove a trigger with a count:\n"
4503         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4504         "\t   Filters can be ignored when removing a trigger.\n"
4505 #ifdef CONFIG_HIST_TRIGGERS
4506         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4507         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4508         "\t            [:values=<field1[,field2,...]>]\n"
4509         "\t            [:sort=<field1[,field2,...]>]\n"
4510         "\t            [:size=#entries]\n"
4511         "\t            [:pause][:continue][:clear]\n"
4512         "\t            [:name=histname1]\n"
4513         "\t            [if <filter>]\n\n"
4514         "\t    When a matching event is hit, an entry is added to a hash\n"
4515         "\t    table using the key(s) and value(s) named, and the value of a\n"
4516         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4517         "\t    correspond to fields in the event's format description.  Keys\n"
4518         "\t    can be any field, or the special string 'stacktrace'.\n"
4519         "\t    Compound keys consisting of up to two fields can be specified\n"
4520         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4521         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4522         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4523         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4524         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4525         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4526         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4527         "\t    its histogram data will be shared with other triggers of the\n"
4528         "\t    same name, and trigger hits will update this common data.\n\n"
4529         "\t    Reading the 'hist' file for the event will dump the hash\n"
4530         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4531         "\t    triggers attached to an event, there will be a table for each\n"
4532         "\t    trigger in the output.  The table displayed for a named\n"
4533         "\t    trigger will be the same as any other instance having the\n"
4534         "\t    same name.  The default format used to display a given field\n"
4535         "\t    can be modified by appending any of the following modifiers\n"
4536         "\t    to the field name, as applicable:\n\n"
4537         "\t            .hex        display a number as a hex value\n"
4538         "\t            .sym        display an address as a symbol\n"
4539         "\t            .sym-offset display an address as a symbol and offset\n"
4540         "\t            .execname   display a common_pid as a program name\n"
4541         "\t            .syscall    display a syscall id as a syscall name\n\n"
4542         "\t            .log2       display log2 value rather than raw number\n\n"
4543         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4544         "\t    trigger or to start a hist trigger but not log any events\n"
4545         "\t    until told to do so.  'continue' can be used to start or\n"
4546         "\t    restart a paused hist trigger.\n\n"
4547         "\t    The 'clear' parameter will clear the contents of a running\n"
4548         "\t    hist trigger and leave its current paused/active state\n"
4549         "\t    unchanged.\n\n"
4550         "\t    The enable_hist and disable_hist triggers can be used to\n"
4551         "\t    have one event conditionally start and stop another event's\n"
4552         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4553         "\t    the enable_event and disable_event triggers.\n"
4554 #endif
4555 ;
4556
4557 static ssize_t
4558 tracing_readme_read(struct file *filp, char __user *ubuf,
4559                        size_t cnt, loff_t *ppos)
4560 {
4561         return simple_read_from_buffer(ubuf, cnt, ppos,
4562                                         readme_msg, strlen(readme_msg));
4563 }
4564
4565 static const struct file_operations tracing_readme_fops = {
4566         .open           = tracing_open_generic,
4567         .read           = tracing_readme_read,
4568         .llseek         = generic_file_llseek,
4569 };
4570
4571 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4572 {
4573         unsigned int *ptr = v;
4574
4575         if (*pos || m->count)
4576                 ptr++;
4577
4578         (*pos)++;
4579
4580         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4581              ptr++) {
4582                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4583                         continue;
4584
4585                 return ptr;
4586         }
4587
4588         return NULL;
4589 }
4590
4591 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4592 {
4593         void *v;
4594         loff_t l = 0;
4595
4596         preempt_disable();
4597         arch_spin_lock(&trace_cmdline_lock);
4598
4599         v = &savedcmd->map_cmdline_to_pid[0];
4600         while (l <= *pos) {
4601                 v = saved_cmdlines_next(m, v, &l);
4602                 if (!v)
4603                         return NULL;
4604         }
4605
4606         return v;
4607 }
4608
4609 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4610 {
4611         arch_spin_unlock(&trace_cmdline_lock);
4612         preempt_enable();
4613 }
4614
4615 static int saved_cmdlines_show(struct seq_file *m, void *v)
4616 {
4617         char buf[TASK_COMM_LEN];
4618         unsigned int *pid = v;
4619
4620         __trace_find_cmdline(*pid, buf);
4621         seq_printf(m, "%d %s\n", *pid, buf);
4622         return 0;
4623 }
4624
4625 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4626         .start          = saved_cmdlines_start,
4627         .next           = saved_cmdlines_next,
4628         .stop           = saved_cmdlines_stop,
4629         .show           = saved_cmdlines_show,
4630 };
4631
4632 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4633 {
4634         if (tracing_disabled)
4635                 return -ENODEV;
4636
4637         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4638 }
4639
4640 static const struct file_operations tracing_saved_cmdlines_fops = {
4641         .open           = tracing_saved_cmdlines_open,
4642         .read           = seq_read,
4643         .llseek         = seq_lseek,
4644         .release        = seq_release,
4645 };
4646
4647 static ssize_t
4648 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4649                                  size_t cnt, loff_t *ppos)
4650 {
4651         char buf[64];
4652         int r;
4653
4654         arch_spin_lock(&trace_cmdline_lock);
4655         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4656         arch_spin_unlock(&trace_cmdline_lock);
4657
4658         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4659 }
4660
4661 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4662 {
4663         kfree(s->saved_cmdlines);
4664         kfree(s->map_cmdline_to_pid);
4665         kfree(s);
4666 }
4667
4668 static int tracing_resize_saved_cmdlines(unsigned int val)
4669 {
4670         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4671
4672         s = kmalloc(sizeof(*s), GFP_KERNEL);
4673         if (!s)
4674                 return -ENOMEM;
4675
4676         if (allocate_cmdlines_buffer(val, s) < 0) {
4677                 kfree(s);
4678                 return -ENOMEM;
4679         }
4680
4681         arch_spin_lock(&trace_cmdline_lock);
4682         savedcmd_temp = savedcmd;
4683         savedcmd = s;
4684         arch_spin_unlock(&trace_cmdline_lock);
4685         free_saved_cmdlines_buffer(savedcmd_temp);
4686
4687         return 0;
4688 }
4689
4690 static ssize_t
4691 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4692                                   size_t cnt, loff_t *ppos)
4693 {
4694         unsigned long val;
4695         int ret;
4696
4697         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4698         if (ret)
4699                 return ret;
4700
4701         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4702         if (!val || val > PID_MAX_DEFAULT)
4703                 return -EINVAL;
4704
4705         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4706         if (ret < 0)
4707                 return ret;
4708
4709         *ppos += cnt;
4710
4711         return cnt;
4712 }
4713
4714 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4715         .open           = tracing_open_generic,
4716         .read           = tracing_saved_cmdlines_size_read,
4717         .write          = tracing_saved_cmdlines_size_write,
4718 };
4719
4720 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4721 static union trace_enum_map_item *
4722 update_enum_map(union trace_enum_map_item *ptr)
4723 {
4724         if (!ptr->map.enum_string) {
4725                 if (ptr->tail.next) {
4726                         ptr = ptr->tail.next;
4727                         /* Set ptr to the next real item (skip head) */
4728                         ptr++;
4729                 } else
4730                         return NULL;
4731         }
4732         return ptr;
4733 }
4734
4735 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4736 {
4737         union trace_enum_map_item *ptr = v;
4738
4739         /*
4740          * Paranoid! If ptr points to end, we don't want to increment past it.
4741          * This really should never happen.
4742          */
4743         ptr = update_enum_map(ptr);
4744         if (WARN_ON_ONCE(!ptr))
4745                 return NULL;
4746
4747         ptr++;
4748
4749         (*pos)++;
4750
4751         ptr = update_enum_map(ptr);
4752
4753         return ptr;
4754 }
4755
4756 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4757 {
4758         union trace_enum_map_item *v;
4759         loff_t l = 0;
4760
4761         mutex_lock(&trace_enum_mutex);
4762
4763         v = trace_enum_maps;
4764         if (v)
4765                 v++;
4766
4767         while (v && l < *pos) {
4768                 v = enum_map_next(m, v, &l);
4769         }
4770
4771         return v;
4772 }
4773
4774 static void enum_map_stop(struct seq_file *m, void *v)
4775 {
4776         mutex_unlock(&trace_enum_mutex);
4777 }
4778
4779 static int enum_map_show(struct seq_file *m, void *v)
4780 {
4781         union trace_enum_map_item *ptr = v;
4782
4783         seq_printf(m, "%s %ld (%s)\n",
4784                    ptr->map.enum_string, ptr->map.enum_value,
4785                    ptr->map.system);
4786
4787         return 0;
4788 }
4789
4790 static const struct seq_operations tracing_enum_map_seq_ops = {
4791         .start          = enum_map_start,
4792         .next           = enum_map_next,
4793         .stop           = enum_map_stop,
4794         .show           = enum_map_show,
4795 };
4796
4797 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4798 {
4799         if (tracing_disabled)
4800                 return -ENODEV;
4801
4802         return seq_open(filp, &tracing_enum_map_seq_ops);
4803 }
4804
4805 static const struct file_operations tracing_enum_map_fops = {
4806         .open           = tracing_enum_map_open,
4807         .read           = seq_read,
4808         .llseek         = seq_lseek,
4809         .release        = seq_release,
4810 };
4811
4812 static inline union trace_enum_map_item *
4813 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4814 {
4815         /* Return tail of array given the head */
4816         return ptr + ptr->head.length + 1;
4817 }
4818
4819 static void
4820 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4821                            int len)
4822 {
4823         struct trace_enum_map **stop;
4824         struct trace_enum_map **map;
4825         union trace_enum_map_item *map_array;
4826         union trace_enum_map_item *ptr;
4827
4828         stop = start + len;
4829
4830         /*
4831          * The trace_enum_maps contains the map plus a head and tail item,
4832          * where the head holds the module and length of array, and the
4833          * tail holds a pointer to the next list.
4834          */
4835         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4836         if (!map_array) {
4837                 pr_warn("Unable to allocate trace enum mapping\n");
4838                 return;
4839         }
4840
4841         mutex_lock(&trace_enum_mutex);
4842
4843         if (!trace_enum_maps)
4844                 trace_enum_maps = map_array;
4845         else {
4846                 ptr = trace_enum_maps;
4847                 for (;;) {
4848                         ptr = trace_enum_jmp_to_tail(ptr);
4849                         if (!ptr->tail.next)
4850                                 break;
4851                         ptr = ptr->tail.next;
4852
4853                 }
4854                 ptr->tail.next = map_array;
4855         }
4856         map_array->head.mod = mod;
4857         map_array->head.length = len;
4858         map_array++;
4859
4860         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4861                 map_array->map = **map;
4862                 map_array++;
4863         }
4864         memset(map_array, 0, sizeof(*map_array));
4865
4866         mutex_unlock(&trace_enum_mutex);
4867 }
4868
4869 static void trace_create_enum_file(struct dentry *d_tracer)
4870 {
4871         trace_create_file("enum_map", 0444, d_tracer,
4872                           NULL, &tracing_enum_map_fops);
4873 }
4874
4875 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4876 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4877 static inline void trace_insert_enum_map_file(struct module *mod,
4878                               struct trace_enum_map **start, int len) { }
4879 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4880
4881 static void trace_insert_enum_map(struct module *mod,
4882                                   struct trace_enum_map **start, int len)
4883 {
4884         struct trace_enum_map **map;
4885
4886         if (len <= 0)
4887                 return;
4888
4889         map = start;
4890
4891         trace_event_enum_update(map, len);
4892
4893         trace_insert_enum_map_file(mod, start, len);
4894 }
4895
4896 static ssize_t
4897 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4898                        size_t cnt, loff_t *ppos)
4899 {
4900         struct trace_array *tr = filp->private_data;
4901         char buf[MAX_TRACER_SIZE+2];
4902         int r;
4903
4904         mutex_lock(&trace_types_lock);
4905         r = sprintf(buf, "%s\n", tr->current_trace->name);
4906         mutex_unlock(&trace_types_lock);
4907
4908         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4909 }
4910
4911 int tracer_init(struct tracer *t, struct trace_array *tr)
4912 {
4913         tracing_reset_online_cpus(&tr->trace_buffer);
4914         return t->init(tr);
4915 }
4916
4917 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4918 {
4919         int cpu;
4920
4921         for_each_tracing_cpu(cpu)
4922                 per_cpu_ptr(buf->data, cpu)->entries = val;
4923 }
4924
4925 #ifdef CONFIG_TRACER_MAX_TRACE
4926 /* resize @tr's buffer to the size of @size_tr's entries */
4927 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4928                                         struct trace_buffer *size_buf, int cpu_id)
4929 {
4930         int cpu, ret = 0;
4931
4932         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4933                 for_each_tracing_cpu(cpu) {
4934                         ret = ring_buffer_resize(trace_buf->buffer,
4935                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4936                         if (ret < 0)
4937                                 break;
4938                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4939                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4940                 }
4941         } else {
4942                 ret = ring_buffer_resize(trace_buf->buffer,
4943                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4944                 if (ret == 0)
4945                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4946                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4947         }
4948
4949         return ret;
4950 }
4951 #endif /* CONFIG_TRACER_MAX_TRACE */
4952
4953 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4954                                         unsigned long size, int cpu)
4955 {
4956         int ret;
4957
4958         /*
4959          * If kernel or user changes the size of the ring buffer
4960          * we use the size that was given, and we can forget about
4961          * expanding it later.
4962          */
4963         ring_buffer_expanded = true;
4964
4965         /* May be called before buffers are initialized */
4966         if (!tr->trace_buffer.buffer)
4967                 return 0;
4968
4969         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4970         if (ret < 0)
4971                 return ret;
4972
4973 #ifdef CONFIG_TRACER_MAX_TRACE
4974         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4975             !tr->current_trace->use_max_tr)
4976                 goto out;
4977
4978         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4979         if (ret < 0) {
4980                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4981                                                      &tr->trace_buffer, cpu);
4982                 if (r < 0) {
4983                         /*
4984                          * AARGH! We are left with different
4985                          * size max buffer!!!!
4986                          * The max buffer is our "snapshot" buffer.
4987                          * When a tracer needs a snapshot (one of the
4988                          * latency tracers), it swaps the max buffer
4989                          * with the saved snap shot. We succeeded to
4990                          * update the size of the main buffer, but failed to
4991                          * update the size of the max buffer. But when we tried
4992                          * to reset the main buffer to the original size, we
4993                          * failed there too. This is very unlikely to
4994                          * happen, but if it does, warn and kill all
4995                          * tracing.
4996                          */
4997                         WARN_ON(1);
4998                         tracing_disabled = 1;
4999                 }
5000                 return ret;
5001         }
5002
5003         if (cpu == RING_BUFFER_ALL_CPUS)
5004                 set_buffer_entries(&tr->max_buffer, size);
5005         else
5006                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5007
5008  out:
5009 #endif /* CONFIG_TRACER_MAX_TRACE */
5010
5011         if (cpu == RING_BUFFER_ALL_CPUS)
5012                 set_buffer_entries(&tr->trace_buffer, size);
5013         else
5014                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5015
5016         return ret;
5017 }
5018
5019 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5020                                           unsigned long size, int cpu_id)
5021 {
5022         int ret = size;
5023
5024         mutex_lock(&trace_types_lock);
5025
5026         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5027                 /* make sure, this cpu is enabled in the mask */
5028                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5029                         ret = -EINVAL;
5030                         goto out;
5031                 }
5032         }
5033
5034         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5035         if (ret < 0)
5036                 ret = -ENOMEM;
5037
5038 out:
5039         mutex_unlock(&trace_types_lock);
5040
5041         return ret;
5042 }
5043
5044
5045 /**
5046  * tracing_update_buffers - used by tracing facility to expand ring buffers
5047  *
5048  * To save on memory when the tracing is never used on a system with it
5049  * configured in. The ring buffers are set to a minimum size. But once
5050  * a user starts to use the tracing facility, then they need to grow
5051  * to their default size.
5052  *
5053  * This function is to be called when a tracer is about to be used.
5054  */
5055 int tracing_update_buffers(void)
5056 {
5057         int ret = 0;
5058
5059         mutex_lock(&trace_types_lock);
5060         if (!ring_buffer_expanded)
5061                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5062                                                 RING_BUFFER_ALL_CPUS);
5063         mutex_unlock(&trace_types_lock);
5064
5065         return ret;
5066 }
5067
5068 struct trace_option_dentry;
5069
5070 static void
5071 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5072
5073 /*
5074  * Used to clear out the tracer before deletion of an instance.
5075  * Must have trace_types_lock held.
5076  */
5077 static void tracing_set_nop(struct trace_array *tr)
5078 {
5079         if (tr->current_trace == &nop_trace)
5080                 return;
5081         
5082         tr->current_trace->enabled--;
5083
5084         if (tr->current_trace->reset)
5085                 tr->current_trace->reset(tr);
5086
5087         tr->current_trace = &nop_trace;
5088 }
5089
5090 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5091 {
5092         /* Only enable if the directory has been created already. */
5093         if (!tr->dir)
5094                 return;
5095
5096         create_trace_option_files(tr, t);
5097 }
5098
5099 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5100 {
5101         struct tracer *t;
5102 #ifdef CONFIG_TRACER_MAX_TRACE
5103         bool had_max_tr;
5104 #endif
5105         int ret = 0;
5106
5107         mutex_lock(&trace_types_lock);
5108
5109         if (!ring_buffer_expanded) {
5110                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5111                                                 RING_BUFFER_ALL_CPUS);
5112                 if (ret < 0)
5113                         goto out;
5114                 ret = 0;
5115         }
5116
5117         for (t = trace_types; t; t = t->next) {
5118                 if (strcmp(t->name, buf) == 0)
5119                         break;
5120         }
5121         if (!t) {
5122                 ret = -EINVAL;
5123                 goto out;
5124         }
5125         if (t == tr->current_trace)
5126                 goto out;
5127
5128         /* Some tracers are only allowed for the top level buffer */
5129         if (!trace_ok_for_array(t, tr)) {
5130                 ret = -EINVAL;
5131                 goto out;
5132         }
5133
5134         /* If trace pipe files are being read, we can't change the tracer */
5135         if (tr->current_trace->ref) {
5136                 ret = -EBUSY;
5137                 goto out;
5138         }
5139
5140         trace_branch_disable();
5141
5142         tr->current_trace->enabled--;
5143
5144         if (tr->current_trace->reset)
5145                 tr->current_trace->reset(tr);
5146
5147         /* Current trace needs to be nop_trace before synchronize_sched */
5148         tr->current_trace = &nop_trace;
5149
5150 #ifdef CONFIG_TRACER_MAX_TRACE
5151         had_max_tr = tr->allocated_snapshot;
5152
5153         if (had_max_tr && !t->use_max_tr) {
5154                 /*
5155                  * We need to make sure that the update_max_tr sees that
5156                  * current_trace changed to nop_trace to keep it from
5157                  * swapping the buffers after we resize it.
5158                  * The update_max_tr is called from interrupts disabled
5159                  * so a synchronized_sched() is sufficient.
5160                  */
5161                 synchronize_sched();
5162                 free_snapshot(tr);
5163         }
5164 #endif
5165
5166 #ifdef CONFIG_TRACER_MAX_TRACE
5167         if (t->use_max_tr && !had_max_tr) {
5168                 ret = alloc_snapshot(tr);
5169                 if (ret < 0)
5170                         goto out;
5171         }
5172 #endif
5173
5174         if (t->init) {
5175                 ret = tracer_init(t, tr);
5176                 if (ret)
5177                         goto out;
5178         }
5179
5180         tr->current_trace = t;
5181         tr->current_trace->enabled++;
5182         trace_branch_enable(tr);
5183  out:
5184         mutex_unlock(&trace_types_lock);
5185
5186         return ret;
5187 }
5188
5189 static ssize_t
5190 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5191                         size_t cnt, loff_t *ppos)
5192 {
5193         struct trace_array *tr = filp->private_data;
5194         char buf[MAX_TRACER_SIZE+1];
5195         int i;
5196         size_t ret;
5197         int err;
5198
5199         ret = cnt;
5200
5201         if (cnt > MAX_TRACER_SIZE)
5202                 cnt = MAX_TRACER_SIZE;
5203
5204         if (copy_from_user(buf, ubuf, cnt))
5205                 return -EFAULT;
5206
5207         buf[cnt] = 0;
5208
5209         /* strip ending whitespace. */
5210         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5211                 buf[i] = 0;
5212
5213         err = tracing_set_tracer(tr, buf);
5214         if (err)
5215                 return err;
5216
5217         *ppos += ret;
5218
5219         return ret;
5220 }
5221
5222 static ssize_t
5223 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5224                    size_t cnt, loff_t *ppos)
5225 {
5226         char buf[64];
5227         int r;
5228
5229         r = snprintf(buf, sizeof(buf), "%ld\n",
5230                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5231         if (r > sizeof(buf))
5232                 r = sizeof(buf);
5233         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5234 }
5235
5236 static ssize_t
5237 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5238                     size_t cnt, loff_t *ppos)
5239 {
5240         unsigned long val;
5241         int ret;
5242
5243         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5244         if (ret)
5245                 return ret;
5246
5247         *ptr = val * 1000;
5248
5249         return cnt;
5250 }
5251
5252 static ssize_t
5253 tracing_thresh_read(struct file *filp, char __user *ubuf,
5254                     size_t cnt, loff_t *ppos)
5255 {
5256         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5257 }
5258
5259 static ssize_t
5260 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5261                      size_t cnt, loff_t *ppos)
5262 {
5263         struct trace_array *tr = filp->private_data;
5264         int ret;
5265
5266         mutex_lock(&trace_types_lock);
5267         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5268         if (ret < 0)
5269                 goto out;
5270
5271         if (tr->current_trace->update_thresh) {
5272                 ret = tr->current_trace->update_thresh(tr);
5273                 if (ret < 0)
5274                         goto out;
5275         }
5276
5277         ret = cnt;
5278 out:
5279         mutex_unlock(&trace_types_lock);
5280
5281         return ret;
5282 }
5283
5284 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5285
5286 static ssize_t
5287 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5288                      size_t cnt, loff_t *ppos)
5289 {
5290         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5291 }
5292
5293 static ssize_t
5294 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5295                       size_t cnt, loff_t *ppos)
5296 {
5297         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5298 }
5299
5300 #endif
5301
5302 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5303 {
5304         struct trace_array *tr = inode->i_private;
5305         struct trace_iterator *iter;
5306         int ret = 0;
5307
5308         if (tracing_disabled)
5309                 return -ENODEV;
5310
5311         if (trace_array_get(tr) < 0)
5312                 return -ENODEV;
5313
5314         mutex_lock(&trace_types_lock);
5315
5316         /* create a buffer to store the information to pass to userspace */
5317         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5318         if (!iter) {
5319                 ret = -ENOMEM;
5320                 __trace_array_put(tr);
5321                 goto out;
5322         }
5323
5324         trace_seq_init(&iter->seq);
5325         iter->trace = tr->current_trace;
5326
5327         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5328                 ret = -ENOMEM;
5329                 goto fail;
5330         }
5331
5332         /* trace pipe does not show start of buffer */
5333         cpumask_setall(iter->started);
5334
5335         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5336                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5337
5338         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5339         if (trace_clocks[tr->clock_id].in_ns)
5340                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5341
5342         iter->tr = tr;
5343         iter->trace_buffer = &tr->trace_buffer;
5344         iter->cpu_file = tracing_get_cpu(inode);
5345         mutex_init(&iter->mutex);
5346         filp->private_data = iter;
5347
5348         if (iter->trace->pipe_open)
5349                 iter->trace->pipe_open(iter);
5350
5351         nonseekable_open(inode, filp);
5352
5353         tr->current_trace->ref++;
5354 out:
5355         mutex_unlock(&trace_types_lock);
5356         return ret;
5357
5358 fail:
5359         kfree(iter->trace);
5360         kfree(iter);
5361         __trace_array_put(tr);
5362         mutex_unlock(&trace_types_lock);
5363         return ret;
5364 }
5365
5366 static int tracing_release_pipe(struct inode *inode, struct file *file)
5367 {
5368         struct trace_iterator *iter = file->private_data;
5369         struct trace_array *tr = inode->i_private;
5370
5371         mutex_lock(&trace_types_lock);
5372
5373         tr->current_trace->ref--;
5374
5375         if (iter->trace->pipe_close)
5376                 iter->trace->pipe_close(iter);
5377
5378         mutex_unlock(&trace_types_lock);
5379
5380         free_cpumask_var(iter->started);
5381         mutex_destroy(&iter->mutex);
5382         kfree(iter);
5383
5384         trace_array_put(tr);
5385
5386         return 0;
5387 }
5388
5389 static unsigned int
5390 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5391 {
5392         struct trace_array *tr = iter->tr;
5393
5394         /* Iterators are static, they should be filled or empty */
5395         if (trace_buffer_iter(iter, iter->cpu_file))
5396                 return POLLIN | POLLRDNORM;
5397
5398         if (tr->trace_flags & TRACE_ITER_BLOCK)
5399                 /*
5400                  * Always select as readable when in blocking mode
5401                  */
5402                 return POLLIN | POLLRDNORM;
5403         else
5404                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5405                                              filp, poll_table);
5406 }
5407
5408 static unsigned int
5409 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5410 {
5411         struct trace_iterator *iter = filp->private_data;
5412
5413         return trace_poll(iter, filp, poll_table);
5414 }
5415
5416 /* Must be called with iter->mutex held. */
5417 static int tracing_wait_pipe(struct file *filp)
5418 {
5419         struct trace_iterator *iter = filp->private_data;
5420         int ret;
5421
5422         while (trace_empty(iter)) {
5423
5424                 if ((filp->f_flags & O_NONBLOCK)) {
5425                         return -EAGAIN;
5426                 }
5427
5428                 /*
5429                  * We block until we read something and tracing is disabled.
5430                  * We still block if tracing is disabled, but we have never
5431                  * read anything. This allows a user to cat this file, and
5432                  * then enable tracing. But after we have read something,
5433                  * we give an EOF when tracing is again disabled.
5434                  *
5435                  * iter->pos will be 0 if we haven't read anything.
5436                  */
5437                 if (!tracing_is_on() && iter->pos)
5438                         break;
5439
5440                 mutex_unlock(&iter->mutex);
5441
5442                 ret = wait_on_pipe(iter, false);
5443
5444                 mutex_lock(&iter->mutex);
5445
5446                 if (ret)
5447                         return ret;
5448         }
5449
5450         return 1;
5451 }
5452
5453 /*
5454  * Consumer reader.
5455  */
5456 static ssize_t
5457 tracing_read_pipe(struct file *filp, char __user *ubuf,
5458                   size_t cnt, loff_t *ppos)
5459 {
5460         struct trace_iterator *iter = filp->private_data;
5461         ssize_t sret;
5462
5463         /*
5464          * Avoid more than one consumer on a single file descriptor
5465          * This is just a matter of traces coherency, the ring buffer itself
5466          * is protected.
5467          */
5468         mutex_lock(&iter->mutex);
5469
5470         /* return any leftover data */
5471         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5472         if (sret != -EBUSY)
5473                 goto out;
5474
5475         trace_seq_init(&iter->seq);
5476
5477         if (iter->trace->read) {
5478                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5479                 if (sret)
5480                         goto out;
5481         }
5482
5483 waitagain:
5484         sret = tracing_wait_pipe(filp);
5485         if (sret <= 0)
5486                 goto out;
5487
5488         /* stop when tracing is finished */
5489         if (trace_empty(iter)) {
5490                 sret = 0;
5491                 goto out;
5492         }
5493
5494         if (cnt >= PAGE_SIZE)
5495                 cnt = PAGE_SIZE - 1;
5496
5497         /* reset all but tr, trace, and overruns */
5498         memset(&iter->seq, 0,
5499                sizeof(struct trace_iterator) -
5500                offsetof(struct trace_iterator, seq));
5501         cpumask_clear(iter->started);
5502         iter->pos = -1;
5503
5504         trace_event_read_lock();
5505         trace_access_lock(iter->cpu_file);
5506         while (trace_find_next_entry_inc(iter) != NULL) {
5507                 enum print_line_t ret;
5508                 int save_len = iter->seq.seq.len;
5509
5510                 ret = print_trace_line(iter);
5511                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5512                         /* don't print partial lines */
5513                         iter->seq.seq.len = save_len;
5514                         break;
5515                 }
5516                 if (ret != TRACE_TYPE_NO_CONSUME)
5517                         trace_consume(iter);
5518
5519                 if (trace_seq_used(&iter->seq) >= cnt)
5520                         break;
5521
5522                 /*
5523                  * Setting the full flag means we reached the trace_seq buffer
5524                  * size and we should leave by partial output condition above.
5525                  * One of the trace_seq_* functions is not used properly.
5526                  */
5527                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5528                           iter->ent->type);
5529         }
5530         trace_access_unlock(iter->cpu_file);
5531         trace_event_read_unlock();
5532
5533         /* Now copy what we have to the user */
5534         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5535         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5536                 trace_seq_init(&iter->seq);
5537
5538         /*
5539          * If there was nothing to send to user, in spite of consuming trace
5540          * entries, go back to wait for more entries.
5541          */
5542         if (sret == -EBUSY)
5543                 goto waitagain;
5544
5545 out:
5546         mutex_unlock(&iter->mutex);
5547
5548         return sret;
5549 }
5550
5551 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5552                                      unsigned int idx)
5553 {
5554         __free_page(spd->pages[idx]);
5555 }
5556
5557 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5558         .can_merge              = 0,
5559         .confirm                = generic_pipe_buf_confirm,
5560         .release                = generic_pipe_buf_release,
5561         .steal                  = generic_pipe_buf_steal,
5562         .get                    = generic_pipe_buf_get,
5563 };
5564
5565 static size_t
5566 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5567 {
5568         size_t count;
5569         int save_len;
5570         int ret;
5571
5572         /* Seq buffer is page-sized, exactly what we need. */
5573         for (;;) {
5574                 save_len = iter->seq.seq.len;
5575                 ret = print_trace_line(iter);
5576
5577                 if (trace_seq_has_overflowed(&iter->seq)) {
5578                         iter->seq.seq.len = save_len;
5579                         break;
5580                 }
5581
5582                 /*
5583                  * This should not be hit, because it should only
5584                  * be set if the iter->seq overflowed. But check it
5585                  * anyway to be safe.
5586                  */
5587                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5588                         iter->seq.seq.len = save_len;
5589                         break;
5590                 }
5591
5592                 count = trace_seq_used(&iter->seq) - save_len;
5593                 if (rem < count) {
5594                         rem = 0;
5595                         iter->seq.seq.len = save_len;
5596                         break;
5597                 }
5598
5599                 if (ret != TRACE_TYPE_NO_CONSUME)
5600                         trace_consume(iter);
5601                 rem -= count;
5602                 if (!trace_find_next_entry_inc(iter))   {
5603                         rem = 0;
5604                         iter->ent = NULL;
5605                         break;
5606                 }
5607         }
5608
5609         return rem;
5610 }
5611
5612 static ssize_t tracing_splice_read_pipe(struct file *filp,
5613                                         loff_t *ppos,
5614                                         struct pipe_inode_info *pipe,
5615                                         size_t len,
5616                                         unsigned int flags)
5617 {
5618         struct page *pages_def[PIPE_DEF_BUFFERS];
5619         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5620         struct trace_iterator *iter = filp->private_data;
5621         struct splice_pipe_desc spd = {
5622                 .pages          = pages_def,
5623                 .partial        = partial_def,
5624                 .nr_pages       = 0, /* This gets updated below. */
5625                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5626                 .ops            = &tracing_pipe_buf_ops,
5627                 .spd_release    = tracing_spd_release_pipe,
5628         };
5629         ssize_t ret;
5630         size_t rem;
5631         unsigned int i;
5632
5633         if (splice_grow_spd(pipe, &spd))
5634                 return -ENOMEM;
5635
5636         mutex_lock(&iter->mutex);
5637
5638         if (iter->trace->splice_read) {
5639                 ret = iter->trace->splice_read(iter, filp,
5640                                                ppos, pipe, len, flags);
5641                 if (ret)
5642                         goto out_err;
5643         }
5644
5645         ret = tracing_wait_pipe(filp);
5646         if (ret <= 0)
5647                 goto out_err;
5648
5649         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5650                 ret = -EFAULT;
5651                 goto out_err;
5652         }
5653
5654         trace_event_read_lock();
5655         trace_access_lock(iter->cpu_file);
5656
5657         /* Fill as many pages as possible. */
5658         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5659                 spd.pages[i] = alloc_page(GFP_KERNEL);
5660                 if (!spd.pages[i])
5661                         break;
5662
5663                 rem = tracing_fill_pipe_page(rem, iter);
5664
5665                 /* Copy the data into the page, so we can start over. */
5666                 ret = trace_seq_to_buffer(&iter->seq,
5667                                           page_address(spd.pages[i]),
5668                                           trace_seq_used(&iter->seq));
5669                 if (ret < 0) {
5670                         __free_page(spd.pages[i]);
5671                         break;
5672                 }
5673                 spd.partial[i].offset = 0;
5674                 spd.partial[i].len = trace_seq_used(&iter->seq);
5675
5676                 trace_seq_init(&iter->seq);
5677         }
5678
5679         trace_access_unlock(iter->cpu_file);
5680         trace_event_read_unlock();
5681         mutex_unlock(&iter->mutex);
5682
5683         spd.nr_pages = i;
5684
5685         if (i)
5686                 ret = splice_to_pipe(pipe, &spd);
5687         else
5688                 ret = 0;
5689 out:
5690         splice_shrink_spd(&spd);
5691         return ret;
5692
5693 out_err:
5694         mutex_unlock(&iter->mutex);
5695         goto out;
5696 }
5697
5698 static ssize_t
5699 tracing_entries_read(struct file *filp, char __user *ubuf,
5700                      size_t cnt, loff_t *ppos)
5701 {
5702         struct inode *inode = file_inode(filp);
5703         struct trace_array *tr = inode->i_private;
5704         int cpu = tracing_get_cpu(inode);
5705         char buf[64];
5706         int r = 0;
5707         ssize_t ret;
5708
5709         mutex_lock(&trace_types_lock);
5710
5711         if (cpu == RING_BUFFER_ALL_CPUS) {
5712                 int cpu, buf_size_same;
5713                 unsigned long size;
5714
5715                 size = 0;
5716                 buf_size_same = 1;
5717                 /* check if all cpu sizes are same */
5718                 for_each_tracing_cpu(cpu) {
5719                         /* fill in the size from first enabled cpu */
5720                         if (size == 0)
5721                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5722                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5723                                 buf_size_same = 0;
5724                                 break;
5725                         }
5726                 }
5727
5728                 if (buf_size_same) {
5729                         if (!ring_buffer_expanded)
5730                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5731                                             size >> 10,
5732                                             trace_buf_size >> 10);
5733                         else
5734                                 r = sprintf(buf, "%lu\n", size >> 10);
5735                 } else
5736                         r = sprintf(buf, "X\n");
5737         } else
5738                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5739
5740         mutex_unlock(&trace_types_lock);
5741
5742         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5743         return ret;
5744 }
5745
5746 static ssize_t
5747 tracing_entries_write(struct file *filp, const char __user *ubuf,
5748                       size_t cnt, loff_t *ppos)
5749 {
5750         struct inode *inode = file_inode(filp);
5751         struct trace_array *tr = inode->i_private;
5752         unsigned long val;
5753         int ret;
5754
5755         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5756         if (ret)
5757                 return ret;
5758
5759         /* must have at least 1 entry */
5760         if (!val)
5761                 return -EINVAL;
5762
5763         /* value is in KB */
5764         val <<= 10;
5765         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5766         if (ret < 0)
5767                 return ret;
5768
5769         *ppos += cnt;
5770
5771         return cnt;
5772 }
5773
5774 static ssize_t
5775 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5776                                 size_t cnt, loff_t *ppos)
5777 {
5778         struct trace_array *tr = filp->private_data;
5779         char buf[64];
5780         int r, cpu;
5781         unsigned long size = 0, expanded_size = 0;
5782
5783         mutex_lock(&trace_types_lock);
5784         for_each_tracing_cpu(cpu) {
5785                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5786                 if (!ring_buffer_expanded)
5787                         expanded_size += trace_buf_size >> 10;
5788         }
5789         if (ring_buffer_expanded)
5790                 r = sprintf(buf, "%lu\n", size);
5791         else
5792                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5793         mutex_unlock(&trace_types_lock);
5794
5795         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5796 }
5797
5798 static ssize_t
5799 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5800                           size_t cnt, loff_t *ppos)
5801 {
5802         /*
5803          * There is no need to read what the user has written, this function
5804          * is just to make sure that there is no error when "echo" is used
5805          */
5806
5807         *ppos += cnt;
5808
5809         return cnt;
5810 }
5811
5812 static int
5813 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5814 {
5815         struct trace_array *tr = inode->i_private;
5816
5817         /* disable tracing ? */
5818         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5819                 tracer_tracing_off(tr);
5820         /* resize the ring buffer to 0 */
5821         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5822
5823         trace_array_put(tr);
5824
5825         return 0;
5826 }
5827
5828 static ssize_t
5829 tracing_mark_write(struct file *filp, const char __user *ubuf,
5830                                         size_t cnt, loff_t *fpos)
5831 {
5832         struct trace_array *tr = filp->private_data;
5833         struct ring_buffer_event *event;
5834         struct ring_buffer *buffer;
5835         struct print_entry *entry;
5836         unsigned long irq_flags;
5837         const char faulted[] = "<faulted>";
5838         ssize_t written;
5839         int size;
5840         int len;
5841
5842 /* Used in tracing_mark_raw_write() as well */
5843 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5844
5845         if (tracing_disabled)
5846                 return -EINVAL;
5847
5848         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5849                 return -EINVAL;
5850
5851         if (cnt > TRACE_BUF_SIZE)
5852                 cnt = TRACE_BUF_SIZE;
5853
5854         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5855
5856         local_save_flags(irq_flags);
5857         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5858
5859         /* If less than "<faulted>", then make sure we can still add that */
5860         if (cnt < FAULTED_SIZE)
5861                 size += FAULTED_SIZE - cnt;
5862
5863         buffer = tr->trace_buffer.buffer;
5864         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5865                                             irq_flags, preempt_count());
5866         if (unlikely(!event))
5867                 /* Ring buffer disabled, return as if not open for write */
5868                 return -EBADF;
5869
5870         entry = ring_buffer_event_data(event);
5871         entry->ip = _THIS_IP_;
5872
5873         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5874         if (len) {
5875                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5876                 cnt = FAULTED_SIZE;
5877                 written = -EFAULT;
5878         } else
5879                 written = cnt;
5880         len = cnt;
5881
5882         if (entry->buf[cnt - 1] != '\n') {
5883                 entry->buf[cnt] = '\n';
5884                 entry->buf[cnt + 1] = '\0';
5885         } else
5886                 entry->buf[cnt] = '\0';
5887
5888         __buffer_unlock_commit(buffer, event);
5889
5890         if (written > 0)
5891                 *fpos += written;
5892
5893         return written;
5894 }
5895
5896 /* Limit it for now to 3K (including tag) */
5897 #define RAW_DATA_MAX_SIZE (1024*3)
5898
5899 static ssize_t
5900 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5901                                         size_t cnt, loff_t *fpos)
5902 {
5903         struct trace_array *tr = filp->private_data;
5904         struct ring_buffer_event *event;
5905         struct ring_buffer *buffer;
5906         struct raw_data_entry *entry;
5907         const char faulted[] = "<faulted>";
5908         unsigned long irq_flags;
5909         ssize_t written;
5910         int size;
5911         int len;
5912
5913 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5914
5915         if (tracing_disabled)
5916                 return -EINVAL;
5917
5918         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5919                 return -EINVAL;
5920
5921         /* The marker must at least have a tag id */
5922         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5923                 return -EINVAL;
5924
5925         if (cnt > TRACE_BUF_SIZE)
5926                 cnt = TRACE_BUF_SIZE;
5927
5928         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5929
5930         local_save_flags(irq_flags);
5931         size = sizeof(*entry) + cnt;
5932         if (cnt < FAULT_SIZE_ID)
5933                 size += FAULT_SIZE_ID - cnt;
5934
5935         buffer = tr->trace_buffer.buffer;
5936         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5937                                             irq_flags, preempt_count());
5938         if (!event)
5939                 /* Ring buffer disabled, return as if not open for write */
5940                 return -EBADF;
5941
5942         entry = ring_buffer_event_data(event);
5943
5944         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5945         if (len) {
5946                 entry->id = -1;
5947                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5948                 written = -EFAULT;
5949         } else
5950                 written = cnt;
5951
5952         __buffer_unlock_commit(buffer, event);
5953
5954         if (written > 0)
5955                 *fpos += written;
5956
5957         return written;
5958 }
5959
5960 static int tracing_clock_show(struct seq_file *m, void *v)
5961 {
5962         struct trace_array *tr = m->private;
5963         int i;
5964
5965         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5966                 seq_printf(m,
5967                         "%s%s%s%s", i ? " " : "",
5968                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5969                         i == tr->clock_id ? "]" : "");
5970         seq_putc(m, '\n');
5971
5972         return 0;
5973 }
5974
5975 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5976 {
5977         int i;
5978
5979         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5980                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5981                         break;
5982         }
5983         if (i == ARRAY_SIZE(trace_clocks))
5984                 return -EINVAL;
5985
5986         mutex_lock(&trace_types_lock);
5987
5988         tr->clock_id = i;
5989
5990         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5991
5992         /*
5993          * New clock may not be consistent with the previous clock.
5994          * Reset the buffer so that it doesn't have incomparable timestamps.
5995          */
5996         tracing_reset_online_cpus(&tr->trace_buffer);
5997
5998 #ifdef CONFIG_TRACER_MAX_TRACE
5999         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6000                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6001         tracing_reset_online_cpus(&tr->max_buffer);
6002 #endif
6003
6004         mutex_unlock(&trace_types_lock);
6005
6006         return 0;
6007 }
6008
6009 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6010                                    size_t cnt, loff_t *fpos)
6011 {
6012         struct seq_file *m = filp->private_data;
6013         struct trace_array *tr = m->private;
6014         char buf[64];
6015         const char *clockstr;
6016         int ret;
6017
6018         if (cnt >= sizeof(buf))
6019                 return -EINVAL;
6020
6021         if (copy_from_user(buf, ubuf, cnt))
6022                 return -EFAULT;
6023
6024         buf[cnt] = 0;
6025
6026         clockstr = strstrip(buf);
6027
6028         ret = tracing_set_clock(tr, clockstr);
6029         if (ret)
6030                 return ret;
6031
6032         *fpos += cnt;
6033
6034         return cnt;
6035 }
6036
6037 static int tracing_clock_open(struct inode *inode, struct file *file)
6038 {
6039         struct trace_array *tr = inode->i_private;
6040         int ret;
6041
6042         if (tracing_disabled)
6043                 return -ENODEV;
6044
6045         if (trace_array_get(tr))
6046                 return -ENODEV;
6047
6048         ret = single_open(file, tracing_clock_show, inode->i_private);
6049         if (ret < 0)
6050                 trace_array_put(tr);
6051
6052         return ret;
6053 }
6054
6055 struct ftrace_buffer_info {
6056         struct trace_iterator   iter;
6057         void                    *spare;
6058         unsigned int            spare_cpu;
6059         unsigned int            read;
6060 };
6061
6062 #ifdef CONFIG_TRACER_SNAPSHOT
6063 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6064 {
6065         struct trace_array *tr = inode->i_private;
6066         struct trace_iterator *iter;
6067         struct seq_file *m;
6068         int ret = 0;
6069
6070         if (trace_array_get(tr) < 0)
6071                 return -ENODEV;
6072
6073         if (file->f_mode & FMODE_READ) {
6074                 iter = __tracing_open(inode, file, true);
6075                 if (IS_ERR(iter))
6076                         ret = PTR_ERR(iter);
6077         } else {
6078                 /* Writes still need the seq_file to hold the private data */
6079                 ret = -ENOMEM;
6080                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6081                 if (!m)
6082                         goto out;
6083                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6084                 if (!iter) {
6085                         kfree(m);
6086                         goto out;
6087                 }
6088                 ret = 0;
6089
6090                 iter->tr = tr;
6091                 iter->trace_buffer = &tr->max_buffer;
6092                 iter->cpu_file = tracing_get_cpu(inode);
6093                 m->private = iter;
6094                 file->private_data = m;
6095         }
6096 out:
6097         if (ret < 0)
6098                 trace_array_put(tr);
6099
6100         return ret;
6101 }
6102
6103 static ssize_t
6104 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6105                        loff_t *ppos)
6106 {
6107         struct seq_file *m = filp->private_data;
6108         struct trace_iterator *iter = m->private;
6109         struct trace_array *tr = iter->tr;
6110         unsigned long val;
6111         int ret;
6112
6113         ret = tracing_update_buffers();
6114         if (ret < 0)
6115                 return ret;
6116
6117         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6118         if (ret)
6119                 return ret;
6120
6121         mutex_lock(&trace_types_lock);
6122
6123         if (tr->current_trace->use_max_tr) {
6124                 ret = -EBUSY;
6125                 goto out;
6126         }
6127
6128         switch (val) {
6129         case 0:
6130                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6131                         ret = -EINVAL;
6132                         break;
6133                 }
6134                 if (tr->allocated_snapshot)
6135                         free_snapshot(tr);
6136                 break;
6137         case 1:
6138 /* Only allow per-cpu swap if the ring buffer supports it */
6139 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6140                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6141                         ret = -EINVAL;
6142                         break;
6143                 }
6144 #endif
6145                 if (!tr->allocated_snapshot) {
6146                         ret = alloc_snapshot(tr);
6147                         if (ret < 0)
6148                                 break;
6149                 }
6150                 local_irq_disable();
6151                 /* Now, we're going to swap */
6152                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6153                         update_max_tr(tr, current, smp_processor_id());
6154                 else
6155                         update_max_tr_single(tr, current, iter->cpu_file);
6156                 local_irq_enable();
6157                 break;
6158         default:
6159                 if (tr->allocated_snapshot) {
6160                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6161                                 tracing_reset_online_cpus(&tr->max_buffer);
6162                         else
6163                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6164                 }
6165                 break;
6166         }
6167
6168         if (ret >= 0) {
6169                 *ppos += cnt;
6170                 ret = cnt;
6171         }
6172 out:
6173         mutex_unlock(&trace_types_lock);
6174         return ret;
6175 }
6176
6177 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6178 {
6179         struct seq_file *m = file->private_data;
6180         int ret;
6181
6182         ret = tracing_release(inode, file);
6183
6184         if (file->f_mode & FMODE_READ)
6185                 return ret;
6186
6187         /* If write only, the seq_file is just a stub */
6188         if (m)
6189                 kfree(m->private);
6190         kfree(m);
6191
6192         return 0;
6193 }
6194
6195 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6196 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6197                                     size_t count, loff_t *ppos);
6198 static int tracing_buffers_release(struct inode *inode, struct file *file);
6199 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6200                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6201
6202 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6203 {
6204         struct ftrace_buffer_info *info;
6205         int ret;
6206
6207         ret = tracing_buffers_open(inode, filp);
6208         if (ret < 0)
6209                 return ret;
6210
6211         info = filp->private_data;
6212
6213         if (info->iter.trace->use_max_tr) {
6214                 tracing_buffers_release(inode, filp);
6215                 return -EBUSY;
6216         }
6217
6218         info->iter.snapshot = true;
6219         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6220
6221         return ret;
6222 }
6223
6224 #endif /* CONFIG_TRACER_SNAPSHOT */
6225
6226
6227 static const struct file_operations tracing_thresh_fops = {
6228         .open           = tracing_open_generic,
6229         .read           = tracing_thresh_read,
6230         .write          = tracing_thresh_write,
6231         .llseek         = generic_file_llseek,
6232 };
6233
6234 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6235 static const struct file_operations tracing_max_lat_fops = {
6236         .open           = tracing_open_generic,
6237         .read           = tracing_max_lat_read,
6238         .write          = tracing_max_lat_write,
6239         .llseek         = generic_file_llseek,
6240 };
6241 #endif
6242
6243 static const struct file_operations set_tracer_fops = {
6244         .open           = tracing_open_generic,
6245         .read           = tracing_set_trace_read,
6246         .write          = tracing_set_trace_write,
6247         .llseek         = generic_file_llseek,
6248 };
6249
6250 static const struct file_operations tracing_pipe_fops = {
6251         .open           = tracing_open_pipe,
6252         .poll           = tracing_poll_pipe,
6253         .read           = tracing_read_pipe,
6254         .splice_read    = tracing_splice_read_pipe,
6255         .release        = tracing_release_pipe,
6256         .llseek         = no_llseek,
6257 };
6258
6259 static const struct file_operations tracing_entries_fops = {
6260         .open           = tracing_open_generic_tr,
6261         .read           = tracing_entries_read,
6262         .write          = tracing_entries_write,
6263         .llseek         = generic_file_llseek,
6264         .release        = tracing_release_generic_tr,
6265 };
6266
6267 static const struct file_operations tracing_total_entries_fops = {
6268         .open           = tracing_open_generic_tr,
6269         .read           = tracing_total_entries_read,
6270         .llseek         = generic_file_llseek,
6271         .release        = tracing_release_generic_tr,
6272 };
6273
6274 static const struct file_operations tracing_free_buffer_fops = {
6275         .open           = tracing_open_generic_tr,
6276         .write          = tracing_free_buffer_write,
6277         .release        = tracing_free_buffer_release,
6278 };
6279
6280 static const struct file_operations tracing_mark_fops = {
6281         .open           = tracing_open_generic_tr,
6282         .write          = tracing_mark_write,
6283         .llseek         = generic_file_llseek,
6284         .release        = tracing_release_generic_tr,
6285 };
6286
6287 static const struct file_operations tracing_mark_raw_fops = {
6288         .open           = tracing_open_generic_tr,
6289         .write          = tracing_mark_raw_write,
6290         .llseek         = generic_file_llseek,
6291         .release        = tracing_release_generic_tr,
6292 };
6293
6294 static const struct file_operations trace_clock_fops = {
6295         .open           = tracing_clock_open,
6296         .read           = seq_read,
6297         .llseek         = seq_lseek,
6298         .release        = tracing_single_release_tr,
6299         .write          = tracing_clock_write,
6300 };
6301
6302 #ifdef CONFIG_TRACER_SNAPSHOT
6303 static const struct file_operations snapshot_fops = {
6304         .open           = tracing_snapshot_open,
6305         .read           = seq_read,
6306         .write          = tracing_snapshot_write,
6307         .llseek         = tracing_lseek,
6308         .release        = tracing_snapshot_release,
6309 };
6310
6311 static const struct file_operations snapshot_raw_fops = {
6312         .open           = snapshot_raw_open,
6313         .read           = tracing_buffers_read,
6314         .release        = tracing_buffers_release,
6315         .splice_read    = tracing_buffers_splice_read,
6316         .llseek         = no_llseek,
6317 };
6318
6319 #endif /* CONFIG_TRACER_SNAPSHOT */
6320
6321 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6322 {
6323         struct trace_array *tr = inode->i_private;
6324         struct ftrace_buffer_info *info;
6325         int ret;
6326
6327         if (tracing_disabled)
6328                 return -ENODEV;
6329
6330         if (trace_array_get(tr) < 0)
6331                 return -ENODEV;
6332
6333         info = kzalloc(sizeof(*info), GFP_KERNEL);
6334         if (!info) {
6335                 trace_array_put(tr);
6336                 return -ENOMEM;
6337         }
6338
6339         mutex_lock(&trace_types_lock);
6340
6341         info->iter.tr           = tr;
6342         info->iter.cpu_file     = tracing_get_cpu(inode);
6343         info->iter.trace        = tr->current_trace;
6344         info->iter.trace_buffer = &tr->trace_buffer;
6345         info->spare             = NULL;
6346         /* Force reading ring buffer for first read */
6347         info->read              = (unsigned int)-1;
6348
6349         filp->private_data = info;
6350
6351         tr->current_trace->ref++;
6352
6353         mutex_unlock(&trace_types_lock);
6354
6355         ret = nonseekable_open(inode, filp);
6356         if (ret < 0)
6357                 trace_array_put(tr);
6358
6359         return ret;
6360 }
6361
6362 static unsigned int
6363 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6364 {
6365         struct ftrace_buffer_info *info = filp->private_data;
6366         struct trace_iterator *iter = &info->iter;
6367
6368         return trace_poll(iter, filp, poll_table);
6369 }
6370
6371 static ssize_t
6372 tracing_buffers_read(struct file *filp, char __user *ubuf,
6373                      size_t count, loff_t *ppos)
6374 {
6375         struct ftrace_buffer_info *info = filp->private_data;
6376         struct trace_iterator *iter = &info->iter;
6377         ssize_t ret;
6378         ssize_t size;
6379
6380         if (!count)
6381                 return 0;
6382
6383 #ifdef CONFIG_TRACER_MAX_TRACE
6384         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6385                 return -EBUSY;
6386 #endif
6387
6388         if (!info->spare) {
6389                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6390                                                           iter->cpu_file);
6391                 info->spare_cpu = iter->cpu_file;
6392         }
6393         if (!info->spare)
6394                 return -ENOMEM;
6395
6396         /* Do we have previous read data to read? */
6397         if (info->read < PAGE_SIZE)
6398                 goto read;
6399
6400  again:
6401         trace_access_lock(iter->cpu_file);
6402         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6403                                     &info->spare,
6404                                     count,
6405                                     iter->cpu_file, 0);
6406         trace_access_unlock(iter->cpu_file);
6407
6408         if (ret < 0) {
6409                 if (trace_empty(iter)) {
6410                         if ((filp->f_flags & O_NONBLOCK))
6411                                 return -EAGAIN;
6412
6413                         ret = wait_on_pipe(iter, false);
6414                         if (ret)
6415                                 return ret;
6416
6417                         goto again;
6418                 }
6419                 return 0;
6420         }
6421
6422         info->read = 0;
6423  read:
6424         size = PAGE_SIZE - info->read;
6425         if (size > count)
6426                 size = count;
6427
6428         ret = copy_to_user(ubuf, info->spare + info->read, size);
6429         if (ret == size)
6430                 return -EFAULT;
6431
6432         size -= ret;
6433
6434         *ppos += size;
6435         info->read += size;
6436
6437         return size;
6438 }
6439
6440 static int tracing_buffers_release(struct inode *inode, struct file *file)
6441 {
6442         struct ftrace_buffer_info *info = file->private_data;
6443         struct trace_iterator *iter = &info->iter;
6444
6445         mutex_lock(&trace_types_lock);
6446
6447         iter->tr->current_trace->ref--;
6448
6449         __trace_array_put(iter->tr);
6450
6451         if (info->spare)
6452                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6453                                            info->spare_cpu, info->spare);
6454         kfree(info);
6455
6456         mutex_unlock(&trace_types_lock);
6457
6458         return 0;
6459 }
6460
6461 struct buffer_ref {
6462         struct ring_buffer      *buffer;
6463         void                    *page;
6464         int                     cpu;
6465         int                     ref;
6466 };
6467
6468 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6469                                     struct pipe_buffer *buf)
6470 {
6471         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6472
6473         if (--ref->ref)
6474                 return;
6475
6476         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6477         kfree(ref);
6478         buf->private = 0;
6479 }
6480
6481 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6482                                 struct pipe_buffer *buf)
6483 {
6484         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6485
6486         ref->ref++;
6487 }
6488
6489 /* Pipe buffer operations for a buffer. */
6490 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6491         .can_merge              = 0,
6492         .confirm                = generic_pipe_buf_confirm,
6493         .release                = buffer_pipe_buf_release,
6494         .steal                  = generic_pipe_buf_steal,
6495         .get                    = buffer_pipe_buf_get,
6496 };
6497
6498 /*
6499  * Callback from splice_to_pipe(), if we need to release some pages
6500  * at the end of the spd in case we error'ed out in filling the pipe.
6501  */
6502 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6503 {
6504         struct buffer_ref *ref =
6505                 (struct buffer_ref *)spd->partial[i].private;
6506
6507         if (--ref->ref)
6508                 return;
6509
6510         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6511         kfree(ref);
6512         spd->partial[i].private = 0;
6513 }
6514
6515 static ssize_t
6516 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6517                             struct pipe_inode_info *pipe, size_t len,
6518                             unsigned int flags)
6519 {
6520         struct ftrace_buffer_info *info = file->private_data;
6521         struct trace_iterator *iter = &info->iter;
6522         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6523         struct page *pages_def[PIPE_DEF_BUFFERS];
6524         struct splice_pipe_desc spd = {
6525                 .pages          = pages_def,
6526                 .partial        = partial_def,
6527                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6528                 .ops            = &buffer_pipe_buf_ops,
6529                 .spd_release    = buffer_spd_release,
6530         };
6531         struct buffer_ref *ref;
6532         int entries, size, i;
6533         ssize_t ret = 0;
6534
6535 #ifdef CONFIG_TRACER_MAX_TRACE
6536         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6537                 return -EBUSY;
6538 #endif
6539
6540         if (*ppos & (PAGE_SIZE - 1))
6541                 return -EINVAL;
6542
6543         if (len & (PAGE_SIZE - 1)) {
6544                 if (len < PAGE_SIZE)
6545                         return -EINVAL;
6546                 len &= PAGE_MASK;
6547         }
6548
6549         if (splice_grow_spd(pipe, &spd))
6550                 return -ENOMEM;
6551
6552  again:
6553         trace_access_lock(iter->cpu_file);
6554         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6555
6556         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6557                 struct page *page;
6558                 int r;
6559
6560                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6561                 if (!ref) {
6562                         ret = -ENOMEM;
6563                         break;
6564                 }
6565
6566                 ref->ref = 1;
6567                 ref->buffer = iter->trace_buffer->buffer;
6568                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6569                 if (!ref->page) {
6570                         ret = -ENOMEM;
6571                         kfree(ref);
6572                         break;
6573                 }
6574                 ref->cpu = iter->cpu_file;
6575
6576                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6577                                           len, iter->cpu_file, 1);
6578                 if (r < 0) {
6579                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6580                                                    ref->page);
6581                         kfree(ref);
6582                         break;
6583                 }
6584
6585                 /*
6586                  * zero out any left over data, this is going to
6587                  * user land.
6588                  */
6589                 size = ring_buffer_page_len(ref->page);
6590                 if (size < PAGE_SIZE)
6591                         memset(ref->page + size, 0, PAGE_SIZE - size);
6592
6593                 page = virt_to_page(ref->page);
6594
6595                 spd.pages[i] = page;
6596                 spd.partial[i].len = PAGE_SIZE;
6597                 spd.partial[i].offset = 0;
6598                 spd.partial[i].private = (unsigned long)ref;
6599                 spd.nr_pages++;
6600                 *ppos += PAGE_SIZE;
6601
6602                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6603         }
6604
6605         trace_access_unlock(iter->cpu_file);
6606         spd.nr_pages = i;
6607
6608         /* did we read anything? */
6609         if (!spd.nr_pages) {
6610                 if (ret)
6611                         goto out;
6612
6613                 ret = -EAGAIN;
6614                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6615                         goto out;
6616
6617                 ret = wait_on_pipe(iter, true);
6618                 if (ret)
6619                         goto out;
6620
6621                 goto again;
6622         }
6623
6624         ret = splice_to_pipe(pipe, &spd);
6625 out:
6626         splice_shrink_spd(&spd);
6627
6628         return ret;
6629 }
6630
6631 static const struct file_operations tracing_buffers_fops = {
6632         .open           = tracing_buffers_open,
6633         .read           = tracing_buffers_read,
6634         .poll           = tracing_buffers_poll,
6635         .release        = tracing_buffers_release,
6636         .splice_read    = tracing_buffers_splice_read,
6637         .llseek         = no_llseek,
6638 };
6639
6640 static ssize_t
6641 tracing_stats_read(struct file *filp, char __user *ubuf,
6642                    size_t count, loff_t *ppos)
6643 {
6644         struct inode *inode = file_inode(filp);
6645         struct trace_array *tr = inode->i_private;
6646         struct trace_buffer *trace_buf = &tr->trace_buffer;
6647         int cpu = tracing_get_cpu(inode);
6648         struct trace_seq *s;
6649         unsigned long cnt;
6650         unsigned long long t;
6651         unsigned long usec_rem;
6652
6653         s = kmalloc(sizeof(*s), GFP_KERNEL);
6654         if (!s)
6655                 return -ENOMEM;
6656
6657         trace_seq_init(s);
6658
6659         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6660         trace_seq_printf(s, "entries: %ld\n", cnt);
6661
6662         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6663         trace_seq_printf(s, "overrun: %ld\n", cnt);
6664
6665         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6666         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6667
6668         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6669         trace_seq_printf(s, "bytes: %ld\n", cnt);
6670
6671         if (trace_clocks[tr->clock_id].in_ns) {
6672                 /* local or global for trace_clock */
6673                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6674                 usec_rem = do_div(t, USEC_PER_SEC);
6675                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6676                                                                 t, usec_rem);
6677
6678                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6679                 usec_rem = do_div(t, USEC_PER_SEC);
6680                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6681         } else {
6682                 /* counter or tsc mode for trace_clock */
6683                 trace_seq_printf(s, "oldest event ts: %llu\n",
6684                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6685
6686                 trace_seq_printf(s, "now ts: %llu\n",
6687                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6688         }
6689
6690         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6691         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6692
6693         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6694         trace_seq_printf(s, "read events: %ld\n", cnt);
6695
6696         count = simple_read_from_buffer(ubuf, count, ppos,
6697                                         s->buffer, trace_seq_used(s));
6698
6699         kfree(s);
6700
6701         return count;
6702 }
6703
6704 static const struct file_operations tracing_stats_fops = {
6705         .open           = tracing_open_generic_tr,
6706         .read           = tracing_stats_read,
6707         .llseek         = generic_file_llseek,
6708         .release        = tracing_release_generic_tr,
6709 };
6710
6711 #ifdef CONFIG_DYNAMIC_FTRACE
6712
6713 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6714 {
6715         return 0;
6716 }
6717
6718 static ssize_t
6719 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6720                   size_t cnt, loff_t *ppos)
6721 {
6722         static char ftrace_dyn_info_buffer[1024];
6723         static DEFINE_MUTEX(dyn_info_mutex);
6724         unsigned long *p = filp->private_data;
6725         char *buf = ftrace_dyn_info_buffer;
6726         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6727         int r;
6728
6729         mutex_lock(&dyn_info_mutex);
6730         r = sprintf(buf, "%ld ", *p);
6731
6732         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6733         buf[r++] = '\n';
6734
6735         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6736
6737         mutex_unlock(&dyn_info_mutex);
6738
6739         return r;
6740 }
6741
6742 static const struct file_operations tracing_dyn_info_fops = {
6743         .open           = tracing_open_generic,
6744         .read           = tracing_read_dyn_info,
6745         .llseek         = generic_file_llseek,
6746 };
6747 #endif /* CONFIG_DYNAMIC_FTRACE */
6748
6749 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6750 static void
6751 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6752                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6753                 void *data)
6754 {
6755         tracing_snapshot_instance(tr);
6756 }
6757
6758 static void
6759 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6760                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6761                       void *data)
6762 {
6763         struct ftrace_func_mapper *mapper = data;
6764         long *count = NULL;
6765
6766         if (mapper)
6767                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6768
6769         if (count) {
6770
6771                 if (*count <= 0)
6772                         return;
6773
6774                 (*count)--;
6775         }
6776
6777         tracing_snapshot_instance(tr);
6778 }
6779
6780 static int
6781 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6782                       struct ftrace_probe_ops *ops, void *data)
6783 {
6784         struct ftrace_func_mapper *mapper = data;
6785         long *count = NULL;
6786
6787         seq_printf(m, "%ps:", (void *)ip);
6788
6789         seq_puts(m, "snapshot");
6790
6791         if (mapper)
6792                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6793
6794         if (count)
6795                 seq_printf(m, ":count=%ld\n", *count);
6796         else
6797                 seq_puts(m, ":unlimited\n");
6798
6799         return 0;
6800 }
6801
6802 static int
6803 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6804                      unsigned long ip, void *init_data, void **data)
6805 {
6806         struct ftrace_func_mapper *mapper = *data;
6807
6808         if (!mapper) {
6809                 mapper = allocate_ftrace_func_mapper();
6810                 if (!mapper)
6811                         return -ENOMEM;
6812                 *data = mapper;
6813         }
6814
6815         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6816 }
6817
6818 static void
6819 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6820                      unsigned long ip, void *data)
6821 {
6822         struct ftrace_func_mapper *mapper = data;
6823
6824         if (!ip) {
6825                 if (!mapper)
6826                         return;
6827                 free_ftrace_func_mapper(mapper, NULL);
6828                 return;
6829         }
6830
6831         ftrace_func_mapper_remove_ip(mapper, ip);
6832 }
6833
6834 static struct ftrace_probe_ops snapshot_probe_ops = {
6835         .func                   = ftrace_snapshot,
6836         .print                  = ftrace_snapshot_print,
6837 };
6838
6839 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6840         .func                   = ftrace_count_snapshot,
6841         .print                  = ftrace_snapshot_print,
6842         .init                   = ftrace_snapshot_init,
6843         .free                   = ftrace_snapshot_free,
6844 };
6845
6846 static int
6847 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6848                                char *glob, char *cmd, char *param, int enable)
6849 {
6850         struct ftrace_probe_ops *ops;
6851         void *count = (void *)-1;
6852         char *number;
6853         int ret;
6854
6855         /* hash funcs only work with set_ftrace_filter */
6856         if (!enable)
6857                 return -EINVAL;
6858
6859         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6860
6861         if (glob[0] == '!')
6862                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6863
6864         if (!param)
6865                 goto out_reg;
6866
6867         number = strsep(&param, ":");
6868
6869         if (!strlen(number))
6870                 goto out_reg;
6871
6872         /*
6873          * We use the callback data field (which is a pointer)
6874          * as our counter.
6875          */
6876         ret = kstrtoul(number, 0, (unsigned long *)&count);
6877         if (ret)
6878                 return ret;
6879
6880  out_reg:
6881         ret = alloc_snapshot(tr);
6882         if (ret < 0)
6883                 goto out;
6884
6885         ret = register_ftrace_function_probe(glob, tr, ops, count);
6886
6887  out:
6888         return ret < 0 ? ret : 0;
6889 }
6890
6891 static struct ftrace_func_command ftrace_snapshot_cmd = {
6892         .name                   = "snapshot",
6893         .func                   = ftrace_trace_snapshot_callback,
6894 };
6895
6896 static __init int register_snapshot_cmd(void)
6897 {
6898         return register_ftrace_command(&ftrace_snapshot_cmd);
6899 }
6900 #else
6901 static inline __init int register_snapshot_cmd(void) { return 0; }
6902 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6903
6904 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6905 {
6906         if (WARN_ON(!tr->dir))
6907                 return ERR_PTR(-ENODEV);
6908
6909         /* Top directory uses NULL as the parent */
6910         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6911                 return NULL;
6912
6913         /* All sub buffers have a descriptor */
6914         return tr->dir;
6915 }
6916
6917 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6918 {
6919         struct dentry *d_tracer;
6920
6921         if (tr->percpu_dir)
6922                 return tr->percpu_dir;
6923
6924         d_tracer = tracing_get_dentry(tr);
6925         if (IS_ERR(d_tracer))
6926                 return NULL;
6927
6928         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6929
6930         WARN_ONCE(!tr->percpu_dir,
6931                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6932
6933         return tr->percpu_dir;
6934 }
6935
6936 static struct dentry *
6937 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6938                       void *data, long cpu, const struct file_operations *fops)
6939 {
6940         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6941
6942         if (ret) /* See tracing_get_cpu() */
6943                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6944         return ret;
6945 }
6946
6947 static void
6948 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6949 {
6950         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6951         struct dentry *d_cpu;
6952         char cpu_dir[30]; /* 30 characters should be more than enough */
6953
6954         if (!d_percpu)
6955                 return;
6956
6957         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6958         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6959         if (!d_cpu) {
6960                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6961                 return;
6962         }
6963
6964         /* per cpu trace_pipe */
6965         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6966                                 tr, cpu, &tracing_pipe_fops);
6967
6968         /* per cpu trace */
6969         trace_create_cpu_file("trace", 0644, d_cpu,
6970                                 tr, cpu, &tracing_fops);
6971
6972         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6973                                 tr, cpu, &tracing_buffers_fops);
6974
6975         trace_create_cpu_file("stats", 0444, d_cpu,
6976                                 tr, cpu, &tracing_stats_fops);
6977
6978         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6979                                 tr, cpu, &tracing_entries_fops);
6980
6981 #ifdef CONFIG_TRACER_SNAPSHOT
6982         trace_create_cpu_file("snapshot", 0644, d_cpu,
6983                                 tr, cpu, &snapshot_fops);
6984
6985         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6986                                 tr, cpu, &snapshot_raw_fops);
6987 #endif
6988 }
6989
6990 #ifdef CONFIG_FTRACE_SELFTEST
6991 /* Let selftest have access to static functions in this file */
6992 #include "trace_selftest.c"
6993 #endif
6994
6995 static ssize_t
6996 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6997                         loff_t *ppos)
6998 {
6999         struct trace_option_dentry *topt = filp->private_data;
7000         char *buf;
7001
7002         if (topt->flags->val & topt->opt->bit)
7003                 buf = "1\n";
7004         else
7005                 buf = "0\n";
7006
7007         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7008 }
7009
7010 static ssize_t
7011 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7012                          loff_t *ppos)
7013 {
7014         struct trace_option_dentry *topt = filp->private_data;
7015         unsigned long val;
7016         int ret;
7017
7018         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7019         if (ret)
7020                 return ret;
7021
7022         if (val != 0 && val != 1)
7023                 return -EINVAL;
7024
7025         if (!!(topt->flags->val & topt->opt->bit) != val) {
7026                 mutex_lock(&trace_types_lock);
7027                 ret = __set_tracer_option(topt->tr, topt->flags,
7028                                           topt->opt, !val);
7029                 mutex_unlock(&trace_types_lock);
7030                 if (ret)
7031                         return ret;
7032         }
7033
7034         *ppos += cnt;
7035
7036         return cnt;
7037 }
7038
7039
7040 static const struct file_operations trace_options_fops = {
7041         .open = tracing_open_generic,
7042         .read = trace_options_read,
7043         .write = trace_options_write,
7044         .llseek = generic_file_llseek,
7045 };
7046
7047 /*
7048  * In order to pass in both the trace_array descriptor as well as the index
7049  * to the flag that the trace option file represents, the trace_array
7050  * has a character array of trace_flags_index[], which holds the index
7051  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7052  * The address of this character array is passed to the flag option file
7053  * read/write callbacks.
7054  *
7055  * In order to extract both the index and the trace_array descriptor,
7056  * get_tr_index() uses the following algorithm.
7057  *
7058  *   idx = *ptr;
7059  *
7060  * As the pointer itself contains the address of the index (remember
7061  * index[1] == 1).
7062  *
7063  * Then to get the trace_array descriptor, by subtracting that index
7064  * from the ptr, we get to the start of the index itself.
7065  *
7066  *   ptr - idx == &index[0]
7067  *
7068  * Then a simple container_of() from that pointer gets us to the
7069  * trace_array descriptor.
7070  */
7071 static void get_tr_index(void *data, struct trace_array **ptr,
7072                          unsigned int *pindex)
7073 {
7074         *pindex = *(unsigned char *)data;
7075
7076         *ptr = container_of(data - *pindex, struct trace_array,
7077                             trace_flags_index);
7078 }
7079
7080 static ssize_t
7081 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7082                         loff_t *ppos)
7083 {
7084         void *tr_index = filp->private_data;
7085         struct trace_array *tr;
7086         unsigned int index;
7087         char *buf;
7088
7089         get_tr_index(tr_index, &tr, &index);
7090
7091         if (tr->trace_flags & (1 << index))
7092                 buf = "1\n";
7093         else
7094                 buf = "0\n";
7095
7096         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7097 }
7098
7099 static ssize_t
7100 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7101                          loff_t *ppos)
7102 {
7103         void *tr_index = filp->private_data;
7104         struct trace_array *tr;
7105         unsigned int index;
7106         unsigned long val;
7107         int ret;
7108
7109         get_tr_index(tr_index, &tr, &index);
7110
7111         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7112         if (ret)
7113                 return ret;
7114
7115         if (val != 0 && val != 1)
7116                 return -EINVAL;
7117
7118         mutex_lock(&trace_types_lock);
7119         ret = set_tracer_flag(tr, 1 << index, val);
7120         mutex_unlock(&trace_types_lock);
7121
7122         if (ret < 0)
7123                 return ret;
7124
7125         *ppos += cnt;
7126
7127         return cnt;
7128 }
7129
7130 static const struct file_operations trace_options_core_fops = {
7131         .open = tracing_open_generic,
7132         .read = trace_options_core_read,
7133         .write = trace_options_core_write,
7134         .llseek = generic_file_llseek,
7135 };
7136
7137 struct dentry *trace_create_file(const char *name,
7138                                  umode_t mode,
7139                                  struct dentry *parent,
7140                                  void *data,
7141                                  const struct file_operations *fops)
7142 {
7143         struct dentry *ret;
7144
7145         ret = tracefs_create_file(name, mode, parent, data, fops);
7146         if (!ret)
7147                 pr_warn("Could not create tracefs '%s' entry\n", name);
7148
7149         return ret;
7150 }
7151
7152
7153 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7154 {
7155         struct dentry *d_tracer;
7156
7157         if (tr->options)
7158                 return tr->options;
7159
7160         d_tracer = tracing_get_dentry(tr);
7161         if (IS_ERR(d_tracer))
7162                 return NULL;
7163
7164         tr->options = tracefs_create_dir("options", d_tracer);
7165         if (!tr->options) {
7166                 pr_warn("Could not create tracefs directory 'options'\n");
7167                 return NULL;
7168         }
7169
7170         return tr->options;
7171 }
7172
7173 static void
7174 create_trace_option_file(struct trace_array *tr,
7175                          struct trace_option_dentry *topt,
7176                          struct tracer_flags *flags,
7177                          struct tracer_opt *opt)
7178 {
7179         struct dentry *t_options;
7180
7181         t_options = trace_options_init_dentry(tr);
7182         if (!t_options)
7183                 return;
7184
7185         topt->flags = flags;
7186         topt->opt = opt;
7187         topt->tr = tr;
7188
7189         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7190                                     &trace_options_fops);
7191
7192 }
7193
7194 static void
7195 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7196 {
7197         struct trace_option_dentry *topts;
7198         struct trace_options *tr_topts;
7199         struct tracer_flags *flags;
7200         struct tracer_opt *opts;
7201         int cnt;
7202         int i;
7203
7204         if (!tracer)
7205                 return;
7206
7207         flags = tracer->flags;
7208
7209         if (!flags || !flags->opts)
7210                 return;
7211
7212         /*
7213          * If this is an instance, only create flags for tracers
7214          * the instance may have.
7215          */
7216         if (!trace_ok_for_array(tracer, tr))
7217                 return;
7218
7219         for (i = 0; i < tr->nr_topts; i++) {
7220                 /* Make sure there's no duplicate flags. */
7221                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7222                         return;
7223         }
7224
7225         opts = flags->opts;
7226
7227         for (cnt = 0; opts[cnt].name; cnt++)
7228                 ;
7229
7230         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7231         if (!topts)
7232                 return;
7233
7234         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7235                             GFP_KERNEL);
7236         if (!tr_topts) {
7237                 kfree(topts);
7238                 return;
7239         }
7240
7241         tr->topts = tr_topts;
7242         tr->topts[tr->nr_topts].tracer = tracer;
7243         tr->topts[tr->nr_topts].topts = topts;
7244         tr->nr_topts++;
7245
7246         for (cnt = 0; opts[cnt].name; cnt++) {
7247                 create_trace_option_file(tr, &topts[cnt], flags,
7248                                          &opts[cnt]);
7249                 WARN_ONCE(topts[cnt].entry == NULL,
7250                           "Failed to create trace option: %s",
7251                           opts[cnt].name);
7252         }
7253 }
7254
7255 static struct dentry *
7256 create_trace_option_core_file(struct trace_array *tr,
7257                               const char *option, long index)
7258 {
7259         struct dentry *t_options;
7260
7261         t_options = trace_options_init_dentry(tr);
7262         if (!t_options)
7263                 return NULL;
7264
7265         return trace_create_file(option, 0644, t_options,
7266                                  (void *)&tr->trace_flags_index[index],
7267                                  &trace_options_core_fops);
7268 }
7269
7270 static void create_trace_options_dir(struct trace_array *tr)
7271 {
7272         struct dentry *t_options;
7273         bool top_level = tr == &global_trace;
7274         int i;
7275
7276         t_options = trace_options_init_dentry(tr);
7277         if (!t_options)
7278                 return;
7279
7280         for (i = 0; trace_options[i]; i++) {
7281                 if (top_level ||
7282                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7283                         create_trace_option_core_file(tr, trace_options[i], i);
7284         }
7285 }
7286
7287 static ssize_t
7288 rb_simple_read(struct file *filp, char __user *ubuf,
7289                size_t cnt, loff_t *ppos)
7290 {
7291         struct trace_array *tr = filp->private_data;
7292         char buf[64];
7293         int r;
7294
7295         r = tracer_tracing_is_on(tr);
7296         r = sprintf(buf, "%d\n", r);
7297
7298         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7299 }
7300
7301 static ssize_t
7302 rb_simple_write(struct file *filp, const char __user *ubuf,
7303                 size_t cnt, loff_t *ppos)
7304 {
7305         struct trace_array *tr = filp->private_data;
7306         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7307         unsigned long val;
7308         int ret;
7309
7310         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7311         if (ret)
7312                 return ret;
7313
7314         if (buffer) {
7315                 mutex_lock(&trace_types_lock);
7316                 if (val) {
7317                         tracer_tracing_on(tr);
7318                         if (tr->current_trace->start)
7319                                 tr->current_trace->start(tr);
7320                 } else {
7321                         tracer_tracing_off(tr);
7322                         if (tr->current_trace->stop)
7323                                 tr->current_trace->stop(tr);
7324                 }
7325                 mutex_unlock(&trace_types_lock);
7326         }
7327
7328         (*ppos)++;
7329
7330         return cnt;
7331 }
7332
7333 static const struct file_operations rb_simple_fops = {
7334         .open           = tracing_open_generic_tr,
7335         .read           = rb_simple_read,
7336         .write          = rb_simple_write,
7337         .release        = tracing_release_generic_tr,
7338         .llseek         = default_llseek,
7339 };
7340
7341 struct dentry *trace_instance_dir;
7342
7343 static void
7344 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7345
7346 static int
7347 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7348 {
7349         enum ring_buffer_flags rb_flags;
7350
7351         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7352
7353         buf->tr = tr;
7354
7355         buf->buffer = ring_buffer_alloc(size, rb_flags);
7356         if (!buf->buffer)
7357                 return -ENOMEM;
7358
7359         buf->data = alloc_percpu(struct trace_array_cpu);
7360         if (!buf->data) {
7361                 ring_buffer_free(buf->buffer);
7362                 return -ENOMEM;
7363         }
7364
7365         /* Allocate the first page for all buffers */
7366         set_buffer_entries(&tr->trace_buffer,
7367                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7368
7369         return 0;
7370 }
7371
7372 static int allocate_trace_buffers(struct trace_array *tr, int size)
7373 {
7374         int ret;
7375
7376         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7377         if (ret)
7378                 return ret;
7379
7380 #ifdef CONFIG_TRACER_MAX_TRACE
7381         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7382                                     allocate_snapshot ? size : 1);
7383         if (WARN_ON(ret)) {
7384                 ring_buffer_free(tr->trace_buffer.buffer);
7385                 free_percpu(tr->trace_buffer.data);
7386                 return -ENOMEM;
7387         }
7388         tr->allocated_snapshot = allocate_snapshot;
7389
7390         /*
7391          * Only the top level trace array gets its snapshot allocated
7392          * from the kernel command line.
7393          */
7394         allocate_snapshot = false;
7395 #endif
7396         return 0;
7397 }
7398
7399 static void free_trace_buffer(struct trace_buffer *buf)
7400 {
7401         if (buf->buffer) {
7402                 ring_buffer_free(buf->buffer);
7403                 buf->buffer = NULL;
7404                 free_percpu(buf->data);
7405                 buf->data = NULL;
7406         }
7407 }
7408
7409 static void free_trace_buffers(struct trace_array *tr)
7410 {
7411         if (!tr)
7412                 return;
7413
7414         free_trace_buffer(&tr->trace_buffer);
7415
7416 #ifdef CONFIG_TRACER_MAX_TRACE
7417         free_trace_buffer(&tr->max_buffer);
7418 #endif
7419 }
7420
7421 static void init_trace_flags_index(struct trace_array *tr)
7422 {
7423         int i;
7424
7425         /* Used by the trace options files */
7426         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7427                 tr->trace_flags_index[i] = i;
7428 }
7429
7430 static void __update_tracer_options(struct trace_array *tr)
7431 {
7432         struct tracer *t;
7433
7434         for (t = trace_types; t; t = t->next)
7435                 add_tracer_options(tr, t);
7436 }
7437
7438 static void update_tracer_options(struct trace_array *tr)
7439 {
7440         mutex_lock(&trace_types_lock);
7441         __update_tracer_options(tr);
7442         mutex_unlock(&trace_types_lock);
7443 }
7444
7445 static int instance_mkdir(const char *name)
7446 {
7447         struct trace_array *tr;
7448         int ret;
7449
7450         mutex_lock(&trace_types_lock);
7451
7452         ret = -EEXIST;
7453         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7454                 if (tr->name && strcmp(tr->name, name) == 0)
7455                         goto out_unlock;
7456         }
7457
7458         ret = -ENOMEM;
7459         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7460         if (!tr)
7461                 goto out_unlock;
7462
7463         tr->name = kstrdup(name, GFP_KERNEL);
7464         if (!tr->name)
7465                 goto out_free_tr;
7466
7467         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7468                 goto out_free_tr;
7469
7470         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7471
7472         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7473
7474         raw_spin_lock_init(&tr->start_lock);
7475
7476         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7477
7478         tr->current_trace = &nop_trace;
7479
7480         INIT_LIST_HEAD(&tr->systems);
7481         INIT_LIST_HEAD(&tr->events);
7482
7483         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7484                 goto out_free_tr;
7485
7486         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7487         if (!tr->dir)
7488                 goto out_free_tr;
7489
7490         ret = event_trace_add_tracer(tr->dir, tr);
7491         if (ret) {
7492                 tracefs_remove_recursive(tr->dir);
7493                 goto out_free_tr;
7494         }
7495
7496         ftrace_init_trace_array(tr);
7497
7498         init_tracer_tracefs(tr, tr->dir);
7499         init_trace_flags_index(tr);
7500         __update_tracer_options(tr);
7501
7502         list_add(&tr->list, &ftrace_trace_arrays);
7503
7504         mutex_unlock(&trace_types_lock);
7505
7506         return 0;
7507
7508  out_free_tr:
7509         free_trace_buffers(tr);
7510         free_cpumask_var(tr->tracing_cpumask);
7511         kfree(tr->name);
7512         kfree(tr);
7513
7514  out_unlock:
7515         mutex_unlock(&trace_types_lock);
7516
7517         return ret;
7518
7519 }
7520
7521 static int instance_rmdir(const char *name)
7522 {
7523         struct trace_array *tr;
7524         int found = 0;
7525         int ret;
7526         int i;
7527
7528         mutex_lock(&trace_types_lock);
7529
7530         ret = -ENODEV;
7531         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7532                 if (tr->name && strcmp(tr->name, name) == 0) {
7533                         found = 1;
7534                         break;
7535                 }
7536         }
7537         if (!found)
7538                 goto out_unlock;
7539
7540         ret = -EBUSY;
7541         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7542                 goto out_unlock;
7543
7544         list_del(&tr->list);
7545
7546         /* Disable all the flags that were enabled coming in */
7547         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7548                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7549                         set_tracer_flag(tr, 1 << i, 0);
7550         }
7551
7552         tracing_set_nop(tr);
7553         event_trace_del_tracer(tr);
7554         ftrace_clear_pids(tr);
7555         ftrace_destroy_function_files(tr);
7556         tracefs_remove_recursive(tr->dir);
7557         free_trace_buffers(tr);
7558
7559         for (i = 0; i < tr->nr_topts; i++) {
7560                 kfree(tr->topts[i].topts);
7561         }
7562         kfree(tr->topts);
7563
7564         kfree(tr->name);
7565         kfree(tr);
7566
7567         ret = 0;
7568
7569  out_unlock:
7570         mutex_unlock(&trace_types_lock);
7571
7572         return ret;
7573 }
7574
7575 static __init void create_trace_instances(struct dentry *d_tracer)
7576 {
7577         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7578                                                          instance_mkdir,
7579                                                          instance_rmdir);
7580         if (WARN_ON(!trace_instance_dir))
7581                 return;
7582 }
7583
7584 static void
7585 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7586 {
7587         int cpu;
7588
7589         trace_create_file("available_tracers", 0444, d_tracer,
7590                         tr, &show_traces_fops);
7591
7592         trace_create_file("current_tracer", 0644, d_tracer,
7593                         tr, &set_tracer_fops);
7594
7595         trace_create_file("tracing_cpumask", 0644, d_tracer,
7596                           tr, &tracing_cpumask_fops);
7597
7598         trace_create_file("trace_options", 0644, d_tracer,
7599                           tr, &tracing_iter_fops);
7600
7601         trace_create_file("trace", 0644, d_tracer,
7602                           tr, &tracing_fops);
7603
7604         trace_create_file("trace_pipe", 0444, d_tracer,
7605                           tr, &tracing_pipe_fops);
7606
7607         trace_create_file("buffer_size_kb", 0644, d_tracer,
7608                           tr, &tracing_entries_fops);
7609
7610         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7611                           tr, &tracing_total_entries_fops);
7612
7613         trace_create_file("free_buffer", 0200, d_tracer,
7614                           tr, &tracing_free_buffer_fops);
7615
7616         trace_create_file("trace_marker", 0220, d_tracer,
7617                           tr, &tracing_mark_fops);
7618
7619         trace_create_file("trace_marker_raw", 0220, d_tracer,
7620                           tr, &tracing_mark_raw_fops);
7621
7622         trace_create_file("trace_clock", 0644, d_tracer, tr,
7623                           &trace_clock_fops);
7624
7625         trace_create_file("tracing_on", 0644, d_tracer,
7626                           tr, &rb_simple_fops);
7627
7628         create_trace_options_dir(tr);
7629
7630 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7631         trace_create_file("tracing_max_latency", 0644, d_tracer,
7632                         &tr->max_latency, &tracing_max_lat_fops);
7633 #endif
7634
7635         if (ftrace_create_function_files(tr, d_tracer))
7636                 WARN(1, "Could not allocate function filter files");
7637
7638 #ifdef CONFIG_TRACER_SNAPSHOT
7639         trace_create_file("snapshot", 0644, d_tracer,
7640                           tr, &snapshot_fops);
7641 #endif
7642
7643         for_each_tracing_cpu(cpu)
7644                 tracing_init_tracefs_percpu(tr, cpu);
7645
7646         ftrace_init_tracefs(tr, d_tracer);
7647 }
7648
7649 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7650 {
7651         struct vfsmount *mnt;
7652         struct file_system_type *type;
7653
7654         /*
7655          * To maintain backward compatibility for tools that mount
7656          * debugfs to get to the tracing facility, tracefs is automatically
7657          * mounted to the debugfs/tracing directory.
7658          */
7659         type = get_fs_type("tracefs");
7660         if (!type)
7661                 return NULL;
7662         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7663         put_filesystem(type);
7664         if (IS_ERR(mnt))
7665                 return NULL;
7666         mntget(mnt);
7667
7668         return mnt;
7669 }
7670
7671 /**
7672  * tracing_init_dentry - initialize top level trace array
7673  *
7674  * This is called when creating files or directories in the tracing
7675  * directory. It is called via fs_initcall() by any of the boot up code
7676  * and expects to return the dentry of the top level tracing directory.
7677  */
7678 struct dentry *tracing_init_dentry(void)
7679 {
7680         struct trace_array *tr = &global_trace;
7681
7682         /* The top level trace array uses  NULL as parent */
7683         if (tr->dir)
7684                 return NULL;
7685
7686         if (WARN_ON(!tracefs_initialized()) ||
7687                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7688                  WARN_ON(!debugfs_initialized())))
7689                 return ERR_PTR(-ENODEV);
7690
7691         /*
7692          * As there may still be users that expect the tracing
7693          * files to exist in debugfs/tracing, we must automount
7694          * the tracefs file system there, so older tools still
7695          * work with the newer kerenl.
7696          */
7697         tr->dir = debugfs_create_automount("tracing", NULL,
7698                                            trace_automount, NULL);
7699         if (!tr->dir) {
7700                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7701                 return ERR_PTR(-ENOMEM);
7702         }
7703
7704         return NULL;
7705 }
7706
7707 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7708 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7709
7710 static void __init trace_enum_init(void)
7711 {
7712         int len;
7713
7714         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7715         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7716 }
7717
7718 #ifdef CONFIG_MODULES
7719 static void trace_module_add_enums(struct module *mod)
7720 {
7721         if (!mod->num_trace_enums)
7722                 return;
7723
7724         /*
7725          * Modules with bad taint do not have events created, do
7726          * not bother with enums either.
7727          */
7728         if (trace_module_has_bad_taint(mod))
7729                 return;
7730
7731         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7732 }
7733
7734 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7735 static void trace_module_remove_enums(struct module *mod)
7736 {
7737         union trace_enum_map_item *map;
7738         union trace_enum_map_item **last = &trace_enum_maps;
7739
7740         if (!mod->num_trace_enums)
7741                 return;
7742
7743         mutex_lock(&trace_enum_mutex);
7744
7745         map = trace_enum_maps;
7746
7747         while (map) {
7748                 if (map->head.mod == mod)
7749                         break;
7750                 map = trace_enum_jmp_to_tail(map);
7751                 last = &map->tail.next;
7752                 map = map->tail.next;
7753         }
7754         if (!map)
7755                 goto out;
7756
7757         *last = trace_enum_jmp_to_tail(map)->tail.next;
7758         kfree(map);
7759  out:
7760         mutex_unlock(&trace_enum_mutex);
7761 }
7762 #else
7763 static inline void trace_module_remove_enums(struct module *mod) { }
7764 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7765
7766 static int trace_module_notify(struct notifier_block *self,
7767                                unsigned long val, void *data)
7768 {
7769         struct module *mod = data;
7770
7771         switch (val) {
7772         case MODULE_STATE_COMING:
7773                 trace_module_add_enums(mod);
7774                 break;
7775         case MODULE_STATE_GOING:
7776                 trace_module_remove_enums(mod);
7777                 break;
7778         }
7779
7780         return 0;
7781 }
7782
7783 static struct notifier_block trace_module_nb = {
7784         .notifier_call = trace_module_notify,
7785         .priority = 0,
7786 };
7787 #endif /* CONFIG_MODULES */
7788
7789 static __init int tracer_init_tracefs(void)
7790 {
7791         struct dentry *d_tracer;
7792
7793         trace_access_lock_init();
7794
7795         d_tracer = tracing_init_dentry();
7796         if (IS_ERR(d_tracer))
7797                 return 0;
7798
7799         init_tracer_tracefs(&global_trace, d_tracer);
7800         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7801
7802         trace_create_file("tracing_thresh", 0644, d_tracer,
7803                         &global_trace, &tracing_thresh_fops);
7804
7805         trace_create_file("README", 0444, d_tracer,
7806                         NULL, &tracing_readme_fops);
7807
7808         trace_create_file("saved_cmdlines", 0444, d_tracer,
7809                         NULL, &tracing_saved_cmdlines_fops);
7810
7811         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7812                           NULL, &tracing_saved_cmdlines_size_fops);
7813
7814         trace_enum_init();
7815
7816         trace_create_enum_file(d_tracer);
7817
7818 #ifdef CONFIG_MODULES
7819         register_module_notifier(&trace_module_nb);
7820 #endif
7821
7822 #ifdef CONFIG_DYNAMIC_FTRACE
7823         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7824                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7825 #endif
7826
7827         create_trace_instances(d_tracer);
7828
7829         update_tracer_options(&global_trace);
7830
7831         return 0;
7832 }
7833
7834 static int trace_panic_handler(struct notifier_block *this,
7835                                unsigned long event, void *unused)
7836 {
7837         if (ftrace_dump_on_oops)
7838                 ftrace_dump(ftrace_dump_on_oops);
7839         return NOTIFY_OK;
7840 }
7841
7842 static struct notifier_block trace_panic_notifier = {
7843         .notifier_call  = trace_panic_handler,
7844         .next           = NULL,
7845         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7846 };
7847
7848 static int trace_die_handler(struct notifier_block *self,
7849                              unsigned long val,
7850                              void *data)
7851 {
7852         switch (val) {
7853         case DIE_OOPS:
7854                 if (ftrace_dump_on_oops)
7855                         ftrace_dump(ftrace_dump_on_oops);
7856                 break;
7857         default:
7858                 break;
7859         }
7860         return NOTIFY_OK;
7861 }
7862
7863 static struct notifier_block trace_die_notifier = {
7864         .notifier_call = trace_die_handler,
7865         .priority = 200
7866 };
7867
7868 /*
7869  * printk is set to max of 1024, we really don't need it that big.
7870  * Nothing should be printing 1000 characters anyway.
7871  */
7872 #define TRACE_MAX_PRINT         1000
7873
7874 /*
7875  * Define here KERN_TRACE so that we have one place to modify
7876  * it if we decide to change what log level the ftrace dump
7877  * should be at.
7878  */
7879 #define KERN_TRACE              KERN_EMERG
7880
7881 void
7882 trace_printk_seq(struct trace_seq *s)
7883 {
7884         /* Probably should print a warning here. */
7885         if (s->seq.len >= TRACE_MAX_PRINT)
7886                 s->seq.len = TRACE_MAX_PRINT;
7887
7888         /*
7889          * More paranoid code. Although the buffer size is set to
7890          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7891          * an extra layer of protection.
7892          */
7893         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7894                 s->seq.len = s->seq.size - 1;
7895
7896         /* should be zero ended, but we are paranoid. */
7897         s->buffer[s->seq.len] = 0;
7898
7899         printk(KERN_TRACE "%s", s->buffer);
7900
7901         trace_seq_init(s);
7902 }
7903
7904 void trace_init_global_iter(struct trace_iterator *iter)
7905 {
7906         iter->tr = &global_trace;
7907         iter->trace = iter->tr->current_trace;
7908         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7909         iter->trace_buffer = &global_trace.trace_buffer;
7910
7911         if (iter->trace && iter->trace->open)
7912                 iter->trace->open(iter);
7913
7914         /* Annotate start of buffers if we had overruns */
7915         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7916                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7917
7918         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7919         if (trace_clocks[iter->tr->clock_id].in_ns)
7920                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7921 }
7922
7923 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7924 {
7925         /* use static because iter can be a bit big for the stack */
7926         static struct trace_iterator iter;
7927         static atomic_t dump_running;
7928         struct trace_array *tr = &global_trace;
7929         unsigned int old_userobj;
7930         unsigned long flags;
7931         int cnt = 0, cpu;
7932
7933         /* Only allow one dump user at a time. */
7934         if (atomic_inc_return(&dump_running) != 1) {
7935                 atomic_dec(&dump_running);
7936                 return;
7937         }
7938
7939         /*
7940          * Always turn off tracing when we dump.
7941          * We don't need to show trace output of what happens
7942          * between multiple crashes.
7943          *
7944          * If the user does a sysrq-z, then they can re-enable
7945          * tracing with echo 1 > tracing_on.
7946          */
7947         tracing_off();
7948
7949         local_irq_save(flags);
7950
7951         /* Simulate the iterator */
7952         trace_init_global_iter(&iter);
7953
7954         for_each_tracing_cpu(cpu) {
7955                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7956         }
7957
7958         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7959
7960         /* don't look at user memory in panic mode */
7961         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7962
7963         switch (oops_dump_mode) {
7964         case DUMP_ALL:
7965                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7966                 break;
7967         case DUMP_ORIG:
7968                 iter.cpu_file = raw_smp_processor_id();
7969                 break;
7970         case DUMP_NONE:
7971                 goto out_enable;
7972         default:
7973                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7974                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7975         }
7976
7977         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7978
7979         /* Did function tracer already get disabled? */
7980         if (ftrace_is_dead()) {
7981                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7982                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7983         }
7984
7985         /*
7986          * We need to stop all tracing on all CPUS to read the
7987          * the next buffer. This is a bit expensive, but is
7988          * not done often. We fill all what we can read,
7989          * and then release the locks again.
7990          */
7991
7992         while (!trace_empty(&iter)) {
7993
7994                 if (!cnt)
7995                         printk(KERN_TRACE "---------------------------------\n");
7996
7997                 cnt++;
7998
7999                 /* reset all but tr, trace, and overruns */
8000                 memset(&iter.seq, 0,
8001                        sizeof(struct trace_iterator) -
8002                        offsetof(struct trace_iterator, seq));
8003                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8004                 iter.pos = -1;
8005
8006                 if (trace_find_next_entry_inc(&iter) != NULL) {
8007                         int ret;
8008
8009                         ret = print_trace_line(&iter);
8010                         if (ret != TRACE_TYPE_NO_CONSUME)
8011                                 trace_consume(&iter);
8012                 }
8013                 touch_nmi_watchdog();
8014
8015                 trace_printk_seq(&iter.seq);
8016         }
8017
8018         if (!cnt)
8019                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8020         else
8021                 printk(KERN_TRACE "---------------------------------\n");
8022
8023  out_enable:
8024         tr->trace_flags |= old_userobj;
8025
8026         for_each_tracing_cpu(cpu) {
8027                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8028         }
8029         atomic_dec(&dump_running);
8030         local_irq_restore(flags);
8031 }
8032 EXPORT_SYMBOL_GPL(ftrace_dump);
8033
8034 __init static int tracer_alloc_buffers(void)
8035 {
8036         int ring_buf_size;
8037         int ret = -ENOMEM;
8038
8039         /*
8040          * Make sure we don't accidently add more trace options
8041          * than we have bits for.
8042          */
8043         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8044
8045         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8046                 goto out;
8047
8048         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8049                 goto out_free_buffer_mask;
8050
8051         /* Only allocate trace_printk buffers if a trace_printk exists */
8052         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8053                 /* Must be called before global_trace.buffer is allocated */
8054                 trace_printk_init_buffers();
8055
8056         /* To save memory, keep the ring buffer size to its minimum */
8057         if (ring_buffer_expanded)
8058                 ring_buf_size = trace_buf_size;
8059         else
8060                 ring_buf_size = 1;
8061
8062         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8063         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8064
8065         raw_spin_lock_init(&global_trace.start_lock);
8066
8067         /*
8068          * The prepare callbacks allocates some memory for the ring buffer. We
8069          * don't free the buffer if the if the CPU goes down. If we were to free
8070          * the buffer, then the user would lose any trace that was in the
8071          * buffer. The memory will be removed once the "instance" is removed.
8072          */
8073         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8074                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8075                                       NULL);
8076         if (ret < 0)
8077                 goto out_free_cpumask;
8078         /* Used for event triggers */
8079         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8080         if (!temp_buffer)
8081                 goto out_rm_hp_state;
8082
8083         if (trace_create_savedcmd() < 0)
8084                 goto out_free_temp_buffer;
8085
8086         /* TODO: make the number of buffers hot pluggable with CPUS */
8087         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8088                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8089                 WARN_ON(1);
8090                 goto out_free_savedcmd;
8091         }
8092
8093         if (global_trace.buffer_disabled)
8094                 tracing_off();
8095
8096         if (trace_boot_clock) {
8097                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8098                 if (ret < 0)
8099                         pr_warn("Trace clock %s not defined, going back to default\n",
8100                                 trace_boot_clock);
8101         }
8102
8103         /*
8104          * register_tracer() might reference current_trace, so it
8105          * needs to be set before we register anything. This is
8106          * just a bootstrap of current_trace anyway.
8107          */
8108         global_trace.current_trace = &nop_trace;
8109
8110         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8111
8112         ftrace_init_global_array_ops(&global_trace);
8113
8114         init_trace_flags_index(&global_trace);
8115
8116         register_tracer(&nop_trace);
8117
8118         /* Function tracing may start here (via kernel command line) */
8119         init_function_trace();
8120
8121         /* All seems OK, enable tracing */
8122         tracing_disabled = 0;
8123
8124         atomic_notifier_chain_register(&panic_notifier_list,
8125                                        &trace_panic_notifier);
8126
8127         register_die_notifier(&trace_die_notifier);
8128
8129         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8130
8131         INIT_LIST_HEAD(&global_trace.systems);
8132         INIT_LIST_HEAD(&global_trace.events);
8133         list_add(&global_trace.list, &ftrace_trace_arrays);
8134
8135         apply_trace_boot_options();
8136
8137         register_snapshot_cmd();
8138
8139         return 0;
8140
8141 out_free_savedcmd:
8142         free_saved_cmdlines_buffer(savedcmd);
8143 out_free_temp_buffer:
8144         ring_buffer_free(temp_buffer);
8145 out_rm_hp_state:
8146         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8147 out_free_cpumask:
8148         free_cpumask_var(global_trace.tracing_cpumask);
8149 out_free_buffer_mask:
8150         free_cpumask_var(tracing_buffer_mask);
8151 out:
8152         return ret;
8153 }
8154
8155 void __init early_trace_init(void)
8156 {
8157         if (tracepoint_printk) {
8158                 tracepoint_print_iter =
8159                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8160                 if (WARN_ON(!tracepoint_print_iter))
8161                         tracepoint_printk = 0;
8162                 else
8163                         static_key_enable(&tracepoint_printk_key.key);
8164         }
8165         tracer_alloc_buffers();
8166 }
8167
8168 void __init trace_init(void)
8169 {
8170         trace_event_init();
8171 }
8172
8173 __init static int clear_boot_tracer(void)
8174 {
8175         /*
8176          * The default tracer at boot buffer is an init section.
8177          * This function is called in lateinit. If we did not
8178          * find the boot tracer, then clear it out, to prevent
8179          * later registration from accessing the buffer that is
8180          * about to be freed.
8181          */
8182         if (!default_bootup_tracer)
8183                 return 0;
8184
8185         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8186                default_bootup_tracer);
8187         default_bootup_tracer = NULL;
8188
8189         return 0;
8190 }
8191
8192 fs_initcall(tracer_init_tracefs);
8193 late_initcall(clear_boot_tracer);