Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         TRACE_ITER_EVENT_FORK
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 static void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 /**
898  * trace_snapshot - take a snapshot of the current buffer.
899  *
900  * This causes a swap between the snapshot buffer and the current live
901  * tracing buffer. You can use this to take snapshots of the live
902  * trace when some condition is triggered, but continue to trace.
903  *
904  * Note, make sure to allocate the snapshot with either
905  * a tracing_snapshot_alloc(), or by doing it manually
906  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
907  *
908  * If the snapshot buffer is not allocated, it will stop tracing.
909  * Basically making a permanent snapshot.
910  */
911 void tracing_snapshot(void)
912 {
913         struct trace_array *tr = &global_trace;
914         struct tracer *tracer = tr->current_trace;
915         unsigned long flags;
916
917         if (in_nmi()) {
918                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
919                 internal_trace_puts("*** snapshot is being ignored        ***\n");
920                 return;
921         }
922
923         if (!tr->allocated_snapshot) {
924                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
925                 internal_trace_puts("*** stopping trace here!   ***\n");
926                 tracing_off();
927                 return;
928         }
929
930         /* Note, snapshot can not be used when the tracer uses it */
931         if (tracer->use_max_tr) {
932                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
933                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
934                 return;
935         }
936
937         local_irq_save(flags);
938         update_max_tr(tr, current, smp_processor_id());
939         local_irq_restore(flags);
940 }
941 EXPORT_SYMBOL_GPL(tracing_snapshot);
942
943 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
944                                         struct trace_buffer *size_buf, int cpu_id);
945 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
946
947 static int alloc_snapshot(struct trace_array *tr)
948 {
949         int ret;
950
951         if (!tr->allocated_snapshot) {
952
953                 /* allocate spare buffer */
954                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
955                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
956                 if (ret < 0)
957                         return ret;
958
959                 tr->allocated_snapshot = true;
960         }
961
962         return 0;
963 }
964
965 static void free_snapshot(struct trace_array *tr)
966 {
967         /*
968          * We don't free the ring buffer. instead, resize it because
969          * The max_tr ring buffer has some state (e.g. ring->clock) and
970          * we want preserve it.
971          */
972         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
973         set_buffer_entries(&tr->max_buffer, 1);
974         tracing_reset_online_cpus(&tr->max_buffer);
975         tr->allocated_snapshot = false;
976 }
977
978 /**
979  * tracing_alloc_snapshot - allocate snapshot buffer.
980  *
981  * This only allocates the snapshot buffer if it isn't already
982  * allocated - it doesn't also take a snapshot.
983  *
984  * This is meant to be used in cases where the snapshot buffer needs
985  * to be set up for events that can't sleep but need to be able to
986  * trigger a snapshot.
987  */
988 int tracing_alloc_snapshot(void)
989 {
990         struct trace_array *tr = &global_trace;
991         int ret;
992
993         ret = alloc_snapshot(tr);
994         WARN_ON(ret < 0);
995
996         return ret;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999
1000 /**
1001  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1002  *
1003  * This is similar to trace_snapshot(), but it will allocate the
1004  * snapshot buffer if it isn't already allocated. Use this only
1005  * where it is safe to sleep, as the allocation may sleep.
1006  *
1007  * This causes a swap between the snapshot buffer and the current live
1008  * tracing buffer. You can use this to take snapshots of the live
1009  * trace when some condition is triggered, but continue to trace.
1010  */
1011 void tracing_snapshot_alloc(void)
1012 {
1013         int ret;
1014
1015         ret = tracing_alloc_snapshot();
1016         if (ret < 0)
1017                 return;
1018
1019         tracing_snapshot();
1020 }
1021 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1022 #else
1023 void tracing_snapshot(void)
1024 {
1025         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot);
1028 int tracing_alloc_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1031         return -ENODEV;
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1034 void tracing_snapshot_alloc(void)
1035 {
1036         /* Give warning */
1037         tracing_snapshot();
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1040 #endif /* CONFIG_TRACER_SNAPSHOT */
1041
1042 static void tracer_tracing_off(struct trace_array *tr)
1043 {
1044         if (tr->trace_buffer.buffer)
1045                 ring_buffer_record_off(tr->trace_buffer.buffer);
1046         /*
1047          * This flag is looked at when buffers haven't been allocated
1048          * yet, or by some tracers (like irqsoff), that just want to
1049          * know if the ring buffer has been disabled, but it can handle
1050          * races of where it gets disabled but we still do a record.
1051          * As the check is in the fast path of the tracers, it is more
1052          * important to be fast than accurate.
1053          */
1054         tr->buffer_disabled = 1;
1055         /* Make the flag seen by readers */
1056         smp_wmb();
1057 }
1058
1059 /**
1060  * tracing_off - turn off tracing buffers
1061  *
1062  * This function stops the tracing buffers from recording data.
1063  * It does not disable any overhead the tracers themselves may
1064  * be causing. This function simply causes all recording to
1065  * the ring buffers to fail.
1066  */
1067 void tracing_off(void)
1068 {
1069         tracer_tracing_off(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_off);
1072
1073 void disable_trace_on_warning(void)
1074 {
1075         if (__disable_trace_on_warning)
1076                 tracing_off();
1077 }
1078
1079 /**
1080  * tracer_tracing_is_on - show real state of ring buffer enabled
1081  * @tr : the trace array to know if ring buffer is enabled
1082  *
1083  * Shows real state of the ring buffer if it is enabled or not.
1084  */
1085 int tracer_tracing_is_on(struct trace_array *tr)
1086 {
1087         if (tr->trace_buffer.buffer)
1088                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1089         return !tr->buffer_disabled;
1090 }
1091
1092 /**
1093  * tracing_is_on - show state of ring buffers enabled
1094  */
1095 int tracing_is_on(void)
1096 {
1097         return tracer_tracing_is_on(&global_trace);
1098 }
1099 EXPORT_SYMBOL_GPL(tracing_is_on);
1100
1101 static int __init set_buf_size(char *str)
1102 {
1103         unsigned long buf_size;
1104
1105         if (!str)
1106                 return 0;
1107         buf_size = memparse(str, &str);
1108         /* nr_entries can not be zero */
1109         if (buf_size == 0)
1110                 return 0;
1111         trace_buf_size = buf_size;
1112         return 1;
1113 }
1114 __setup("trace_buf_size=", set_buf_size);
1115
1116 static int __init set_tracing_thresh(char *str)
1117 {
1118         unsigned long threshold;
1119         int ret;
1120
1121         if (!str)
1122                 return 0;
1123         ret = kstrtoul(str, 0, &threshold);
1124         if (ret < 0)
1125                 return 0;
1126         tracing_thresh = threshold * 1000;
1127         return 1;
1128 }
1129 __setup("tracing_thresh=", set_tracing_thresh);
1130
1131 unsigned long nsecs_to_usecs(unsigned long nsecs)
1132 {
1133         return nsecs / 1000;
1134 }
1135
1136 /*
1137  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1138  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1139  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1140  * of strings in the order that the enums were defined.
1141  */
1142 #undef C
1143 #define C(a, b) b
1144
1145 /* These must match the bit postions in trace_iterator_flags */
1146 static const char *trace_options[] = {
1147         TRACE_FLAGS
1148         NULL
1149 };
1150
1151 static struct {
1152         u64 (*func)(void);
1153         const char *name;
1154         int in_ns;              /* is this clock in nanoseconds? */
1155 } trace_clocks[] = {
1156         { trace_clock_local,            "local",        1 },
1157         { trace_clock_global,           "global",       1 },
1158         { trace_clock_counter,          "counter",      0 },
1159         { trace_clock_jiffies,          "uptime",       0 },
1160         { trace_clock,                  "perf",         1 },
1161         { ktime_get_mono_fast_ns,       "mono",         1 },
1162         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1163         { ktime_get_boot_fast_ns,       "boot",         1 },
1164         ARCH_TRACE_CLOCKS
1165 };
1166
1167 /*
1168  * trace_parser_get_init - gets the buffer for trace parser
1169  */
1170 int trace_parser_get_init(struct trace_parser *parser, int size)
1171 {
1172         memset(parser, 0, sizeof(*parser));
1173
1174         parser->buffer = kmalloc(size, GFP_KERNEL);
1175         if (!parser->buffer)
1176                 return 1;
1177
1178         parser->size = size;
1179         return 0;
1180 }
1181
1182 /*
1183  * trace_parser_put - frees the buffer for trace parser
1184  */
1185 void trace_parser_put(struct trace_parser *parser)
1186 {
1187         kfree(parser->buffer);
1188         parser->buffer = NULL;
1189 }
1190
1191 /*
1192  * trace_get_user - reads the user input string separated by  space
1193  * (matched by isspace(ch))
1194  *
1195  * For each string found the 'struct trace_parser' is updated,
1196  * and the function returns.
1197  *
1198  * Returns number of bytes read.
1199  *
1200  * See kernel/trace/trace.h for 'struct trace_parser' details.
1201  */
1202 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1203         size_t cnt, loff_t *ppos)
1204 {
1205         char ch;
1206         size_t read = 0;
1207         ssize_t ret;
1208
1209         if (!*ppos)
1210                 trace_parser_clear(parser);
1211
1212         ret = get_user(ch, ubuf++);
1213         if (ret)
1214                 goto out;
1215
1216         read++;
1217         cnt--;
1218
1219         /*
1220          * The parser is not finished with the last write,
1221          * continue reading the user input without skipping spaces.
1222          */
1223         if (!parser->cont) {
1224                 /* skip white space */
1225                 while (cnt && isspace(ch)) {
1226                         ret = get_user(ch, ubuf++);
1227                         if (ret)
1228                                 goto out;
1229                         read++;
1230                         cnt--;
1231                 }
1232
1233                 /* only spaces were written */
1234                 if (isspace(ch)) {
1235                         *ppos += read;
1236                         ret = read;
1237                         goto out;
1238                 }
1239
1240                 parser->idx = 0;
1241         }
1242
1243         /* read the non-space input */
1244         while (cnt && !isspace(ch)) {
1245                 if (parser->idx < parser->size - 1)
1246                         parser->buffer[parser->idx++] = ch;
1247                 else {
1248                         ret = -EINVAL;
1249                         goto out;
1250                 }
1251                 ret = get_user(ch, ubuf++);
1252                 if (ret)
1253                         goto out;
1254                 read++;
1255                 cnt--;
1256         }
1257
1258         /* We either got finished input or we have to wait for another call. */
1259         if (isspace(ch)) {
1260                 parser->buffer[parser->idx] = 0;
1261                 parser->cont = false;
1262         } else if (parser->idx < parser->size - 1) {
1263                 parser->cont = true;
1264                 parser->buffer[parser->idx++] = ch;
1265         } else {
1266                 ret = -EINVAL;
1267                 goto out;
1268         }
1269
1270         *ppos += read;
1271         ret = read;
1272
1273 out:
1274         return ret;
1275 }
1276
1277 /* TODO add a seq_buf_to_buffer() */
1278 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1279 {
1280         int len;
1281
1282         if (trace_seq_used(s) <= s->seq.readpos)
1283                 return -EBUSY;
1284
1285         len = trace_seq_used(s) - s->seq.readpos;
1286         if (cnt > len)
1287                 cnt = len;
1288         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1289
1290         s->seq.readpos += cnt;
1291         return cnt;
1292 }
1293
1294 unsigned long __read_mostly     tracing_thresh;
1295
1296 #ifdef CONFIG_TRACER_MAX_TRACE
1297 /*
1298  * Copy the new maximum trace into the separate maximum-trace
1299  * structure. (this way the maximum trace is permanently saved,
1300  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1301  */
1302 static void
1303 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1304 {
1305         struct trace_buffer *trace_buf = &tr->trace_buffer;
1306         struct trace_buffer *max_buf = &tr->max_buffer;
1307         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1308         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1309
1310         max_buf->cpu = cpu;
1311         max_buf->time_start = data->preempt_timestamp;
1312
1313         max_data->saved_latency = tr->max_latency;
1314         max_data->critical_start = data->critical_start;
1315         max_data->critical_end = data->critical_end;
1316
1317         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1318         max_data->pid = tsk->pid;
1319         /*
1320          * If tsk == current, then use current_uid(), as that does not use
1321          * RCU. The irq tracer can be called out of RCU scope.
1322          */
1323         if (tsk == current)
1324                 max_data->uid = current_uid();
1325         else
1326                 max_data->uid = task_uid(tsk);
1327
1328         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1329         max_data->policy = tsk->policy;
1330         max_data->rt_priority = tsk->rt_priority;
1331
1332         /* record this tasks comm */
1333         tracing_record_cmdline(tsk);
1334 }
1335
1336 /**
1337  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1338  * @tr: tracer
1339  * @tsk: the task with the latency
1340  * @cpu: The cpu that initiated the trace.
1341  *
1342  * Flip the buffers between the @tr and the max_tr and record information
1343  * about which task was the cause of this latency.
1344  */
1345 void
1346 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1347 {
1348         struct ring_buffer *buf;
1349
1350         if (tr->stop_count)
1351                 return;
1352
1353         WARN_ON_ONCE(!irqs_disabled());
1354
1355         if (!tr->allocated_snapshot) {
1356                 /* Only the nop tracer should hit this when disabling */
1357                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358                 return;
1359         }
1360
1361         arch_spin_lock(&tr->max_lock);
1362
1363         buf = tr->trace_buffer.buffer;
1364         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1365         tr->max_buffer.buffer = buf;
1366
1367         __update_max_tr(tr, tsk, cpu);
1368         arch_spin_unlock(&tr->max_lock);
1369 }
1370
1371 /**
1372  * update_max_tr_single - only copy one trace over, and reset the rest
1373  * @tr - tracer
1374  * @tsk - task with the latency
1375  * @cpu - the cpu of the buffer to copy.
1376  *
1377  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1378  */
1379 void
1380 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1381 {
1382         int ret;
1383
1384         if (tr->stop_count)
1385                 return;
1386
1387         WARN_ON_ONCE(!irqs_disabled());
1388         if (!tr->allocated_snapshot) {
1389                 /* Only the nop tracer should hit this when disabling */
1390                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1391                 return;
1392         }
1393
1394         arch_spin_lock(&tr->max_lock);
1395
1396         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1397
1398         if (ret == -EBUSY) {
1399                 /*
1400                  * We failed to swap the buffer due to a commit taking
1401                  * place on this CPU. We fail to record, but we reset
1402                  * the max trace buffer (no one writes directly to it)
1403                  * and flag that it failed.
1404                  */
1405                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1406                         "Failed to swap buffers due to commit in progress\n");
1407         }
1408
1409         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1410
1411         __update_max_tr(tr, tsk, cpu);
1412         arch_spin_unlock(&tr->max_lock);
1413 }
1414 #endif /* CONFIG_TRACER_MAX_TRACE */
1415
1416 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1417 {
1418         /* Iterators are static, they should be filled or empty */
1419         if (trace_buffer_iter(iter, iter->cpu_file))
1420                 return 0;
1421
1422         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1423                                 full);
1424 }
1425
1426 #ifdef CONFIG_FTRACE_STARTUP_TEST
1427 static int run_tracer_selftest(struct tracer *type)
1428 {
1429         struct trace_array *tr = &global_trace;
1430         struct tracer *saved_tracer = tr->current_trace;
1431         int ret;
1432
1433         if (!type->selftest || tracing_selftest_disabled)
1434                 return 0;
1435
1436         /*
1437          * Run a selftest on this tracer.
1438          * Here we reset the trace buffer, and set the current
1439          * tracer to be this tracer. The tracer can then run some
1440          * internal tracing to verify that everything is in order.
1441          * If we fail, we do not register this tracer.
1442          */
1443         tracing_reset_online_cpus(&tr->trace_buffer);
1444
1445         tr->current_trace = type;
1446
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448         if (type->use_max_tr) {
1449                 /* If we expanded the buffers, make sure the max is expanded too */
1450                 if (ring_buffer_expanded)
1451                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1452                                            RING_BUFFER_ALL_CPUS);
1453                 tr->allocated_snapshot = true;
1454         }
1455 #endif
1456
1457         /* the test is responsible for initializing and enabling */
1458         pr_info("Testing tracer %s: ", type->name);
1459         ret = type->selftest(type, tr);
1460         /* the test is responsible for resetting too */
1461         tr->current_trace = saved_tracer;
1462         if (ret) {
1463                 printk(KERN_CONT "FAILED!\n");
1464                 /* Add the warning after printing 'FAILED' */
1465                 WARN_ON(1);
1466                 return -1;
1467         }
1468         /* Only reset on passing, to avoid touching corrupted buffers */
1469         tracing_reset_online_cpus(&tr->trace_buffer);
1470
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472         if (type->use_max_tr) {
1473                 tr->allocated_snapshot = false;
1474
1475                 /* Shrink the max buffer again */
1476                 if (ring_buffer_expanded)
1477                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1478                                            RING_BUFFER_ALL_CPUS);
1479         }
1480 #endif
1481
1482         printk(KERN_CONT "PASSED\n");
1483         return 0;
1484 }
1485 #else
1486 static inline int run_tracer_selftest(struct tracer *type)
1487 {
1488         return 0;
1489 }
1490 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1491
1492 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1493
1494 static void __init apply_trace_boot_options(void);
1495
1496 /**
1497  * register_tracer - register a tracer with the ftrace system.
1498  * @type - the plugin for the tracer
1499  *
1500  * Register a new plugin tracer.
1501  */
1502 int __init register_tracer(struct tracer *type)
1503 {
1504         struct tracer *t;
1505         int ret = 0;
1506
1507         if (!type->name) {
1508                 pr_info("Tracer must have a name\n");
1509                 return -1;
1510         }
1511
1512         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1513                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1514                 return -1;
1515         }
1516
1517         mutex_lock(&trace_types_lock);
1518
1519         tracing_selftest_running = true;
1520
1521         for (t = trace_types; t; t = t->next) {
1522                 if (strcmp(type->name, t->name) == 0) {
1523                         /* already found */
1524                         pr_info("Tracer %s already registered\n",
1525                                 type->name);
1526                         ret = -1;
1527                         goto out;
1528                 }
1529         }
1530
1531         if (!type->set_flag)
1532                 type->set_flag = &dummy_set_flag;
1533         if (!type->flags) {
1534                 /*allocate a dummy tracer_flags*/
1535                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1536                 if (!type->flags) {
1537                         ret = -ENOMEM;
1538                         goto out;
1539                 }
1540                 type->flags->val = 0;
1541                 type->flags->opts = dummy_tracer_opt;
1542         } else
1543                 if (!type->flags->opts)
1544                         type->flags->opts = dummy_tracer_opt;
1545
1546         /* store the tracer for __set_tracer_option */
1547         type->flags->trace = type;
1548
1549         ret = run_tracer_selftest(type);
1550         if (ret < 0)
1551                 goto out;
1552
1553         type->next = trace_types;
1554         trace_types = type;
1555         add_tracer_options(&global_trace, type);
1556
1557  out:
1558         tracing_selftest_running = false;
1559         mutex_unlock(&trace_types_lock);
1560
1561         if (ret || !default_bootup_tracer)
1562                 goto out_unlock;
1563
1564         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1565                 goto out_unlock;
1566
1567         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1568         /* Do we want this tracer to start on bootup? */
1569         tracing_set_tracer(&global_trace, type->name);
1570         default_bootup_tracer = NULL;
1571
1572         apply_trace_boot_options();
1573
1574         /* disable other selftests, since this will break it. */
1575         tracing_selftest_disabled = true;
1576 #ifdef CONFIG_FTRACE_STARTUP_TEST
1577         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1578                type->name);
1579 #endif
1580
1581  out_unlock:
1582         return ret;
1583 }
1584
1585 void tracing_reset(struct trace_buffer *buf, int cpu)
1586 {
1587         struct ring_buffer *buffer = buf->buffer;
1588
1589         if (!buffer)
1590                 return;
1591
1592         ring_buffer_record_disable(buffer);
1593
1594         /* Make sure all commits have finished */
1595         synchronize_sched();
1596         ring_buffer_reset_cpu(buffer, cpu);
1597
1598         ring_buffer_record_enable(buffer);
1599 }
1600
1601 void tracing_reset_online_cpus(struct trace_buffer *buf)
1602 {
1603         struct ring_buffer *buffer = buf->buffer;
1604         int cpu;
1605
1606         if (!buffer)
1607                 return;
1608
1609         ring_buffer_record_disable(buffer);
1610
1611         /* Make sure all commits have finished */
1612         synchronize_sched();
1613
1614         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1615
1616         for_each_online_cpu(cpu)
1617                 ring_buffer_reset_cpu(buffer, cpu);
1618
1619         ring_buffer_record_enable(buffer);
1620 }
1621
1622 /* Must have trace_types_lock held */
1623 void tracing_reset_all_online_cpus(void)
1624 {
1625         struct trace_array *tr;
1626
1627         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1628                 tracing_reset_online_cpus(&tr->trace_buffer);
1629 #ifdef CONFIG_TRACER_MAX_TRACE
1630                 tracing_reset_online_cpus(&tr->max_buffer);
1631 #endif
1632         }
1633 }
1634
1635 #define SAVED_CMDLINES_DEFAULT 128
1636 #define NO_CMDLINE_MAP UINT_MAX
1637 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1638 struct saved_cmdlines_buffer {
1639         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1640         unsigned *map_cmdline_to_pid;
1641         unsigned cmdline_num;
1642         int cmdline_idx;
1643         char *saved_cmdlines;
1644 };
1645 static struct saved_cmdlines_buffer *savedcmd;
1646
1647 /* temporary disable recording */
1648 static atomic_t trace_record_cmdline_disabled __read_mostly;
1649
1650 static inline char *get_saved_cmdlines(int idx)
1651 {
1652         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1653 }
1654
1655 static inline void set_cmdline(int idx, const char *cmdline)
1656 {
1657         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1658 }
1659
1660 static int allocate_cmdlines_buffer(unsigned int val,
1661                                     struct saved_cmdlines_buffer *s)
1662 {
1663         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1664                                         GFP_KERNEL);
1665         if (!s->map_cmdline_to_pid)
1666                 return -ENOMEM;
1667
1668         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1669         if (!s->saved_cmdlines) {
1670                 kfree(s->map_cmdline_to_pid);
1671                 return -ENOMEM;
1672         }
1673
1674         s->cmdline_idx = 0;
1675         s->cmdline_num = val;
1676         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1677                sizeof(s->map_pid_to_cmdline));
1678         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1679                val * sizeof(*s->map_cmdline_to_pid));
1680
1681         return 0;
1682 }
1683
1684 static int trace_create_savedcmd(void)
1685 {
1686         int ret;
1687
1688         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1689         if (!savedcmd)
1690                 return -ENOMEM;
1691
1692         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1693         if (ret < 0) {
1694                 kfree(savedcmd);
1695                 savedcmd = NULL;
1696                 return -ENOMEM;
1697         }
1698
1699         return 0;
1700 }
1701
1702 int is_tracing_stopped(void)
1703 {
1704         return global_trace.stop_count;
1705 }
1706
1707 /**
1708  * tracing_start - quick start of the tracer
1709  *
1710  * If tracing is enabled but was stopped by tracing_stop,
1711  * this will start the tracer back up.
1712  */
1713 void tracing_start(void)
1714 {
1715         struct ring_buffer *buffer;
1716         unsigned long flags;
1717
1718         if (tracing_disabled)
1719                 return;
1720
1721         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1722         if (--global_trace.stop_count) {
1723                 if (global_trace.stop_count < 0) {
1724                         /* Someone screwed up their debugging */
1725                         WARN_ON_ONCE(1);
1726                         global_trace.stop_count = 0;
1727                 }
1728                 goto out;
1729         }
1730
1731         /* Prevent the buffers from switching */
1732         arch_spin_lock(&global_trace.max_lock);
1733
1734         buffer = global_trace.trace_buffer.buffer;
1735         if (buffer)
1736                 ring_buffer_record_enable(buffer);
1737
1738 #ifdef CONFIG_TRACER_MAX_TRACE
1739         buffer = global_trace.max_buffer.buffer;
1740         if (buffer)
1741                 ring_buffer_record_enable(buffer);
1742 #endif
1743
1744         arch_spin_unlock(&global_trace.max_lock);
1745
1746  out:
1747         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1748 }
1749
1750 static void tracing_start_tr(struct trace_array *tr)
1751 {
1752         struct ring_buffer *buffer;
1753         unsigned long flags;
1754
1755         if (tracing_disabled)
1756                 return;
1757
1758         /* If global, we need to also start the max tracer */
1759         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1760                 return tracing_start();
1761
1762         raw_spin_lock_irqsave(&tr->start_lock, flags);
1763
1764         if (--tr->stop_count) {
1765                 if (tr->stop_count < 0) {
1766                         /* Someone screwed up their debugging */
1767                         WARN_ON_ONCE(1);
1768                         tr->stop_count = 0;
1769                 }
1770                 goto out;
1771         }
1772
1773         buffer = tr->trace_buffer.buffer;
1774         if (buffer)
1775                 ring_buffer_record_enable(buffer);
1776
1777  out:
1778         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1779 }
1780
1781 /**
1782  * tracing_stop - quick stop of the tracer
1783  *
1784  * Light weight way to stop tracing. Use in conjunction with
1785  * tracing_start.
1786  */
1787 void tracing_stop(void)
1788 {
1789         struct ring_buffer *buffer;
1790         unsigned long flags;
1791
1792         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1793         if (global_trace.stop_count++)
1794                 goto out;
1795
1796         /* Prevent the buffers from switching */
1797         arch_spin_lock(&global_trace.max_lock);
1798
1799         buffer = global_trace.trace_buffer.buffer;
1800         if (buffer)
1801                 ring_buffer_record_disable(buffer);
1802
1803 #ifdef CONFIG_TRACER_MAX_TRACE
1804         buffer = global_trace.max_buffer.buffer;
1805         if (buffer)
1806                 ring_buffer_record_disable(buffer);
1807 #endif
1808
1809         arch_spin_unlock(&global_trace.max_lock);
1810
1811  out:
1812         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1813 }
1814
1815 static void tracing_stop_tr(struct trace_array *tr)
1816 {
1817         struct ring_buffer *buffer;
1818         unsigned long flags;
1819
1820         /* If global, we need to also stop the max tracer */
1821         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1822                 return tracing_stop();
1823
1824         raw_spin_lock_irqsave(&tr->start_lock, flags);
1825         if (tr->stop_count++)
1826                 goto out;
1827
1828         buffer = tr->trace_buffer.buffer;
1829         if (buffer)
1830                 ring_buffer_record_disable(buffer);
1831
1832  out:
1833         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1834 }
1835
1836 void trace_stop_cmdline_recording(void);
1837
1838 static int trace_save_cmdline(struct task_struct *tsk)
1839 {
1840         unsigned pid, idx;
1841
1842         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1843                 return 0;
1844
1845         /*
1846          * It's not the end of the world if we don't get
1847          * the lock, but we also don't want to spin
1848          * nor do we want to disable interrupts,
1849          * so if we miss here, then better luck next time.
1850          */
1851         if (!arch_spin_trylock(&trace_cmdline_lock))
1852                 return 0;
1853
1854         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1855         if (idx == NO_CMDLINE_MAP) {
1856                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1857
1858                 /*
1859                  * Check whether the cmdline buffer at idx has a pid
1860                  * mapped. We are going to overwrite that entry so we
1861                  * need to clear the map_pid_to_cmdline. Otherwise we
1862                  * would read the new comm for the old pid.
1863                  */
1864                 pid = savedcmd->map_cmdline_to_pid[idx];
1865                 if (pid != NO_CMDLINE_MAP)
1866                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1867
1868                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1869                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1870
1871                 savedcmd->cmdline_idx = idx;
1872         }
1873
1874         set_cmdline(idx, tsk->comm);
1875
1876         arch_spin_unlock(&trace_cmdline_lock);
1877
1878         return 1;
1879 }
1880
1881 static void __trace_find_cmdline(int pid, char comm[])
1882 {
1883         unsigned map;
1884
1885         if (!pid) {
1886                 strcpy(comm, "<idle>");
1887                 return;
1888         }
1889
1890         if (WARN_ON_ONCE(pid < 0)) {
1891                 strcpy(comm, "<XXX>");
1892                 return;
1893         }
1894
1895         if (pid > PID_MAX_DEFAULT) {
1896                 strcpy(comm, "<...>");
1897                 return;
1898         }
1899
1900         map = savedcmd->map_pid_to_cmdline[pid];
1901         if (map != NO_CMDLINE_MAP)
1902                 strcpy(comm, get_saved_cmdlines(map));
1903         else
1904                 strcpy(comm, "<...>");
1905 }
1906
1907 void trace_find_cmdline(int pid, char comm[])
1908 {
1909         preempt_disable();
1910         arch_spin_lock(&trace_cmdline_lock);
1911
1912         __trace_find_cmdline(pid, comm);
1913
1914         arch_spin_unlock(&trace_cmdline_lock);
1915         preempt_enable();
1916 }
1917
1918 void tracing_record_cmdline(struct task_struct *tsk)
1919 {
1920         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1921                 return;
1922
1923         if (!__this_cpu_read(trace_cmdline_save))
1924                 return;
1925
1926         if (trace_save_cmdline(tsk))
1927                 __this_cpu_write(trace_cmdline_save, false);
1928 }
1929
1930 void
1931 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1932                              int pc)
1933 {
1934         struct task_struct *tsk = current;
1935
1936         entry->preempt_count            = pc & 0xff;
1937         entry->pid                      = (tsk) ? tsk->pid : 0;
1938         entry->flags =
1939 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1940                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1941 #else
1942                 TRACE_FLAG_IRQS_NOSUPPORT |
1943 #endif
1944                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1945                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1946                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1947                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1948                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1949 }
1950 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1951
1952 struct ring_buffer_event *
1953 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1954                           int type,
1955                           unsigned long len,
1956                           unsigned long flags, int pc)
1957 {
1958         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1959 }
1960
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1964
1965 /**
1966  * trace_buffered_event_enable - enable buffering events
1967  *
1968  * When events are being filtered, it is quicker to use a temporary
1969  * buffer to write the event data into if there's a likely chance
1970  * that it will not be committed. The discard of the ring buffer
1971  * is not as fast as committing, and is much slower than copying
1972  * a commit.
1973  *
1974  * When an event is to be filtered, allocate per cpu buffers to
1975  * write the event data into, and if the event is filtered and discarded
1976  * it is simply dropped, otherwise, the entire data is to be committed
1977  * in one shot.
1978  */
1979 void trace_buffered_event_enable(void)
1980 {
1981         struct ring_buffer_event *event;
1982         struct page *page;
1983         int cpu;
1984
1985         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1986
1987         if (trace_buffered_event_ref++)
1988                 return;
1989
1990         for_each_tracing_cpu(cpu) {
1991                 page = alloc_pages_node(cpu_to_node(cpu),
1992                                         GFP_KERNEL | __GFP_NORETRY, 0);
1993                 if (!page)
1994                         goto failed;
1995
1996                 event = page_address(page);
1997                 memset(event, 0, sizeof(*event));
1998
1999                 per_cpu(trace_buffered_event, cpu) = event;
2000
2001                 preempt_disable();
2002                 if (cpu == smp_processor_id() &&
2003                     this_cpu_read(trace_buffered_event) !=
2004                     per_cpu(trace_buffered_event, cpu))
2005                         WARN_ON_ONCE(1);
2006                 preempt_enable();
2007         }
2008
2009         return;
2010  failed:
2011         trace_buffered_event_disable();
2012 }
2013
2014 static void enable_trace_buffered_event(void *data)
2015 {
2016         /* Probably not needed, but do it anyway */
2017         smp_rmb();
2018         this_cpu_dec(trace_buffered_event_cnt);
2019 }
2020
2021 static void disable_trace_buffered_event(void *data)
2022 {
2023         this_cpu_inc(trace_buffered_event_cnt);
2024 }
2025
2026 /**
2027  * trace_buffered_event_disable - disable buffering events
2028  *
2029  * When a filter is removed, it is faster to not use the buffered
2030  * events, and to commit directly into the ring buffer. Free up
2031  * the temp buffers when there are no more users. This requires
2032  * special synchronization with current events.
2033  */
2034 void trace_buffered_event_disable(void)
2035 {
2036         int cpu;
2037
2038         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2039
2040         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2041                 return;
2042
2043         if (--trace_buffered_event_ref)
2044                 return;
2045
2046         preempt_disable();
2047         /* For each CPU, set the buffer as used. */
2048         smp_call_function_many(tracing_buffer_mask,
2049                                disable_trace_buffered_event, NULL, 1);
2050         preempt_enable();
2051
2052         /* Wait for all current users to finish */
2053         synchronize_sched();
2054
2055         for_each_tracing_cpu(cpu) {
2056                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057                 per_cpu(trace_buffered_event, cpu) = NULL;
2058         }
2059         /*
2060          * Make sure trace_buffered_event is NULL before clearing
2061          * trace_buffered_event_cnt.
2062          */
2063         smp_wmb();
2064
2065         preempt_disable();
2066         /* Do the work on each cpu */
2067         smp_call_function_many(tracing_buffer_mask,
2068                                enable_trace_buffered_event, NULL, 1);
2069         preempt_enable();
2070 }
2071
2072 static struct ring_buffer *temp_buffer;
2073
2074 struct ring_buffer_event *
2075 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2076                           struct trace_event_file *trace_file,
2077                           int type, unsigned long len,
2078                           unsigned long flags, int pc)
2079 {
2080         struct ring_buffer_event *entry;
2081         int val;
2082
2083         *current_rb = trace_file->tr->trace_buffer.buffer;
2084
2085         if ((trace_file->flags &
2086              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2087             (entry = this_cpu_read(trace_buffered_event))) {
2088                 /* Try to use the per cpu buffer first */
2089                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2090                 if (val == 1) {
2091                         trace_event_setup(entry, type, flags, pc);
2092                         entry->array[0] = len;
2093                         return entry;
2094                 }
2095                 this_cpu_dec(trace_buffered_event_cnt);
2096         }
2097
2098         entry = __trace_buffer_lock_reserve(*current_rb,
2099                                             type, len, flags, pc);
2100         /*
2101          * If tracing is off, but we have triggers enabled
2102          * we still need to look at the event data. Use the temp_buffer
2103          * to store the trace event for the tigger to use. It's recusive
2104          * safe and will not be recorded anywhere.
2105          */
2106         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2107                 *current_rb = temp_buffer;
2108                 entry = __trace_buffer_lock_reserve(*current_rb,
2109                                                     type, len, flags, pc);
2110         }
2111         return entry;
2112 }
2113 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2114
2115 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2116 static DEFINE_MUTEX(tracepoint_printk_mutex);
2117
2118 static void output_printk(struct trace_event_buffer *fbuffer)
2119 {
2120         struct trace_event_call *event_call;
2121         struct trace_event *event;
2122         unsigned long flags;
2123         struct trace_iterator *iter = tracepoint_print_iter;
2124
2125         /* We should never get here if iter is NULL */
2126         if (WARN_ON_ONCE(!iter))
2127                 return;
2128
2129         event_call = fbuffer->trace_file->event_call;
2130         if (!event_call || !event_call->event.funcs ||
2131             !event_call->event.funcs->trace)
2132                 return;
2133
2134         event = &fbuffer->trace_file->event_call->event;
2135
2136         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2137         trace_seq_init(&iter->seq);
2138         iter->ent = fbuffer->entry;
2139         event_call->event.funcs->trace(iter, 0, event);
2140         trace_seq_putc(&iter->seq, 0);
2141         printk("%s", iter->seq.buffer);
2142
2143         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2144 }
2145
2146 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2147                              void __user *buffer, size_t *lenp,
2148                              loff_t *ppos)
2149 {
2150         int save_tracepoint_printk;
2151         int ret;
2152
2153         mutex_lock(&tracepoint_printk_mutex);
2154         save_tracepoint_printk = tracepoint_printk;
2155
2156         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2157
2158         /*
2159          * This will force exiting early, as tracepoint_printk
2160          * is always zero when tracepoint_printk_iter is not allocated
2161          */
2162         if (!tracepoint_print_iter)
2163                 tracepoint_printk = 0;
2164
2165         if (save_tracepoint_printk == tracepoint_printk)
2166                 goto out;
2167
2168         if (tracepoint_printk)
2169                 static_key_enable(&tracepoint_printk_key.key);
2170         else
2171                 static_key_disable(&tracepoint_printk_key.key);
2172
2173  out:
2174         mutex_unlock(&tracepoint_printk_mutex);
2175
2176         return ret;
2177 }
2178
2179 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2180 {
2181         if (static_key_false(&tracepoint_printk_key.key))
2182                 output_printk(fbuffer);
2183
2184         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2185                                     fbuffer->event, fbuffer->entry,
2186                                     fbuffer->flags, fbuffer->pc);
2187 }
2188 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2189
2190 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2191                                      struct ring_buffer *buffer,
2192                                      struct ring_buffer_event *event,
2193                                      unsigned long flags, int pc,
2194                                      struct pt_regs *regs)
2195 {
2196         __buffer_unlock_commit(buffer, event);
2197
2198         /*
2199          * If regs is not set, then skip the following callers:
2200          *   trace_buffer_unlock_commit_regs
2201          *   event_trigger_unlock_commit
2202          *   trace_event_buffer_commit
2203          *   trace_event_raw_event_sched_switch
2204          * Note, we can still get here via blktrace, wakeup tracer
2205          * and mmiotrace, but that's ok if they lose a function or
2206          * two. They are that meaningful.
2207          */
2208         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2209         ftrace_trace_userstack(buffer, flags, pc);
2210 }
2211
2212 /*
2213  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2214  */
2215 void
2216 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2217                                    struct ring_buffer_event *event)
2218 {
2219         __buffer_unlock_commit(buffer, event);
2220 }
2221
2222 static void
2223 trace_process_export(struct trace_export *export,
2224                struct ring_buffer_event *event)
2225 {
2226         struct trace_entry *entry;
2227         unsigned int size = 0;
2228
2229         entry = ring_buffer_event_data(event);
2230         size = ring_buffer_event_length(event);
2231         export->write(entry, size);
2232 }
2233
2234 static DEFINE_MUTEX(ftrace_export_lock);
2235
2236 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2237
2238 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2239
2240 static inline void ftrace_exports_enable(void)
2241 {
2242         static_branch_enable(&ftrace_exports_enabled);
2243 }
2244
2245 static inline void ftrace_exports_disable(void)
2246 {
2247         static_branch_disable(&ftrace_exports_enabled);
2248 }
2249
2250 void ftrace_exports(struct ring_buffer_event *event)
2251 {
2252         struct trace_export *export;
2253
2254         preempt_disable_notrace();
2255
2256         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2257         while (export) {
2258                 trace_process_export(export, event);
2259                 export = rcu_dereference_raw_notrace(export->next);
2260         }
2261
2262         preempt_enable_notrace();
2263 }
2264
2265 static inline void
2266 add_trace_export(struct trace_export **list, struct trace_export *export)
2267 {
2268         rcu_assign_pointer(export->next, *list);
2269         /*
2270          * We are entering export into the list but another
2271          * CPU might be walking that list. We need to make sure
2272          * the export->next pointer is valid before another CPU sees
2273          * the export pointer included into the list.
2274          */
2275         rcu_assign_pointer(*list, export);
2276 }
2277
2278 static inline int
2279 rm_trace_export(struct trace_export **list, struct trace_export *export)
2280 {
2281         struct trace_export **p;
2282
2283         for (p = list; *p != NULL; p = &(*p)->next)
2284                 if (*p == export)
2285                         break;
2286
2287         if (*p != export)
2288                 return -1;
2289
2290         rcu_assign_pointer(*p, (*p)->next);
2291
2292         return 0;
2293 }
2294
2295 static inline void
2296 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2297 {
2298         if (*list == NULL)
2299                 ftrace_exports_enable();
2300
2301         add_trace_export(list, export);
2302 }
2303
2304 static inline int
2305 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2306 {
2307         int ret;
2308
2309         ret = rm_trace_export(list, export);
2310         if (*list == NULL)
2311                 ftrace_exports_disable();
2312
2313         return ret;
2314 }
2315
2316 int register_ftrace_export(struct trace_export *export)
2317 {
2318         if (WARN_ON_ONCE(!export->write))
2319                 return -1;
2320
2321         mutex_lock(&ftrace_export_lock);
2322
2323         add_ftrace_export(&ftrace_exports_list, export);
2324
2325         mutex_unlock(&ftrace_export_lock);
2326
2327         return 0;
2328 }
2329 EXPORT_SYMBOL_GPL(register_ftrace_export);
2330
2331 int unregister_ftrace_export(struct trace_export *export)
2332 {
2333         int ret;
2334
2335         mutex_lock(&ftrace_export_lock);
2336
2337         ret = rm_ftrace_export(&ftrace_exports_list, export);
2338
2339         mutex_unlock(&ftrace_export_lock);
2340
2341         return ret;
2342 }
2343 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2344
2345 void
2346 trace_function(struct trace_array *tr,
2347                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2348                int pc)
2349 {
2350         struct trace_event_call *call = &event_function;
2351         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2352         struct ring_buffer_event *event;
2353         struct ftrace_entry *entry;
2354
2355         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2356                                             flags, pc);
2357         if (!event)
2358                 return;
2359         entry   = ring_buffer_event_data(event);
2360         entry->ip                       = ip;
2361         entry->parent_ip                = parent_ip;
2362
2363         if (!call_filter_check_discard(call, entry, buffer, event)) {
2364                 if (static_branch_unlikely(&ftrace_exports_enabled))
2365                         ftrace_exports(event);
2366                 __buffer_unlock_commit(buffer, event);
2367         }
2368 }
2369
2370 #ifdef CONFIG_STACKTRACE
2371
2372 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2373 struct ftrace_stack {
2374         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2375 };
2376
2377 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2378 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2379
2380 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2381                                  unsigned long flags,
2382                                  int skip, int pc, struct pt_regs *regs)
2383 {
2384         struct trace_event_call *call = &event_kernel_stack;
2385         struct ring_buffer_event *event;
2386         struct stack_entry *entry;
2387         struct stack_trace trace;
2388         int use_stack;
2389         int size = FTRACE_STACK_ENTRIES;
2390
2391         trace.nr_entries        = 0;
2392         trace.skip              = skip;
2393
2394         /*
2395          * Add two, for this function and the call to save_stack_trace()
2396          * If regs is set, then these functions will not be in the way.
2397          */
2398         if (!regs)
2399                 trace.skip += 2;
2400
2401         /*
2402          * Since events can happen in NMIs there's no safe way to
2403          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2404          * or NMI comes in, it will just have to use the default
2405          * FTRACE_STACK_SIZE.
2406          */
2407         preempt_disable_notrace();
2408
2409         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2410         /*
2411          * We don't need any atomic variables, just a barrier.
2412          * If an interrupt comes in, we don't care, because it would
2413          * have exited and put the counter back to what we want.
2414          * We just need a barrier to keep gcc from moving things
2415          * around.
2416          */
2417         barrier();
2418         if (use_stack == 1) {
2419                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2420                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2421
2422                 if (regs)
2423                         save_stack_trace_regs(regs, &trace);
2424                 else
2425                         save_stack_trace(&trace);
2426
2427                 if (trace.nr_entries > size)
2428                         size = trace.nr_entries;
2429         } else
2430                 /* From now on, use_stack is a boolean */
2431                 use_stack = 0;
2432
2433         size *= sizeof(unsigned long);
2434
2435         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2436                                             sizeof(*entry) + size, flags, pc);
2437         if (!event)
2438                 goto out;
2439         entry = ring_buffer_event_data(event);
2440
2441         memset(&entry->caller, 0, size);
2442
2443         if (use_stack)
2444                 memcpy(&entry->caller, trace.entries,
2445                        trace.nr_entries * sizeof(unsigned long));
2446         else {
2447                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2448                 trace.entries           = entry->caller;
2449                 if (regs)
2450                         save_stack_trace_regs(regs, &trace);
2451                 else
2452                         save_stack_trace(&trace);
2453         }
2454
2455         entry->size = trace.nr_entries;
2456
2457         if (!call_filter_check_discard(call, entry, buffer, event))
2458                 __buffer_unlock_commit(buffer, event);
2459
2460  out:
2461         /* Again, don't let gcc optimize things here */
2462         barrier();
2463         __this_cpu_dec(ftrace_stack_reserve);
2464         preempt_enable_notrace();
2465
2466 }
2467
2468 static inline void ftrace_trace_stack(struct trace_array *tr,
2469                                       struct ring_buffer *buffer,
2470                                       unsigned long flags,
2471                                       int skip, int pc, struct pt_regs *regs)
2472 {
2473         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2474                 return;
2475
2476         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2477 }
2478
2479 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2480                    int pc)
2481 {
2482         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2483 }
2484
2485 /**
2486  * trace_dump_stack - record a stack back trace in the trace buffer
2487  * @skip: Number of functions to skip (helper handlers)
2488  */
2489 void trace_dump_stack(int skip)
2490 {
2491         unsigned long flags;
2492
2493         if (tracing_disabled || tracing_selftest_running)
2494                 return;
2495
2496         local_save_flags(flags);
2497
2498         /*
2499          * Skip 3 more, seems to get us at the caller of
2500          * this function.
2501          */
2502         skip += 3;
2503         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2504                              flags, skip, preempt_count(), NULL);
2505 }
2506
2507 static DEFINE_PER_CPU(int, user_stack_count);
2508
2509 void
2510 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2511 {
2512         struct trace_event_call *call = &event_user_stack;
2513         struct ring_buffer_event *event;
2514         struct userstack_entry *entry;
2515         struct stack_trace trace;
2516
2517         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2518                 return;
2519
2520         /*
2521          * NMIs can not handle page faults, even with fix ups.
2522          * The save user stack can (and often does) fault.
2523          */
2524         if (unlikely(in_nmi()))
2525                 return;
2526
2527         /*
2528          * prevent recursion, since the user stack tracing may
2529          * trigger other kernel events.
2530          */
2531         preempt_disable();
2532         if (__this_cpu_read(user_stack_count))
2533                 goto out;
2534
2535         __this_cpu_inc(user_stack_count);
2536
2537         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2538                                             sizeof(*entry), flags, pc);
2539         if (!event)
2540                 goto out_drop_count;
2541         entry   = ring_buffer_event_data(event);
2542
2543         entry->tgid             = current->tgid;
2544         memset(&entry->caller, 0, sizeof(entry->caller));
2545
2546         trace.nr_entries        = 0;
2547         trace.max_entries       = FTRACE_STACK_ENTRIES;
2548         trace.skip              = 0;
2549         trace.entries           = entry->caller;
2550
2551         save_stack_trace_user(&trace);
2552         if (!call_filter_check_discard(call, entry, buffer, event))
2553                 __buffer_unlock_commit(buffer, event);
2554
2555  out_drop_count:
2556         __this_cpu_dec(user_stack_count);
2557  out:
2558         preempt_enable();
2559 }
2560
2561 #ifdef UNUSED
2562 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2563 {
2564         ftrace_trace_userstack(tr, flags, preempt_count());
2565 }
2566 #endif /* UNUSED */
2567
2568 #endif /* CONFIG_STACKTRACE */
2569
2570 /* created for use with alloc_percpu */
2571 struct trace_buffer_struct {
2572         int nesting;
2573         char buffer[4][TRACE_BUF_SIZE];
2574 };
2575
2576 static struct trace_buffer_struct *trace_percpu_buffer;
2577
2578 /*
2579  * Thise allows for lockless recording.  If we're nested too deeply, then
2580  * this returns NULL.
2581  */
2582 static char *get_trace_buf(void)
2583 {
2584         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2585
2586         if (!buffer || buffer->nesting >= 4)
2587                 return NULL;
2588
2589         return &buffer->buffer[buffer->nesting++][0];
2590 }
2591
2592 static void put_trace_buf(void)
2593 {
2594         this_cpu_dec(trace_percpu_buffer->nesting);
2595 }
2596
2597 static int alloc_percpu_trace_buffer(void)
2598 {
2599         struct trace_buffer_struct *buffers;
2600
2601         buffers = alloc_percpu(struct trace_buffer_struct);
2602         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2603                 return -ENOMEM;
2604
2605         trace_percpu_buffer = buffers;
2606         return 0;
2607 }
2608
2609 static int buffers_allocated;
2610
2611 void trace_printk_init_buffers(void)
2612 {
2613         if (buffers_allocated)
2614                 return;
2615
2616         if (alloc_percpu_trace_buffer())
2617                 return;
2618
2619         /* trace_printk() is for debug use only. Don't use it in production. */
2620
2621         pr_warn("\n");
2622         pr_warn("**********************************************************\n");
2623         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2624         pr_warn("**                                                      **\n");
2625         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2626         pr_warn("**                                                      **\n");
2627         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2628         pr_warn("** unsafe for production use.                           **\n");
2629         pr_warn("**                                                      **\n");
2630         pr_warn("** If you see this message and you are not debugging    **\n");
2631         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2632         pr_warn("**                                                      **\n");
2633         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2634         pr_warn("**********************************************************\n");
2635
2636         /* Expand the buffers to set size */
2637         tracing_update_buffers();
2638
2639         buffers_allocated = 1;
2640
2641         /*
2642          * trace_printk_init_buffers() can be called by modules.
2643          * If that happens, then we need to start cmdline recording
2644          * directly here. If the global_trace.buffer is already
2645          * allocated here, then this was called by module code.
2646          */
2647         if (global_trace.trace_buffer.buffer)
2648                 tracing_start_cmdline_record();
2649 }
2650
2651 void trace_printk_start_comm(void)
2652 {
2653         /* Start tracing comms if trace printk is set */
2654         if (!buffers_allocated)
2655                 return;
2656         tracing_start_cmdline_record();
2657 }
2658
2659 static void trace_printk_start_stop_comm(int enabled)
2660 {
2661         if (!buffers_allocated)
2662                 return;
2663
2664         if (enabled)
2665                 tracing_start_cmdline_record();
2666         else
2667                 tracing_stop_cmdline_record();
2668 }
2669
2670 /**
2671  * trace_vbprintk - write binary msg to tracing buffer
2672  *
2673  */
2674 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2675 {
2676         struct trace_event_call *call = &event_bprint;
2677         struct ring_buffer_event *event;
2678         struct ring_buffer *buffer;
2679         struct trace_array *tr = &global_trace;
2680         struct bprint_entry *entry;
2681         unsigned long flags;
2682         char *tbuffer;
2683         int len = 0, size, pc;
2684
2685         if (unlikely(tracing_selftest_running || tracing_disabled))
2686                 return 0;
2687
2688         /* Don't pollute graph traces with trace_vprintk internals */
2689         pause_graph_tracing();
2690
2691         pc = preempt_count();
2692         preempt_disable_notrace();
2693
2694         tbuffer = get_trace_buf();
2695         if (!tbuffer) {
2696                 len = 0;
2697                 goto out_nobuffer;
2698         }
2699
2700         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2701
2702         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2703                 goto out;
2704
2705         local_save_flags(flags);
2706         size = sizeof(*entry) + sizeof(u32) * len;
2707         buffer = tr->trace_buffer.buffer;
2708         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2709                                             flags, pc);
2710         if (!event)
2711                 goto out;
2712         entry = ring_buffer_event_data(event);
2713         entry->ip                       = ip;
2714         entry->fmt                      = fmt;
2715
2716         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2717         if (!call_filter_check_discard(call, entry, buffer, event)) {
2718                 __buffer_unlock_commit(buffer, event);
2719                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2720         }
2721
2722 out:
2723         put_trace_buf();
2724
2725 out_nobuffer:
2726         preempt_enable_notrace();
2727         unpause_graph_tracing();
2728
2729         return len;
2730 }
2731 EXPORT_SYMBOL_GPL(trace_vbprintk);
2732
2733 static int
2734 __trace_array_vprintk(struct ring_buffer *buffer,
2735                       unsigned long ip, const char *fmt, va_list args)
2736 {
2737         struct trace_event_call *call = &event_print;
2738         struct ring_buffer_event *event;
2739         int len = 0, size, pc;
2740         struct print_entry *entry;
2741         unsigned long flags;
2742         char *tbuffer;
2743
2744         if (tracing_disabled || tracing_selftest_running)
2745                 return 0;
2746
2747         /* Don't pollute graph traces with trace_vprintk internals */
2748         pause_graph_tracing();
2749
2750         pc = preempt_count();
2751         preempt_disable_notrace();
2752
2753
2754         tbuffer = get_trace_buf();
2755         if (!tbuffer) {
2756                 len = 0;
2757                 goto out_nobuffer;
2758         }
2759
2760         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2761
2762         local_save_flags(flags);
2763         size = sizeof(*entry) + len + 1;
2764         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2765                                             flags, pc);
2766         if (!event)
2767                 goto out;
2768         entry = ring_buffer_event_data(event);
2769         entry->ip = ip;
2770
2771         memcpy(&entry->buf, tbuffer, len + 1);
2772         if (!call_filter_check_discard(call, entry, buffer, event)) {
2773                 __buffer_unlock_commit(buffer, event);
2774                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2775         }
2776
2777 out:
2778         put_trace_buf();
2779
2780 out_nobuffer:
2781         preempt_enable_notrace();
2782         unpause_graph_tracing();
2783
2784         return len;
2785 }
2786
2787 int trace_array_vprintk(struct trace_array *tr,
2788                         unsigned long ip, const char *fmt, va_list args)
2789 {
2790         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2791 }
2792
2793 int trace_array_printk(struct trace_array *tr,
2794                        unsigned long ip, const char *fmt, ...)
2795 {
2796         int ret;
2797         va_list ap;
2798
2799         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2800                 return 0;
2801
2802         va_start(ap, fmt);
2803         ret = trace_array_vprintk(tr, ip, fmt, ap);
2804         va_end(ap);
2805         return ret;
2806 }
2807
2808 int trace_array_printk_buf(struct ring_buffer *buffer,
2809                            unsigned long ip, const char *fmt, ...)
2810 {
2811         int ret;
2812         va_list ap;
2813
2814         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2815                 return 0;
2816
2817         va_start(ap, fmt);
2818         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2819         va_end(ap);
2820         return ret;
2821 }
2822
2823 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2824 {
2825         return trace_array_vprintk(&global_trace, ip, fmt, args);
2826 }
2827 EXPORT_SYMBOL_GPL(trace_vprintk);
2828
2829 static void trace_iterator_increment(struct trace_iterator *iter)
2830 {
2831         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2832
2833         iter->idx++;
2834         if (buf_iter)
2835                 ring_buffer_read(buf_iter, NULL);
2836 }
2837
2838 static struct trace_entry *
2839 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2840                 unsigned long *lost_events)
2841 {
2842         struct ring_buffer_event *event;
2843         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2844
2845         if (buf_iter)
2846                 event = ring_buffer_iter_peek(buf_iter, ts);
2847         else
2848                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2849                                          lost_events);
2850
2851         if (event) {
2852                 iter->ent_size = ring_buffer_event_length(event);
2853                 return ring_buffer_event_data(event);
2854         }
2855         iter->ent_size = 0;
2856         return NULL;
2857 }
2858
2859 static struct trace_entry *
2860 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2861                   unsigned long *missing_events, u64 *ent_ts)
2862 {
2863         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2864         struct trace_entry *ent, *next = NULL;
2865         unsigned long lost_events = 0, next_lost = 0;
2866         int cpu_file = iter->cpu_file;
2867         u64 next_ts = 0, ts;
2868         int next_cpu = -1;
2869         int next_size = 0;
2870         int cpu;
2871
2872         /*
2873          * If we are in a per_cpu trace file, don't bother by iterating over
2874          * all cpu and peek directly.
2875          */
2876         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2877                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2878                         return NULL;
2879                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2880                 if (ent_cpu)
2881                         *ent_cpu = cpu_file;
2882
2883                 return ent;
2884         }
2885
2886         for_each_tracing_cpu(cpu) {
2887
2888                 if (ring_buffer_empty_cpu(buffer, cpu))
2889                         continue;
2890
2891                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2892
2893                 /*
2894                  * Pick the entry with the smallest timestamp:
2895                  */
2896                 if (ent && (!next || ts < next_ts)) {
2897                         next = ent;
2898                         next_cpu = cpu;
2899                         next_ts = ts;
2900                         next_lost = lost_events;
2901                         next_size = iter->ent_size;
2902                 }
2903         }
2904
2905         iter->ent_size = next_size;
2906
2907         if (ent_cpu)
2908                 *ent_cpu = next_cpu;
2909
2910         if (ent_ts)
2911                 *ent_ts = next_ts;
2912
2913         if (missing_events)
2914                 *missing_events = next_lost;
2915
2916         return next;
2917 }
2918
2919 /* Find the next real entry, without updating the iterator itself */
2920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2921                                           int *ent_cpu, u64 *ent_ts)
2922 {
2923         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2924 }
2925
2926 /* Find the next real entry, and increment the iterator to the next entry */
2927 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2928 {
2929         iter->ent = __find_next_entry(iter, &iter->cpu,
2930                                       &iter->lost_events, &iter->ts);
2931
2932         if (iter->ent)
2933                 trace_iterator_increment(iter);
2934
2935         return iter->ent ? iter : NULL;
2936 }
2937
2938 static void trace_consume(struct trace_iterator *iter)
2939 {
2940         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2941                             &iter->lost_events);
2942 }
2943
2944 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2945 {
2946         struct trace_iterator *iter = m->private;
2947         int i = (int)*pos;
2948         void *ent;
2949
2950         WARN_ON_ONCE(iter->leftover);
2951
2952         (*pos)++;
2953
2954         /* can't go backwards */
2955         if (iter->idx > i)
2956                 return NULL;
2957
2958         if (iter->idx < 0)
2959                 ent = trace_find_next_entry_inc(iter);
2960         else
2961                 ent = iter;
2962
2963         while (ent && iter->idx < i)
2964                 ent = trace_find_next_entry_inc(iter);
2965
2966         iter->pos = *pos;
2967
2968         return ent;
2969 }
2970
2971 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2972 {
2973         struct ring_buffer_event *event;
2974         struct ring_buffer_iter *buf_iter;
2975         unsigned long entries = 0;
2976         u64 ts;
2977
2978         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2979
2980         buf_iter = trace_buffer_iter(iter, cpu);
2981         if (!buf_iter)
2982                 return;
2983
2984         ring_buffer_iter_reset(buf_iter);
2985
2986         /*
2987          * We could have the case with the max latency tracers
2988          * that a reset never took place on a cpu. This is evident
2989          * by the timestamp being before the start of the buffer.
2990          */
2991         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2992                 if (ts >= iter->trace_buffer->time_start)
2993                         break;
2994                 entries++;
2995                 ring_buffer_read(buf_iter, NULL);
2996         }
2997
2998         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2999 }
3000
3001 /*
3002  * The current tracer is copied to avoid a global locking
3003  * all around.
3004  */
3005 static void *s_start(struct seq_file *m, loff_t *pos)
3006 {
3007         struct trace_iterator *iter = m->private;
3008         struct trace_array *tr = iter->tr;
3009         int cpu_file = iter->cpu_file;
3010         void *p = NULL;
3011         loff_t l = 0;
3012         int cpu;
3013
3014         /*
3015          * copy the tracer to avoid using a global lock all around.
3016          * iter->trace is a copy of current_trace, the pointer to the
3017          * name may be used instead of a strcmp(), as iter->trace->name
3018          * will point to the same string as current_trace->name.
3019          */
3020         mutex_lock(&trace_types_lock);
3021         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3022                 *iter->trace = *tr->current_trace;
3023         mutex_unlock(&trace_types_lock);
3024
3025 #ifdef CONFIG_TRACER_MAX_TRACE
3026         if (iter->snapshot && iter->trace->use_max_tr)
3027                 return ERR_PTR(-EBUSY);
3028 #endif
3029
3030         if (!iter->snapshot)
3031                 atomic_inc(&trace_record_cmdline_disabled);
3032
3033         if (*pos != iter->pos) {
3034                 iter->ent = NULL;
3035                 iter->cpu = 0;
3036                 iter->idx = -1;
3037
3038                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3039                         for_each_tracing_cpu(cpu)
3040                                 tracing_iter_reset(iter, cpu);
3041                 } else
3042                         tracing_iter_reset(iter, cpu_file);
3043
3044                 iter->leftover = 0;
3045                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3046                         ;
3047
3048         } else {
3049                 /*
3050                  * If we overflowed the seq_file before, then we want
3051                  * to just reuse the trace_seq buffer again.
3052                  */
3053                 if (iter->leftover)
3054                         p = iter;
3055                 else {
3056                         l = *pos - 1;
3057                         p = s_next(m, p, &l);
3058                 }
3059         }
3060
3061         trace_event_read_lock();
3062         trace_access_lock(cpu_file);
3063         return p;
3064 }
3065
3066 static void s_stop(struct seq_file *m, void *p)
3067 {
3068         struct trace_iterator *iter = m->private;
3069
3070 #ifdef CONFIG_TRACER_MAX_TRACE
3071         if (iter->snapshot && iter->trace->use_max_tr)
3072                 return;
3073 #endif
3074
3075         if (!iter->snapshot)
3076                 atomic_dec(&trace_record_cmdline_disabled);
3077
3078         trace_access_unlock(iter->cpu_file);
3079         trace_event_read_unlock();
3080 }
3081
3082 static void
3083 get_total_entries(struct trace_buffer *buf,
3084                   unsigned long *total, unsigned long *entries)
3085 {
3086         unsigned long count;
3087         int cpu;
3088
3089         *total = 0;
3090         *entries = 0;
3091
3092         for_each_tracing_cpu(cpu) {
3093                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3094                 /*
3095                  * If this buffer has skipped entries, then we hold all
3096                  * entries for the trace and we need to ignore the
3097                  * ones before the time stamp.
3098                  */
3099                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3100                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3101                         /* total is the same as the entries */
3102                         *total += count;
3103                 } else
3104                         *total += count +
3105                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3106                 *entries += count;
3107         }
3108 }
3109
3110 static void print_lat_help_header(struct seq_file *m)
3111 {
3112         seq_puts(m, "#                  _------=> CPU#            \n"
3113                     "#                 / _-----=> irqs-off        \n"
3114                     "#                | / _----=> need-resched    \n"
3115                     "#                || / _---=> hardirq/softirq \n"
3116                     "#                ||| / _--=> preempt-depth   \n"
3117                     "#                |||| /     delay            \n"
3118                     "#  cmd     pid   ||||| time  |   caller      \n"
3119                     "#     \\   /      |||||  \\    |   /         \n");
3120 }
3121
3122 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3123 {
3124         unsigned long total;
3125         unsigned long entries;
3126
3127         get_total_entries(buf, &total, &entries);
3128         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3129                    entries, total, num_online_cpus());
3130         seq_puts(m, "#\n");
3131 }
3132
3133 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3134 {
3135         print_event_info(buf, m);
3136         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3137                     "#              | |       |          |         |\n");
3138 }
3139
3140 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3141 {
3142         print_event_info(buf, m);
3143         seq_puts(m, "#                              _-----=> irqs-off\n"
3144                     "#                             / _----=> need-resched\n"
3145                     "#                            | / _---=> hardirq/softirq\n"
3146                     "#                            || / _--=> preempt-depth\n"
3147                     "#                            ||| /     delay\n"
3148                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3149                     "#              | |       |   ||||       |         |\n");
3150 }
3151
3152 void
3153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3154 {
3155         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3156         struct trace_buffer *buf = iter->trace_buffer;
3157         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3158         struct tracer *type = iter->trace;
3159         unsigned long entries;
3160         unsigned long total;
3161         const char *name = "preemption";
3162
3163         name = type->name;
3164
3165         get_total_entries(buf, &total, &entries);
3166
3167         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3168                    name, UTS_RELEASE);
3169         seq_puts(m, "# -----------------------------------"
3170                  "---------------------------------\n");
3171         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3172                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3173                    nsecs_to_usecs(data->saved_latency),
3174                    entries,
3175                    total,
3176                    buf->cpu,
3177 #if defined(CONFIG_PREEMPT_NONE)
3178                    "server",
3179 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3180                    "desktop",
3181 #elif defined(CONFIG_PREEMPT)
3182                    "preempt",
3183 #else
3184                    "unknown",
3185 #endif
3186                    /* These are reserved for later use */
3187                    0, 0, 0, 0);
3188 #ifdef CONFIG_SMP
3189         seq_printf(m, " #P:%d)\n", num_online_cpus());
3190 #else
3191         seq_puts(m, ")\n");
3192 #endif
3193         seq_puts(m, "#    -----------------\n");
3194         seq_printf(m, "#    | task: %.16s-%d "
3195                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3196                    data->comm, data->pid,
3197                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3198                    data->policy, data->rt_priority);
3199         seq_puts(m, "#    -----------------\n");
3200
3201         if (data->critical_start) {
3202                 seq_puts(m, "#  => started at: ");
3203                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3204                 trace_print_seq(m, &iter->seq);
3205                 seq_puts(m, "\n#  => ended at:   ");
3206                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3207                 trace_print_seq(m, &iter->seq);
3208                 seq_puts(m, "\n#\n");
3209         }
3210
3211         seq_puts(m, "#\n");
3212 }
3213
3214 static void test_cpu_buff_start(struct trace_iterator *iter)
3215 {
3216         struct trace_seq *s = &iter->seq;
3217         struct trace_array *tr = iter->tr;
3218
3219         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3220                 return;
3221
3222         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3223                 return;
3224
3225         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3226                 return;
3227
3228         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3229                 return;
3230
3231         if (iter->started)
3232                 cpumask_set_cpu(iter->cpu, iter->started);
3233
3234         /* Don't print started cpu buffer for the first entry of the trace */
3235         if (iter->idx > 1)
3236                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3237                                 iter->cpu);
3238 }
3239
3240 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3241 {
3242         struct trace_array *tr = iter->tr;
3243         struct trace_seq *s = &iter->seq;
3244         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3245         struct trace_entry *entry;
3246         struct trace_event *event;
3247
3248         entry = iter->ent;
3249
3250         test_cpu_buff_start(iter);
3251
3252         event = ftrace_find_event(entry->type);
3253
3254         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3255                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3256                         trace_print_lat_context(iter);
3257                 else
3258                         trace_print_context(iter);
3259         }
3260
3261         if (trace_seq_has_overflowed(s))
3262                 return TRACE_TYPE_PARTIAL_LINE;
3263
3264         if (event)
3265                 return event->funcs->trace(iter, sym_flags, event);
3266
3267         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3268
3269         return trace_handle_return(s);
3270 }
3271
3272 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3273 {
3274         struct trace_array *tr = iter->tr;
3275         struct trace_seq *s = &iter->seq;
3276         struct trace_entry *entry;
3277         struct trace_event *event;
3278
3279         entry = iter->ent;
3280
3281         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3282                 trace_seq_printf(s, "%d %d %llu ",
3283                                  entry->pid, iter->cpu, iter->ts);
3284
3285         if (trace_seq_has_overflowed(s))
3286                 return TRACE_TYPE_PARTIAL_LINE;
3287
3288         event = ftrace_find_event(entry->type);
3289         if (event)
3290                 return event->funcs->raw(iter, 0, event);
3291
3292         trace_seq_printf(s, "%d ?\n", entry->type);
3293
3294         return trace_handle_return(s);
3295 }
3296
3297 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3298 {
3299         struct trace_array *tr = iter->tr;
3300         struct trace_seq *s = &iter->seq;
3301         unsigned char newline = '\n';
3302         struct trace_entry *entry;
3303         struct trace_event *event;
3304
3305         entry = iter->ent;
3306
3307         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3308                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3309                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3310                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3311                 if (trace_seq_has_overflowed(s))
3312                         return TRACE_TYPE_PARTIAL_LINE;
3313         }
3314
3315         event = ftrace_find_event(entry->type);
3316         if (event) {
3317                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3318                 if (ret != TRACE_TYPE_HANDLED)
3319                         return ret;
3320         }
3321
3322         SEQ_PUT_FIELD(s, newline);
3323
3324         return trace_handle_return(s);
3325 }
3326
3327 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3328 {
3329         struct trace_array *tr = iter->tr;
3330         struct trace_seq *s = &iter->seq;
3331         struct trace_entry *entry;
3332         struct trace_event *event;
3333
3334         entry = iter->ent;
3335
3336         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3337                 SEQ_PUT_FIELD(s, entry->pid);
3338                 SEQ_PUT_FIELD(s, iter->cpu);
3339                 SEQ_PUT_FIELD(s, iter->ts);
3340                 if (trace_seq_has_overflowed(s))
3341                         return TRACE_TYPE_PARTIAL_LINE;
3342         }
3343
3344         event = ftrace_find_event(entry->type);
3345         return event ? event->funcs->binary(iter, 0, event) :
3346                 TRACE_TYPE_HANDLED;
3347 }
3348
3349 int trace_empty(struct trace_iterator *iter)
3350 {
3351         struct ring_buffer_iter *buf_iter;
3352         int cpu;
3353
3354         /* If we are looking at one CPU buffer, only check that one */
3355         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3356                 cpu = iter->cpu_file;
3357                 buf_iter = trace_buffer_iter(iter, cpu);
3358                 if (buf_iter) {
3359                         if (!ring_buffer_iter_empty(buf_iter))
3360                                 return 0;
3361                 } else {
3362                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3363                                 return 0;
3364                 }
3365                 return 1;
3366         }
3367
3368         for_each_tracing_cpu(cpu) {
3369                 buf_iter = trace_buffer_iter(iter, cpu);
3370                 if (buf_iter) {
3371                         if (!ring_buffer_iter_empty(buf_iter))
3372                                 return 0;
3373                 } else {
3374                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3375                                 return 0;
3376                 }
3377         }
3378
3379         return 1;
3380 }
3381
3382 /*  Called with trace_event_read_lock() held. */
3383 enum print_line_t print_trace_line(struct trace_iterator *iter)
3384 {
3385         struct trace_array *tr = iter->tr;
3386         unsigned long trace_flags = tr->trace_flags;
3387         enum print_line_t ret;
3388
3389         if (iter->lost_events) {
3390                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3391                                  iter->cpu, iter->lost_events);
3392                 if (trace_seq_has_overflowed(&iter->seq))
3393                         return TRACE_TYPE_PARTIAL_LINE;
3394         }
3395
3396         if (iter->trace && iter->trace->print_line) {
3397                 ret = iter->trace->print_line(iter);
3398                 if (ret != TRACE_TYPE_UNHANDLED)
3399                         return ret;
3400         }
3401
3402         if (iter->ent->type == TRACE_BPUTS &&
3403                         trace_flags & TRACE_ITER_PRINTK &&
3404                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3405                 return trace_print_bputs_msg_only(iter);
3406
3407         if (iter->ent->type == TRACE_BPRINT &&
3408                         trace_flags & TRACE_ITER_PRINTK &&
3409                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3410                 return trace_print_bprintk_msg_only(iter);
3411
3412         if (iter->ent->type == TRACE_PRINT &&
3413                         trace_flags & TRACE_ITER_PRINTK &&
3414                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3415                 return trace_print_printk_msg_only(iter);
3416
3417         if (trace_flags & TRACE_ITER_BIN)
3418                 return print_bin_fmt(iter);
3419
3420         if (trace_flags & TRACE_ITER_HEX)
3421                 return print_hex_fmt(iter);
3422
3423         if (trace_flags & TRACE_ITER_RAW)
3424                 return print_raw_fmt(iter);
3425
3426         return print_trace_fmt(iter);
3427 }
3428
3429 void trace_latency_header(struct seq_file *m)
3430 {
3431         struct trace_iterator *iter = m->private;
3432         struct trace_array *tr = iter->tr;
3433
3434         /* print nothing if the buffers are empty */
3435         if (trace_empty(iter))
3436                 return;
3437
3438         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3439                 print_trace_header(m, iter);
3440
3441         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3442                 print_lat_help_header(m);
3443 }
3444
3445 void trace_default_header(struct seq_file *m)
3446 {
3447         struct trace_iterator *iter = m->private;
3448         struct trace_array *tr = iter->tr;
3449         unsigned long trace_flags = tr->trace_flags;
3450
3451         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3452                 return;
3453
3454         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3455                 /* print nothing if the buffers are empty */
3456                 if (trace_empty(iter))
3457                         return;
3458                 print_trace_header(m, iter);
3459                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3460                         print_lat_help_header(m);
3461         } else {
3462                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3463                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3464                                 print_func_help_header_irq(iter->trace_buffer, m);
3465                         else
3466                                 print_func_help_header(iter->trace_buffer, m);
3467                 }
3468         }
3469 }
3470
3471 static void test_ftrace_alive(struct seq_file *m)
3472 {
3473         if (!ftrace_is_dead())
3474                 return;
3475         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3476                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3477 }
3478
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 static void show_snapshot_main_help(struct seq_file *m)
3481 {
3482         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3483                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3484                     "#                      Takes a snapshot of the main buffer.\n"
3485                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3486                     "#                      (Doesn't have to be '2' works with any number that\n"
3487                     "#                       is not a '0' or '1')\n");
3488 }
3489
3490 static void show_snapshot_percpu_help(struct seq_file *m)
3491 {
3492         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3494         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3495                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3496 #else
3497         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3498                     "#                     Must use main snapshot file to allocate.\n");
3499 #endif
3500         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3501                     "#                      (Doesn't have to be '2' works with any number that\n"
3502                     "#                       is not a '0' or '1')\n");
3503 }
3504
3505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3506 {
3507         if (iter->tr->allocated_snapshot)
3508                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3509         else
3510                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3511
3512         seq_puts(m, "# Snapshot commands:\n");
3513         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3514                 show_snapshot_main_help(m);
3515         else
3516                 show_snapshot_percpu_help(m);
3517 }
3518 #else
3519 /* Should never be called */
3520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3521 #endif
3522
3523 static int s_show(struct seq_file *m, void *v)
3524 {
3525         struct trace_iterator *iter = v;
3526         int ret;
3527
3528         if (iter->ent == NULL) {
3529                 if (iter->tr) {
3530                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3531                         seq_puts(m, "#\n");
3532                         test_ftrace_alive(m);
3533                 }
3534                 if (iter->snapshot && trace_empty(iter))
3535                         print_snapshot_help(m, iter);
3536                 else if (iter->trace && iter->trace->print_header)
3537                         iter->trace->print_header(m);
3538                 else
3539                         trace_default_header(m);
3540
3541         } else if (iter->leftover) {
3542                 /*
3543                  * If we filled the seq_file buffer earlier, we
3544                  * want to just show it now.
3545                  */
3546                 ret = trace_print_seq(m, &iter->seq);
3547
3548                 /* ret should this time be zero, but you never know */
3549                 iter->leftover = ret;
3550
3551         } else {
3552                 print_trace_line(iter);
3553                 ret = trace_print_seq(m, &iter->seq);
3554                 /*
3555                  * If we overflow the seq_file buffer, then it will
3556                  * ask us for this data again at start up.
3557                  * Use that instead.
3558                  *  ret is 0 if seq_file write succeeded.
3559                  *        -1 otherwise.
3560                  */
3561                 iter->leftover = ret;
3562         }
3563
3564         return 0;
3565 }
3566
3567 /*
3568  * Should be used after trace_array_get(), trace_types_lock
3569  * ensures that i_cdev was already initialized.
3570  */
3571 static inline int tracing_get_cpu(struct inode *inode)
3572 {
3573         if (inode->i_cdev) /* See trace_create_cpu_file() */
3574                 return (long)inode->i_cdev - 1;
3575         return RING_BUFFER_ALL_CPUS;
3576 }
3577
3578 static const struct seq_operations tracer_seq_ops = {
3579         .start          = s_start,
3580         .next           = s_next,
3581         .stop           = s_stop,
3582         .show           = s_show,
3583 };
3584
3585 static struct trace_iterator *
3586 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3587 {
3588         struct trace_array *tr = inode->i_private;
3589         struct trace_iterator *iter;
3590         int cpu;
3591
3592         if (tracing_disabled)
3593                 return ERR_PTR(-ENODEV);
3594
3595         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3596         if (!iter)
3597                 return ERR_PTR(-ENOMEM);
3598
3599         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3600                                     GFP_KERNEL);
3601         if (!iter->buffer_iter)
3602                 goto release;
3603
3604         /*
3605          * We make a copy of the current tracer to avoid concurrent
3606          * changes on it while we are reading.
3607          */
3608         mutex_lock(&trace_types_lock);
3609         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3610         if (!iter->trace)
3611                 goto fail;
3612
3613         *iter->trace = *tr->current_trace;
3614
3615         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3616                 goto fail;
3617
3618         iter->tr = tr;
3619
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621         /* Currently only the top directory has a snapshot */
3622         if (tr->current_trace->print_max || snapshot)
3623                 iter->trace_buffer = &tr->max_buffer;
3624         else
3625 #endif
3626                 iter->trace_buffer = &tr->trace_buffer;
3627         iter->snapshot = snapshot;
3628         iter->pos = -1;
3629         iter->cpu_file = tracing_get_cpu(inode);
3630         mutex_init(&iter->mutex);
3631
3632         /* Notify the tracer early; before we stop tracing. */
3633         if (iter->trace && iter->trace->open)
3634                 iter->trace->open(iter);
3635
3636         /* Annotate start of buffers if we had overruns */
3637         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3638                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3639
3640         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3641         if (trace_clocks[tr->clock_id].in_ns)
3642                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3643
3644         /* stop the trace while dumping if we are not opening "snapshot" */
3645         if (!iter->snapshot)
3646                 tracing_stop_tr(tr);
3647
3648         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3649                 for_each_tracing_cpu(cpu) {
3650                         iter->buffer_iter[cpu] =
3651                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3652                 }
3653                 ring_buffer_read_prepare_sync();
3654                 for_each_tracing_cpu(cpu) {
3655                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3656                         tracing_iter_reset(iter, cpu);
3657                 }
3658         } else {
3659                 cpu = iter->cpu_file;
3660                 iter->buffer_iter[cpu] =
3661                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3662                 ring_buffer_read_prepare_sync();
3663                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3664                 tracing_iter_reset(iter, cpu);
3665         }
3666
3667         mutex_unlock(&trace_types_lock);
3668
3669         return iter;
3670
3671  fail:
3672         mutex_unlock(&trace_types_lock);
3673         kfree(iter->trace);
3674         kfree(iter->buffer_iter);
3675 release:
3676         seq_release_private(inode, file);
3677         return ERR_PTR(-ENOMEM);
3678 }
3679
3680 int tracing_open_generic(struct inode *inode, struct file *filp)
3681 {
3682         if (tracing_disabled)
3683                 return -ENODEV;
3684
3685         filp->private_data = inode->i_private;
3686         return 0;
3687 }
3688
3689 bool tracing_is_disabled(void)
3690 {
3691         return (tracing_disabled) ? true: false;
3692 }
3693
3694 /*
3695  * Open and update trace_array ref count.
3696  * Must have the current trace_array passed to it.
3697  */
3698 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3699 {
3700         struct trace_array *tr = inode->i_private;
3701
3702         if (tracing_disabled)
3703                 return -ENODEV;
3704
3705         if (trace_array_get(tr) < 0)
3706                 return -ENODEV;
3707
3708         filp->private_data = inode->i_private;
3709
3710         return 0;
3711 }
3712
3713 static int tracing_release(struct inode *inode, struct file *file)
3714 {
3715         struct trace_array *tr = inode->i_private;
3716         struct seq_file *m = file->private_data;
3717         struct trace_iterator *iter;
3718         int cpu;
3719
3720         if (!(file->f_mode & FMODE_READ)) {
3721                 trace_array_put(tr);
3722                 return 0;
3723         }
3724
3725         /* Writes do not use seq_file */
3726         iter = m->private;
3727         mutex_lock(&trace_types_lock);
3728
3729         for_each_tracing_cpu(cpu) {
3730                 if (iter->buffer_iter[cpu])
3731                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3732         }
3733
3734         if (iter->trace && iter->trace->close)
3735                 iter->trace->close(iter);
3736
3737         if (!iter->snapshot)
3738                 /* reenable tracing if it was previously enabled */
3739                 tracing_start_tr(tr);
3740
3741         __trace_array_put(tr);
3742
3743         mutex_unlock(&trace_types_lock);
3744
3745         mutex_destroy(&iter->mutex);
3746         free_cpumask_var(iter->started);
3747         kfree(iter->trace);
3748         kfree(iter->buffer_iter);
3749         seq_release_private(inode, file);
3750
3751         return 0;
3752 }
3753
3754 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3755 {
3756         struct trace_array *tr = inode->i_private;
3757
3758         trace_array_put(tr);
3759         return 0;
3760 }
3761
3762 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3763 {
3764         struct trace_array *tr = inode->i_private;
3765
3766         trace_array_put(tr);
3767
3768         return single_release(inode, file);
3769 }
3770
3771 static int tracing_open(struct inode *inode, struct file *file)
3772 {
3773         struct trace_array *tr = inode->i_private;
3774         struct trace_iterator *iter;
3775         int ret = 0;
3776
3777         if (trace_array_get(tr) < 0)
3778                 return -ENODEV;
3779
3780         /* If this file was open for write, then erase contents */
3781         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3782                 int cpu = tracing_get_cpu(inode);
3783
3784                 if (cpu == RING_BUFFER_ALL_CPUS)
3785                         tracing_reset_online_cpus(&tr->trace_buffer);
3786                 else
3787                         tracing_reset(&tr->trace_buffer, cpu);
3788         }
3789
3790         if (file->f_mode & FMODE_READ) {
3791                 iter = __tracing_open(inode, file, false);
3792                 if (IS_ERR(iter))
3793                         ret = PTR_ERR(iter);
3794                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3795                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3796         }
3797
3798         if (ret < 0)
3799                 trace_array_put(tr);
3800
3801         return ret;
3802 }
3803
3804 /*
3805  * Some tracers are not suitable for instance buffers.
3806  * A tracer is always available for the global array (toplevel)
3807  * or if it explicitly states that it is.
3808  */
3809 static bool
3810 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3811 {
3812         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3813 }
3814
3815 /* Find the next tracer that this trace array may use */
3816 static struct tracer *
3817 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3818 {
3819         while (t && !trace_ok_for_array(t, tr))
3820                 t = t->next;
3821
3822         return t;
3823 }
3824
3825 static void *
3826 t_next(struct seq_file *m, void *v, loff_t *pos)
3827 {
3828         struct trace_array *tr = m->private;
3829         struct tracer *t = v;
3830
3831         (*pos)++;
3832
3833         if (t)
3834                 t = get_tracer_for_array(tr, t->next);
3835
3836         return t;
3837 }
3838
3839 static void *t_start(struct seq_file *m, loff_t *pos)
3840 {
3841         struct trace_array *tr = m->private;
3842         struct tracer *t;
3843         loff_t l = 0;
3844
3845         mutex_lock(&trace_types_lock);
3846
3847         t = get_tracer_for_array(tr, trace_types);
3848         for (; t && l < *pos; t = t_next(m, t, &l))
3849                         ;
3850
3851         return t;
3852 }
3853
3854 static void t_stop(struct seq_file *m, void *p)
3855 {
3856         mutex_unlock(&trace_types_lock);
3857 }
3858
3859 static int t_show(struct seq_file *m, void *v)
3860 {
3861         struct tracer *t = v;
3862
3863         if (!t)
3864                 return 0;
3865
3866         seq_puts(m, t->name);
3867         if (t->next)
3868                 seq_putc(m, ' ');
3869         else
3870                 seq_putc(m, '\n');
3871
3872         return 0;
3873 }
3874
3875 static const struct seq_operations show_traces_seq_ops = {
3876         .start          = t_start,
3877         .next           = t_next,
3878         .stop           = t_stop,
3879         .show           = t_show,
3880 };
3881
3882 static int show_traces_open(struct inode *inode, struct file *file)
3883 {
3884         struct trace_array *tr = inode->i_private;
3885         struct seq_file *m;
3886         int ret;
3887
3888         if (tracing_disabled)
3889                 return -ENODEV;
3890
3891         ret = seq_open(file, &show_traces_seq_ops);
3892         if (ret)
3893                 return ret;
3894
3895         m = file->private_data;
3896         m->private = tr;
3897
3898         return 0;
3899 }
3900
3901 static ssize_t
3902 tracing_write_stub(struct file *filp, const char __user *ubuf,
3903                    size_t count, loff_t *ppos)
3904 {
3905         return count;
3906 }
3907
3908 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3909 {
3910         int ret;
3911
3912         if (file->f_mode & FMODE_READ)
3913                 ret = seq_lseek(file, offset, whence);
3914         else
3915                 file->f_pos = ret = 0;
3916
3917         return ret;
3918 }
3919
3920 static const struct file_operations tracing_fops = {
3921         .open           = tracing_open,
3922         .read           = seq_read,
3923         .write          = tracing_write_stub,
3924         .llseek         = tracing_lseek,
3925         .release        = tracing_release,
3926 };
3927
3928 static const struct file_operations show_traces_fops = {
3929         .open           = show_traces_open,
3930         .read           = seq_read,
3931         .release        = seq_release,
3932         .llseek         = seq_lseek,
3933 };
3934
3935 /*
3936  * The tracer itself will not take this lock, but still we want
3937  * to provide a consistent cpumask to user-space:
3938  */
3939 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3940
3941 /*
3942  * Temporary storage for the character representation of the
3943  * CPU bitmask (and one more byte for the newline):
3944  */
3945 static char mask_str[NR_CPUS + 1];
3946
3947 static ssize_t
3948 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3949                      size_t count, loff_t *ppos)
3950 {
3951         struct trace_array *tr = file_inode(filp)->i_private;
3952         int len;
3953
3954         mutex_lock(&tracing_cpumask_update_lock);
3955
3956         len = snprintf(mask_str, count, "%*pb\n",
3957                        cpumask_pr_args(tr->tracing_cpumask));
3958         if (len >= count) {
3959                 count = -EINVAL;
3960                 goto out_err;
3961         }
3962         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3963
3964 out_err:
3965         mutex_unlock(&tracing_cpumask_update_lock);
3966
3967         return count;
3968 }
3969
3970 static ssize_t
3971 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3972                       size_t count, loff_t *ppos)
3973 {
3974         struct trace_array *tr = file_inode(filp)->i_private;
3975         cpumask_var_t tracing_cpumask_new;
3976         int err, cpu;
3977
3978         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3979                 return -ENOMEM;
3980
3981         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3982         if (err)
3983                 goto err_unlock;
3984
3985         mutex_lock(&tracing_cpumask_update_lock);
3986
3987         local_irq_disable();
3988         arch_spin_lock(&tr->max_lock);
3989         for_each_tracing_cpu(cpu) {
3990                 /*
3991                  * Increase/decrease the disabled counter if we are
3992                  * about to flip a bit in the cpumask:
3993                  */
3994                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3995                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3996                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3997                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3998                 }
3999                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4000                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4001                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4002                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4003                 }
4004         }
4005         arch_spin_unlock(&tr->max_lock);
4006         local_irq_enable();
4007
4008         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4009
4010         mutex_unlock(&tracing_cpumask_update_lock);
4011         free_cpumask_var(tracing_cpumask_new);
4012
4013         return count;
4014
4015 err_unlock:
4016         free_cpumask_var(tracing_cpumask_new);
4017
4018         return err;
4019 }
4020
4021 static const struct file_operations tracing_cpumask_fops = {
4022         .open           = tracing_open_generic_tr,
4023         .read           = tracing_cpumask_read,
4024         .write          = tracing_cpumask_write,
4025         .release        = tracing_release_generic_tr,
4026         .llseek         = generic_file_llseek,
4027 };
4028
4029 static int tracing_trace_options_show(struct seq_file *m, void *v)
4030 {
4031         struct tracer_opt *trace_opts;
4032         struct trace_array *tr = m->private;
4033         u32 tracer_flags;
4034         int i;
4035
4036         mutex_lock(&trace_types_lock);
4037         tracer_flags = tr->current_trace->flags->val;
4038         trace_opts = tr->current_trace->flags->opts;
4039
4040         for (i = 0; trace_options[i]; i++) {
4041                 if (tr->trace_flags & (1 << i))
4042                         seq_printf(m, "%s\n", trace_options[i]);
4043                 else
4044                         seq_printf(m, "no%s\n", trace_options[i]);
4045         }
4046
4047         for (i = 0; trace_opts[i].name; i++) {
4048                 if (tracer_flags & trace_opts[i].bit)
4049                         seq_printf(m, "%s\n", trace_opts[i].name);
4050                 else
4051                         seq_printf(m, "no%s\n", trace_opts[i].name);
4052         }
4053         mutex_unlock(&trace_types_lock);
4054
4055         return 0;
4056 }
4057
4058 static int __set_tracer_option(struct trace_array *tr,
4059                                struct tracer_flags *tracer_flags,
4060                                struct tracer_opt *opts, int neg)
4061 {
4062         struct tracer *trace = tracer_flags->trace;
4063         int ret;
4064
4065         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4066         if (ret)
4067                 return ret;
4068
4069         if (neg)
4070                 tracer_flags->val &= ~opts->bit;
4071         else
4072                 tracer_flags->val |= opts->bit;
4073         return 0;
4074 }
4075
4076 /* Try to assign a tracer specific option */
4077 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4078 {
4079         struct tracer *trace = tr->current_trace;
4080         struct tracer_flags *tracer_flags = trace->flags;
4081         struct tracer_opt *opts = NULL;
4082         int i;
4083
4084         for (i = 0; tracer_flags->opts[i].name; i++) {
4085                 opts = &tracer_flags->opts[i];
4086
4087                 if (strcmp(cmp, opts->name) == 0)
4088                         return __set_tracer_option(tr, trace->flags, opts, neg);
4089         }
4090
4091         return -EINVAL;
4092 }
4093
4094 /* Some tracers require overwrite to stay enabled */
4095 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4096 {
4097         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4098                 return -1;
4099
4100         return 0;
4101 }
4102
4103 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4104 {
4105         /* do nothing if flag is already set */
4106         if (!!(tr->trace_flags & mask) == !!enabled)
4107                 return 0;
4108
4109         /* Give the tracer a chance to approve the change */
4110         if (tr->current_trace->flag_changed)
4111                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4112                         return -EINVAL;
4113
4114         if (enabled)
4115                 tr->trace_flags |= mask;
4116         else
4117                 tr->trace_flags &= ~mask;
4118
4119         if (mask == TRACE_ITER_RECORD_CMD)
4120                 trace_event_enable_cmd_record(enabled);
4121
4122         if (mask == TRACE_ITER_EVENT_FORK)
4123                 trace_event_follow_fork(tr, enabled);
4124
4125         if (mask == TRACE_ITER_OVERWRITE) {
4126                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4127 #ifdef CONFIG_TRACER_MAX_TRACE
4128                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4129 #endif
4130         }
4131
4132         if (mask == TRACE_ITER_PRINTK) {
4133                 trace_printk_start_stop_comm(enabled);
4134                 trace_printk_control(enabled);
4135         }
4136
4137         return 0;
4138 }
4139
4140 static int trace_set_options(struct trace_array *tr, char *option)
4141 {
4142         char *cmp;
4143         int neg = 0;
4144         int ret = -ENODEV;
4145         int i;
4146         size_t orig_len = strlen(option);
4147
4148         cmp = strstrip(option);
4149
4150         if (strncmp(cmp, "no", 2) == 0) {
4151                 neg = 1;
4152                 cmp += 2;
4153         }
4154
4155         mutex_lock(&trace_types_lock);
4156
4157         for (i = 0; trace_options[i]; i++) {
4158                 if (strcmp(cmp, trace_options[i]) == 0) {
4159                         ret = set_tracer_flag(tr, 1 << i, !neg);
4160                         break;
4161                 }
4162         }
4163
4164         /* If no option could be set, test the specific tracer options */
4165         if (!trace_options[i])
4166                 ret = set_tracer_option(tr, cmp, neg);
4167
4168         mutex_unlock(&trace_types_lock);
4169
4170         /*
4171          * If the first trailing whitespace is replaced with '\0' by strstrip,
4172          * turn it back into a space.
4173          */
4174         if (orig_len > strlen(option))
4175                 option[strlen(option)] = ' ';
4176
4177         return ret;
4178 }
4179
4180 static void __init apply_trace_boot_options(void)
4181 {
4182         char *buf = trace_boot_options_buf;
4183         char *option;
4184
4185         while (true) {
4186                 option = strsep(&buf, ",");
4187
4188                 if (!option)
4189                         break;
4190
4191                 if (*option)
4192                         trace_set_options(&global_trace, option);
4193
4194                 /* Put back the comma to allow this to be called again */
4195                 if (buf)
4196                         *(buf - 1) = ',';
4197         }
4198 }
4199
4200 static ssize_t
4201 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4202                         size_t cnt, loff_t *ppos)
4203 {
4204         struct seq_file *m = filp->private_data;
4205         struct trace_array *tr = m->private;
4206         char buf[64];
4207         int ret;
4208
4209         if (cnt >= sizeof(buf))
4210                 return -EINVAL;
4211
4212         if (copy_from_user(buf, ubuf, cnt))
4213                 return -EFAULT;
4214
4215         buf[cnt] = 0;
4216
4217         ret = trace_set_options(tr, buf);
4218         if (ret < 0)
4219                 return ret;
4220
4221         *ppos += cnt;
4222
4223         return cnt;
4224 }
4225
4226 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4227 {
4228         struct trace_array *tr = inode->i_private;
4229         int ret;
4230
4231         if (tracing_disabled)
4232                 return -ENODEV;
4233
4234         if (trace_array_get(tr) < 0)
4235                 return -ENODEV;
4236
4237         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4238         if (ret < 0)
4239                 trace_array_put(tr);
4240
4241         return ret;
4242 }
4243
4244 static const struct file_operations tracing_iter_fops = {
4245         .open           = tracing_trace_options_open,
4246         .read           = seq_read,
4247         .llseek         = seq_lseek,
4248         .release        = tracing_single_release_tr,
4249         .write          = tracing_trace_options_write,
4250 };
4251
4252 static const char readme_msg[] =
4253         "tracing mini-HOWTO:\n\n"
4254         "# echo 0 > tracing_on : quick way to disable tracing\n"
4255         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4256         " Important files:\n"
4257         "  trace\t\t\t- The static contents of the buffer\n"
4258         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4259         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4260         "  current_tracer\t- function and latency tracers\n"
4261         "  available_tracers\t- list of configured tracers for current_tracer\n"
4262         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4263         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4264         "  trace_clock\t\t-change the clock used to order events\n"
4265         "       local:   Per cpu clock but may not be synced across CPUs\n"
4266         "      global:   Synced across CPUs but slows tracing down.\n"
4267         "     counter:   Not a clock, but just an increment\n"
4268         "      uptime:   Jiffy counter from time of boot\n"
4269         "        perf:   Same clock that perf events use\n"
4270 #ifdef CONFIG_X86_64
4271         "     x86-tsc:   TSC cycle counter\n"
4272 #endif
4273         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4274         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4275         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4276         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4277         "\t\t\t  Remove sub-buffer with rmdir\n"
4278         "  trace_options\t\t- Set format or modify how tracing happens\n"
4279         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4280         "\t\t\t  option name\n"
4281         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4282 #ifdef CONFIG_DYNAMIC_FTRACE
4283         "\n  available_filter_functions - list of functions that can be filtered on\n"
4284         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4285         "\t\t\t  functions\n"
4286         "\t     accepts: func_full_name or glob-matching-pattern\n"
4287         "\t     modules: Can select a group via module\n"
4288         "\t      Format: :mod:<module-name>\n"
4289         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4290         "\t    triggers: a command to perform when function is hit\n"
4291         "\t      Format: <function>:<trigger>[:count]\n"
4292         "\t     trigger: traceon, traceoff\n"
4293         "\t\t      enable_event:<system>:<event>\n"
4294         "\t\t      disable_event:<system>:<event>\n"
4295 #ifdef CONFIG_STACKTRACE
4296         "\t\t      stacktrace\n"
4297 #endif
4298 #ifdef CONFIG_TRACER_SNAPSHOT
4299         "\t\t      snapshot\n"
4300 #endif
4301         "\t\t      dump\n"
4302         "\t\t      cpudump\n"
4303         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4304         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4305         "\t     The first one will disable tracing every time do_fault is hit\n"
4306         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4307         "\t       The first time do trap is hit and it disables tracing, the\n"
4308         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4309         "\t       the counter will not decrement. It only decrements when the\n"
4310         "\t       trigger did work\n"
4311         "\t     To remove trigger without count:\n"
4312         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4313         "\t     To remove trigger with a count:\n"
4314         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4315         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4316         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4317         "\t    modules: Can select a group via module command :mod:\n"
4318         "\t    Does not accept triggers\n"
4319 #endif /* CONFIG_DYNAMIC_FTRACE */
4320 #ifdef CONFIG_FUNCTION_TRACER
4321         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4322         "\t\t    (function)\n"
4323 #endif
4324 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4325         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4326         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4327         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4328 #endif
4329 #ifdef CONFIG_TRACER_SNAPSHOT
4330         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4331         "\t\t\t  snapshot buffer. Read the contents for more\n"
4332         "\t\t\t  information\n"
4333 #endif
4334 #ifdef CONFIG_STACK_TRACER
4335         "  stack_trace\t\t- Shows the max stack trace when active\n"
4336         "  stack_max_size\t- Shows current max stack size that was traced\n"
4337         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4338         "\t\t\t  new trace)\n"
4339 #ifdef CONFIG_DYNAMIC_FTRACE
4340         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4341         "\t\t\t  traces\n"
4342 #endif
4343 #endif /* CONFIG_STACK_TRACER */
4344 #ifdef CONFIG_KPROBE_EVENTS
4345         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4346         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4347 #endif
4348 #ifdef CONFIG_UPROBE_EVENTS
4349         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4350         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4351 #endif
4352 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4353         "\t  accepts: event-definitions (one definition per line)\n"
4354         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4355         "\t           -:[<group>/]<event>\n"
4356 #ifdef CONFIG_KPROBE_EVENTS
4357         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4358   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4359 #endif
4360 #ifdef CONFIG_UPROBE_EVENTS
4361         "\t    place: <path>:<offset>\n"
4362 #endif
4363         "\t     args: <name>=fetcharg[:type]\n"
4364         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4365         "\t           $stack<index>, $stack, $retval, $comm\n"
4366         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4367         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4368 #endif
4369         "  events/\t\t- Directory containing all trace event subsystems:\n"
4370         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4371         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4372         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4373         "\t\t\t  events\n"
4374         "      filter\t\t- If set, only events passing filter are traced\n"
4375         "  events/<system>/<event>/\t- Directory containing control files for\n"
4376         "\t\t\t  <event>:\n"
4377         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4378         "      filter\t\t- If set, only events passing filter are traced\n"
4379         "      trigger\t\t- If set, a command to perform when event is hit\n"
4380         "\t    Format: <trigger>[:count][if <filter>]\n"
4381         "\t   trigger: traceon, traceoff\n"
4382         "\t            enable_event:<system>:<event>\n"
4383         "\t            disable_event:<system>:<event>\n"
4384 #ifdef CONFIG_HIST_TRIGGERS
4385         "\t            enable_hist:<system>:<event>\n"
4386         "\t            disable_hist:<system>:<event>\n"
4387 #endif
4388 #ifdef CONFIG_STACKTRACE
4389         "\t\t    stacktrace\n"
4390 #endif
4391 #ifdef CONFIG_TRACER_SNAPSHOT
4392         "\t\t    snapshot\n"
4393 #endif
4394 #ifdef CONFIG_HIST_TRIGGERS
4395         "\t\t    hist (see below)\n"
4396 #endif
4397         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4398         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4399         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4400         "\t                  events/block/block_unplug/trigger\n"
4401         "\t   The first disables tracing every time block_unplug is hit.\n"
4402         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4403         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4404         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4405         "\t   Like function triggers, the counter is only decremented if it\n"
4406         "\t    enabled or disabled tracing.\n"
4407         "\t   To remove a trigger without a count:\n"
4408         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4409         "\t   To remove a trigger with a count:\n"
4410         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4411         "\t   Filters can be ignored when removing a trigger.\n"
4412 #ifdef CONFIG_HIST_TRIGGERS
4413         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4414         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4415         "\t            [:values=<field1[,field2,...]>]\n"
4416         "\t            [:sort=<field1[,field2,...]>]\n"
4417         "\t            [:size=#entries]\n"
4418         "\t            [:pause][:continue][:clear]\n"
4419         "\t            [:name=histname1]\n"
4420         "\t            [if <filter>]\n\n"
4421         "\t    When a matching event is hit, an entry is added to a hash\n"
4422         "\t    table using the key(s) and value(s) named, and the value of a\n"
4423         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4424         "\t    correspond to fields in the event's format description.  Keys\n"
4425         "\t    can be any field, or the special string 'stacktrace'.\n"
4426         "\t    Compound keys consisting of up to two fields can be specified\n"
4427         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4428         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4429         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4430         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4431         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4432         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4433         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4434         "\t    its histogram data will be shared with other triggers of the\n"
4435         "\t    same name, and trigger hits will update this common data.\n\n"
4436         "\t    Reading the 'hist' file for the event will dump the hash\n"
4437         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4438         "\t    triggers attached to an event, there will be a table for each\n"
4439         "\t    trigger in the output.  The table displayed for a named\n"
4440         "\t    trigger will be the same as any other instance having the\n"
4441         "\t    same name.  The default format used to display a given field\n"
4442         "\t    can be modified by appending any of the following modifiers\n"
4443         "\t    to the field name, as applicable:\n\n"
4444         "\t            .hex        display a number as a hex value\n"
4445         "\t            .sym        display an address as a symbol\n"
4446         "\t            .sym-offset display an address as a symbol and offset\n"
4447         "\t            .execname   display a common_pid as a program name\n"
4448         "\t            .syscall    display a syscall id as a syscall name\n\n"
4449         "\t            .log2       display log2 value rather than raw number\n\n"
4450         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4451         "\t    trigger or to start a hist trigger but not log any events\n"
4452         "\t    until told to do so.  'continue' can be used to start or\n"
4453         "\t    restart a paused hist trigger.\n\n"
4454         "\t    The 'clear' parameter will clear the contents of a running\n"
4455         "\t    hist trigger and leave its current paused/active state\n"
4456         "\t    unchanged.\n\n"
4457         "\t    The enable_hist and disable_hist triggers can be used to\n"
4458         "\t    have one event conditionally start and stop another event's\n"
4459         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4460         "\t    the enable_event and disable_event triggers.\n"
4461 #endif
4462 ;
4463
4464 static ssize_t
4465 tracing_readme_read(struct file *filp, char __user *ubuf,
4466                        size_t cnt, loff_t *ppos)
4467 {
4468         return simple_read_from_buffer(ubuf, cnt, ppos,
4469                                         readme_msg, strlen(readme_msg));
4470 }
4471
4472 static const struct file_operations tracing_readme_fops = {
4473         .open           = tracing_open_generic,
4474         .read           = tracing_readme_read,
4475         .llseek         = generic_file_llseek,
4476 };
4477
4478 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4479 {
4480         unsigned int *ptr = v;
4481
4482         if (*pos || m->count)
4483                 ptr++;
4484
4485         (*pos)++;
4486
4487         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4488              ptr++) {
4489                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4490                         continue;
4491
4492                 return ptr;
4493         }
4494
4495         return NULL;
4496 }
4497
4498 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4499 {
4500         void *v;
4501         loff_t l = 0;
4502
4503         preempt_disable();
4504         arch_spin_lock(&trace_cmdline_lock);
4505
4506         v = &savedcmd->map_cmdline_to_pid[0];
4507         while (l <= *pos) {
4508                 v = saved_cmdlines_next(m, v, &l);
4509                 if (!v)
4510                         return NULL;
4511         }
4512
4513         return v;
4514 }
4515
4516 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4517 {
4518         arch_spin_unlock(&trace_cmdline_lock);
4519         preempt_enable();
4520 }
4521
4522 static int saved_cmdlines_show(struct seq_file *m, void *v)
4523 {
4524         char buf[TASK_COMM_LEN];
4525         unsigned int *pid = v;
4526
4527         __trace_find_cmdline(*pid, buf);
4528         seq_printf(m, "%d %s\n", *pid, buf);
4529         return 0;
4530 }
4531
4532 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4533         .start          = saved_cmdlines_start,
4534         .next           = saved_cmdlines_next,
4535         .stop           = saved_cmdlines_stop,
4536         .show           = saved_cmdlines_show,
4537 };
4538
4539 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4540 {
4541         if (tracing_disabled)
4542                 return -ENODEV;
4543
4544         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4545 }
4546
4547 static const struct file_operations tracing_saved_cmdlines_fops = {
4548         .open           = tracing_saved_cmdlines_open,
4549         .read           = seq_read,
4550         .llseek         = seq_lseek,
4551         .release        = seq_release,
4552 };
4553
4554 static ssize_t
4555 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4556                                  size_t cnt, loff_t *ppos)
4557 {
4558         char buf[64];
4559         int r;
4560
4561         arch_spin_lock(&trace_cmdline_lock);
4562         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4563         arch_spin_unlock(&trace_cmdline_lock);
4564
4565         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4566 }
4567
4568 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4569 {
4570         kfree(s->saved_cmdlines);
4571         kfree(s->map_cmdline_to_pid);
4572         kfree(s);
4573 }
4574
4575 static int tracing_resize_saved_cmdlines(unsigned int val)
4576 {
4577         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4578
4579         s = kmalloc(sizeof(*s), GFP_KERNEL);
4580         if (!s)
4581                 return -ENOMEM;
4582
4583         if (allocate_cmdlines_buffer(val, s) < 0) {
4584                 kfree(s);
4585                 return -ENOMEM;
4586         }
4587
4588         arch_spin_lock(&trace_cmdline_lock);
4589         savedcmd_temp = savedcmd;
4590         savedcmd = s;
4591         arch_spin_unlock(&trace_cmdline_lock);
4592         free_saved_cmdlines_buffer(savedcmd_temp);
4593
4594         return 0;
4595 }
4596
4597 static ssize_t
4598 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4599                                   size_t cnt, loff_t *ppos)
4600 {
4601         unsigned long val;
4602         int ret;
4603
4604         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4605         if (ret)
4606                 return ret;
4607
4608         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4609         if (!val || val > PID_MAX_DEFAULT)
4610                 return -EINVAL;
4611
4612         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4613         if (ret < 0)
4614                 return ret;
4615
4616         *ppos += cnt;
4617
4618         return cnt;
4619 }
4620
4621 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4622         .open           = tracing_open_generic,
4623         .read           = tracing_saved_cmdlines_size_read,
4624         .write          = tracing_saved_cmdlines_size_write,
4625 };
4626
4627 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4628 static union trace_enum_map_item *
4629 update_enum_map(union trace_enum_map_item *ptr)
4630 {
4631         if (!ptr->map.enum_string) {
4632                 if (ptr->tail.next) {
4633                         ptr = ptr->tail.next;
4634                         /* Set ptr to the next real item (skip head) */
4635                         ptr++;
4636                 } else
4637                         return NULL;
4638         }
4639         return ptr;
4640 }
4641
4642 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4643 {
4644         union trace_enum_map_item *ptr = v;
4645
4646         /*
4647          * Paranoid! If ptr points to end, we don't want to increment past it.
4648          * This really should never happen.
4649          */
4650         ptr = update_enum_map(ptr);
4651         if (WARN_ON_ONCE(!ptr))
4652                 return NULL;
4653
4654         ptr++;
4655
4656         (*pos)++;
4657
4658         ptr = update_enum_map(ptr);
4659
4660         return ptr;
4661 }
4662
4663 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4664 {
4665         union trace_enum_map_item *v;
4666         loff_t l = 0;
4667
4668         mutex_lock(&trace_enum_mutex);
4669
4670         v = trace_enum_maps;
4671         if (v)
4672                 v++;
4673
4674         while (v && l < *pos) {
4675                 v = enum_map_next(m, v, &l);
4676         }
4677
4678         return v;
4679 }
4680
4681 static void enum_map_stop(struct seq_file *m, void *v)
4682 {
4683         mutex_unlock(&trace_enum_mutex);
4684 }
4685
4686 static int enum_map_show(struct seq_file *m, void *v)
4687 {
4688         union trace_enum_map_item *ptr = v;
4689
4690         seq_printf(m, "%s %ld (%s)\n",
4691                    ptr->map.enum_string, ptr->map.enum_value,
4692                    ptr->map.system);
4693
4694         return 0;
4695 }
4696
4697 static const struct seq_operations tracing_enum_map_seq_ops = {
4698         .start          = enum_map_start,
4699         .next           = enum_map_next,
4700         .stop           = enum_map_stop,
4701         .show           = enum_map_show,
4702 };
4703
4704 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4705 {
4706         if (tracing_disabled)
4707                 return -ENODEV;
4708
4709         return seq_open(filp, &tracing_enum_map_seq_ops);
4710 }
4711
4712 static const struct file_operations tracing_enum_map_fops = {
4713         .open           = tracing_enum_map_open,
4714         .read           = seq_read,
4715         .llseek         = seq_lseek,
4716         .release        = seq_release,
4717 };
4718
4719 static inline union trace_enum_map_item *
4720 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4721 {
4722         /* Return tail of array given the head */
4723         return ptr + ptr->head.length + 1;
4724 }
4725
4726 static void
4727 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4728                            int len)
4729 {
4730         struct trace_enum_map **stop;
4731         struct trace_enum_map **map;
4732         union trace_enum_map_item *map_array;
4733         union trace_enum_map_item *ptr;
4734
4735         stop = start + len;
4736
4737         /*
4738          * The trace_enum_maps contains the map plus a head and tail item,
4739          * where the head holds the module and length of array, and the
4740          * tail holds a pointer to the next list.
4741          */
4742         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4743         if (!map_array) {
4744                 pr_warn("Unable to allocate trace enum mapping\n");
4745                 return;
4746         }
4747
4748         mutex_lock(&trace_enum_mutex);
4749
4750         if (!trace_enum_maps)
4751                 trace_enum_maps = map_array;
4752         else {
4753                 ptr = trace_enum_maps;
4754                 for (;;) {
4755                         ptr = trace_enum_jmp_to_tail(ptr);
4756                         if (!ptr->tail.next)
4757                                 break;
4758                         ptr = ptr->tail.next;
4759
4760                 }
4761                 ptr->tail.next = map_array;
4762         }
4763         map_array->head.mod = mod;
4764         map_array->head.length = len;
4765         map_array++;
4766
4767         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4768                 map_array->map = **map;
4769                 map_array++;
4770         }
4771         memset(map_array, 0, sizeof(*map_array));
4772
4773         mutex_unlock(&trace_enum_mutex);
4774 }
4775
4776 static void trace_create_enum_file(struct dentry *d_tracer)
4777 {
4778         trace_create_file("enum_map", 0444, d_tracer,
4779                           NULL, &tracing_enum_map_fops);
4780 }
4781
4782 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4783 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4784 static inline void trace_insert_enum_map_file(struct module *mod,
4785                               struct trace_enum_map **start, int len) { }
4786 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4787
4788 static void trace_insert_enum_map(struct module *mod,
4789                                   struct trace_enum_map **start, int len)
4790 {
4791         struct trace_enum_map **map;
4792
4793         if (len <= 0)
4794                 return;
4795
4796         map = start;
4797
4798         trace_event_enum_update(map, len);
4799
4800         trace_insert_enum_map_file(mod, start, len);
4801 }
4802
4803 static ssize_t
4804 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4805                        size_t cnt, loff_t *ppos)
4806 {
4807         struct trace_array *tr = filp->private_data;
4808         char buf[MAX_TRACER_SIZE+2];
4809         int r;
4810
4811         mutex_lock(&trace_types_lock);
4812         r = sprintf(buf, "%s\n", tr->current_trace->name);
4813         mutex_unlock(&trace_types_lock);
4814
4815         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4816 }
4817
4818 int tracer_init(struct tracer *t, struct trace_array *tr)
4819 {
4820         tracing_reset_online_cpus(&tr->trace_buffer);
4821         return t->init(tr);
4822 }
4823
4824 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4825 {
4826         int cpu;
4827
4828         for_each_tracing_cpu(cpu)
4829                 per_cpu_ptr(buf->data, cpu)->entries = val;
4830 }
4831
4832 #ifdef CONFIG_TRACER_MAX_TRACE
4833 /* resize @tr's buffer to the size of @size_tr's entries */
4834 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4835                                         struct trace_buffer *size_buf, int cpu_id)
4836 {
4837         int cpu, ret = 0;
4838
4839         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4840                 for_each_tracing_cpu(cpu) {
4841                         ret = ring_buffer_resize(trace_buf->buffer,
4842                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4843                         if (ret < 0)
4844                                 break;
4845                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4846                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4847                 }
4848         } else {
4849                 ret = ring_buffer_resize(trace_buf->buffer,
4850                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4851                 if (ret == 0)
4852                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4853                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4854         }
4855
4856         return ret;
4857 }
4858 #endif /* CONFIG_TRACER_MAX_TRACE */
4859
4860 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4861                                         unsigned long size, int cpu)
4862 {
4863         int ret;
4864
4865         /*
4866          * If kernel or user changes the size of the ring buffer
4867          * we use the size that was given, and we can forget about
4868          * expanding it later.
4869          */
4870         ring_buffer_expanded = true;
4871
4872         /* May be called before buffers are initialized */
4873         if (!tr->trace_buffer.buffer)
4874                 return 0;
4875
4876         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4877         if (ret < 0)
4878                 return ret;
4879
4880 #ifdef CONFIG_TRACER_MAX_TRACE
4881         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4882             !tr->current_trace->use_max_tr)
4883                 goto out;
4884
4885         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4886         if (ret < 0) {
4887                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4888                                                      &tr->trace_buffer, cpu);
4889                 if (r < 0) {
4890                         /*
4891                          * AARGH! We are left with different
4892                          * size max buffer!!!!
4893                          * The max buffer is our "snapshot" buffer.
4894                          * When a tracer needs a snapshot (one of the
4895                          * latency tracers), it swaps the max buffer
4896                          * with the saved snap shot. We succeeded to
4897                          * update the size of the main buffer, but failed to
4898                          * update the size of the max buffer. But when we tried
4899                          * to reset the main buffer to the original size, we
4900                          * failed there too. This is very unlikely to
4901                          * happen, but if it does, warn and kill all
4902                          * tracing.
4903                          */
4904                         WARN_ON(1);
4905                         tracing_disabled = 1;
4906                 }
4907                 return ret;
4908         }
4909
4910         if (cpu == RING_BUFFER_ALL_CPUS)
4911                 set_buffer_entries(&tr->max_buffer, size);
4912         else
4913                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4914
4915  out:
4916 #endif /* CONFIG_TRACER_MAX_TRACE */
4917
4918         if (cpu == RING_BUFFER_ALL_CPUS)
4919                 set_buffer_entries(&tr->trace_buffer, size);
4920         else
4921                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4922
4923         return ret;
4924 }
4925
4926 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4927                                           unsigned long size, int cpu_id)
4928 {
4929         int ret = size;
4930
4931         mutex_lock(&trace_types_lock);
4932
4933         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4934                 /* make sure, this cpu is enabled in the mask */
4935                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4936                         ret = -EINVAL;
4937                         goto out;
4938                 }
4939         }
4940
4941         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4942         if (ret < 0)
4943                 ret = -ENOMEM;
4944
4945 out:
4946         mutex_unlock(&trace_types_lock);
4947
4948         return ret;
4949 }
4950
4951
4952 /**
4953  * tracing_update_buffers - used by tracing facility to expand ring buffers
4954  *
4955  * To save on memory when the tracing is never used on a system with it
4956  * configured in. The ring buffers are set to a minimum size. But once
4957  * a user starts to use the tracing facility, then they need to grow
4958  * to their default size.
4959  *
4960  * This function is to be called when a tracer is about to be used.
4961  */
4962 int tracing_update_buffers(void)
4963 {
4964         int ret = 0;
4965
4966         mutex_lock(&trace_types_lock);
4967         if (!ring_buffer_expanded)
4968                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4969                                                 RING_BUFFER_ALL_CPUS);
4970         mutex_unlock(&trace_types_lock);
4971
4972         return ret;
4973 }
4974
4975 struct trace_option_dentry;
4976
4977 static void
4978 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4979
4980 /*
4981  * Used to clear out the tracer before deletion of an instance.
4982  * Must have trace_types_lock held.
4983  */
4984 static void tracing_set_nop(struct trace_array *tr)
4985 {
4986         if (tr->current_trace == &nop_trace)
4987                 return;
4988         
4989         tr->current_trace->enabled--;
4990
4991         if (tr->current_trace->reset)
4992                 tr->current_trace->reset(tr);
4993
4994         tr->current_trace = &nop_trace;
4995 }
4996
4997 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4998 {
4999         /* Only enable if the directory has been created already. */
5000         if (!tr->dir)
5001                 return;
5002
5003         create_trace_option_files(tr, t);
5004 }
5005
5006 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5007 {
5008         struct tracer *t;
5009 #ifdef CONFIG_TRACER_MAX_TRACE
5010         bool had_max_tr;
5011 #endif
5012         int ret = 0;
5013
5014         mutex_lock(&trace_types_lock);
5015
5016         if (!ring_buffer_expanded) {
5017                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5018                                                 RING_BUFFER_ALL_CPUS);
5019                 if (ret < 0)
5020                         goto out;
5021                 ret = 0;
5022         }
5023
5024         for (t = trace_types; t; t = t->next) {
5025                 if (strcmp(t->name, buf) == 0)
5026                         break;
5027         }
5028         if (!t) {
5029                 ret = -EINVAL;
5030                 goto out;
5031         }
5032         if (t == tr->current_trace)
5033                 goto out;
5034
5035         /* Some tracers are only allowed for the top level buffer */
5036         if (!trace_ok_for_array(t, tr)) {
5037                 ret = -EINVAL;
5038                 goto out;
5039         }
5040
5041         /* If trace pipe files are being read, we can't change the tracer */
5042         if (tr->current_trace->ref) {
5043                 ret = -EBUSY;
5044                 goto out;
5045         }
5046
5047         trace_branch_disable();
5048
5049         tr->current_trace->enabled--;
5050
5051         if (tr->current_trace->reset)
5052                 tr->current_trace->reset(tr);
5053
5054         /* Current trace needs to be nop_trace before synchronize_sched */
5055         tr->current_trace = &nop_trace;
5056
5057 #ifdef CONFIG_TRACER_MAX_TRACE
5058         had_max_tr = tr->allocated_snapshot;
5059
5060         if (had_max_tr && !t->use_max_tr) {
5061                 /*
5062                  * We need to make sure that the update_max_tr sees that
5063                  * current_trace changed to nop_trace to keep it from
5064                  * swapping the buffers after we resize it.
5065                  * The update_max_tr is called from interrupts disabled
5066                  * so a synchronized_sched() is sufficient.
5067                  */
5068                 synchronize_sched();
5069                 free_snapshot(tr);
5070         }
5071 #endif
5072
5073 #ifdef CONFIG_TRACER_MAX_TRACE
5074         if (t->use_max_tr && !had_max_tr) {
5075                 ret = alloc_snapshot(tr);
5076                 if (ret < 0)
5077                         goto out;
5078         }
5079 #endif
5080
5081         if (t->init) {
5082                 ret = tracer_init(t, tr);
5083                 if (ret)
5084                         goto out;
5085         }
5086
5087         tr->current_trace = t;
5088         tr->current_trace->enabled++;
5089         trace_branch_enable(tr);
5090  out:
5091         mutex_unlock(&trace_types_lock);
5092
5093         return ret;
5094 }
5095
5096 static ssize_t
5097 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5098                         size_t cnt, loff_t *ppos)
5099 {
5100         struct trace_array *tr = filp->private_data;
5101         char buf[MAX_TRACER_SIZE+1];
5102         int i;
5103         size_t ret;
5104         int err;
5105
5106         ret = cnt;
5107
5108         if (cnt > MAX_TRACER_SIZE)
5109                 cnt = MAX_TRACER_SIZE;
5110
5111         if (copy_from_user(buf, ubuf, cnt))
5112                 return -EFAULT;
5113
5114         buf[cnt] = 0;
5115
5116         /* strip ending whitespace. */
5117         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5118                 buf[i] = 0;
5119
5120         err = tracing_set_tracer(tr, buf);
5121         if (err)
5122                 return err;
5123
5124         *ppos += ret;
5125
5126         return ret;
5127 }
5128
5129 static ssize_t
5130 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5131                    size_t cnt, loff_t *ppos)
5132 {
5133         char buf[64];
5134         int r;
5135
5136         r = snprintf(buf, sizeof(buf), "%ld\n",
5137                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5138         if (r > sizeof(buf))
5139                 r = sizeof(buf);
5140         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5141 }
5142
5143 static ssize_t
5144 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5145                     size_t cnt, loff_t *ppos)
5146 {
5147         unsigned long val;
5148         int ret;
5149
5150         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5151         if (ret)
5152                 return ret;
5153
5154         *ptr = val * 1000;
5155
5156         return cnt;
5157 }
5158
5159 static ssize_t
5160 tracing_thresh_read(struct file *filp, char __user *ubuf,
5161                     size_t cnt, loff_t *ppos)
5162 {
5163         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5164 }
5165
5166 static ssize_t
5167 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5168                      size_t cnt, loff_t *ppos)
5169 {
5170         struct trace_array *tr = filp->private_data;
5171         int ret;
5172
5173         mutex_lock(&trace_types_lock);
5174         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5175         if (ret < 0)
5176                 goto out;
5177
5178         if (tr->current_trace->update_thresh) {
5179                 ret = tr->current_trace->update_thresh(tr);
5180                 if (ret < 0)
5181                         goto out;
5182         }
5183
5184         ret = cnt;
5185 out:
5186         mutex_unlock(&trace_types_lock);
5187
5188         return ret;
5189 }
5190
5191 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5192
5193 static ssize_t
5194 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5195                      size_t cnt, loff_t *ppos)
5196 {
5197         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5198 }
5199
5200 static ssize_t
5201 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5202                       size_t cnt, loff_t *ppos)
5203 {
5204         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5205 }
5206
5207 #endif
5208
5209 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5210 {
5211         struct trace_array *tr = inode->i_private;
5212         struct trace_iterator *iter;
5213         int ret = 0;
5214
5215         if (tracing_disabled)
5216                 return -ENODEV;
5217
5218         if (trace_array_get(tr) < 0)
5219                 return -ENODEV;
5220
5221         mutex_lock(&trace_types_lock);
5222
5223         /* create a buffer to store the information to pass to userspace */
5224         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5225         if (!iter) {
5226                 ret = -ENOMEM;
5227                 __trace_array_put(tr);
5228                 goto out;
5229         }
5230
5231         trace_seq_init(&iter->seq);
5232         iter->trace = tr->current_trace;
5233
5234         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5235                 ret = -ENOMEM;
5236                 goto fail;
5237         }
5238
5239         /* trace pipe does not show start of buffer */
5240         cpumask_setall(iter->started);
5241
5242         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5243                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5244
5245         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5246         if (trace_clocks[tr->clock_id].in_ns)
5247                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5248
5249         iter->tr = tr;
5250         iter->trace_buffer = &tr->trace_buffer;
5251         iter->cpu_file = tracing_get_cpu(inode);
5252         mutex_init(&iter->mutex);
5253         filp->private_data = iter;
5254
5255         if (iter->trace->pipe_open)
5256                 iter->trace->pipe_open(iter);
5257
5258         nonseekable_open(inode, filp);
5259
5260         tr->current_trace->ref++;
5261 out:
5262         mutex_unlock(&trace_types_lock);
5263         return ret;
5264
5265 fail:
5266         kfree(iter->trace);
5267         kfree(iter);
5268         __trace_array_put(tr);
5269         mutex_unlock(&trace_types_lock);
5270         return ret;
5271 }
5272
5273 static int tracing_release_pipe(struct inode *inode, struct file *file)
5274 {
5275         struct trace_iterator *iter = file->private_data;
5276         struct trace_array *tr = inode->i_private;
5277
5278         mutex_lock(&trace_types_lock);
5279
5280         tr->current_trace->ref--;
5281
5282         if (iter->trace->pipe_close)
5283                 iter->trace->pipe_close(iter);
5284
5285         mutex_unlock(&trace_types_lock);
5286
5287         free_cpumask_var(iter->started);
5288         mutex_destroy(&iter->mutex);
5289         kfree(iter);
5290
5291         trace_array_put(tr);
5292
5293         return 0;
5294 }
5295
5296 static unsigned int
5297 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5298 {
5299         struct trace_array *tr = iter->tr;
5300
5301         /* Iterators are static, they should be filled or empty */
5302         if (trace_buffer_iter(iter, iter->cpu_file))
5303                 return POLLIN | POLLRDNORM;
5304
5305         if (tr->trace_flags & TRACE_ITER_BLOCK)
5306                 /*
5307                  * Always select as readable when in blocking mode
5308                  */
5309                 return POLLIN | POLLRDNORM;
5310         else
5311                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5312                                              filp, poll_table);
5313 }
5314
5315 static unsigned int
5316 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5317 {
5318         struct trace_iterator *iter = filp->private_data;
5319
5320         return trace_poll(iter, filp, poll_table);
5321 }
5322
5323 /* Must be called with iter->mutex held. */
5324 static int tracing_wait_pipe(struct file *filp)
5325 {
5326         struct trace_iterator *iter = filp->private_data;
5327         int ret;
5328
5329         while (trace_empty(iter)) {
5330
5331                 if ((filp->f_flags & O_NONBLOCK)) {
5332                         return -EAGAIN;
5333                 }
5334
5335                 /*
5336                  * We block until we read something and tracing is disabled.
5337                  * We still block if tracing is disabled, but we have never
5338                  * read anything. This allows a user to cat this file, and
5339                  * then enable tracing. But after we have read something,
5340                  * we give an EOF when tracing is again disabled.
5341                  *
5342                  * iter->pos will be 0 if we haven't read anything.
5343                  */
5344                 if (!tracing_is_on() && iter->pos)
5345                         break;
5346
5347                 mutex_unlock(&iter->mutex);
5348
5349                 ret = wait_on_pipe(iter, false);
5350
5351                 mutex_lock(&iter->mutex);
5352
5353                 if (ret)
5354                         return ret;
5355         }
5356
5357         return 1;
5358 }
5359
5360 /*
5361  * Consumer reader.
5362  */
5363 static ssize_t
5364 tracing_read_pipe(struct file *filp, char __user *ubuf,
5365                   size_t cnt, loff_t *ppos)
5366 {
5367         struct trace_iterator *iter = filp->private_data;
5368         ssize_t sret;
5369
5370         /*
5371          * Avoid more than one consumer on a single file descriptor
5372          * This is just a matter of traces coherency, the ring buffer itself
5373          * is protected.
5374          */
5375         mutex_lock(&iter->mutex);
5376
5377         /* return any leftover data */
5378         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5379         if (sret != -EBUSY)
5380                 goto out;
5381
5382         trace_seq_init(&iter->seq);
5383
5384         if (iter->trace->read) {
5385                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5386                 if (sret)
5387                         goto out;
5388         }
5389
5390 waitagain:
5391         sret = tracing_wait_pipe(filp);
5392         if (sret <= 0)
5393                 goto out;
5394
5395         /* stop when tracing is finished */
5396         if (trace_empty(iter)) {
5397                 sret = 0;
5398                 goto out;
5399         }
5400
5401         if (cnt >= PAGE_SIZE)
5402                 cnt = PAGE_SIZE - 1;
5403
5404         /* reset all but tr, trace, and overruns */
5405         memset(&iter->seq, 0,
5406                sizeof(struct trace_iterator) -
5407                offsetof(struct trace_iterator, seq));
5408         cpumask_clear(iter->started);
5409         iter->pos = -1;
5410
5411         trace_event_read_lock();
5412         trace_access_lock(iter->cpu_file);
5413         while (trace_find_next_entry_inc(iter) != NULL) {
5414                 enum print_line_t ret;
5415                 int save_len = iter->seq.seq.len;
5416
5417                 ret = print_trace_line(iter);
5418                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5419                         /* don't print partial lines */
5420                         iter->seq.seq.len = save_len;
5421                         break;
5422                 }
5423                 if (ret != TRACE_TYPE_NO_CONSUME)
5424                         trace_consume(iter);
5425
5426                 if (trace_seq_used(&iter->seq) >= cnt)
5427                         break;
5428
5429                 /*
5430                  * Setting the full flag means we reached the trace_seq buffer
5431                  * size and we should leave by partial output condition above.
5432                  * One of the trace_seq_* functions is not used properly.
5433                  */
5434                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5435                           iter->ent->type);
5436         }
5437         trace_access_unlock(iter->cpu_file);
5438         trace_event_read_unlock();
5439
5440         /* Now copy what we have to the user */
5441         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5442         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5443                 trace_seq_init(&iter->seq);
5444
5445         /*
5446          * If there was nothing to send to user, in spite of consuming trace
5447          * entries, go back to wait for more entries.
5448          */
5449         if (sret == -EBUSY)
5450                 goto waitagain;
5451
5452 out:
5453         mutex_unlock(&iter->mutex);
5454
5455         return sret;
5456 }
5457
5458 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5459                                      unsigned int idx)
5460 {
5461         __free_page(spd->pages[idx]);
5462 }
5463
5464 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5465         .can_merge              = 0,
5466         .confirm                = generic_pipe_buf_confirm,
5467         .release                = generic_pipe_buf_release,
5468         .steal                  = generic_pipe_buf_steal,
5469         .get                    = generic_pipe_buf_get,
5470 };
5471
5472 static size_t
5473 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5474 {
5475         size_t count;
5476         int save_len;
5477         int ret;
5478
5479         /* Seq buffer is page-sized, exactly what we need. */
5480         for (;;) {
5481                 save_len = iter->seq.seq.len;
5482                 ret = print_trace_line(iter);
5483
5484                 if (trace_seq_has_overflowed(&iter->seq)) {
5485                         iter->seq.seq.len = save_len;
5486                         break;
5487                 }
5488
5489                 /*
5490                  * This should not be hit, because it should only
5491                  * be set if the iter->seq overflowed. But check it
5492                  * anyway to be safe.
5493                  */
5494                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5495                         iter->seq.seq.len = save_len;
5496                         break;
5497                 }
5498
5499                 count = trace_seq_used(&iter->seq) - save_len;
5500                 if (rem < count) {
5501                         rem = 0;
5502                         iter->seq.seq.len = save_len;
5503                         break;
5504                 }
5505
5506                 if (ret != TRACE_TYPE_NO_CONSUME)
5507                         trace_consume(iter);
5508                 rem -= count;
5509                 if (!trace_find_next_entry_inc(iter))   {
5510                         rem = 0;
5511                         iter->ent = NULL;
5512                         break;
5513                 }
5514         }
5515
5516         return rem;
5517 }
5518
5519 static ssize_t tracing_splice_read_pipe(struct file *filp,
5520                                         loff_t *ppos,
5521                                         struct pipe_inode_info *pipe,
5522                                         size_t len,
5523                                         unsigned int flags)
5524 {
5525         struct page *pages_def[PIPE_DEF_BUFFERS];
5526         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5527         struct trace_iterator *iter = filp->private_data;
5528         struct splice_pipe_desc spd = {
5529                 .pages          = pages_def,
5530                 .partial        = partial_def,
5531                 .nr_pages       = 0, /* This gets updated below. */
5532                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5533                 .flags          = flags,
5534                 .ops            = &tracing_pipe_buf_ops,
5535                 .spd_release    = tracing_spd_release_pipe,
5536         };
5537         ssize_t ret;
5538         size_t rem;
5539         unsigned int i;
5540
5541         if (splice_grow_spd(pipe, &spd))
5542                 return -ENOMEM;
5543
5544         mutex_lock(&iter->mutex);
5545
5546         if (iter->trace->splice_read) {
5547                 ret = iter->trace->splice_read(iter, filp,
5548                                                ppos, pipe, len, flags);
5549                 if (ret)
5550                         goto out_err;
5551         }
5552
5553         ret = tracing_wait_pipe(filp);
5554         if (ret <= 0)
5555                 goto out_err;
5556
5557         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5558                 ret = -EFAULT;
5559                 goto out_err;
5560         }
5561
5562         trace_event_read_lock();
5563         trace_access_lock(iter->cpu_file);
5564
5565         /* Fill as many pages as possible. */
5566         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5567                 spd.pages[i] = alloc_page(GFP_KERNEL);
5568                 if (!spd.pages[i])
5569                         break;
5570
5571                 rem = tracing_fill_pipe_page(rem, iter);
5572
5573                 /* Copy the data into the page, so we can start over. */
5574                 ret = trace_seq_to_buffer(&iter->seq,
5575                                           page_address(spd.pages[i]),
5576                                           trace_seq_used(&iter->seq));
5577                 if (ret < 0) {
5578                         __free_page(spd.pages[i]);
5579                         break;
5580                 }
5581                 spd.partial[i].offset = 0;
5582                 spd.partial[i].len = trace_seq_used(&iter->seq);
5583
5584                 trace_seq_init(&iter->seq);
5585         }
5586
5587         trace_access_unlock(iter->cpu_file);
5588         trace_event_read_unlock();
5589         mutex_unlock(&iter->mutex);
5590
5591         spd.nr_pages = i;
5592
5593         if (i)
5594                 ret = splice_to_pipe(pipe, &spd);
5595         else
5596                 ret = 0;
5597 out:
5598         splice_shrink_spd(&spd);
5599         return ret;
5600
5601 out_err:
5602         mutex_unlock(&iter->mutex);
5603         goto out;
5604 }
5605
5606 static ssize_t
5607 tracing_entries_read(struct file *filp, char __user *ubuf,
5608                      size_t cnt, loff_t *ppos)
5609 {
5610         struct inode *inode = file_inode(filp);
5611         struct trace_array *tr = inode->i_private;
5612         int cpu = tracing_get_cpu(inode);
5613         char buf[64];
5614         int r = 0;
5615         ssize_t ret;
5616
5617         mutex_lock(&trace_types_lock);
5618
5619         if (cpu == RING_BUFFER_ALL_CPUS) {
5620                 int cpu, buf_size_same;
5621                 unsigned long size;
5622
5623                 size = 0;
5624                 buf_size_same = 1;
5625                 /* check if all cpu sizes are same */
5626                 for_each_tracing_cpu(cpu) {
5627                         /* fill in the size from first enabled cpu */
5628                         if (size == 0)
5629                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5630                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5631                                 buf_size_same = 0;
5632                                 break;
5633                         }
5634                 }
5635
5636                 if (buf_size_same) {
5637                         if (!ring_buffer_expanded)
5638                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5639                                             size >> 10,
5640                                             trace_buf_size >> 10);
5641                         else
5642                                 r = sprintf(buf, "%lu\n", size >> 10);
5643                 } else
5644                         r = sprintf(buf, "X\n");
5645         } else
5646                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5647
5648         mutex_unlock(&trace_types_lock);
5649
5650         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5651         return ret;
5652 }
5653
5654 static ssize_t
5655 tracing_entries_write(struct file *filp, const char __user *ubuf,
5656                       size_t cnt, loff_t *ppos)
5657 {
5658         struct inode *inode = file_inode(filp);
5659         struct trace_array *tr = inode->i_private;
5660         unsigned long val;
5661         int ret;
5662
5663         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5664         if (ret)
5665                 return ret;
5666
5667         /* must have at least 1 entry */
5668         if (!val)
5669                 return -EINVAL;
5670
5671         /* value is in KB */
5672         val <<= 10;
5673         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5674         if (ret < 0)
5675                 return ret;
5676
5677         *ppos += cnt;
5678
5679         return cnt;
5680 }
5681
5682 static ssize_t
5683 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5684                                 size_t cnt, loff_t *ppos)
5685 {
5686         struct trace_array *tr = filp->private_data;
5687         char buf[64];
5688         int r, cpu;
5689         unsigned long size = 0, expanded_size = 0;
5690
5691         mutex_lock(&trace_types_lock);
5692         for_each_tracing_cpu(cpu) {
5693                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5694                 if (!ring_buffer_expanded)
5695                         expanded_size += trace_buf_size >> 10;
5696         }
5697         if (ring_buffer_expanded)
5698                 r = sprintf(buf, "%lu\n", size);
5699         else
5700                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5701         mutex_unlock(&trace_types_lock);
5702
5703         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5704 }
5705
5706 static ssize_t
5707 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5708                           size_t cnt, loff_t *ppos)
5709 {
5710         /*
5711          * There is no need to read what the user has written, this function
5712          * is just to make sure that there is no error when "echo" is used
5713          */
5714
5715         *ppos += cnt;
5716
5717         return cnt;
5718 }
5719
5720 static int
5721 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5722 {
5723         struct trace_array *tr = inode->i_private;
5724
5725         /* disable tracing ? */
5726         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5727                 tracer_tracing_off(tr);
5728         /* resize the ring buffer to 0 */
5729         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5730
5731         trace_array_put(tr);
5732
5733         return 0;
5734 }
5735
5736 static ssize_t
5737 tracing_mark_write(struct file *filp, const char __user *ubuf,
5738                                         size_t cnt, loff_t *fpos)
5739 {
5740         struct trace_array *tr = filp->private_data;
5741         struct ring_buffer_event *event;
5742         struct ring_buffer *buffer;
5743         struct print_entry *entry;
5744         unsigned long irq_flags;
5745         const char faulted[] = "<faulted>";
5746         ssize_t written;
5747         int size;
5748         int len;
5749
5750 /* Used in tracing_mark_raw_write() as well */
5751 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5752
5753         if (tracing_disabled)
5754                 return -EINVAL;
5755
5756         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5757                 return -EINVAL;
5758
5759         if (cnt > TRACE_BUF_SIZE)
5760                 cnt = TRACE_BUF_SIZE;
5761
5762         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5763
5764         local_save_flags(irq_flags);
5765         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5766
5767         /* If less than "<faulted>", then make sure we can still add that */
5768         if (cnt < FAULTED_SIZE)
5769                 size += FAULTED_SIZE - cnt;
5770
5771         buffer = tr->trace_buffer.buffer;
5772         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5773                                             irq_flags, preempt_count());
5774         if (unlikely(!event))
5775                 /* Ring buffer disabled, return as if not open for write */
5776                 return -EBADF;
5777
5778         entry = ring_buffer_event_data(event);
5779         entry->ip = _THIS_IP_;
5780
5781         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5782         if (len) {
5783                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5784                 cnt = FAULTED_SIZE;
5785                 written = -EFAULT;
5786         } else
5787                 written = cnt;
5788         len = cnt;
5789
5790         if (entry->buf[cnt - 1] != '\n') {
5791                 entry->buf[cnt] = '\n';
5792                 entry->buf[cnt + 1] = '\0';
5793         } else
5794                 entry->buf[cnt] = '\0';
5795
5796         __buffer_unlock_commit(buffer, event);
5797
5798         if (written > 0)
5799                 *fpos += written;
5800
5801         return written;
5802 }
5803
5804 /* Limit it for now to 3K (including tag) */
5805 #define RAW_DATA_MAX_SIZE (1024*3)
5806
5807 static ssize_t
5808 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5809                                         size_t cnt, loff_t *fpos)
5810 {
5811         struct trace_array *tr = filp->private_data;
5812         struct ring_buffer_event *event;
5813         struct ring_buffer *buffer;
5814         struct raw_data_entry *entry;
5815         const char faulted[] = "<faulted>";
5816         unsigned long irq_flags;
5817         ssize_t written;
5818         int size;
5819         int len;
5820
5821 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5822
5823         if (tracing_disabled)
5824                 return -EINVAL;
5825
5826         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5827                 return -EINVAL;
5828
5829         /* The marker must at least have a tag id */
5830         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5831                 return -EINVAL;
5832
5833         if (cnt > TRACE_BUF_SIZE)
5834                 cnt = TRACE_BUF_SIZE;
5835
5836         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5837
5838         local_save_flags(irq_flags);
5839         size = sizeof(*entry) + cnt;
5840         if (cnt < FAULT_SIZE_ID)
5841                 size += FAULT_SIZE_ID - cnt;
5842
5843         buffer = tr->trace_buffer.buffer;
5844         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5845                                             irq_flags, preempt_count());
5846         if (!event)
5847                 /* Ring buffer disabled, return as if not open for write */
5848                 return -EBADF;
5849
5850         entry = ring_buffer_event_data(event);
5851
5852         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5853         if (len) {
5854                 entry->id = -1;
5855                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5856                 written = -EFAULT;
5857         } else
5858                 written = cnt;
5859
5860         __buffer_unlock_commit(buffer, event);
5861
5862         if (written > 0)
5863                 *fpos += written;
5864
5865         return written;
5866 }
5867
5868 static int tracing_clock_show(struct seq_file *m, void *v)
5869 {
5870         struct trace_array *tr = m->private;
5871         int i;
5872
5873         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5874                 seq_printf(m,
5875                         "%s%s%s%s", i ? " " : "",
5876                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5877                         i == tr->clock_id ? "]" : "");
5878         seq_putc(m, '\n');
5879
5880         return 0;
5881 }
5882
5883 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5884 {
5885         int i;
5886
5887         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5888                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5889                         break;
5890         }
5891         if (i == ARRAY_SIZE(trace_clocks))
5892                 return -EINVAL;
5893
5894         mutex_lock(&trace_types_lock);
5895
5896         tr->clock_id = i;
5897
5898         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5899
5900         /*
5901          * New clock may not be consistent with the previous clock.
5902          * Reset the buffer so that it doesn't have incomparable timestamps.
5903          */
5904         tracing_reset_online_cpus(&tr->trace_buffer);
5905
5906 #ifdef CONFIG_TRACER_MAX_TRACE
5907         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5908                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5909         tracing_reset_online_cpus(&tr->max_buffer);
5910 #endif
5911
5912         mutex_unlock(&trace_types_lock);
5913
5914         return 0;
5915 }
5916
5917 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5918                                    size_t cnt, loff_t *fpos)
5919 {
5920         struct seq_file *m = filp->private_data;
5921         struct trace_array *tr = m->private;
5922         char buf[64];
5923         const char *clockstr;
5924         int ret;
5925
5926         if (cnt >= sizeof(buf))
5927                 return -EINVAL;
5928
5929         if (copy_from_user(buf, ubuf, cnt))
5930                 return -EFAULT;
5931
5932         buf[cnt] = 0;
5933
5934         clockstr = strstrip(buf);
5935
5936         ret = tracing_set_clock(tr, clockstr);
5937         if (ret)
5938                 return ret;
5939
5940         *fpos += cnt;
5941
5942         return cnt;
5943 }
5944
5945 static int tracing_clock_open(struct inode *inode, struct file *file)
5946 {
5947         struct trace_array *tr = inode->i_private;
5948         int ret;
5949
5950         if (tracing_disabled)
5951                 return -ENODEV;
5952
5953         if (trace_array_get(tr))
5954                 return -ENODEV;
5955
5956         ret = single_open(file, tracing_clock_show, inode->i_private);
5957         if (ret < 0)
5958                 trace_array_put(tr);
5959
5960         return ret;
5961 }
5962
5963 struct ftrace_buffer_info {
5964         struct trace_iterator   iter;
5965         void                    *spare;
5966         unsigned int            read;
5967 };
5968
5969 #ifdef CONFIG_TRACER_SNAPSHOT
5970 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5971 {
5972         struct trace_array *tr = inode->i_private;
5973         struct trace_iterator *iter;
5974         struct seq_file *m;
5975         int ret = 0;
5976
5977         if (trace_array_get(tr) < 0)
5978                 return -ENODEV;
5979
5980         if (file->f_mode & FMODE_READ) {
5981                 iter = __tracing_open(inode, file, true);
5982                 if (IS_ERR(iter))
5983                         ret = PTR_ERR(iter);
5984         } else {
5985                 /* Writes still need the seq_file to hold the private data */
5986                 ret = -ENOMEM;
5987                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5988                 if (!m)
5989                         goto out;
5990                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5991                 if (!iter) {
5992                         kfree(m);
5993                         goto out;
5994                 }
5995                 ret = 0;
5996
5997                 iter->tr = tr;
5998                 iter->trace_buffer = &tr->max_buffer;
5999                 iter->cpu_file = tracing_get_cpu(inode);
6000                 m->private = iter;
6001                 file->private_data = m;
6002         }
6003 out:
6004         if (ret < 0)
6005                 trace_array_put(tr);
6006
6007         return ret;
6008 }
6009
6010 static ssize_t
6011 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6012                        loff_t *ppos)
6013 {
6014         struct seq_file *m = filp->private_data;
6015         struct trace_iterator *iter = m->private;
6016         struct trace_array *tr = iter->tr;
6017         unsigned long val;
6018         int ret;
6019
6020         ret = tracing_update_buffers();
6021         if (ret < 0)
6022                 return ret;
6023
6024         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6025         if (ret)
6026                 return ret;
6027
6028         mutex_lock(&trace_types_lock);
6029
6030         if (tr->current_trace->use_max_tr) {
6031                 ret = -EBUSY;
6032                 goto out;
6033         }
6034
6035         switch (val) {
6036         case 0:
6037                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6038                         ret = -EINVAL;
6039                         break;
6040                 }
6041                 if (tr->allocated_snapshot)
6042                         free_snapshot(tr);
6043                 break;
6044         case 1:
6045 /* Only allow per-cpu swap if the ring buffer supports it */
6046 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6047                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6048                         ret = -EINVAL;
6049                         break;
6050                 }
6051 #endif
6052                 if (!tr->allocated_snapshot) {
6053                         ret = alloc_snapshot(tr);
6054                         if (ret < 0)
6055                                 break;
6056                 }
6057                 local_irq_disable();
6058                 /* Now, we're going to swap */
6059                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6060                         update_max_tr(tr, current, smp_processor_id());
6061                 else
6062                         update_max_tr_single(tr, current, iter->cpu_file);
6063                 local_irq_enable();
6064                 break;
6065         default:
6066                 if (tr->allocated_snapshot) {
6067                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6068                                 tracing_reset_online_cpus(&tr->max_buffer);
6069                         else
6070                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6071                 }
6072                 break;
6073         }
6074
6075         if (ret >= 0) {
6076                 *ppos += cnt;
6077                 ret = cnt;
6078         }
6079 out:
6080         mutex_unlock(&trace_types_lock);
6081         return ret;
6082 }
6083
6084 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6085 {
6086         struct seq_file *m = file->private_data;
6087         int ret;
6088
6089         ret = tracing_release(inode, file);
6090
6091         if (file->f_mode & FMODE_READ)
6092                 return ret;
6093
6094         /* If write only, the seq_file is just a stub */
6095         if (m)
6096                 kfree(m->private);
6097         kfree(m);
6098
6099         return 0;
6100 }
6101
6102 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6103 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6104                                     size_t count, loff_t *ppos);
6105 static int tracing_buffers_release(struct inode *inode, struct file *file);
6106 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6107                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6108
6109 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6110 {
6111         struct ftrace_buffer_info *info;
6112         int ret;
6113
6114         ret = tracing_buffers_open(inode, filp);
6115         if (ret < 0)
6116                 return ret;
6117
6118         info = filp->private_data;
6119
6120         if (info->iter.trace->use_max_tr) {
6121                 tracing_buffers_release(inode, filp);
6122                 return -EBUSY;
6123         }
6124
6125         info->iter.snapshot = true;
6126         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6127
6128         return ret;
6129 }
6130
6131 #endif /* CONFIG_TRACER_SNAPSHOT */
6132
6133
6134 static const struct file_operations tracing_thresh_fops = {
6135         .open           = tracing_open_generic,
6136         .read           = tracing_thresh_read,
6137         .write          = tracing_thresh_write,
6138         .llseek         = generic_file_llseek,
6139 };
6140
6141 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6142 static const struct file_operations tracing_max_lat_fops = {
6143         .open           = tracing_open_generic,
6144         .read           = tracing_max_lat_read,
6145         .write          = tracing_max_lat_write,
6146         .llseek         = generic_file_llseek,
6147 };
6148 #endif
6149
6150 static const struct file_operations set_tracer_fops = {
6151         .open           = tracing_open_generic,
6152         .read           = tracing_set_trace_read,
6153         .write          = tracing_set_trace_write,
6154         .llseek         = generic_file_llseek,
6155 };
6156
6157 static const struct file_operations tracing_pipe_fops = {
6158         .open           = tracing_open_pipe,
6159         .poll           = tracing_poll_pipe,
6160         .read           = tracing_read_pipe,
6161         .splice_read    = tracing_splice_read_pipe,
6162         .release        = tracing_release_pipe,
6163         .llseek         = no_llseek,
6164 };
6165
6166 static const struct file_operations tracing_entries_fops = {
6167         .open           = tracing_open_generic_tr,
6168         .read           = tracing_entries_read,
6169         .write          = tracing_entries_write,
6170         .llseek         = generic_file_llseek,
6171         .release        = tracing_release_generic_tr,
6172 };
6173
6174 static const struct file_operations tracing_total_entries_fops = {
6175         .open           = tracing_open_generic_tr,
6176         .read           = tracing_total_entries_read,
6177         .llseek         = generic_file_llseek,
6178         .release        = tracing_release_generic_tr,
6179 };
6180
6181 static const struct file_operations tracing_free_buffer_fops = {
6182         .open           = tracing_open_generic_tr,
6183         .write          = tracing_free_buffer_write,
6184         .release        = tracing_free_buffer_release,
6185 };
6186
6187 static const struct file_operations tracing_mark_fops = {
6188         .open           = tracing_open_generic_tr,
6189         .write          = tracing_mark_write,
6190         .llseek         = generic_file_llseek,
6191         .release        = tracing_release_generic_tr,
6192 };
6193
6194 static const struct file_operations tracing_mark_raw_fops = {
6195         .open           = tracing_open_generic_tr,
6196         .write          = tracing_mark_raw_write,
6197         .llseek         = generic_file_llseek,
6198         .release        = tracing_release_generic_tr,
6199 };
6200
6201 static const struct file_operations trace_clock_fops = {
6202         .open           = tracing_clock_open,
6203         .read           = seq_read,
6204         .llseek         = seq_lseek,
6205         .release        = tracing_single_release_tr,
6206         .write          = tracing_clock_write,
6207 };
6208
6209 #ifdef CONFIG_TRACER_SNAPSHOT
6210 static const struct file_operations snapshot_fops = {
6211         .open           = tracing_snapshot_open,
6212         .read           = seq_read,
6213         .write          = tracing_snapshot_write,
6214         .llseek         = tracing_lseek,
6215         .release        = tracing_snapshot_release,
6216 };
6217
6218 static const struct file_operations snapshot_raw_fops = {
6219         .open           = snapshot_raw_open,
6220         .read           = tracing_buffers_read,
6221         .release        = tracing_buffers_release,
6222         .splice_read    = tracing_buffers_splice_read,
6223         .llseek         = no_llseek,
6224 };
6225
6226 #endif /* CONFIG_TRACER_SNAPSHOT */
6227
6228 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6229 {
6230         struct trace_array *tr = inode->i_private;
6231         struct ftrace_buffer_info *info;
6232         int ret;
6233
6234         if (tracing_disabled)
6235                 return -ENODEV;
6236
6237         if (trace_array_get(tr) < 0)
6238                 return -ENODEV;
6239
6240         info = kzalloc(sizeof(*info), GFP_KERNEL);
6241         if (!info) {
6242                 trace_array_put(tr);
6243                 return -ENOMEM;
6244         }
6245
6246         mutex_lock(&trace_types_lock);
6247
6248         info->iter.tr           = tr;
6249         info->iter.cpu_file     = tracing_get_cpu(inode);
6250         info->iter.trace        = tr->current_trace;
6251         info->iter.trace_buffer = &tr->trace_buffer;
6252         info->spare             = NULL;
6253         /* Force reading ring buffer for first read */
6254         info->read              = (unsigned int)-1;
6255
6256         filp->private_data = info;
6257
6258         tr->current_trace->ref++;
6259
6260         mutex_unlock(&trace_types_lock);
6261
6262         ret = nonseekable_open(inode, filp);
6263         if (ret < 0)
6264                 trace_array_put(tr);
6265
6266         return ret;
6267 }
6268
6269 static unsigned int
6270 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6271 {
6272         struct ftrace_buffer_info *info = filp->private_data;
6273         struct trace_iterator *iter = &info->iter;
6274
6275         return trace_poll(iter, filp, poll_table);
6276 }
6277
6278 static ssize_t
6279 tracing_buffers_read(struct file *filp, char __user *ubuf,
6280                      size_t count, loff_t *ppos)
6281 {
6282         struct ftrace_buffer_info *info = filp->private_data;
6283         struct trace_iterator *iter = &info->iter;
6284         ssize_t ret;
6285         ssize_t size;
6286
6287         if (!count)
6288                 return 0;
6289
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6292                 return -EBUSY;
6293 #endif
6294
6295         if (!info->spare)
6296                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6297                                                           iter->cpu_file);
6298         if (!info->spare)
6299                 return -ENOMEM;
6300
6301         /* Do we have previous read data to read? */
6302         if (info->read < PAGE_SIZE)
6303                 goto read;
6304
6305  again:
6306         trace_access_lock(iter->cpu_file);
6307         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6308                                     &info->spare,
6309                                     count,
6310                                     iter->cpu_file, 0);
6311         trace_access_unlock(iter->cpu_file);
6312
6313         if (ret < 0) {
6314                 if (trace_empty(iter)) {
6315                         if ((filp->f_flags & O_NONBLOCK))
6316                                 return -EAGAIN;
6317
6318                         ret = wait_on_pipe(iter, false);
6319                         if (ret)
6320                                 return ret;
6321
6322                         goto again;
6323                 }
6324                 return 0;
6325         }
6326
6327         info->read = 0;
6328  read:
6329         size = PAGE_SIZE - info->read;
6330         if (size > count)
6331                 size = count;
6332
6333         ret = copy_to_user(ubuf, info->spare + info->read, size);
6334         if (ret == size)
6335                 return -EFAULT;
6336
6337         size -= ret;
6338
6339         *ppos += size;
6340         info->read += size;
6341
6342         return size;
6343 }
6344
6345 static int tracing_buffers_release(struct inode *inode, struct file *file)
6346 {
6347         struct ftrace_buffer_info *info = file->private_data;
6348         struct trace_iterator *iter = &info->iter;
6349
6350         mutex_lock(&trace_types_lock);
6351
6352         iter->tr->current_trace->ref--;
6353
6354         __trace_array_put(iter->tr);
6355
6356         if (info->spare)
6357                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6358         kfree(info);
6359
6360         mutex_unlock(&trace_types_lock);
6361
6362         return 0;
6363 }
6364
6365 struct buffer_ref {
6366         struct ring_buffer      *buffer;
6367         void                    *page;
6368         int                     ref;
6369 };
6370
6371 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6372                                     struct pipe_buffer *buf)
6373 {
6374         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6375
6376         if (--ref->ref)
6377                 return;
6378
6379         ring_buffer_free_read_page(ref->buffer, ref->page);
6380         kfree(ref);
6381         buf->private = 0;
6382 }
6383
6384 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6385                                 struct pipe_buffer *buf)
6386 {
6387         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6388
6389         ref->ref++;
6390 }
6391
6392 /* Pipe buffer operations for a buffer. */
6393 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6394         .can_merge              = 0,
6395         .confirm                = generic_pipe_buf_confirm,
6396         .release                = buffer_pipe_buf_release,
6397         .steal                  = generic_pipe_buf_steal,
6398         .get                    = buffer_pipe_buf_get,
6399 };
6400
6401 /*
6402  * Callback from splice_to_pipe(), if we need to release some pages
6403  * at the end of the spd in case we error'ed out in filling the pipe.
6404  */
6405 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6406 {
6407         struct buffer_ref *ref =
6408                 (struct buffer_ref *)spd->partial[i].private;
6409
6410         if (--ref->ref)
6411                 return;
6412
6413         ring_buffer_free_read_page(ref->buffer, ref->page);
6414         kfree(ref);
6415         spd->partial[i].private = 0;
6416 }
6417
6418 static ssize_t
6419 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6420                             struct pipe_inode_info *pipe, size_t len,
6421                             unsigned int flags)
6422 {
6423         struct ftrace_buffer_info *info = file->private_data;
6424         struct trace_iterator *iter = &info->iter;
6425         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6426         struct page *pages_def[PIPE_DEF_BUFFERS];
6427         struct splice_pipe_desc spd = {
6428                 .pages          = pages_def,
6429                 .partial        = partial_def,
6430                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6431                 .flags          = flags,
6432                 .ops            = &buffer_pipe_buf_ops,
6433                 .spd_release    = buffer_spd_release,
6434         };
6435         struct buffer_ref *ref;
6436         int entries, size, i;
6437         ssize_t ret = 0;
6438
6439 #ifdef CONFIG_TRACER_MAX_TRACE
6440         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6441                 return -EBUSY;
6442 #endif
6443
6444         if (*ppos & (PAGE_SIZE - 1))
6445                 return -EINVAL;
6446
6447         if (len & (PAGE_SIZE - 1)) {
6448                 if (len < PAGE_SIZE)
6449                         return -EINVAL;
6450                 len &= PAGE_MASK;
6451         }
6452
6453         if (splice_grow_spd(pipe, &spd))
6454                 return -ENOMEM;
6455
6456  again:
6457         trace_access_lock(iter->cpu_file);
6458         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6459
6460         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6461                 struct page *page;
6462                 int r;
6463
6464                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6465                 if (!ref) {
6466                         ret = -ENOMEM;
6467                         break;
6468                 }
6469
6470                 ref->ref = 1;
6471                 ref->buffer = iter->trace_buffer->buffer;
6472                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6473                 if (!ref->page) {
6474                         ret = -ENOMEM;
6475                         kfree(ref);
6476                         break;
6477                 }
6478
6479                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6480                                           len, iter->cpu_file, 1);
6481                 if (r < 0) {
6482                         ring_buffer_free_read_page(ref->buffer, ref->page);
6483                         kfree(ref);
6484                         break;
6485                 }
6486
6487                 /*
6488                  * zero out any left over data, this is going to
6489                  * user land.
6490                  */
6491                 size = ring_buffer_page_len(ref->page);
6492                 if (size < PAGE_SIZE)
6493                         memset(ref->page + size, 0, PAGE_SIZE - size);
6494
6495                 page = virt_to_page(ref->page);
6496
6497                 spd.pages[i] = page;
6498                 spd.partial[i].len = PAGE_SIZE;
6499                 spd.partial[i].offset = 0;
6500                 spd.partial[i].private = (unsigned long)ref;
6501                 spd.nr_pages++;
6502                 *ppos += PAGE_SIZE;
6503
6504                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6505         }
6506
6507         trace_access_unlock(iter->cpu_file);
6508         spd.nr_pages = i;
6509
6510         /* did we read anything? */
6511         if (!spd.nr_pages) {
6512                 if (ret)
6513                         goto out;
6514
6515                 ret = -EAGAIN;
6516                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6517                         goto out;
6518
6519                 ret = wait_on_pipe(iter, true);
6520                 if (ret)
6521                         goto out;
6522
6523                 goto again;
6524         }
6525
6526         ret = splice_to_pipe(pipe, &spd);
6527 out:
6528         splice_shrink_spd(&spd);
6529
6530         return ret;
6531 }
6532
6533 static const struct file_operations tracing_buffers_fops = {
6534         .open           = tracing_buffers_open,
6535         .read           = tracing_buffers_read,
6536         .poll           = tracing_buffers_poll,
6537         .release        = tracing_buffers_release,
6538         .splice_read    = tracing_buffers_splice_read,
6539         .llseek         = no_llseek,
6540 };
6541
6542 static ssize_t
6543 tracing_stats_read(struct file *filp, char __user *ubuf,
6544                    size_t count, loff_t *ppos)
6545 {
6546         struct inode *inode = file_inode(filp);
6547         struct trace_array *tr = inode->i_private;
6548         struct trace_buffer *trace_buf = &tr->trace_buffer;
6549         int cpu = tracing_get_cpu(inode);
6550         struct trace_seq *s;
6551         unsigned long cnt;
6552         unsigned long long t;
6553         unsigned long usec_rem;
6554
6555         s = kmalloc(sizeof(*s), GFP_KERNEL);
6556         if (!s)
6557                 return -ENOMEM;
6558
6559         trace_seq_init(s);
6560
6561         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6562         trace_seq_printf(s, "entries: %ld\n", cnt);
6563
6564         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6565         trace_seq_printf(s, "overrun: %ld\n", cnt);
6566
6567         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6568         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6569
6570         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6571         trace_seq_printf(s, "bytes: %ld\n", cnt);
6572
6573         if (trace_clocks[tr->clock_id].in_ns) {
6574                 /* local or global for trace_clock */
6575                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6576                 usec_rem = do_div(t, USEC_PER_SEC);
6577                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6578                                                                 t, usec_rem);
6579
6580                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6581                 usec_rem = do_div(t, USEC_PER_SEC);
6582                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6583         } else {
6584                 /* counter or tsc mode for trace_clock */
6585                 trace_seq_printf(s, "oldest event ts: %llu\n",
6586                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6587
6588                 trace_seq_printf(s, "now ts: %llu\n",
6589                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6590         }
6591
6592         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6593         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6594
6595         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6596         trace_seq_printf(s, "read events: %ld\n", cnt);
6597
6598         count = simple_read_from_buffer(ubuf, count, ppos,
6599                                         s->buffer, trace_seq_used(s));
6600
6601         kfree(s);
6602
6603         return count;
6604 }
6605
6606 static const struct file_operations tracing_stats_fops = {
6607         .open           = tracing_open_generic_tr,
6608         .read           = tracing_stats_read,
6609         .llseek         = generic_file_llseek,
6610         .release        = tracing_release_generic_tr,
6611 };
6612
6613 #ifdef CONFIG_DYNAMIC_FTRACE
6614
6615 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6616 {
6617         return 0;
6618 }
6619
6620 static ssize_t
6621 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6622                   size_t cnt, loff_t *ppos)
6623 {
6624         static char ftrace_dyn_info_buffer[1024];
6625         static DEFINE_MUTEX(dyn_info_mutex);
6626         unsigned long *p = filp->private_data;
6627         char *buf = ftrace_dyn_info_buffer;
6628         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6629         int r;
6630
6631         mutex_lock(&dyn_info_mutex);
6632         r = sprintf(buf, "%ld ", *p);
6633
6634         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6635         buf[r++] = '\n';
6636
6637         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6638
6639         mutex_unlock(&dyn_info_mutex);
6640
6641         return r;
6642 }
6643
6644 static const struct file_operations tracing_dyn_info_fops = {
6645         .open           = tracing_open_generic,
6646         .read           = tracing_read_dyn_info,
6647         .llseek         = generic_file_llseek,
6648 };
6649 #endif /* CONFIG_DYNAMIC_FTRACE */
6650
6651 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6652 static void
6653 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6654 {
6655         tracing_snapshot();
6656 }
6657
6658 static void
6659 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6660 {
6661         unsigned long *count = (long *)data;
6662
6663         if (!*count)
6664                 return;
6665
6666         if (*count != -1)
6667                 (*count)--;
6668
6669         tracing_snapshot();
6670 }
6671
6672 static int
6673 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6674                       struct ftrace_probe_ops *ops, void *data)
6675 {
6676         long count = (long)data;
6677
6678         seq_printf(m, "%ps:", (void *)ip);
6679
6680         seq_puts(m, "snapshot");
6681
6682         if (count == -1)
6683                 seq_puts(m, ":unlimited\n");
6684         else
6685                 seq_printf(m, ":count=%ld\n", count);
6686
6687         return 0;
6688 }
6689
6690 static struct ftrace_probe_ops snapshot_probe_ops = {
6691         .func                   = ftrace_snapshot,
6692         .print                  = ftrace_snapshot_print,
6693 };
6694
6695 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6696         .func                   = ftrace_count_snapshot,
6697         .print                  = ftrace_snapshot_print,
6698 };
6699
6700 static int
6701 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6702                                char *glob, char *cmd, char *param, int enable)
6703 {
6704         struct ftrace_probe_ops *ops;
6705         void *count = (void *)-1;
6706         char *number;
6707         int ret;
6708
6709         /* hash funcs only work with set_ftrace_filter */
6710         if (!enable)
6711                 return -EINVAL;
6712
6713         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6714
6715         if (glob[0] == '!') {
6716                 unregister_ftrace_function_probe_func(glob+1, ops);
6717                 return 0;
6718         }
6719
6720         if (!param)
6721                 goto out_reg;
6722
6723         number = strsep(&param, ":");
6724
6725         if (!strlen(number))
6726                 goto out_reg;
6727
6728         /*
6729          * We use the callback data field (which is a pointer)
6730          * as our counter.
6731          */
6732         ret = kstrtoul(number, 0, (unsigned long *)&count);
6733         if (ret)
6734                 return ret;
6735
6736  out_reg:
6737         ret = alloc_snapshot(&global_trace);
6738         if (ret < 0)
6739                 goto out;
6740
6741         ret = register_ftrace_function_probe(glob, ops, count);
6742
6743  out:
6744         return ret < 0 ? ret : 0;
6745 }
6746
6747 static struct ftrace_func_command ftrace_snapshot_cmd = {
6748         .name                   = "snapshot",
6749         .func                   = ftrace_trace_snapshot_callback,
6750 };
6751
6752 static __init int register_snapshot_cmd(void)
6753 {
6754         return register_ftrace_command(&ftrace_snapshot_cmd);
6755 }
6756 #else
6757 static inline __init int register_snapshot_cmd(void) { return 0; }
6758 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6759
6760 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6761 {
6762         if (WARN_ON(!tr->dir))
6763                 return ERR_PTR(-ENODEV);
6764
6765         /* Top directory uses NULL as the parent */
6766         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6767                 return NULL;
6768
6769         /* All sub buffers have a descriptor */
6770         return tr->dir;
6771 }
6772
6773 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6774 {
6775         struct dentry *d_tracer;
6776
6777         if (tr->percpu_dir)
6778                 return tr->percpu_dir;
6779
6780         d_tracer = tracing_get_dentry(tr);
6781         if (IS_ERR(d_tracer))
6782                 return NULL;
6783
6784         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6785
6786         WARN_ONCE(!tr->percpu_dir,
6787                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6788
6789         return tr->percpu_dir;
6790 }
6791
6792 static struct dentry *
6793 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6794                       void *data, long cpu, const struct file_operations *fops)
6795 {
6796         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6797
6798         if (ret) /* See tracing_get_cpu() */
6799                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6800         return ret;
6801 }
6802
6803 static void
6804 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6805 {
6806         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6807         struct dentry *d_cpu;
6808         char cpu_dir[30]; /* 30 characters should be more than enough */
6809
6810         if (!d_percpu)
6811                 return;
6812
6813         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6814         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6815         if (!d_cpu) {
6816                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6817                 return;
6818         }
6819
6820         /* per cpu trace_pipe */
6821         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6822                                 tr, cpu, &tracing_pipe_fops);
6823
6824         /* per cpu trace */
6825         trace_create_cpu_file("trace", 0644, d_cpu,
6826                                 tr, cpu, &tracing_fops);
6827
6828         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6829                                 tr, cpu, &tracing_buffers_fops);
6830
6831         trace_create_cpu_file("stats", 0444, d_cpu,
6832                                 tr, cpu, &tracing_stats_fops);
6833
6834         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6835                                 tr, cpu, &tracing_entries_fops);
6836
6837 #ifdef CONFIG_TRACER_SNAPSHOT
6838         trace_create_cpu_file("snapshot", 0644, d_cpu,
6839                                 tr, cpu, &snapshot_fops);
6840
6841         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6842                                 tr, cpu, &snapshot_raw_fops);
6843 #endif
6844 }
6845
6846 #ifdef CONFIG_FTRACE_SELFTEST
6847 /* Let selftest have access to static functions in this file */
6848 #include "trace_selftest.c"
6849 #endif
6850
6851 static ssize_t
6852 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6853                         loff_t *ppos)
6854 {
6855         struct trace_option_dentry *topt = filp->private_data;
6856         char *buf;
6857
6858         if (topt->flags->val & topt->opt->bit)
6859                 buf = "1\n";
6860         else
6861                 buf = "0\n";
6862
6863         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6864 }
6865
6866 static ssize_t
6867 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6868                          loff_t *ppos)
6869 {
6870         struct trace_option_dentry *topt = filp->private_data;
6871         unsigned long val;
6872         int ret;
6873
6874         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6875         if (ret)
6876                 return ret;
6877
6878         if (val != 0 && val != 1)
6879                 return -EINVAL;
6880
6881         if (!!(topt->flags->val & topt->opt->bit) != val) {
6882                 mutex_lock(&trace_types_lock);
6883                 ret = __set_tracer_option(topt->tr, topt->flags,
6884                                           topt->opt, !val);
6885                 mutex_unlock(&trace_types_lock);
6886                 if (ret)
6887                         return ret;
6888         }
6889
6890         *ppos += cnt;
6891
6892         return cnt;
6893 }
6894
6895
6896 static const struct file_operations trace_options_fops = {
6897         .open = tracing_open_generic,
6898         .read = trace_options_read,
6899         .write = trace_options_write,
6900         .llseek = generic_file_llseek,
6901 };
6902
6903 /*
6904  * In order to pass in both the trace_array descriptor as well as the index
6905  * to the flag that the trace option file represents, the trace_array
6906  * has a character array of trace_flags_index[], which holds the index
6907  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6908  * The address of this character array is passed to the flag option file
6909  * read/write callbacks.
6910  *
6911  * In order to extract both the index and the trace_array descriptor,
6912  * get_tr_index() uses the following algorithm.
6913  *
6914  *   idx = *ptr;
6915  *
6916  * As the pointer itself contains the address of the index (remember
6917  * index[1] == 1).
6918  *
6919  * Then to get the trace_array descriptor, by subtracting that index
6920  * from the ptr, we get to the start of the index itself.
6921  *
6922  *   ptr - idx == &index[0]
6923  *
6924  * Then a simple container_of() from that pointer gets us to the
6925  * trace_array descriptor.
6926  */
6927 static void get_tr_index(void *data, struct trace_array **ptr,
6928                          unsigned int *pindex)
6929 {
6930         *pindex = *(unsigned char *)data;
6931
6932         *ptr = container_of(data - *pindex, struct trace_array,
6933                             trace_flags_index);
6934 }
6935
6936 static ssize_t
6937 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6938                         loff_t *ppos)
6939 {
6940         void *tr_index = filp->private_data;
6941         struct trace_array *tr;
6942         unsigned int index;
6943         char *buf;
6944
6945         get_tr_index(tr_index, &tr, &index);
6946
6947         if (tr->trace_flags & (1 << index))
6948                 buf = "1\n";
6949         else
6950                 buf = "0\n";
6951
6952         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6953 }
6954
6955 static ssize_t
6956 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6957                          loff_t *ppos)
6958 {
6959         void *tr_index = filp->private_data;
6960         struct trace_array *tr;
6961         unsigned int index;
6962         unsigned long val;
6963         int ret;
6964
6965         get_tr_index(tr_index, &tr, &index);
6966
6967         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6968         if (ret)
6969                 return ret;
6970
6971         if (val != 0 && val != 1)
6972                 return -EINVAL;
6973
6974         mutex_lock(&trace_types_lock);
6975         ret = set_tracer_flag(tr, 1 << index, val);
6976         mutex_unlock(&trace_types_lock);
6977
6978         if (ret < 0)
6979                 return ret;
6980
6981         *ppos += cnt;
6982
6983         return cnt;
6984 }
6985
6986 static const struct file_operations trace_options_core_fops = {
6987         .open = tracing_open_generic,
6988         .read = trace_options_core_read,
6989         .write = trace_options_core_write,
6990         .llseek = generic_file_llseek,
6991 };
6992
6993 struct dentry *trace_create_file(const char *name,
6994                                  umode_t mode,
6995                                  struct dentry *parent,
6996                                  void *data,
6997                                  const struct file_operations *fops)
6998 {
6999         struct dentry *ret;
7000
7001         ret = tracefs_create_file(name, mode, parent, data, fops);
7002         if (!ret)
7003                 pr_warn("Could not create tracefs '%s' entry\n", name);
7004
7005         return ret;
7006 }
7007
7008
7009 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7010 {
7011         struct dentry *d_tracer;
7012
7013         if (tr->options)
7014                 return tr->options;
7015
7016         d_tracer = tracing_get_dentry(tr);
7017         if (IS_ERR(d_tracer))
7018                 return NULL;
7019
7020         tr->options = tracefs_create_dir("options", d_tracer);
7021         if (!tr->options) {
7022                 pr_warn("Could not create tracefs directory 'options'\n");
7023                 return NULL;
7024         }
7025
7026         return tr->options;
7027 }
7028
7029 static void
7030 create_trace_option_file(struct trace_array *tr,
7031                          struct trace_option_dentry *topt,
7032                          struct tracer_flags *flags,
7033                          struct tracer_opt *opt)
7034 {
7035         struct dentry *t_options;
7036
7037         t_options = trace_options_init_dentry(tr);
7038         if (!t_options)
7039                 return;
7040
7041         topt->flags = flags;
7042         topt->opt = opt;
7043         topt->tr = tr;
7044
7045         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7046                                     &trace_options_fops);
7047
7048 }
7049
7050 static void
7051 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7052 {
7053         struct trace_option_dentry *topts;
7054         struct trace_options *tr_topts;
7055         struct tracer_flags *flags;
7056         struct tracer_opt *opts;
7057         int cnt;
7058         int i;
7059
7060         if (!tracer)
7061                 return;
7062
7063         flags = tracer->flags;
7064
7065         if (!flags || !flags->opts)
7066                 return;
7067
7068         /*
7069          * If this is an instance, only create flags for tracers
7070          * the instance may have.
7071          */
7072         if (!trace_ok_for_array(tracer, tr))
7073                 return;
7074
7075         for (i = 0; i < tr->nr_topts; i++) {
7076                 /* Make sure there's no duplicate flags. */
7077                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7078                         return;
7079         }
7080
7081         opts = flags->opts;
7082
7083         for (cnt = 0; opts[cnt].name; cnt++)
7084                 ;
7085
7086         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7087         if (!topts)
7088                 return;
7089
7090         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7091                             GFP_KERNEL);
7092         if (!tr_topts) {
7093                 kfree(topts);
7094                 return;
7095         }
7096
7097         tr->topts = tr_topts;
7098         tr->topts[tr->nr_topts].tracer = tracer;
7099         tr->topts[tr->nr_topts].topts = topts;
7100         tr->nr_topts++;
7101
7102         for (cnt = 0; opts[cnt].name; cnt++) {
7103                 create_trace_option_file(tr, &topts[cnt], flags,
7104                                          &opts[cnt]);
7105                 WARN_ONCE(topts[cnt].entry == NULL,
7106                           "Failed to create trace option: %s",
7107                           opts[cnt].name);
7108         }
7109 }
7110
7111 static struct dentry *
7112 create_trace_option_core_file(struct trace_array *tr,
7113                               const char *option, long index)
7114 {
7115         struct dentry *t_options;
7116
7117         t_options = trace_options_init_dentry(tr);
7118         if (!t_options)
7119                 return NULL;
7120
7121         return trace_create_file(option, 0644, t_options,
7122                                  (void *)&tr->trace_flags_index[index],
7123                                  &trace_options_core_fops);
7124 }
7125
7126 static void create_trace_options_dir(struct trace_array *tr)
7127 {
7128         struct dentry *t_options;
7129         bool top_level = tr == &global_trace;
7130         int i;
7131
7132         t_options = trace_options_init_dentry(tr);
7133         if (!t_options)
7134                 return;
7135
7136         for (i = 0; trace_options[i]; i++) {
7137                 if (top_level ||
7138                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7139                         create_trace_option_core_file(tr, trace_options[i], i);
7140         }
7141 }
7142
7143 static ssize_t
7144 rb_simple_read(struct file *filp, char __user *ubuf,
7145                size_t cnt, loff_t *ppos)
7146 {
7147         struct trace_array *tr = filp->private_data;
7148         char buf[64];
7149         int r;
7150
7151         r = tracer_tracing_is_on(tr);
7152         r = sprintf(buf, "%d\n", r);
7153
7154         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7155 }
7156
7157 static ssize_t
7158 rb_simple_write(struct file *filp, const char __user *ubuf,
7159                 size_t cnt, loff_t *ppos)
7160 {
7161         struct trace_array *tr = filp->private_data;
7162         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7163         unsigned long val;
7164         int ret;
7165
7166         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7167         if (ret)
7168                 return ret;
7169
7170         if (buffer) {
7171                 mutex_lock(&trace_types_lock);
7172                 if (val) {
7173                         tracer_tracing_on(tr);
7174                         if (tr->current_trace->start)
7175                                 tr->current_trace->start(tr);
7176                 } else {
7177                         tracer_tracing_off(tr);
7178                         if (tr->current_trace->stop)
7179                                 tr->current_trace->stop(tr);
7180                 }
7181                 mutex_unlock(&trace_types_lock);
7182         }
7183
7184         (*ppos)++;
7185
7186         return cnt;
7187 }
7188
7189 static const struct file_operations rb_simple_fops = {
7190         .open           = tracing_open_generic_tr,
7191         .read           = rb_simple_read,
7192         .write          = rb_simple_write,
7193         .release        = tracing_release_generic_tr,
7194         .llseek         = default_llseek,
7195 };
7196
7197 struct dentry *trace_instance_dir;
7198
7199 static void
7200 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7201
7202 static int
7203 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7204 {
7205         enum ring_buffer_flags rb_flags;
7206
7207         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7208
7209         buf->tr = tr;
7210
7211         buf->buffer = ring_buffer_alloc(size, rb_flags);
7212         if (!buf->buffer)
7213                 return -ENOMEM;
7214
7215         buf->data = alloc_percpu(struct trace_array_cpu);
7216         if (!buf->data) {
7217                 ring_buffer_free(buf->buffer);
7218                 return -ENOMEM;
7219         }
7220
7221         /* Allocate the first page for all buffers */
7222         set_buffer_entries(&tr->trace_buffer,
7223                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7224
7225         return 0;
7226 }
7227
7228 static int allocate_trace_buffers(struct trace_array *tr, int size)
7229 {
7230         int ret;
7231
7232         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7233         if (ret)
7234                 return ret;
7235
7236 #ifdef CONFIG_TRACER_MAX_TRACE
7237         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7238                                     allocate_snapshot ? size : 1);
7239         if (WARN_ON(ret)) {
7240                 ring_buffer_free(tr->trace_buffer.buffer);
7241                 free_percpu(tr->trace_buffer.data);
7242                 return -ENOMEM;
7243         }
7244         tr->allocated_snapshot = allocate_snapshot;
7245
7246         /*
7247          * Only the top level trace array gets its snapshot allocated
7248          * from the kernel command line.
7249          */
7250         allocate_snapshot = false;
7251 #endif
7252         return 0;
7253 }
7254
7255 static void free_trace_buffer(struct trace_buffer *buf)
7256 {
7257         if (buf->buffer) {
7258                 ring_buffer_free(buf->buffer);
7259                 buf->buffer = NULL;
7260                 free_percpu(buf->data);
7261                 buf->data = NULL;
7262         }
7263 }
7264
7265 static void free_trace_buffers(struct trace_array *tr)
7266 {
7267         if (!tr)
7268                 return;
7269
7270         free_trace_buffer(&tr->trace_buffer);
7271
7272 #ifdef CONFIG_TRACER_MAX_TRACE
7273         free_trace_buffer(&tr->max_buffer);
7274 #endif
7275 }
7276
7277 static void init_trace_flags_index(struct trace_array *tr)
7278 {
7279         int i;
7280
7281         /* Used by the trace options files */
7282         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7283                 tr->trace_flags_index[i] = i;
7284 }
7285
7286 static void __update_tracer_options(struct trace_array *tr)
7287 {
7288         struct tracer *t;
7289
7290         for (t = trace_types; t; t = t->next)
7291                 add_tracer_options(tr, t);
7292 }
7293
7294 static void update_tracer_options(struct trace_array *tr)
7295 {
7296         mutex_lock(&trace_types_lock);
7297         __update_tracer_options(tr);
7298         mutex_unlock(&trace_types_lock);
7299 }
7300
7301 static int instance_mkdir(const char *name)
7302 {
7303         struct trace_array *tr;
7304         int ret;
7305
7306         mutex_lock(&trace_types_lock);
7307
7308         ret = -EEXIST;
7309         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7310                 if (tr->name && strcmp(tr->name, name) == 0)
7311                         goto out_unlock;
7312         }
7313
7314         ret = -ENOMEM;
7315         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7316         if (!tr)
7317                 goto out_unlock;
7318
7319         tr->name = kstrdup(name, GFP_KERNEL);
7320         if (!tr->name)
7321                 goto out_free_tr;
7322
7323         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7324                 goto out_free_tr;
7325
7326         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7327
7328         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7329
7330         raw_spin_lock_init(&tr->start_lock);
7331
7332         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7333
7334         tr->current_trace = &nop_trace;
7335
7336         INIT_LIST_HEAD(&tr->systems);
7337         INIT_LIST_HEAD(&tr->events);
7338
7339         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7340                 goto out_free_tr;
7341
7342         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7343         if (!tr->dir)
7344                 goto out_free_tr;
7345
7346         ret = event_trace_add_tracer(tr->dir, tr);
7347         if (ret) {
7348                 tracefs_remove_recursive(tr->dir);
7349                 goto out_free_tr;
7350         }
7351
7352         init_tracer_tracefs(tr, tr->dir);
7353         init_trace_flags_index(tr);
7354         __update_tracer_options(tr);
7355
7356         list_add(&tr->list, &ftrace_trace_arrays);
7357
7358         mutex_unlock(&trace_types_lock);
7359
7360         return 0;
7361
7362  out_free_tr:
7363         free_trace_buffers(tr);
7364         free_cpumask_var(tr->tracing_cpumask);
7365         kfree(tr->name);
7366         kfree(tr);
7367
7368  out_unlock:
7369         mutex_unlock(&trace_types_lock);
7370
7371         return ret;
7372
7373 }
7374
7375 static int instance_rmdir(const char *name)
7376 {
7377         struct trace_array *tr;
7378         int found = 0;
7379         int ret;
7380         int i;
7381
7382         mutex_lock(&trace_types_lock);
7383
7384         ret = -ENODEV;
7385         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7386                 if (tr->name && strcmp(tr->name, name) == 0) {
7387                         found = 1;
7388                         break;
7389                 }
7390         }
7391         if (!found)
7392                 goto out_unlock;
7393
7394         ret = -EBUSY;
7395         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7396                 goto out_unlock;
7397
7398         list_del(&tr->list);
7399
7400         /* Disable all the flags that were enabled coming in */
7401         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7402                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7403                         set_tracer_flag(tr, 1 << i, 0);
7404         }
7405
7406         tracing_set_nop(tr);
7407         event_trace_del_tracer(tr);
7408         ftrace_clear_pids(tr);
7409         ftrace_destroy_function_files(tr);
7410         tracefs_remove_recursive(tr->dir);
7411         free_trace_buffers(tr);
7412
7413         for (i = 0; i < tr->nr_topts; i++) {
7414                 kfree(tr->topts[i].topts);
7415         }
7416         kfree(tr->topts);
7417
7418         kfree(tr->name);
7419         kfree(tr);
7420
7421         ret = 0;
7422
7423  out_unlock:
7424         mutex_unlock(&trace_types_lock);
7425
7426         return ret;
7427 }
7428
7429 static __init void create_trace_instances(struct dentry *d_tracer)
7430 {
7431         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7432                                                          instance_mkdir,
7433                                                          instance_rmdir);
7434         if (WARN_ON(!trace_instance_dir))
7435                 return;
7436 }
7437
7438 static void
7439 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7440 {
7441         int cpu;
7442
7443         trace_create_file("available_tracers", 0444, d_tracer,
7444                         tr, &show_traces_fops);
7445
7446         trace_create_file("current_tracer", 0644, d_tracer,
7447                         tr, &set_tracer_fops);
7448
7449         trace_create_file("tracing_cpumask", 0644, d_tracer,
7450                           tr, &tracing_cpumask_fops);
7451
7452         trace_create_file("trace_options", 0644, d_tracer,
7453                           tr, &tracing_iter_fops);
7454
7455         trace_create_file("trace", 0644, d_tracer,
7456                           tr, &tracing_fops);
7457
7458         trace_create_file("trace_pipe", 0444, d_tracer,
7459                           tr, &tracing_pipe_fops);
7460
7461         trace_create_file("buffer_size_kb", 0644, d_tracer,
7462                           tr, &tracing_entries_fops);
7463
7464         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7465                           tr, &tracing_total_entries_fops);
7466
7467         trace_create_file("free_buffer", 0200, d_tracer,
7468                           tr, &tracing_free_buffer_fops);
7469
7470         trace_create_file("trace_marker", 0220, d_tracer,
7471                           tr, &tracing_mark_fops);
7472
7473         trace_create_file("trace_marker_raw", 0220, d_tracer,
7474                           tr, &tracing_mark_raw_fops);
7475
7476         trace_create_file("trace_clock", 0644, d_tracer, tr,
7477                           &trace_clock_fops);
7478
7479         trace_create_file("tracing_on", 0644, d_tracer,
7480                           tr, &rb_simple_fops);
7481
7482         create_trace_options_dir(tr);
7483
7484 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7485         trace_create_file("tracing_max_latency", 0644, d_tracer,
7486                         &tr->max_latency, &tracing_max_lat_fops);
7487 #endif
7488
7489         if (ftrace_create_function_files(tr, d_tracer))
7490                 WARN(1, "Could not allocate function filter files");
7491
7492 #ifdef CONFIG_TRACER_SNAPSHOT
7493         trace_create_file("snapshot", 0644, d_tracer,
7494                           tr, &snapshot_fops);
7495 #endif
7496
7497         for_each_tracing_cpu(cpu)
7498                 tracing_init_tracefs_percpu(tr, cpu);
7499
7500         ftrace_init_tracefs(tr, d_tracer);
7501 }
7502
7503 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7504 {
7505         struct vfsmount *mnt;
7506         struct file_system_type *type;
7507
7508         /*
7509          * To maintain backward compatibility for tools that mount
7510          * debugfs to get to the tracing facility, tracefs is automatically
7511          * mounted to the debugfs/tracing directory.
7512          */
7513         type = get_fs_type("tracefs");
7514         if (!type)
7515                 return NULL;
7516         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7517         put_filesystem(type);
7518         if (IS_ERR(mnt))
7519                 return NULL;
7520         mntget(mnt);
7521
7522         return mnt;
7523 }
7524
7525 /**
7526  * tracing_init_dentry - initialize top level trace array
7527  *
7528  * This is called when creating files or directories in the tracing
7529  * directory. It is called via fs_initcall() by any of the boot up code
7530  * and expects to return the dentry of the top level tracing directory.
7531  */
7532 struct dentry *tracing_init_dentry(void)
7533 {
7534         struct trace_array *tr = &global_trace;
7535
7536         /* The top level trace array uses  NULL as parent */
7537         if (tr->dir)
7538                 return NULL;
7539
7540         if (WARN_ON(!tracefs_initialized()) ||
7541                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7542                  WARN_ON(!debugfs_initialized())))
7543                 return ERR_PTR(-ENODEV);
7544
7545         /*
7546          * As there may still be users that expect the tracing
7547          * files to exist in debugfs/tracing, we must automount
7548          * the tracefs file system there, so older tools still
7549          * work with the newer kerenl.
7550          */
7551         tr->dir = debugfs_create_automount("tracing", NULL,
7552                                            trace_automount, NULL);
7553         if (!tr->dir) {
7554                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7555                 return ERR_PTR(-ENOMEM);
7556         }
7557
7558         return NULL;
7559 }
7560
7561 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7562 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7563
7564 static void __init trace_enum_init(void)
7565 {
7566         int len;
7567
7568         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7569         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7570 }
7571
7572 #ifdef CONFIG_MODULES
7573 static void trace_module_add_enums(struct module *mod)
7574 {
7575         if (!mod->num_trace_enums)
7576                 return;
7577
7578         /*
7579          * Modules with bad taint do not have events created, do
7580          * not bother with enums either.
7581          */
7582         if (trace_module_has_bad_taint(mod))
7583                 return;
7584
7585         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7586 }
7587
7588 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7589 static void trace_module_remove_enums(struct module *mod)
7590 {
7591         union trace_enum_map_item *map;
7592         union trace_enum_map_item **last = &trace_enum_maps;
7593
7594         if (!mod->num_trace_enums)
7595                 return;
7596
7597         mutex_lock(&trace_enum_mutex);
7598
7599         map = trace_enum_maps;
7600
7601         while (map) {
7602                 if (map->head.mod == mod)
7603                         break;
7604                 map = trace_enum_jmp_to_tail(map);
7605                 last = &map->tail.next;
7606                 map = map->tail.next;
7607         }
7608         if (!map)
7609                 goto out;
7610
7611         *last = trace_enum_jmp_to_tail(map)->tail.next;
7612         kfree(map);
7613  out:
7614         mutex_unlock(&trace_enum_mutex);
7615 }
7616 #else
7617 static inline void trace_module_remove_enums(struct module *mod) { }
7618 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7619
7620 static int trace_module_notify(struct notifier_block *self,
7621                                unsigned long val, void *data)
7622 {
7623         struct module *mod = data;
7624
7625         switch (val) {
7626         case MODULE_STATE_COMING:
7627                 trace_module_add_enums(mod);
7628                 break;
7629         case MODULE_STATE_GOING:
7630                 trace_module_remove_enums(mod);
7631                 break;
7632         }
7633
7634         return 0;
7635 }
7636
7637 static struct notifier_block trace_module_nb = {
7638         .notifier_call = trace_module_notify,
7639         .priority = 0,
7640 };
7641 #endif /* CONFIG_MODULES */
7642
7643 static __init int tracer_init_tracefs(void)
7644 {
7645         struct dentry *d_tracer;
7646
7647         trace_access_lock_init();
7648
7649         d_tracer = tracing_init_dentry();
7650         if (IS_ERR(d_tracer))
7651                 return 0;
7652
7653         init_tracer_tracefs(&global_trace, d_tracer);
7654         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7655
7656         trace_create_file("tracing_thresh", 0644, d_tracer,
7657                         &global_trace, &tracing_thresh_fops);
7658
7659         trace_create_file("README", 0444, d_tracer,
7660                         NULL, &tracing_readme_fops);
7661
7662         trace_create_file("saved_cmdlines", 0444, d_tracer,
7663                         NULL, &tracing_saved_cmdlines_fops);
7664
7665         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7666                           NULL, &tracing_saved_cmdlines_size_fops);
7667
7668         trace_enum_init();
7669
7670         trace_create_enum_file(d_tracer);
7671
7672 #ifdef CONFIG_MODULES
7673         register_module_notifier(&trace_module_nb);
7674 #endif
7675
7676 #ifdef CONFIG_DYNAMIC_FTRACE
7677         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7678                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7679 #endif
7680
7681         create_trace_instances(d_tracer);
7682
7683         update_tracer_options(&global_trace);
7684
7685         return 0;
7686 }
7687
7688 static int trace_panic_handler(struct notifier_block *this,
7689                                unsigned long event, void *unused)
7690 {
7691         if (ftrace_dump_on_oops)
7692                 ftrace_dump(ftrace_dump_on_oops);
7693         return NOTIFY_OK;
7694 }
7695
7696 static struct notifier_block trace_panic_notifier = {
7697         .notifier_call  = trace_panic_handler,
7698         .next           = NULL,
7699         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7700 };
7701
7702 static int trace_die_handler(struct notifier_block *self,
7703                              unsigned long val,
7704                              void *data)
7705 {
7706         switch (val) {
7707         case DIE_OOPS:
7708                 if (ftrace_dump_on_oops)
7709                         ftrace_dump(ftrace_dump_on_oops);
7710                 break;
7711         default:
7712                 break;
7713         }
7714         return NOTIFY_OK;
7715 }
7716
7717 static struct notifier_block trace_die_notifier = {
7718         .notifier_call = trace_die_handler,
7719         .priority = 200
7720 };
7721
7722 /*
7723  * printk is set to max of 1024, we really don't need it that big.
7724  * Nothing should be printing 1000 characters anyway.
7725  */
7726 #define TRACE_MAX_PRINT         1000
7727
7728 /*
7729  * Define here KERN_TRACE so that we have one place to modify
7730  * it if we decide to change what log level the ftrace dump
7731  * should be at.
7732  */
7733 #define KERN_TRACE              KERN_EMERG
7734
7735 void
7736 trace_printk_seq(struct trace_seq *s)
7737 {
7738         /* Probably should print a warning here. */
7739         if (s->seq.len >= TRACE_MAX_PRINT)
7740                 s->seq.len = TRACE_MAX_PRINT;
7741
7742         /*
7743          * More paranoid code. Although the buffer size is set to
7744          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7745          * an extra layer of protection.
7746          */
7747         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7748                 s->seq.len = s->seq.size - 1;
7749
7750         /* should be zero ended, but we are paranoid. */
7751         s->buffer[s->seq.len] = 0;
7752
7753         printk(KERN_TRACE "%s", s->buffer);
7754
7755         trace_seq_init(s);
7756 }
7757
7758 void trace_init_global_iter(struct trace_iterator *iter)
7759 {
7760         iter->tr = &global_trace;
7761         iter->trace = iter->tr->current_trace;
7762         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7763         iter->trace_buffer = &global_trace.trace_buffer;
7764
7765         if (iter->trace && iter->trace->open)
7766                 iter->trace->open(iter);
7767
7768         /* Annotate start of buffers if we had overruns */
7769         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7770                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7771
7772         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7773         if (trace_clocks[iter->tr->clock_id].in_ns)
7774                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7775 }
7776
7777 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7778 {
7779         /* use static because iter can be a bit big for the stack */
7780         static struct trace_iterator iter;
7781         static atomic_t dump_running;
7782         struct trace_array *tr = &global_trace;
7783         unsigned int old_userobj;
7784         unsigned long flags;
7785         int cnt = 0, cpu;
7786
7787         /* Only allow one dump user at a time. */
7788         if (atomic_inc_return(&dump_running) != 1) {
7789                 atomic_dec(&dump_running);
7790                 return;
7791         }
7792
7793         /*
7794          * Always turn off tracing when we dump.
7795          * We don't need to show trace output of what happens
7796          * between multiple crashes.
7797          *
7798          * If the user does a sysrq-z, then they can re-enable
7799          * tracing with echo 1 > tracing_on.
7800          */
7801         tracing_off();
7802
7803         local_irq_save(flags);
7804
7805         /* Simulate the iterator */
7806         trace_init_global_iter(&iter);
7807
7808         for_each_tracing_cpu(cpu) {
7809                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7810         }
7811
7812         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7813
7814         /* don't look at user memory in panic mode */
7815         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7816
7817         switch (oops_dump_mode) {
7818         case DUMP_ALL:
7819                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7820                 break;
7821         case DUMP_ORIG:
7822                 iter.cpu_file = raw_smp_processor_id();
7823                 break;
7824         case DUMP_NONE:
7825                 goto out_enable;
7826         default:
7827                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7828                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7829         }
7830
7831         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7832
7833         /* Did function tracer already get disabled? */
7834         if (ftrace_is_dead()) {
7835                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7836                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7837         }
7838
7839         /*
7840          * We need to stop all tracing on all CPUS to read the
7841          * the next buffer. This is a bit expensive, but is
7842          * not done often. We fill all what we can read,
7843          * and then release the locks again.
7844          */
7845
7846         while (!trace_empty(&iter)) {
7847
7848                 if (!cnt)
7849                         printk(KERN_TRACE "---------------------------------\n");
7850
7851                 cnt++;
7852
7853                 /* reset all but tr, trace, and overruns */
7854                 memset(&iter.seq, 0,
7855                        sizeof(struct trace_iterator) -
7856                        offsetof(struct trace_iterator, seq));
7857                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7858                 iter.pos = -1;
7859
7860                 if (trace_find_next_entry_inc(&iter) != NULL) {
7861                         int ret;
7862
7863                         ret = print_trace_line(&iter);
7864                         if (ret != TRACE_TYPE_NO_CONSUME)
7865                                 trace_consume(&iter);
7866                 }
7867                 touch_nmi_watchdog();
7868
7869                 trace_printk_seq(&iter.seq);
7870         }
7871
7872         if (!cnt)
7873                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7874         else
7875                 printk(KERN_TRACE "---------------------------------\n");
7876
7877  out_enable:
7878         tr->trace_flags |= old_userobj;
7879
7880         for_each_tracing_cpu(cpu) {
7881                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7882         }
7883         atomic_dec(&dump_running);
7884         local_irq_restore(flags);
7885 }
7886 EXPORT_SYMBOL_GPL(ftrace_dump);
7887
7888 __init static int tracer_alloc_buffers(void)
7889 {
7890         int ring_buf_size;
7891         int ret = -ENOMEM;
7892
7893         /*
7894          * Make sure we don't accidently add more trace options
7895          * than we have bits for.
7896          */
7897         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7898
7899         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7900                 goto out;
7901
7902         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7903                 goto out_free_buffer_mask;
7904
7905         /* Only allocate trace_printk buffers if a trace_printk exists */
7906         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7907                 /* Must be called before global_trace.buffer is allocated */
7908                 trace_printk_init_buffers();
7909
7910         /* To save memory, keep the ring buffer size to its minimum */
7911         if (ring_buffer_expanded)
7912                 ring_buf_size = trace_buf_size;
7913         else
7914                 ring_buf_size = 1;
7915
7916         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7917         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7918
7919         raw_spin_lock_init(&global_trace.start_lock);
7920
7921         /*
7922          * The prepare callbacks allocates some memory for the ring buffer. We
7923          * don't free the buffer if the if the CPU goes down. If we were to free
7924          * the buffer, then the user would lose any trace that was in the
7925          * buffer. The memory will be removed once the "instance" is removed.
7926          */
7927         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7928                                       "trace/RB:preapre", trace_rb_cpu_prepare,
7929                                       NULL);
7930         if (ret < 0)
7931                 goto out_free_cpumask;
7932         /* Used for event triggers */
7933         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7934         if (!temp_buffer)
7935                 goto out_rm_hp_state;
7936
7937         if (trace_create_savedcmd() < 0)
7938                 goto out_free_temp_buffer;
7939
7940         /* TODO: make the number of buffers hot pluggable with CPUS */
7941         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7942                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7943                 WARN_ON(1);
7944                 goto out_free_savedcmd;
7945         }
7946
7947         if (global_trace.buffer_disabled)
7948                 tracing_off();
7949
7950         if (trace_boot_clock) {
7951                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7952                 if (ret < 0)
7953                         pr_warn("Trace clock %s not defined, going back to default\n",
7954                                 trace_boot_clock);
7955         }
7956
7957         /*
7958          * register_tracer() might reference current_trace, so it
7959          * needs to be set before we register anything. This is
7960          * just a bootstrap of current_trace anyway.
7961          */
7962         global_trace.current_trace = &nop_trace;
7963
7964         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7965
7966         ftrace_init_global_array_ops(&global_trace);
7967
7968         init_trace_flags_index(&global_trace);
7969
7970         register_tracer(&nop_trace);
7971
7972         /* All seems OK, enable tracing */
7973         tracing_disabled = 0;
7974
7975         atomic_notifier_chain_register(&panic_notifier_list,
7976                                        &trace_panic_notifier);
7977
7978         register_die_notifier(&trace_die_notifier);
7979
7980         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7981
7982         INIT_LIST_HEAD(&global_trace.systems);
7983         INIT_LIST_HEAD(&global_trace.events);
7984         list_add(&global_trace.list, &ftrace_trace_arrays);
7985
7986         apply_trace_boot_options();
7987
7988         register_snapshot_cmd();
7989
7990         return 0;
7991
7992 out_free_savedcmd:
7993         free_saved_cmdlines_buffer(savedcmd);
7994 out_free_temp_buffer:
7995         ring_buffer_free(temp_buffer);
7996 out_rm_hp_state:
7997         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
7998 out_free_cpumask:
7999         free_cpumask_var(global_trace.tracing_cpumask);
8000 out_free_buffer_mask:
8001         free_cpumask_var(tracing_buffer_mask);
8002 out:
8003         return ret;
8004 }
8005
8006 void __init trace_init(void)
8007 {
8008         if (tracepoint_printk) {
8009                 tracepoint_print_iter =
8010                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8011                 if (WARN_ON(!tracepoint_print_iter))
8012                         tracepoint_printk = 0;
8013                 else
8014                         static_key_enable(&tracepoint_printk_key.key);
8015         }
8016         tracer_alloc_buffers();
8017         trace_event_init();
8018 }
8019
8020 __init static int clear_boot_tracer(void)
8021 {
8022         /*
8023          * The default tracer at boot buffer is an init section.
8024          * This function is called in lateinit. If we did not
8025          * find the boot tracer, then clear it out, to prevent
8026          * later registration from accessing the buffer that is
8027          * about to be freed.
8028          */
8029         if (!default_bootup_tracer)
8030                 return 0;
8031
8032         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8033                default_bootup_tracer);
8034         default_bootup_tracer = NULL;
8035
8036         return 0;
8037 }
8038
8039 fs_initcall(tracer_init_tracefs);
8040 late_initcall(clear_boot_tracer);