Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         TRACE_ITER_EVENT_FORK
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 static void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 /**
898  * trace_snapshot - take a snapshot of the current buffer.
899  *
900  * This causes a swap between the snapshot buffer and the current live
901  * tracing buffer. You can use this to take snapshots of the live
902  * trace when some condition is triggered, but continue to trace.
903  *
904  * Note, make sure to allocate the snapshot with either
905  * a tracing_snapshot_alloc(), or by doing it manually
906  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
907  *
908  * If the snapshot buffer is not allocated, it will stop tracing.
909  * Basically making a permanent snapshot.
910  */
911 void tracing_snapshot(void)
912 {
913         struct trace_array *tr = &global_trace;
914         struct tracer *tracer = tr->current_trace;
915         unsigned long flags;
916
917         if (in_nmi()) {
918                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
919                 internal_trace_puts("*** snapshot is being ignored        ***\n");
920                 return;
921         }
922
923         if (!tr->allocated_snapshot) {
924                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
925                 internal_trace_puts("*** stopping trace here!   ***\n");
926                 tracing_off();
927                 return;
928         }
929
930         /* Note, snapshot can not be used when the tracer uses it */
931         if (tracer->use_max_tr) {
932                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
933                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
934                 return;
935         }
936
937         local_irq_save(flags);
938         update_max_tr(tr, current, smp_processor_id());
939         local_irq_restore(flags);
940 }
941 EXPORT_SYMBOL_GPL(tracing_snapshot);
942
943 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
944                                         struct trace_buffer *size_buf, int cpu_id);
945 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
946
947 static int alloc_snapshot(struct trace_array *tr)
948 {
949         int ret;
950
951         if (!tr->allocated_snapshot) {
952
953                 /* allocate spare buffer */
954                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
955                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
956                 if (ret < 0)
957                         return ret;
958
959                 tr->allocated_snapshot = true;
960         }
961
962         return 0;
963 }
964
965 static void free_snapshot(struct trace_array *tr)
966 {
967         /*
968          * We don't free the ring buffer. instead, resize it because
969          * The max_tr ring buffer has some state (e.g. ring->clock) and
970          * we want preserve it.
971          */
972         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
973         set_buffer_entries(&tr->max_buffer, 1);
974         tracing_reset_online_cpus(&tr->max_buffer);
975         tr->allocated_snapshot = false;
976 }
977
978 /**
979  * tracing_alloc_snapshot - allocate snapshot buffer.
980  *
981  * This only allocates the snapshot buffer if it isn't already
982  * allocated - it doesn't also take a snapshot.
983  *
984  * This is meant to be used in cases where the snapshot buffer needs
985  * to be set up for events that can't sleep but need to be able to
986  * trigger a snapshot.
987  */
988 int tracing_alloc_snapshot(void)
989 {
990         struct trace_array *tr = &global_trace;
991         int ret;
992
993         ret = alloc_snapshot(tr);
994         WARN_ON(ret < 0);
995
996         return ret;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999
1000 /**
1001  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1002  *
1003  * This is similar to trace_snapshot(), but it will allocate the
1004  * snapshot buffer if it isn't already allocated. Use this only
1005  * where it is safe to sleep, as the allocation may sleep.
1006  *
1007  * This causes a swap between the snapshot buffer and the current live
1008  * tracing buffer. You can use this to take snapshots of the live
1009  * trace when some condition is triggered, but continue to trace.
1010  */
1011 void tracing_snapshot_alloc(void)
1012 {
1013         int ret;
1014
1015         ret = tracing_alloc_snapshot();
1016         if (ret < 0)
1017                 return;
1018
1019         tracing_snapshot();
1020 }
1021 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1022 #else
1023 void tracing_snapshot(void)
1024 {
1025         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot);
1028 int tracing_alloc_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1031         return -ENODEV;
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1034 void tracing_snapshot_alloc(void)
1035 {
1036         /* Give warning */
1037         tracing_snapshot();
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1040 #endif /* CONFIG_TRACER_SNAPSHOT */
1041
1042 static void tracer_tracing_off(struct trace_array *tr)
1043 {
1044         if (tr->trace_buffer.buffer)
1045                 ring_buffer_record_off(tr->trace_buffer.buffer);
1046         /*
1047          * This flag is looked at when buffers haven't been allocated
1048          * yet, or by some tracers (like irqsoff), that just want to
1049          * know if the ring buffer has been disabled, but it can handle
1050          * races of where it gets disabled but we still do a record.
1051          * As the check is in the fast path of the tracers, it is more
1052          * important to be fast than accurate.
1053          */
1054         tr->buffer_disabled = 1;
1055         /* Make the flag seen by readers */
1056         smp_wmb();
1057 }
1058
1059 /**
1060  * tracing_off - turn off tracing buffers
1061  *
1062  * This function stops the tracing buffers from recording data.
1063  * It does not disable any overhead the tracers themselves may
1064  * be causing. This function simply causes all recording to
1065  * the ring buffers to fail.
1066  */
1067 void tracing_off(void)
1068 {
1069         tracer_tracing_off(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_off);
1072
1073 void disable_trace_on_warning(void)
1074 {
1075         if (__disable_trace_on_warning)
1076                 tracing_off();
1077 }
1078
1079 /**
1080  * tracer_tracing_is_on - show real state of ring buffer enabled
1081  * @tr : the trace array to know if ring buffer is enabled
1082  *
1083  * Shows real state of the ring buffer if it is enabled or not.
1084  */
1085 int tracer_tracing_is_on(struct trace_array *tr)
1086 {
1087         if (tr->trace_buffer.buffer)
1088                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1089         return !tr->buffer_disabled;
1090 }
1091
1092 /**
1093  * tracing_is_on - show state of ring buffers enabled
1094  */
1095 int tracing_is_on(void)
1096 {
1097         return tracer_tracing_is_on(&global_trace);
1098 }
1099 EXPORT_SYMBOL_GPL(tracing_is_on);
1100
1101 static int __init set_buf_size(char *str)
1102 {
1103         unsigned long buf_size;
1104
1105         if (!str)
1106                 return 0;
1107         buf_size = memparse(str, &str);
1108         /* nr_entries can not be zero */
1109         if (buf_size == 0)
1110                 return 0;
1111         trace_buf_size = buf_size;
1112         return 1;
1113 }
1114 __setup("trace_buf_size=", set_buf_size);
1115
1116 static int __init set_tracing_thresh(char *str)
1117 {
1118         unsigned long threshold;
1119         int ret;
1120
1121         if (!str)
1122                 return 0;
1123         ret = kstrtoul(str, 0, &threshold);
1124         if (ret < 0)
1125                 return 0;
1126         tracing_thresh = threshold * 1000;
1127         return 1;
1128 }
1129 __setup("tracing_thresh=", set_tracing_thresh);
1130
1131 unsigned long nsecs_to_usecs(unsigned long nsecs)
1132 {
1133         return nsecs / 1000;
1134 }
1135
1136 /*
1137  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1138  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1139  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1140  * of strings in the order that the enums were defined.
1141  */
1142 #undef C
1143 #define C(a, b) b
1144
1145 /* These must match the bit postions in trace_iterator_flags */
1146 static const char *trace_options[] = {
1147         TRACE_FLAGS
1148         NULL
1149 };
1150
1151 static struct {
1152         u64 (*func)(void);
1153         const char *name;
1154         int in_ns;              /* is this clock in nanoseconds? */
1155 } trace_clocks[] = {
1156         { trace_clock_local,            "local",        1 },
1157         { trace_clock_global,           "global",       1 },
1158         { trace_clock_counter,          "counter",      0 },
1159         { trace_clock_jiffies,          "uptime",       0 },
1160         { trace_clock,                  "perf",         1 },
1161         { ktime_get_mono_fast_ns,       "mono",         1 },
1162         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1163         { ktime_get_boot_fast_ns,       "boot",         1 },
1164         ARCH_TRACE_CLOCKS
1165 };
1166
1167 /*
1168  * trace_parser_get_init - gets the buffer for trace parser
1169  */
1170 int trace_parser_get_init(struct trace_parser *parser, int size)
1171 {
1172         memset(parser, 0, sizeof(*parser));
1173
1174         parser->buffer = kmalloc(size, GFP_KERNEL);
1175         if (!parser->buffer)
1176                 return 1;
1177
1178         parser->size = size;
1179         return 0;
1180 }
1181
1182 /*
1183  * trace_parser_put - frees the buffer for trace parser
1184  */
1185 void trace_parser_put(struct trace_parser *parser)
1186 {
1187         kfree(parser->buffer);
1188         parser->buffer = NULL;
1189 }
1190
1191 /*
1192  * trace_get_user - reads the user input string separated by  space
1193  * (matched by isspace(ch))
1194  *
1195  * For each string found the 'struct trace_parser' is updated,
1196  * and the function returns.
1197  *
1198  * Returns number of bytes read.
1199  *
1200  * See kernel/trace/trace.h for 'struct trace_parser' details.
1201  */
1202 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1203         size_t cnt, loff_t *ppos)
1204 {
1205         char ch;
1206         size_t read = 0;
1207         ssize_t ret;
1208
1209         if (!*ppos)
1210                 trace_parser_clear(parser);
1211
1212         ret = get_user(ch, ubuf++);
1213         if (ret)
1214                 goto out;
1215
1216         read++;
1217         cnt--;
1218
1219         /*
1220          * The parser is not finished with the last write,
1221          * continue reading the user input without skipping spaces.
1222          */
1223         if (!parser->cont) {
1224                 /* skip white space */
1225                 while (cnt && isspace(ch)) {
1226                         ret = get_user(ch, ubuf++);
1227                         if (ret)
1228                                 goto out;
1229                         read++;
1230                         cnt--;
1231                 }
1232
1233                 /* only spaces were written */
1234                 if (isspace(ch)) {
1235                         *ppos += read;
1236                         ret = read;
1237                         goto out;
1238                 }
1239
1240                 parser->idx = 0;
1241         }
1242
1243         /* read the non-space input */
1244         while (cnt && !isspace(ch)) {
1245                 if (parser->idx < parser->size - 1)
1246                         parser->buffer[parser->idx++] = ch;
1247                 else {
1248                         ret = -EINVAL;
1249                         goto out;
1250                 }
1251                 ret = get_user(ch, ubuf++);
1252                 if (ret)
1253                         goto out;
1254                 read++;
1255                 cnt--;
1256         }
1257
1258         /* We either got finished input or we have to wait for another call. */
1259         if (isspace(ch)) {
1260                 parser->buffer[parser->idx] = 0;
1261                 parser->cont = false;
1262         } else if (parser->idx < parser->size - 1) {
1263                 parser->cont = true;
1264                 parser->buffer[parser->idx++] = ch;
1265         } else {
1266                 ret = -EINVAL;
1267                 goto out;
1268         }
1269
1270         *ppos += read;
1271         ret = read;
1272
1273 out:
1274         return ret;
1275 }
1276
1277 /* TODO add a seq_buf_to_buffer() */
1278 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1279 {
1280         int len;
1281
1282         if (trace_seq_used(s) <= s->seq.readpos)
1283                 return -EBUSY;
1284
1285         len = trace_seq_used(s) - s->seq.readpos;
1286         if (cnt > len)
1287                 cnt = len;
1288         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1289
1290         s->seq.readpos += cnt;
1291         return cnt;
1292 }
1293
1294 unsigned long __read_mostly     tracing_thresh;
1295
1296 #ifdef CONFIG_TRACER_MAX_TRACE
1297 /*
1298  * Copy the new maximum trace into the separate maximum-trace
1299  * structure. (this way the maximum trace is permanently saved,
1300  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1301  */
1302 static void
1303 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1304 {
1305         struct trace_buffer *trace_buf = &tr->trace_buffer;
1306         struct trace_buffer *max_buf = &tr->max_buffer;
1307         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1308         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1309
1310         max_buf->cpu = cpu;
1311         max_buf->time_start = data->preempt_timestamp;
1312
1313         max_data->saved_latency = tr->max_latency;
1314         max_data->critical_start = data->critical_start;
1315         max_data->critical_end = data->critical_end;
1316
1317         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1318         max_data->pid = tsk->pid;
1319         /*
1320          * If tsk == current, then use current_uid(), as that does not use
1321          * RCU. The irq tracer can be called out of RCU scope.
1322          */
1323         if (tsk == current)
1324                 max_data->uid = current_uid();
1325         else
1326                 max_data->uid = task_uid(tsk);
1327
1328         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1329         max_data->policy = tsk->policy;
1330         max_data->rt_priority = tsk->rt_priority;
1331
1332         /* record this tasks comm */
1333         tracing_record_cmdline(tsk);
1334 }
1335
1336 /**
1337  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1338  * @tr: tracer
1339  * @tsk: the task with the latency
1340  * @cpu: The cpu that initiated the trace.
1341  *
1342  * Flip the buffers between the @tr and the max_tr and record information
1343  * about which task was the cause of this latency.
1344  */
1345 void
1346 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1347 {
1348         struct ring_buffer *buf;
1349
1350         if (tr->stop_count)
1351                 return;
1352
1353         WARN_ON_ONCE(!irqs_disabled());
1354
1355         if (!tr->allocated_snapshot) {
1356                 /* Only the nop tracer should hit this when disabling */
1357                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358                 return;
1359         }
1360
1361         arch_spin_lock(&tr->max_lock);
1362
1363         buf = tr->trace_buffer.buffer;
1364         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1365         tr->max_buffer.buffer = buf;
1366
1367         __update_max_tr(tr, tsk, cpu);
1368         arch_spin_unlock(&tr->max_lock);
1369 }
1370
1371 /**
1372  * update_max_tr_single - only copy one trace over, and reset the rest
1373  * @tr - tracer
1374  * @tsk - task with the latency
1375  * @cpu - the cpu of the buffer to copy.
1376  *
1377  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1378  */
1379 void
1380 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1381 {
1382         int ret;
1383
1384         if (tr->stop_count)
1385                 return;
1386
1387         WARN_ON_ONCE(!irqs_disabled());
1388         if (!tr->allocated_snapshot) {
1389                 /* Only the nop tracer should hit this when disabling */
1390                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1391                 return;
1392         }
1393
1394         arch_spin_lock(&tr->max_lock);
1395
1396         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1397
1398         if (ret == -EBUSY) {
1399                 /*
1400                  * We failed to swap the buffer due to a commit taking
1401                  * place on this CPU. We fail to record, but we reset
1402                  * the max trace buffer (no one writes directly to it)
1403                  * and flag that it failed.
1404                  */
1405                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1406                         "Failed to swap buffers due to commit in progress\n");
1407         }
1408
1409         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1410
1411         __update_max_tr(tr, tsk, cpu);
1412         arch_spin_unlock(&tr->max_lock);
1413 }
1414 #endif /* CONFIG_TRACER_MAX_TRACE */
1415
1416 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1417 {
1418         /* Iterators are static, they should be filled or empty */
1419         if (trace_buffer_iter(iter, iter->cpu_file))
1420                 return 0;
1421
1422         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1423                                 full);
1424 }
1425
1426 #ifdef CONFIG_FTRACE_STARTUP_TEST
1427 static int run_tracer_selftest(struct tracer *type)
1428 {
1429         struct trace_array *tr = &global_trace;
1430         struct tracer *saved_tracer = tr->current_trace;
1431         int ret;
1432
1433         if (!type->selftest || tracing_selftest_disabled)
1434                 return 0;
1435
1436         /*
1437          * Run a selftest on this tracer.
1438          * Here we reset the trace buffer, and set the current
1439          * tracer to be this tracer. The tracer can then run some
1440          * internal tracing to verify that everything is in order.
1441          * If we fail, we do not register this tracer.
1442          */
1443         tracing_reset_online_cpus(&tr->trace_buffer);
1444
1445         tr->current_trace = type;
1446
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448         if (type->use_max_tr) {
1449                 /* If we expanded the buffers, make sure the max is expanded too */
1450                 if (ring_buffer_expanded)
1451                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1452                                            RING_BUFFER_ALL_CPUS);
1453                 tr->allocated_snapshot = true;
1454         }
1455 #endif
1456
1457         /* the test is responsible for initializing and enabling */
1458         pr_info("Testing tracer %s: ", type->name);
1459         ret = type->selftest(type, tr);
1460         /* the test is responsible for resetting too */
1461         tr->current_trace = saved_tracer;
1462         if (ret) {
1463                 printk(KERN_CONT "FAILED!\n");
1464                 /* Add the warning after printing 'FAILED' */
1465                 WARN_ON(1);
1466                 return -1;
1467         }
1468         /* Only reset on passing, to avoid touching corrupted buffers */
1469         tracing_reset_online_cpus(&tr->trace_buffer);
1470
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472         if (type->use_max_tr) {
1473                 tr->allocated_snapshot = false;
1474
1475                 /* Shrink the max buffer again */
1476                 if (ring_buffer_expanded)
1477                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1478                                            RING_BUFFER_ALL_CPUS);
1479         }
1480 #endif
1481
1482         printk(KERN_CONT "PASSED\n");
1483         return 0;
1484 }
1485 #else
1486 static inline int run_tracer_selftest(struct tracer *type)
1487 {
1488         return 0;
1489 }
1490 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1491
1492 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1493
1494 static void __init apply_trace_boot_options(void);
1495
1496 /**
1497  * register_tracer - register a tracer with the ftrace system.
1498  * @type - the plugin for the tracer
1499  *
1500  * Register a new plugin tracer.
1501  */
1502 int __init register_tracer(struct tracer *type)
1503 {
1504         struct tracer *t;
1505         int ret = 0;
1506
1507         if (!type->name) {
1508                 pr_info("Tracer must have a name\n");
1509                 return -1;
1510         }
1511
1512         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1513                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1514                 return -1;
1515         }
1516
1517         mutex_lock(&trace_types_lock);
1518
1519         tracing_selftest_running = true;
1520
1521         for (t = trace_types; t; t = t->next) {
1522                 if (strcmp(type->name, t->name) == 0) {
1523                         /* already found */
1524                         pr_info("Tracer %s already registered\n",
1525                                 type->name);
1526                         ret = -1;
1527                         goto out;
1528                 }
1529         }
1530
1531         if (!type->set_flag)
1532                 type->set_flag = &dummy_set_flag;
1533         if (!type->flags) {
1534                 /*allocate a dummy tracer_flags*/
1535                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1536                 if (!type->flags) {
1537                         ret = -ENOMEM;
1538                         goto out;
1539                 }
1540                 type->flags->val = 0;
1541                 type->flags->opts = dummy_tracer_opt;
1542         } else
1543                 if (!type->flags->opts)
1544                         type->flags->opts = dummy_tracer_opt;
1545
1546         /* store the tracer for __set_tracer_option */
1547         type->flags->trace = type;
1548
1549         ret = run_tracer_selftest(type);
1550         if (ret < 0)
1551                 goto out;
1552
1553         type->next = trace_types;
1554         trace_types = type;
1555         add_tracer_options(&global_trace, type);
1556
1557  out:
1558         tracing_selftest_running = false;
1559         mutex_unlock(&trace_types_lock);
1560
1561         if (ret || !default_bootup_tracer)
1562                 goto out_unlock;
1563
1564         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1565                 goto out_unlock;
1566
1567         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1568         /* Do we want this tracer to start on bootup? */
1569         tracing_set_tracer(&global_trace, type->name);
1570         default_bootup_tracer = NULL;
1571
1572         apply_trace_boot_options();
1573
1574         /* disable other selftests, since this will break it. */
1575         tracing_selftest_disabled = true;
1576 #ifdef CONFIG_FTRACE_STARTUP_TEST
1577         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1578                type->name);
1579 #endif
1580
1581  out_unlock:
1582         return ret;
1583 }
1584
1585 void tracing_reset(struct trace_buffer *buf, int cpu)
1586 {
1587         struct ring_buffer *buffer = buf->buffer;
1588
1589         if (!buffer)
1590                 return;
1591
1592         ring_buffer_record_disable(buffer);
1593
1594         /* Make sure all commits have finished */
1595         synchronize_sched();
1596         ring_buffer_reset_cpu(buffer, cpu);
1597
1598         ring_buffer_record_enable(buffer);
1599 }
1600
1601 void tracing_reset_online_cpus(struct trace_buffer *buf)
1602 {
1603         struct ring_buffer *buffer = buf->buffer;
1604         int cpu;
1605
1606         if (!buffer)
1607                 return;
1608
1609         ring_buffer_record_disable(buffer);
1610
1611         /* Make sure all commits have finished */
1612         synchronize_sched();
1613
1614         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1615
1616         for_each_online_cpu(cpu)
1617                 ring_buffer_reset_cpu(buffer, cpu);
1618
1619         ring_buffer_record_enable(buffer);
1620 }
1621
1622 /* Must have trace_types_lock held */
1623 void tracing_reset_all_online_cpus(void)
1624 {
1625         struct trace_array *tr;
1626
1627         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1628                 tracing_reset_online_cpus(&tr->trace_buffer);
1629 #ifdef CONFIG_TRACER_MAX_TRACE
1630                 tracing_reset_online_cpus(&tr->max_buffer);
1631 #endif
1632         }
1633 }
1634
1635 #define SAVED_CMDLINES_DEFAULT 128
1636 #define NO_CMDLINE_MAP UINT_MAX
1637 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1638 struct saved_cmdlines_buffer {
1639         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1640         unsigned *map_cmdline_to_pid;
1641         unsigned cmdline_num;
1642         int cmdline_idx;
1643         char *saved_cmdlines;
1644 };
1645 static struct saved_cmdlines_buffer *savedcmd;
1646
1647 /* temporary disable recording */
1648 static atomic_t trace_record_cmdline_disabled __read_mostly;
1649
1650 static inline char *get_saved_cmdlines(int idx)
1651 {
1652         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1653 }
1654
1655 static inline void set_cmdline(int idx, const char *cmdline)
1656 {
1657         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1658 }
1659
1660 static int allocate_cmdlines_buffer(unsigned int val,
1661                                     struct saved_cmdlines_buffer *s)
1662 {
1663         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1664                                         GFP_KERNEL);
1665         if (!s->map_cmdline_to_pid)
1666                 return -ENOMEM;
1667
1668         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1669         if (!s->saved_cmdlines) {
1670                 kfree(s->map_cmdline_to_pid);
1671                 return -ENOMEM;
1672         }
1673
1674         s->cmdline_idx = 0;
1675         s->cmdline_num = val;
1676         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1677                sizeof(s->map_pid_to_cmdline));
1678         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1679                val * sizeof(*s->map_cmdline_to_pid));
1680
1681         return 0;
1682 }
1683
1684 static int trace_create_savedcmd(void)
1685 {
1686         int ret;
1687
1688         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1689         if (!savedcmd)
1690                 return -ENOMEM;
1691
1692         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1693         if (ret < 0) {
1694                 kfree(savedcmd);
1695                 savedcmd = NULL;
1696                 return -ENOMEM;
1697         }
1698
1699         return 0;
1700 }
1701
1702 int is_tracing_stopped(void)
1703 {
1704         return global_trace.stop_count;
1705 }
1706
1707 /**
1708  * tracing_start - quick start of the tracer
1709  *
1710  * If tracing is enabled but was stopped by tracing_stop,
1711  * this will start the tracer back up.
1712  */
1713 void tracing_start(void)
1714 {
1715         struct ring_buffer *buffer;
1716         unsigned long flags;
1717
1718         if (tracing_disabled)
1719                 return;
1720
1721         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1722         if (--global_trace.stop_count) {
1723                 if (global_trace.stop_count < 0) {
1724                         /* Someone screwed up their debugging */
1725                         WARN_ON_ONCE(1);
1726                         global_trace.stop_count = 0;
1727                 }
1728                 goto out;
1729         }
1730
1731         /* Prevent the buffers from switching */
1732         arch_spin_lock(&global_trace.max_lock);
1733
1734         buffer = global_trace.trace_buffer.buffer;
1735         if (buffer)
1736                 ring_buffer_record_enable(buffer);
1737
1738 #ifdef CONFIG_TRACER_MAX_TRACE
1739         buffer = global_trace.max_buffer.buffer;
1740         if (buffer)
1741                 ring_buffer_record_enable(buffer);
1742 #endif
1743
1744         arch_spin_unlock(&global_trace.max_lock);
1745
1746  out:
1747         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1748 }
1749
1750 static void tracing_start_tr(struct trace_array *tr)
1751 {
1752         struct ring_buffer *buffer;
1753         unsigned long flags;
1754
1755         if (tracing_disabled)
1756                 return;
1757
1758         /* If global, we need to also start the max tracer */
1759         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1760                 return tracing_start();
1761
1762         raw_spin_lock_irqsave(&tr->start_lock, flags);
1763
1764         if (--tr->stop_count) {
1765                 if (tr->stop_count < 0) {
1766                         /* Someone screwed up their debugging */
1767                         WARN_ON_ONCE(1);
1768                         tr->stop_count = 0;
1769                 }
1770                 goto out;
1771         }
1772
1773         buffer = tr->trace_buffer.buffer;
1774         if (buffer)
1775                 ring_buffer_record_enable(buffer);
1776
1777  out:
1778         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1779 }
1780
1781 /**
1782  * tracing_stop - quick stop of the tracer
1783  *
1784  * Light weight way to stop tracing. Use in conjunction with
1785  * tracing_start.
1786  */
1787 void tracing_stop(void)
1788 {
1789         struct ring_buffer *buffer;
1790         unsigned long flags;
1791
1792         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1793         if (global_trace.stop_count++)
1794                 goto out;
1795
1796         /* Prevent the buffers from switching */
1797         arch_spin_lock(&global_trace.max_lock);
1798
1799         buffer = global_trace.trace_buffer.buffer;
1800         if (buffer)
1801                 ring_buffer_record_disable(buffer);
1802
1803 #ifdef CONFIG_TRACER_MAX_TRACE
1804         buffer = global_trace.max_buffer.buffer;
1805         if (buffer)
1806                 ring_buffer_record_disable(buffer);
1807 #endif
1808
1809         arch_spin_unlock(&global_trace.max_lock);
1810
1811  out:
1812         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1813 }
1814
1815 static void tracing_stop_tr(struct trace_array *tr)
1816 {
1817         struct ring_buffer *buffer;
1818         unsigned long flags;
1819
1820         /* If global, we need to also stop the max tracer */
1821         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1822                 return tracing_stop();
1823
1824         raw_spin_lock_irqsave(&tr->start_lock, flags);
1825         if (tr->stop_count++)
1826                 goto out;
1827
1828         buffer = tr->trace_buffer.buffer;
1829         if (buffer)
1830                 ring_buffer_record_disable(buffer);
1831
1832  out:
1833         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1834 }
1835
1836 void trace_stop_cmdline_recording(void);
1837
1838 static int trace_save_cmdline(struct task_struct *tsk)
1839 {
1840         unsigned pid, idx;
1841
1842         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1843                 return 0;
1844
1845         /*
1846          * It's not the end of the world if we don't get
1847          * the lock, but we also don't want to spin
1848          * nor do we want to disable interrupts,
1849          * so if we miss here, then better luck next time.
1850          */
1851         if (!arch_spin_trylock(&trace_cmdline_lock))
1852                 return 0;
1853
1854         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1855         if (idx == NO_CMDLINE_MAP) {
1856                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1857
1858                 /*
1859                  * Check whether the cmdline buffer at idx has a pid
1860                  * mapped. We are going to overwrite that entry so we
1861                  * need to clear the map_pid_to_cmdline. Otherwise we
1862                  * would read the new comm for the old pid.
1863                  */
1864                 pid = savedcmd->map_cmdline_to_pid[idx];
1865                 if (pid != NO_CMDLINE_MAP)
1866                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1867
1868                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1869                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1870
1871                 savedcmd->cmdline_idx = idx;
1872         }
1873
1874         set_cmdline(idx, tsk->comm);
1875
1876         arch_spin_unlock(&trace_cmdline_lock);
1877
1878         return 1;
1879 }
1880
1881 static void __trace_find_cmdline(int pid, char comm[])
1882 {
1883         unsigned map;
1884
1885         if (!pid) {
1886                 strcpy(comm, "<idle>");
1887                 return;
1888         }
1889
1890         if (WARN_ON_ONCE(pid < 0)) {
1891                 strcpy(comm, "<XXX>");
1892                 return;
1893         }
1894
1895         if (pid > PID_MAX_DEFAULT) {
1896                 strcpy(comm, "<...>");
1897                 return;
1898         }
1899
1900         map = savedcmd->map_pid_to_cmdline[pid];
1901         if (map != NO_CMDLINE_MAP)
1902                 strcpy(comm, get_saved_cmdlines(map));
1903         else
1904                 strcpy(comm, "<...>");
1905 }
1906
1907 void trace_find_cmdline(int pid, char comm[])
1908 {
1909         preempt_disable();
1910         arch_spin_lock(&trace_cmdline_lock);
1911
1912         __trace_find_cmdline(pid, comm);
1913
1914         arch_spin_unlock(&trace_cmdline_lock);
1915         preempt_enable();
1916 }
1917
1918 void tracing_record_cmdline(struct task_struct *tsk)
1919 {
1920         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1921                 return;
1922
1923         if (!__this_cpu_read(trace_cmdline_save))
1924                 return;
1925
1926         if (trace_save_cmdline(tsk))
1927                 __this_cpu_write(trace_cmdline_save, false);
1928 }
1929
1930 void
1931 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1932                              int pc)
1933 {
1934         struct task_struct *tsk = current;
1935
1936         entry->preempt_count            = pc & 0xff;
1937         entry->pid                      = (tsk) ? tsk->pid : 0;
1938         entry->flags =
1939 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1940                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1941 #else
1942                 TRACE_FLAG_IRQS_NOSUPPORT |
1943 #endif
1944                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1945                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1946                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1947                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1948                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1949 }
1950 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1951
1952 struct ring_buffer_event *
1953 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1954                           int type,
1955                           unsigned long len,
1956                           unsigned long flags, int pc)
1957 {
1958         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1959 }
1960
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1964
1965 /**
1966  * trace_buffered_event_enable - enable buffering events
1967  *
1968  * When events are being filtered, it is quicker to use a temporary
1969  * buffer to write the event data into if there's a likely chance
1970  * that it will not be committed. The discard of the ring buffer
1971  * is not as fast as committing, and is much slower than copying
1972  * a commit.
1973  *
1974  * When an event is to be filtered, allocate per cpu buffers to
1975  * write the event data into, and if the event is filtered and discarded
1976  * it is simply dropped, otherwise, the entire data is to be committed
1977  * in one shot.
1978  */
1979 void trace_buffered_event_enable(void)
1980 {
1981         struct ring_buffer_event *event;
1982         struct page *page;
1983         int cpu;
1984
1985         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1986
1987         if (trace_buffered_event_ref++)
1988                 return;
1989
1990         for_each_tracing_cpu(cpu) {
1991                 page = alloc_pages_node(cpu_to_node(cpu),
1992                                         GFP_KERNEL | __GFP_NORETRY, 0);
1993                 if (!page)
1994                         goto failed;
1995
1996                 event = page_address(page);
1997                 memset(event, 0, sizeof(*event));
1998
1999                 per_cpu(trace_buffered_event, cpu) = event;
2000
2001                 preempt_disable();
2002                 if (cpu == smp_processor_id() &&
2003                     this_cpu_read(trace_buffered_event) !=
2004                     per_cpu(trace_buffered_event, cpu))
2005                         WARN_ON_ONCE(1);
2006                 preempt_enable();
2007         }
2008
2009         return;
2010  failed:
2011         trace_buffered_event_disable();
2012 }
2013
2014 static void enable_trace_buffered_event(void *data)
2015 {
2016         /* Probably not needed, but do it anyway */
2017         smp_rmb();
2018         this_cpu_dec(trace_buffered_event_cnt);
2019 }
2020
2021 static void disable_trace_buffered_event(void *data)
2022 {
2023         this_cpu_inc(trace_buffered_event_cnt);
2024 }
2025
2026 /**
2027  * trace_buffered_event_disable - disable buffering events
2028  *
2029  * When a filter is removed, it is faster to not use the buffered
2030  * events, and to commit directly into the ring buffer. Free up
2031  * the temp buffers when there are no more users. This requires
2032  * special synchronization with current events.
2033  */
2034 void trace_buffered_event_disable(void)
2035 {
2036         int cpu;
2037
2038         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2039
2040         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2041                 return;
2042
2043         if (--trace_buffered_event_ref)
2044                 return;
2045
2046         preempt_disable();
2047         /* For each CPU, set the buffer as used. */
2048         smp_call_function_many(tracing_buffer_mask,
2049                                disable_trace_buffered_event, NULL, 1);
2050         preempt_enable();
2051
2052         /* Wait for all current users to finish */
2053         synchronize_sched();
2054
2055         for_each_tracing_cpu(cpu) {
2056                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057                 per_cpu(trace_buffered_event, cpu) = NULL;
2058         }
2059         /*
2060          * Make sure trace_buffered_event is NULL before clearing
2061          * trace_buffered_event_cnt.
2062          */
2063         smp_wmb();
2064
2065         preempt_disable();
2066         /* Do the work on each cpu */
2067         smp_call_function_many(tracing_buffer_mask,
2068                                enable_trace_buffered_event, NULL, 1);
2069         preempt_enable();
2070 }
2071
2072 static struct ring_buffer *temp_buffer;
2073
2074 struct ring_buffer_event *
2075 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2076                           struct trace_event_file *trace_file,
2077                           int type, unsigned long len,
2078                           unsigned long flags, int pc)
2079 {
2080         struct ring_buffer_event *entry;
2081         int val;
2082
2083         *current_rb = trace_file->tr->trace_buffer.buffer;
2084
2085         if ((trace_file->flags &
2086              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2087             (entry = this_cpu_read(trace_buffered_event))) {
2088                 /* Try to use the per cpu buffer first */
2089                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2090                 if (val == 1) {
2091                         trace_event_setup(entry, type, flags, pc);
2092                         entry->array[0] = len;
2093                         return entry;
2094                 }
2095                 this_cpu_dec(trace_buffered_event_cnt);
2096         }
2097
2098         entry = __trace_buffer_lock_reserve(*current_rb,
2099                                             type, len, flags, pc);
2100         /*
2101          * If tracing is off, but we have triggers enabled
2102          * we still need to look at the event data. Use the temp_buffer
2103          * to store the trace event for the tigger to use. It's recusive
2104          * safe and will not be recorded anywhere.
2105          */
2106         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2107                 *current_rb = temp_buffer;
2108                 entry = __trace_buffer_lock_reserve(*current_rb,
2109                                                     type, len, flags, pc);
2110         }
2111         return entry;
2112 }
2113 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2114
2115 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2116 static DEFINE_MUTEX(tracepoint_printk_mutex);
2117
2118 static void output_printk(struct trace_event_buffer *fbuffer)
2119 {
2120         struct trace_event_call *event_call;
2121         struct trace_event *event;
2122         unsigned long flags;
2123         struct trace_iterator *iter = tracepoint_print_iter;
2124
2125         /* We should never get here if iter is NULL */
2126         if (WARN_ON_ONCE(!iter))
2127                 return;
2128
2129         event_call = fbuffer->trace_file->event_call;
2130         if (!event_call || !event_call->event.funcs ||
2131             !event_call->event.funcs->trace)
2132                 return;
2133
2134         event = &fbuffer->trace_file->event_call->event;
2135
2136         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2137         trace_seq_init(&iter->seq);
2138         iter->ent = fbuffer->entry;
2139         event_call->event.funcs->trace(iter, 0, event);
2140         trace_seq_putc(&iter->seq, 0);
2141         printk("%s", iter->seq.buffer);
2142
2143         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2144 }
2145
2146 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2147                              void __user *buffer, size_t *lenp,
2148                              loff_t *ppos)
2149 {
2150         int save_tracepoint_printk;
2151         int ret;
2152
2153         mutex_lock(&tracepoint_printk_mutex);
2154         save_tracepoint_printk = tracepoint_printk;
2155
2156         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2157
2158         /*
2159          * This will force exiting early, as tracepoint_printk
2160          * is always zero when tracepoint_printk_iter is not allocated
2161          */
2162         if (!tracepoint_print_iter)
2163                 tracepoint_printk = 0;
2164
2165         if (save_tracepoint_printk == tracepoint_printk)
2166                 goto out;
2167
2168         if (tracepoint_printk)
2169                 static_key_enable(&tracepoint_printk_key.key);
2170         else
2171                 static_key_disable(&tracepoint_printk_key.key);
2172
2173  out:
2174         mutex_unlock(&tracepoint_printk_mutex);
2175
2176         return ret;
2177 }
2178
2179 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2180 {
2181         if (static_key_false(&tracepoint_printk_key.key))
2182                 output_printk(fbuffer);
2183
2184         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2185                                     fbuffer->event, fbuffer->entry,
2186                                     fbuffer->flags, fbuffer->pc);
2187 }
2188 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2189
2190 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2191                                      struct ring_buffer *buffer,
2192                                      struct ring_buffer_event *event,
2193                                      unsigned long flags, int pc,
2194                                      struct pt_regs *regs)
2195 {
2196         __buffer_unlock_commit(buffer, event);
2197
2198         /*
2199          * If regs is not set, then skip the following callers:
2200          *   trace_buffer_unlock_commit_regs
2201          *   event_trigger_unlock_commit
2202          *   trace_event_buffer_commit
2203          *   trace_event_raw_event_sched_switch
2204          * Note, we can still get here via blktrace, wakeup tracer
2205          * and mmiotrace, but that's ok if they lose a function or
2206          * two. They are that meaningful.
2207          */
2208         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2209         ftrace_trace_userstack(buffer, flags, pc);
2210 }
2211
2212 /*
2213  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2214  */
2215 void
2216 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2217                                    struct ring_buffer_event *event)
2218 {
2219         __buffer_unlock_commit(buffer, event);
2220 }
2221
2222 static void
2223 trace_process_export(struct trace_export *export,
2224                struct ring_buffer_event *event)
2225 {
2226         struct trace_entry *entry;
2227         unsigned int size = 0;
2228
2229         entry = ring_buffer_event_data(event);
2230         size = ring_buffer_event_length(event);
2231         export->write(entry, size);
2232 }
2233
2234 static DEFINE_MUTEX(ftrace_export_lock);
2235
2236 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2237
2238 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2239
2240 static inline void ftrace_exports_enable(void)
2241 {
2242         static_branch_enable(&ftrace_exports_enabled);
2243 }
2244
2245 static inline void ftrace_exports_disable(void)
2246 {
2247         static_branch_disable(&ftrace_exports_enabled);
2248 }
2249
2250 void ftrace_exports(struct ring_buffer_event *event)
2251 {
2252         struct trace_export *export;
2253
2254         preempt_disable_notrace();
2255
2256         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2257         while (export) {
2258                 trace_process_export(export, event);
2259                 export = rcu_dereference_raw_notrace(export->next);
2260         }
2261
2262         preempt_enable_notrace();
2263 }
2264
2265 static inline void
2266 add_trace_export(struct trace_export **list, struct trace_export *export)
2267 {
2268         rcu_assign_pointer(export->next, *list);
2269         /*
2270          * We are entering export into the list but another
2271          * CPU might be walking that list. We need to make sure
2272          * the export->next pointer is valid before another CPU sees
2273          * the export pointer included into the list.
2274          */
2275         rcu_assign_pointer(*list, export);
2276 }
2277
2278 static inline int
2279 rm_trace_export(struct trace_export **list, struct trace_export *export)
2280 {
2281         struct trace_export **p;
2282
2283         for (p = list; *p != NULL; p = &(*p)->next)
2284                 if (*p == export)
2285                         break;
2286
2287         if (*p != export)
2288                 return -1;
2289
2290         rcu_assign_pointer(*p, (*p)->next);
2291
2292         return 0;
2293 }
2294
2295 static inline void
2296 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2297 {
2298         if (*list == NULL)
2299                 ftrace_exports_enable();
2300
2301         add_trace_export(list, export);
2302 }
2303
2304 static inline int
2305 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2306 {
2307         int ret;
2308
2309         ret = rm_trace_export(list, export);
2310         if (*list == NULL)
2311                 ftrace_exports_disable();
2312
2313         return ret;
2314 }
2315
2316 int register_ftrace_export(struct trace_export *export)
2317 {
2318         if (WARN_ON_ONCE(!export->write))
2319                 return -1;
2320
2321         mutex_lock(&ftrace_export_lock);
2322
2323         add_ftrace_export(&ftrace_exports_list, export);
2324
2325         mutex_unlock(&ftrace_export_lock);
2326
2327         return 0;
2328 }
2329 EXPORT_SYMBOL_GPL(register_ftrace_export);
2330
2331 int unregister_ftrace_export(struct trace_export *export)
2332 {
2333         int ret;
2334
2335         mutex_lock(&ftrace_export_lock);
2336
2337         ret = rm_ftrace_export(&ftrace_exports_list, export);
2338
2339         mutex_unlock(&ftrace_export_lock);
2340
2341         return ret;
2342 }
2343 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2344
2345 void
2346 trace_function(struct trace_array *tr,
2347                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2348                int pc)
2349 {
2350         struct trace_event_call *call = &event_function;
2351         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2352         struct ring_buffer_event *event;
2353         struct ftrace_entry *entry;
2354
2355         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2356                                             flags, pc);
2357         if (!event)
2358                 return;
2359         entry   = ring_buffer_event_data(event);
2360         entry->ip                       = ip;
2361         entry->parent_ip                = parent_ip;
2362
2363         if (!call_filter_check_discard(call, entry, buffer, event)) {
2364                 if (static_branch_unlikely(&ftrace_exports_enabled))
2365                         ftrace_exports(event);
2366                 __buffer_unlock_commit(buffer, event);
2367         }
2368 }
2369
2370 #ifdef CONFIG_STACKTRACE
2371
2372 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2373 struct ftrace_stack {
2374         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2375 };
2376
2377 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2378 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2379
2380 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2381                                  unsigned long flags,
2382                                  int skip, int pc, struct pt_regs *regs)
2383 {
2384         struct trace_event_call *call = &event_kernel_stack;
2385         struct ring_buffer_event *event;
2386         struct stack_entry *entry;
2387         struct stack_trace trace;
2388         int use_stack;
2389         int size = FTRACE_STACK_ENTRIES;
2390
2391         trace.nr_entries        = 0;
2392         trace.skip              = skip;
2393
2394         /*
2395          * Add two, for this function and the call to save_stack_trace()
2396          * If regs is set, then these functions will not be in the way.
2397          */
2398         if (!regs)
2399                 trace.skip += 2;
2400
2401         /*
2402          * Since events can happen in NMIs there's no safe way to
2403          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2404          * or NMI comes in, it will just have to use the default
2405          * FTRACE_STACK_SIZE.
2406          */
2407         preempt_disable_notrace();
2408
2409         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2410         /*
2411          * We don't need any atomic variables, just a barrier.
2412          * If an interrupt comes in, we don't care, because it would
2413          * have exited and put the counter back to what we want.
2414          * We just need a barrier to keep gcc from moving things
2415          * around.
2416          */
2417         barrier();
2418         if (use_stack == 1) {
2419                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2420                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2421
2422                 if (regs)
2423                         save_stack_trace_regs(regs, &trace);
2424                 else
2425                         save_stack_trace(&trace);
2426
2427                 if (trace.nr_entries > size)
2428                         size = trace.nr_entries;
2429         } else
2430                 /* From now on, use_stack is a boolean */
2431                 use_stack = 0;
2432
2433         size *= sizeof(unsigned long);
2434
2435         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2436                                             sizeof(*entry) + size, flags, pc);
2437         if (!event)
2438                 goto out;
2439         entry = ring_buffer_event_data(event);
2440
2441         memset(&entry->caller, 0, size);
2442
2443         if (use_stack)
2444                 memcpy(&entry->caller, trace.entries,
2445                        trace.nr_entries * sizeof(unsigned long));
2446         else {
2447                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2448                 trace.entries           = entry->caller;
2449                 if (regs)
2450                         save_stack_trace_regs(regs, &trace);
2451                 else
2452                         save_stack_trace(&trace);
2453         }
2454
2455         entry->size = trace.nr_entries;
2456
2457         if (!call_filter_check_discard(call, entry, buffer, event))
2458                 __buffer_unlock_commit(buffer, event);
2459
2460  out:
2461         /* Again, don't let gcc optimize things here */
2462         barrier();
2463         __this_cpu_dec(ftrace_stack_reserve);
2464         preempt_enable_notrace();
2465
2466 }
2467
2468 static inline void ftrace_trace_stack(struct trace_array *tr,
2469                                       struct ring_buffer *buffer,
2470                                       unsigned long flags,
2471                                       int skip, int pc, struct pt_regs *regs)
2472 {
2473         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2474                 return;
2475
2476         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2477 }
2478
2479 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2480                    int pc)
2481 {
2482         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2483 }
2484
2485 /**
2486  * trace_dump_stack - record a stack back trace in the trace buffer
2487  * @skip: Number of functions to skip (helper handlers)
2488  */
2489 void trace_dump_stack(int skip)
2490 {
2491         unsigned long flags;
2492
2493         if (tracing_disabled || tracing_selftest_running)
2494                 return;
2495
2496         local_save_flags(flags);
2497
2498         /*
2499          * Skip 3 more, seems to get us at the caller of
2500          * this function.
2501          */
2502         skip += 3;
2503         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2504                              flags, skip, preempt_count(), NULL);
2505 }
2506
2507 static DEFINE_PER_CPU(int, user_stack_count);
2508
2509 void
2510 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2511 {
2512         struct trace_event_call *call = &event_user_stack;
2513         struct ring_buffer_event *event;
2514         struct userstack_entry *entry;
2515         struct stack_trace trace;
2516
2517         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2518                 return;
2519
2520         /*
2521          * NMIs can not handle page faults, even with fix ups.
2522          * The save user stack can (and often does) fault.
2523          */
2524         if (unlikely(in_nmi()))
2525                 return;
2526
2527         /*
2528          * prevent recursion, since the user stack tracing may
2529          * trigger other kernel events.
2530          */
2531         preempt_disable();
2532         if (__this_cpu_read(user_stack_count))
2533                 goto out;
2534
2535         __this_cpu_inc(user_stack_count);
2536
2537         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2538                                             sizeof(*entry), flags, pc);
2539         if (!event)
2540                 goto out_drop_count;
2541         entry   = ring_buffer_event_data(event);
2542
2543         entry->tgid             = current->tgid;
2544         memset(&entry->caller, 0, sizeof(entry->caller));
2545
2546         trace.nr_entries        = 0;
2547         trace.max_entries       = FTRACE_STACK_ENTRIES;
2548         trace.skip              = 0;
2549         trace.entries           = entry->caller;
2550
2551         save_stack_trace_user(&trace);
2552         if (!call_filter_check_discard(call, entry, buffer, event))
2553                 __buffer_unlock_commit(buffer, event);
2554
2555  out_drop_count:
2556         __this_cpu_dec(user_stack_count);
2557  out:
2558         preempt_enable();
2559 }
2560
2561 #ifdef UNUSED
2562 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2563 {
2564         ftrace_trace_userstack(tr, flags, preempt_count());
2565 }
2566 #endif /* UNUSED */
2567
2568 #endif /* CONFIG_STACKTRACE */
2569
2570 /* created for use with alloc_percpu */
2571 struct trace_buffer_struct {
2572         int nesting;
2573         char buffer[4][TRACE_BUF_SIZE];
2574 };
2575
2576 static struct trace_buffer_struct *trace_percpu_buffer;
2577
2578 /*
2579  * Thise allows for lockless recording.  If we're nested too deeply, then
2580  * this returns NULL.
2581  */
2582 static char *get_trace_buf(void)
2583 {
2584         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2585
2586         if (!buffer || buffer->nesting >= 4)
2587                 return NULL;
2588
2589         return &buffer->buffer[buffer->nesting++][0];
2590 }
2591
2592 static void put_trace_buf(void)
2593 {
2594         this_cpu_dec(trace_percpu_buffer->nesting);
2595 }
2596
2597 static int alloc_percpu_trace_buffer(void)
2598 {
2599         struct trace_buffer_struct *buffers;
2600
2601         buffers = alloc_percpu(struct trace_buffer_struct);
2602         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2603                 return -ENOMEM;
2604
2605         trace_percpu_buffer = buffers;
2606         return 0;
2607 }
2608
2609 static int buffers_allocated;
2610
2611 void trace_printk_init_buffers(void)
2612 {
2613         if (buffers_allocated)
2614                 return;
2615
2616         if (alloc_percpu_trace_buffer())
2617                 return;
2618
2619         /* trace_printk() is for debug use only. Don't use it in production. */
2620
2621         pr_warn("\n");
2622         pr_warn("**********************************************************\n");
2623         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2624         pr_warn("**                                                      **\n");
2625         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2626         pr_warn("**                                                      **\n");
2627         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2628         pr_warn("** unsafe for production use.                           **\n");
2629         pr_warn("**                                                      **\n");
2630         pr_warn("** If you see this message and you are not debugging    **\n");
2631         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2632         pr_warn("**                                                      **\n");
2633         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2634         pr_warn("**********************************************************\n");
2635
2636         /* Expand the buffers to set size */
2637         tracing_update_buffers();
2638
2639         buffers_allocated = 1;
2640
2641         /*
2642          * trace_printk_init_buffers() can be called by modules.
2643          * If that happens, then we need to start cmdline recording
2644          * directly here. If the global_trace.buffer is already
2645          * allocated here, then this was called by module code.
2646          */
2647         if (global_trace.trace_buffer.buffer)
2648                 tracing_start_cmdline_record();
2649 }
2650
2651 void trace_printk_start_comm(void)
2652 {
2653         /* Start tracing comms if trace printk is set */
2654         if (!buffers_allocated)
2655                 return;
2656         tracing_start_cmdline_record();
2657 }
2658
2659 static void trace_printk_start_stop_comm(int enabled)
2660 {
2661         if (!buffers_allocated)
2662                 return;
2663
2664         if (enabled)
2665                 tracing_start_cmdline_record();
2666         else
2667                 tracing_stop_cmdline_record();
2668 }
2669
2670 /**
2671  * trace_vbprintk - write binary msg to tracing buffer
2672  *
2673  */
2674 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2675 {
2676         struct trace_event_call *call = &event_bprint;
2677         struct ring_buffer_event *event;
2678         struct ring_buffer *buffer;
2679         struct trace_array *tr = &global_trace;
2680         struct bprint_entry *entry;
2681         unsigned long flags;
2682         char *tbuffer;
2683         int len = 0, size, pc;
2684
2685         if (unlikely(tracing_selftest_running || tracing_disabled))
2686                 return 0;
2687
2688         /* Don't pollute graph traces with trace_vprintk internals */
2689         pause_graph_tracing();
2690
2691         pc = preempt_count();
2692         preempt_disable_notrace();
2693
2694         tbuffer = get_trace_buf();
2695         if (!tbuffer) {
2696                 len = 0;
2697                 goto out_nobuffer;
2698         }
2699
2700         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2701
2702         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2703                 goto out;
2704
2705         local_save_flags(flags);
2706         size = sizeof(*entry) + sizeof(u32) * len;
2707         buffer = tr->trace_buffer.buffer;
2708         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2709                                             flags, pc);
2710         if (!event)
2711                 goto out;
2712         entry = ring_buffer_event_data(event);
2713         entry->ip                       = ip;
2714         entry->fmt                      = fmt;
2715
2716         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2717         if (!call_filter_check_discard(call, entry, buffer, event)) {
2718                 __buffer_unlock_commit(buffer, event);
2719                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2720         }
2721
2722 out:
2723         put_trace_buf();
2724
2725 out_nobuffer:
2726         preempt_enable_notrace();
2727         unpause_graph_tracing();
2728
2729         return len;
2730 }
2731 EXPORT_SYMBOL_GPL(trace_vbprintk);
2732
2733 static int
2734 __trace_array_vprintk(struct ring_buffer *buffer,
2735                       unsigned long ip, const char *fmt, va_list args)
2736 {
2737         struct trace_event_call *call = &event_print;
2738         struct ring_buffer_event *event;
2739         int len = 0, size, pc;
2740         struct print_entry *entry;
2741         unsigned long flags;
2742         char *tbuffer;
2743
2744         if (tracing_disabled || tracing_selftest_running)
2745                 return 0;
2746
2747         /* Don't pollute graph traces with trace_vprintk internals */
2748         pause_graph_tracing();
2749
2750         pc = preempt_count();
2751         preempt_disable_notrace();
2752
2753
2754         tbuffer = get_trace_buf();
2755         if (!tbuffer) {
2756                 len = 0;
2757                 goto out_nobuffer;
2758         }
2759
2760         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2761
2762         local_save_flags(flags);
2763         size = sizeof(*entry) + len + 1;
2764         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2765                                             flags, pc);
2766         if (!event)
2767                 goto out;
2768         entry = ring_buffer_event_data(event);
2769         entry->ip = ip;
2770
2771         memcpy(&entry->buf, tbuffer, len + 1);
2772         if (!call_filter_check_discard(call, entry, buffer, event)) {
2773                 __buffer_unlock_commit(buffer, event);
2774                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2775         }
2776
2777 out:
2778         put_trace_buf();
2779
2780 out_nobuffer:
2781         preempt_enable_notrace();
2782         unpause_graph_tracing();
2783
2784         return len;
2785 }
2786
2787 int trace_array_vprintk(struct trace_array *tr,
2788                         unsigned long ip, const char *fmt, va_list args)
2789 {
2790         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2791 }
2792
2793 int trace_array_printk(struct trace_array *tr,
2794                        unsigned long ip, const char *fmt, ...)
2795 {
2796         int ret;
2797         va_list ap;
2798
2799         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2800                 return 0;
2801
2802         va_start(ap, fmt);
2803         ret = trace_array_vprintk(tr, ip, fmt, ap);
2804         va_end(ap);
2805         return ret;
2806 }
2807
2808 int trace_array_printk_buf(struct ring_buffer *buffer,
2809                            unsigned long ip, const char *fmt, ...)
2810 {
2811         int ret;
2812         va_list ap;
2813
2814         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2815                 return 0;
2816
2817         va_start(ap, fmt);
2818         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2819         va_end(ap);
2820         return ret;
2821 }
2822
2823 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2824 {
2825         return trace_array_vprintk(&global_trace, ip, fmt, args);
2826 }
2827 EXPORT_SYMBOL_GPL(trace_vprintk);
2828
2829 static void trace_iterator_increment(struct trace_iterator *iter)
2830 {
2831         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2832
2833         iter->idx++;
2834         if (buf_iter)
2835                 ring_buffer_read(buf_iter, NULL);
2836 }
2837
2838 static struct trace_entry *
2839 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2840                 unsigned long *lost_events)
2841 {
2842         struct ring_buffer_event *event;
2843         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2844
2845         if (buf_iter)
2846                 event = ring_buffer_iter_peek(buf_iter, ts);
2847         else
2848                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2849                                          lost_events);
2850
2851         if (event) {
2852                 iter->ent_size = ring_buffer_event_length(event);
2853                 return ring_buffer_event_data(event);
2854         }
2855         iter->ent_size = 0;
2856         return NULL;
2857 }
2858
2859 static struct trace_entry *
2860 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2861                   unsigned long *missing_events, u64 *ent_ts)
2862 {
2863         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2864         struct trace_entry *ent, *next = NULL;
2865         unsigned long lost_events = 0, next_lost = 0;
2866         int cpu_file = iter->cpu_file;
2867         u64 next_ts = 0, ts;
2868         int next_cpu = -1;
2869         int next_size = 0;
2870         int cpu;
2871
2872         /*
2873          * If we are in a per_cpu trace file, don't bother by iterating over
2874          * all cpu and peek directly.
2875          */
2876         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2877                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2878                         return NULL;
2879                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2880                 if (ent_cpu)
2881                         *ent_cpu = cpu_file;
2882
2883                 return ent;
2884         }
2885
2886         for_each_tracing_cpu(cpu) {
2887
2888                 if (ring_buffer_empty_cpu(buffer, cpu))
2889                         continue;
2890
2891                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2892
2893                 /*
2894                  * Pick the entry with the smallest timestamp:
2895                  */
2896                 if (ent && (!next || ts < next_ts)) {
2897                         next = ent;
2898                         next_cpu = cpu;
2899                         next_ts = ts;
2900                         next_lost = lost_events;
2901                         next_size = iter->ent_size;
2902                 }
2903         }
2904
2905         iter->ent_size = next_size;
2906
2907         if (ent_cpu)
2908                 *ent_cpu = next_cpu;
2909
2910         if (ent_ts)
2911                 *ent_ts = next_ts;
2912
2913         if (missing_events)
2914                 *missing_events = next_lost;
2915
2916         return next;
2917 }
2918
2919 /* Find the next real entry, without updating the iterator itself */
2920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2921                                           int *ent_cpu, u64 *ent_ts)
2922 {
2923         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2924 }
2925
2926 /* Find the next real entry, and increment the iterator to the next entry */
2927 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2928 {
2929         iter->ent = __find_next_entry(iter, &iter->cpu,
2930                                       &iter->lost_events, &iter->ts);
2931
2932         if (iter->ent)
2933                 trace_iterator_increment(iter);
2934
2935         return iter->ent ? iter : NULL;
2936 }
2937
2938 static void trace_consume(struct trace_iterator *iter)
2939 {
2940         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2941                             &iter->lost_events);
2942 }
2943
2944 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2945 {
2946         struct trace_iterator *iter = m->private;
2947         int i = (int)*pos;
2948         void *ent;
2949
2950         WARN_ON_ONCE(iter->leftover);
2951
2952         (*pos)++;
2953
2954         /* can't go backwards */
2955         if (iter->idx > i)
2956                 return NULL;
2957
2958         if (iter->idx < 0)
2959                 ent = trace_find_next_entry_inc(iter);
2960         else
2961                 ent = iter;
2962
2963         while (ent && iter->idx < i)
2964                 ent = trace_find_next_entry_inc(iter);
2965
2966         iter->pos = *pos;
2967
2968         return ent;
2969 }
2970
2971 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2972 {
2973         struct ring_buffer_event *event;
2974         struct ring_buffer_iter *buf_iter;
2975         unsigned long entries = 0;
2976         u64 ts;
2977
2978         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2979
2980         buf_iter = trace_buffer_iter(iter, cpu);
2981         if (!buf_iter)
2982                 return;
2983
2984         ring_buffer_iter_reset(buf_iter);
2985
2986         /*
2987          * We could have the case with the max latency tracers
2988          * that a reset never took place on a cpu. This is evident
2989          * by the timestamp being before the start of the buffer.
2990          */
2991         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2992                 if (ts >= iter->trace_buffer->time_start)
2993                         break;
2994                 entries++;
2995                 ring_buffer_read(buf_iter, NULL);
2996         }
2997
2998         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2999 }
3000
3001 /*
3002  * The current tracer is copied to avoid a global locking
3003  * all around.
3004  */
3005 static void *s_start(struct seq_file *m, loff_t *pos)
3006 {
3007         struct trace_iterator *iter = m->private;
3008         struct trace_array *tr = iter->tr;
3009         int cpu_file = iter->cpu_file;
3010         void *p = NULL;
3011         loff_t l = 0;
3012         int cpu;
3013
3014         /*
3015          * copy the tracer to avoid using a global lock all around.
3016          * iter->trace is a copy of current_trace, the pointer to the
3017          * name may be used instead of a strcmp(), as iter->trace->name
3018          * will point to the same string as current_trace->name.
3019          */
3020         mutex_lock(&trace_types_lock);
3021         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3022                 *iter->trace = *tr->current_trace;
3023         mutex_unlock(&trace_types_lock);
3024
3025 #ifdef CONFIG_TRACER_MAX_TRACE
3026         if (iter->snapshot && iter->trace->use_max_tr)
3027                 return ERR_PTR(-EBUSY);
3028 #endif
3029
3030         if (!iter->snapshot)
3031                 atomic_inc(&trace_record_cmdline_disabled);
3032
3033         if (*pos != iter->pos) {
3034                 iter->ent = NULL;
3035                 iter->cpu = 0;
3036                 iter->idx = -1;
3037
3038                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3039                         for_each_tracing_cpu(cpu)
3040                                 tracing_iter_reset(iter, cpu);
3041                 } else
3042                         tracing_iter_reset(iter, cpu_file);
3043
3044                 iter->leftover = 0;
3045                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3046                         ;
3047
3048         } else {
3049                 /*
3050                  * If we overflowed the seq_file before, then we want
3051                  * to just reuse the trace_seq buffer again.
3052                  */
3053                 if (iter->leftover)
3054                         p = iter;
3055                 else {
3056                         l = *pos - 1;
3057                         p = s_next(m, p, &l);
3058                 }
3059         }
3060
3061         trace_event_read_lock();
3062         trace_access_lock(cpu_file);
3063         return p;
3064 }
3065
3066 static void s_stop(struct seq_file *m, void *p)
3067 {
3068         struct trace_iterator *iter = m->private;
3069
3070 #ifdef CONFIG_TRACER_MAX_TRACE
3071         if (iter->snapshot && iter->trace->use_max_tr)
3072                 return;
3073 #endif
3074
3075         if (!iter->snapshot)
3076                 atomic_dec(&trace_record_cmdline_disabled);
3077
3078         trace_access_unlock(iter->cpu_file);
3079         trace_event_read_unlock();
3080 }
3081
3082 static void
3083 get_total_entries(struct trace_buffer *buf,
3084                   unsigned long *total, unsigned long *entries)
3085 {
3086         unsigned long count;
3087         int cpu;
3088
3089         *total = 0;
3090         *entries = 0;
3091
3092         for_each_tracing_cpu(cpu) {
3093                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3094                 /*
3095                  * If this buffer has skipped entries, then we hold all
3096                  * entries for the trace and we need to ignore the
3097                  * ones before the time stamp.
3098                  */
3099                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3100                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3101                         /* total is the same as the entries */
3102                         *total += count;
3103                 } else
3104                         *total += count +
3105                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3106                 *entries += count;
3107         }
3108 }
3109
3110 static void print_lat_help_header(struct seq_file *m)
3111 {
3112         seq_puts(m, "#                  _------=> CPU#            \n"
3113                     "#                 / _-----=> irqs-off        \n"
3114                     "#                | / _----=> need-resched    \n"
3115                     "#                || / _---=> hardirq/softirq \n"
3116                     "#                ||| / _--=> preempt-depth   \n"
3117                     "#                |||| /     delay            \n"
3118                     "#  cmd     pid   ||||| time  |   caller      \n"
3119                     "#     \\   /      |||||  \\    |   /         \n");
3120 }
3121
3122 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3123 {
3124         unsigned long total;
3125         unsigned long entries;
3126
3127         get_total_entries(buf, &total, &entries);
3128         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3129                    entries, total, num_online_cpus());
3130         seq_puts(m, "#\n");
3131 }
3132
3133 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3134 {
3135         print_event_info(buf, m);
3136         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3137                     "#              | |       |          |         |\n");
3138 }
3139
3140 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3141 {
3142         print_event_info(buf, m);
3143         seq_puts(m, "#                              _-----=> irqs-off\n"
3144                     "#                             / _----=> need-resched\n"
3145                     "#                            | / _---=> hardirq/softirq\n"
3146                     "#                            || / _--=> preempt-depth\n"
3147                     "#                            ||| /     delay\n"
3148                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3149                     "#              | |       |   ||||       |         |\n");
3150 }
3151
3152 void
3153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3154 {
3155         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3156         struct trace_buffer *buf = iter->trace_buffer;
3157         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3158         struct tracer *type = iter->trace;
3159         unsigned long entries;
3160         unsigned long total;
3161         const char *name = "preemption";
3162
3163         name = type->name;
3164
3165         get_total_entries(buf, &total, &entries);
3166
3167         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3168                    name, UTS_RELEASE);
3169         seq_puts(m, "# -----------------------------------"
3170                  "---------------------------------\n");
3171         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3172                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3173                    nsecs_to_usecs(data->saved_latency),
3174                    entries,
3175                    total,
3176                    buf->cpu,
3177 #if defined(CONFIG_PREEMPT_NONE)
3178                    "server",
3179 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3180                    "desktop",
3181 #elif defined(CONFIG_PREEMPT)
3182                    "preempt",
3183 #else
3184                    "unknown",
3185 #endif
3186                    /* These are reserved for later use */
3187                    0, 0, 0, 0);
3188 #ifdef CONFIG_SMP
3189         seq_printf(m, " #P:%d)\n", num_online_cpus());
3190 #else
3191         seq_puts(m, ")\n");
3192 #endif
3193         seq_puts(m, "#    -----------------\n");
3194         seq_printf(m, "#    | task: %.16s-%d "
3195                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3196                    data->comm, data->pid,
3197                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3198                    data->policy, data->rt_priority);
3199         seq_puts(m, "#    -----------------\n");
3200
3201         if (data->critical_start) {
3202                 seq_puts(m, "#  => started at: ");
3203                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3204                 trace_print_seq(m, &iter->seq);
3205                 seq_puts(m, "\n#  => ended at:   ");
3206                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3207                 trace_print_seq(m, &iter->seq);
3208                 seq_puts(m, "\n#\n");
3209         }
3210
3211         seq_puts(m, "#\n");
3212 }
3213
3214 static void test_cpu_buff_start(struct trace_iterator *iter)
3215 {
3216         struct trace_seq *s = &iter->seq;
3217         struct trace_array *tr = iter->tr;
3218
3219         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3220                 return;
3221
3222         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3223                 return;
3224
3225         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3226                 return;
3227
3228         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3229                 return;
3230
3231         if (iter->started)
3232                 cpumask_set_cpu(iter->cpu, iter->started);
3233
3234         /* Don't print started cpu buffer for the first entry of the trace */
3235         if (iter->idx > 1)
3236                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3237                                 iter->cpu);
3238 }
3239
3240 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3241 {
3242         struct trace_array *tr = iter->tr;
3243         struct trace_seq *s = &iter->seq;
3244         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3245         struct trace_entry *entry;
3246         struct trace_event *event;
3247
3248         entry = iter->ent;
3249
3250         test_cpu_buff_start(iter);
3251
3252         event = ftrace_find_event(entry->type);
3253
3254         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3255                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3256                         trace_print_lat_context(iter);
3257                 else
3258                         trace_print_context(iter);
3259         }
3260
3261         if (trace_seq_has_overflowed(s))
3262                 return TRACE_TYPE_PARTIAL_LINE;
3263
3264         if (event)
3265                 return event->funcs->trace(iter, sym_flags, event);
3266
3267         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3268
3269         return trace_handle_return(s);
3270 }
3271
3272 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3273 {
3274         struct trace_array *tr = iter->tr;
3275         struct trace_seq *s = &iter->seq;
3276         struct trace_entry *entry;
3277         struct trace_event *event;
3278
3279         entry = iter->ent;
3280
3281         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3282                 trace_seq_printf(s, "%d %d %llu ",
3283                                  entry->pid, iter->cpu, iter->ts);
3284
3285         if (trace_seq_has_overflowed(s))
3286                 return TRACE_TYPE_PARTIAL_LINE;
3287
3288         event = ftrace_find_event(entry->type);
3289         if (event)
3290                 return event->funcs->raw(iter, 0, event);
3291
3292         trace_seq_printf(s, "%d ?\n", entry->type);
3293
3294         return trace_handle_return(s);
3295 }
3296
3297 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3298 {
3299         struct trace_array *tr = iter->tr;
3300         struct trace_seq *s = &iter->seq;
3301         unsigned char newline = '\n';
3302         struct trace_entry *entry;
3303         struct trace_event *event;
3304
3305         entry = iter->ent;
3306
3307         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3308                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3309                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3310                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3311                 if (trace_seq_has_overflowed(s))
3312                         return TRACE_TYPE_PARTIAL_LINE;
3313         }
3314
3315         event = ftrace_find_event(entry->type);
3316         if (event) {
3317                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3318                 if (ret != TRACE_TYPE_HANDLED)
3319                         return ret;
3320         }
3321
3322         SEQ_PUT_FIELD(s, newline);
3323
3324         return trace_handle_return(s);
3325 }
3326
3327 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3328 {
3329         struct trace_array *tr = iter->tr;
3330         struct trace_seq *s = &iter->seq;
3331         struct trace_entry *entry;
3332         struct trace_event *event;
3333
3334         entry = iter->ent;
3335
3336         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3337                 SEQ_PUT_FIELD(s, entry->pid);
3338                 SEQ_PUT_FIELD(s, iter->cpu);
3339                 SEQ_PUT_FIELD(s, iter->ts);
3340                 if (trace_seq_has_overflowed(s))
3341                         return TRACE_TYPE_PARTIAL_LINE;
3342         }
3343
3344         event = ftrace_find_event(entry->type);
3345         return event ? event->funcs->binary(iter, 0, event) :
3346                 TRACE_TYPE_HANDLED;
3347 }
3348
3349 int trace_empty(struct trace_iterator *iter)
3350 {
3351         struct ring_buffer_iter *buf_iter;
3352         int cpu;
3353
3354         /* If we are looking at one CPU buffer, only check that one */
3355         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3356                 cpu = iter->cpu_file;
3357                 buf_iter = trace_buffer_iter(iter, cpu);
3358                 if (buf_iter) {
3359                         if (!ring_buffer_iter_empty(buf_iter))
3360                                 return 0;
3361                 } else {
3362                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3363                                 return 0;
3364                 }
3365                 return 1;
3366         }
3367
3368         for_each_tracing_cpu(cpu) {
3369                 buf_iter = trace_buffer_iter(iter, cpu);
3370                 if (buf_iter) {
3371                         if (!ring_buffer_iter_empty(buf_iter))
3372                                 return 0;
3373                 } else {
3374                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3375                                 return 0;
3376                 }
3377         }
3378
3379         return 1;
3380 }
3381
3382 /*  Called with trace_event_read_lock() held. */
3383 enum print_line_t print_trace_line(struct trace_iterator *iter)
3384 {
3385         struct trace_array *tr = iter->tr;
3386         unsigned long trace_flags = tr->trace_flags;
3387         enum print_line_t ret;
3388
3389         if (iter->lost_events) {
3390                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3391                                  iter->cpu, iter->lost_events);
3392                 if (trace_seq_has_overflowed(&iter->seq))
3393                         return TRACE_TYPE_PARTIAL_LINE;
3394         }
3395
3396         if (iter->trace && iter->trace->print_line) {
3397                 ret = iter->trace->print_line(iter);
3398                 if (ret != TRACE_TYPE_UNHANDLED)
3399                         return ret;
3400         }
3401
3402         if (iter->ent->type == TRACE_BPUTS &&
3403                         trace_flags & TRACE_ITER_PRINTK &&
3404                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3405                 return trace_print_bputs_msg_only(iter);
3406
3407         if (iter->ent->type == TRACE_BPRINT &&
3408                         trace_flags & TRACE_ITER_PRINTK &&
3409                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3410                 return trace_print_bprintk_msg_only(iter);
3411
3412         if (iter->ent->type == TRACE_PRINT &&
3413                         trace_flags & TRACE_ITER_PRINTK &&
3414                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3415                 return trace_print_printk_msg_only(iter);
3416
3417         if (trace_flags & TRACE_ITER_BIN)
3418                 return print_bin_fmt(iter);
3419
3420         if (trace_flags & TRACE_ITER_HEX)
3421                 return print_hex_fmt(iter);
3422
3423         if (trace_flags & TRACE_ITER_RAW)
3424                 return print_raw_fmt(iter);
3425
3426         return print_trace_fmt(iter);
3427 }
3428
3429 void trace_latency_header(struct seq_file *m)
3430 {
3431         struct trace_iterator *iter = m->private;
3432         struct trace_array *tr = iter->tr;
3433
3434         /* print nothing if the buffers are empty */
3435         if (trace_empty(iter))
3436                 return;
3437
3438         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3439                 print_trace_header(m, iter);
3440
3441         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3442                 print_lat_help_header(m);
3443 }
3444
3445 void trace_default_header(struct seq_file *m)
3446 {
3447         struct trace_iterator *iter = m->private;
3448         struct trace_array *tr = iter->tr;
3449         unsigned long trace_flags = tr->trace_flags;
3450
3451         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3452                 return;
3453
3454         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3455                 /* print nothing if the buffers are empty */
3456                 if (trace_empty(iter))
3457                         return;
3458                 print_trace_header(m, iter);
3459                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3460                         print_lat_help_header(m);
3461         } else {
3462                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3463                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3464                                 print_func_help_header_irq(iter->trace_buffer, m);
3465                         else
3466                                 print_func_help_header(iter->trace_buffer, m);
3467                 }
3468         }
3469 }
3470
3471 static void test_ftrace_alive(struct seq_file *m)
3472 {
3473         if (!ftrace_is_dead())
3474                 return;
3475         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3476                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3477 }
3478
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 static void show_snapshot_main_help(struct seq_file *m)
3481 {
3482         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3483                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3484                     "#                      Takes a snapshot of the main buffer.\n"
3485                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3486                     "#                      (Doesn't have to be '2' works with any number that\n"
3487                     "#                       is not a '0' or '1')\n");
3488 }
3489
3490 static void show_snapshot_percpu_help(struct seq_file *m)
3491 {
3492         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3494         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3495                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3496 #else
3497         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3498                     "#                     Must use main snapshot file to allocate.\n");
3499 #endif
3500         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3501                     "#                      (Doesn't have to be '2' works with any number that\n"
3502                     "#                       is not a '0' or '1')\n");
3503 }
3504
3505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3506 {
3507         if (iter->tr->allocated_snapshot)
3508                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3509         else
3510                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3511
3512         seq_puts(m, "# Snapshot commands:\n");
3513         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3514                 show_snapshot_main_help(m);
3515         else
3516                 show_snapshot_percpu_help(m);
3517 }
3518 #else
3519 /* Should never be called */
3520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3521 #endif
3522
3523 static int s_show(struct seq_file *m, void *v)
3524 {
3525         struct trace_iterator *iter = v;
3526         int ret;
3527
3528         if (iter->ent == NULL) {
3529                 if (iter->tr) {
3530                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3531                         seq_puts(m, "#\n");
3532                         test_ftrace_alive(m);
3533                 }
3534                 if (iter->snapshot && trace_empty(iter))
3535                         print_snapshot_help(m, iter);
3536                 else if (iter->trace && iter->trace->print_header)
3537                         iter->trace->print_header(m);
3538                 else
3539                         trace_default_header(m);
3540
3541         } else if (iter->leftover) {
3542                 /*
3543                  * If we filled the seq_file buffer earlier, we
3544                  * want to just show it now.
3545                  */
3546                 ret = trace_print_seq(m, &iter->seq);
3547
3548                 /* ret should this time be zero, but you never know */
3549                 iter->leftover = ret;
3550
3551         } else {
3552                 print_trace_line(iter);
3553                 ret = trace_print_seq(m, &iter->seq);
3554                 /*
3555                  * If we overflow the seq_file buffer, then it will
3556                  * ask us for this data again at start up.
3557                  * Use that instead.
3558                  *  ret is 0 if seq_file write succeeded.
3559                  *        -1 otherwise.
3560                  */
3561                 iter->leftover = ret;
3562         }
3563
3564         return 0;
3565 }
3566
3567 /*
3568  * Should be used after trace_array_get(), trace_types_lock
3569  * ensures that i_cdev was already initialized.
3570  */
3571 static inline int tracing_get_cpu(struct inode *inode)
3572 {
3573         if (inode->i_cdev) /* See trace_create_cpu_file() */
3574                 return (long)inode->i_cdev - 1;
3575         return RING_BUFFER_ALL_CPUS;
3576 }
3577
3578 static const struct seq_operations tracer_seq_ops = {
3579         .start          = s_start,
3580         .next           = s_next,
3581         .stop           = s_stop,
3582         .show           = s_show,
3583 };
3584
3585 static struct trace_iterator *
3586 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3587 {
3588         struct trace_array *tr = inode->i_private;
3589         struct trace_iterator *iter;
3590         int cpu;
3591
3592         if (tracing_disabled)
3593                 return ERR_PTR(-ENODEV);
3594
3595         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3596         if (!iter)
3597                 return ERR_PTR(-ENOMEM);
3598
3599         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3600                                     GFP_KERNEL);
3601         if (!iter->buffer_iter)
3602                 goto release;
3603
3604         /*
3605          * We make a copy of the current tracer to avoid concurrent
3606          * changes on it while we are reading.
3607          */
3608         mutex_lock(&trace_types_lock);
3609         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3610         if (!iter->trace)
3611                 goto fail;
3612
3613         *iter->trace = *tr->current_trace;
3614
3615         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3616                 goto fail;
3617
3618         iter->tr = tr;
3619
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621         /* Currently only the top directory has a snapshot */
3622         if (tr->current_trace->print_max || snapshot)
3623                 iter->trace_buffer = &tr->max_buffer;
3624         else
3625 #endif
3626                 iter->trace_buffer = &tr->trace_buffer;
3627         iter->snapshot = snapshot;
3628         iter->pos = -1;
3629         iter->cpu_file = tracing_get_cpu(inode);
3630         mutex_init(&iter->mutex);
3631
3632         /* Notify the tracer early; before we stop tracing. */
3633         if (iter->trace && iter->trace->open)
3634                 iter->trace->open(iter);
3635
3636         /* Annotate start of buffers if we had overruns */
3637         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3638                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3639
3640         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3641         if (trace_clocks[tr->clock_id].in_ns)
3642                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3643
3644         /* stop the trace while dumping if we are not opening "snapshot" */
3645         if (!iter->snapshot)
3646                 tracing_stop_tr(tr);
3647
3648         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3649                 for_each_tracing_cpu(cpu) {
3650                         iter->buffer_iter[cpu] =
3651                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3652                 }
3653                 ring_buffer_read_prepare_sync();
3654                 for_each_tracing_cpu(cpu) {
3655                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3656                         tracing_iter_reset(iter, cpu);
3657                 }
3658         } else {
3659                 cpu = iter->cpu_file;
3660                 iter->buffer_iter[cpu] =
3661                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3662                 ring_buffer_read_prepare_sync();
3663                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3664                 tracing_iter_reset(iter, cpu);
3665         }
3666
3667         mutex_unlock(&trace_types_lock);
3668
3669         return iter;
3670
3671  fail:
3672         mutex_unlock(&trace_types_lock);
3673         kfree(iter->trace);
3674         kfree(iter->buffer_iter);
3675 release:
3676         seq_release_private(inode, file);
3677         return ERR_PTR(-ENOMEM);
3678 }
3679
3680 int tracing_open_generic(struct inode *inode, struct file *filp)
3681 {
3682         if (tracing_disabled)
3683                 return -ENODEV;
3684
3685         filp->private_data = inode->i_private;
3686         return 0;
3687 }
3688
3689 bool tracing_is_disabled(void)
3690 {
3691         return (tracing_disabled) ? true: false;
3692 }
3693
3694 /*
3695  * Open and update trace_array ref count.
3696  * Must have the current trace_array passed to it.
3697  */
3698 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3699 {
3700         struct trace_array *tr = inode->i_private;
3701
3702         if (tracing_disabled)
3703                 return -ENODEV;
3704
3705         if (trace_array_get(tr) < 0)
3706                 return -ENODEV;
3707
3708         filp->private_data = inode->i_private;
3709
3710         return 0;
3711 }
3712
3713 static int tracing_release(struct inode *inode, struct file *file)
3714 {
3715         struct trace_array *tr = inode->i_private;
3716         struct seq_file *m = file->private_data;
3717         struct trace_iterator *iter;
3718         int cpu;
3719
3720         if (!(file->f_mode & FMODE_READ)) {
3721                 trace_array_put(tr);
3722                 return 0;
3723         }
3724
3725         /* Writes do not use seq_file */
3726         iter = m->private;
3727         mutex_lock(&trace_types_lock);
3728
3729         for_each_tracing_cpu(cpu) {
3730                 if (iter->buffer_iter[cpu])
3731                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3732         }
3733
3734         if (iter->trace && iter->trace->close)
3735                 iter->trace->close(iter);
3736
3737         if (!iter->snapshot)
3738                 /* reenable tracing if it was previously enabled */
3739                 tracing_start_tr(tr);
3740
3741         __trace_array_put(tr);
3742
3743         mutex_unlock(&trace_types_lock);
3744
3745         mutex_destroy(&iter->mutex);
3746         free_cpumask_var(iter->started);
3747         kfree(iter->trace);
3748         kfree(iter->buffer_iter);
3749         seq_release_private(inode, file);
3750
3751         return 0;
3752 }
3753
3754 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3755 {
3756         struct trace_array *tr = inode->i_private;
3757
3758         trace_array_put(tr);
3759         return 0;
3760 }
3761
3762 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3763 {
3764         struct trace_array *tr = inode->i_private;
3765
3766         trace_array_put(tr);
3767
3768         return single_release(inode, file);
3769 }
3770
3771 static int tracing_open(struct inode *inode, struct file *file)
3772 {
3773         struct trace_array *tr = inode->i_private;
3774         struct trace_iterator *iter;
3775         int ret = 0;
3776
3777         if (trace_array_get(tr) < 0)
3778                 return -ENODEV;
3779
3780         /* If this file was open for write, then erase contents */
3781         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3782                 int cpu = tracing_get_cpu(inode);
3783
3784                 if (cpu == RING_BUFFER_ALL_CPUS)
3785                         tracing_reset_online_cpus(&tr->trace_buffer);
3786                 else
3787                         tracing_reset(&tr->trace_buffer, cpu);
3788         }
3789
3790         if (file->f_mode & FMODE_READ) {
3791                 iter = __tracing_open(inode, file, false);
3792                 if (IS_ERR(iter))
3793                         ret = PTR_ERR(iter);
3794                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3795                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3796         }
3797
3798         if (ret < 0)
3799                 trace_array_put(tr);
3800
3801         return ret;
3802 }
3803
3804 /*
3805  * Some tracers are not suitable for instance buffers.
3806  * A tracer is always available for the global array (toplevel)
3807  * or if it explicitly states that it is.
3808  */
3809 static bool
3810 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3811 {
3812         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3813 }
3814
3815 /* Find the next tracer that this trace array may use */
3816 static struct tracer *
3817 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3818 {
3819         while (t && !trace_ok_for_array(t, tr))
3820                 t = t->next;
3821
3822         return t;
3823 }
3824
3825 static void *
3826 t_next(struct seq_file *m, void *v, loff_t *pos)
3827 {
3828         struct trace_array *tr = m->private;
3829         struct tracer *t = v;
3830
3831         (*pos)++;
3832
3833         if (t)
3834                 t = get_tracer_for_array(tr, t->next);
3835
3836         return t;
3837 }
3838
3839 static void *t_start(struct seq_file *m, loff_t *pos)
3840 {
3841         struct trace_array *tr = m->private;
3842         struct tracer *t;
3843         loff_t l = 0;
3844
3845         mutex_lock(&trace_types_lock);
3846
3847         t = get_tracer_for_array(tr, trace_types);
3848         for (; t && l < *pos; t = t_next(m, t, &l))
3849                         ;
3850
3851         return t;
3852 }
3853
3854 static void t_stop(struct seq_file *m, void *p)
3855 {
3856         mutex_unlock(&trace_types_lock);
3857 }
3858
3859 static int t_show(struct seq_file *m, void *v)
3860 {
3861         struct tracer *t = v;
3862
3863         if (!t)
3864                 return 0;
3865
3866         seq_puts(m, t->name);
3867         if (t->next)
3868                 seq_putc(m, ' ');
3869         else
3870                 seq_putc(m, '\n');
3871
3872         return 0;
3873 }
3874
3875 static const struct seq_operations show_traces_seq_ops = {
3876         .start          = t_start,
3877         .next           = t_next,
3878         .stop           = t_stop,
3879         .show           = t_show,
3880 };
3881
3882 static int show_traces_open(struct inode *inode, struct file *file)
3883 {
3884         struct trace_array *tr = inode->i_private;
3885         struct seq_file *m;
3886         int ret;
3887
3888         if (tracing_disabled)
3889                 return -ENODEV;
3890
3891         ret = seq_open(file, &show_traces_seq_ops);
3892         if (ret)
3893                 return ret;
3894
3895         m = file->private_data;
3896         m->private = tr;
3897
3898         return 0;
3899 }
3900
3901 static ssize_t
3902 tracing_write_stub(struct file *filp, const char __user *ubuf,
3903                    size_t count, loff_t *ppos)
3904 {
3905         return count;
3906 }
3907
3908 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3909 {
3910         int ret;
3911
3912         if (file->f_mode & FMODE_READ)
3913                 ret = seq_lseek(file, offset, whence);
3914         else
3915                 file->f_pos = ret = 0;
3916
3917         return ret;
3918 }
3919
3920 static const struct file_operations tracing_fops = {
3921         .open           = tracing_open,
3922         .read           = seq_read,
3923         .write          = tracing_write_stub,
3924         .llseek         = tracing_lseek,
3925         .release        = tracing_release,
3926 };
3927
3928 static const struct file_operations show_traces_fops = {
3929         .open           = show_traces_open,
3930         .read           = seq_read,
3931         .release        = seq_release,
3932         .llseek         = seq_lseek,
3933 };
3934
3935 /*
3936  * The tracer itself will not take this lock, but still we want
3937  * to provide a consistent cpumask to user-space:
3938  */
3939 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3940
3941 /*
3942  * Temporary storage for the character representation of the
3943  * CPU bitmask (and one more byte for the newline):
3944  */
3945 static char mask_str[NR_CPUS + 1];
3946
3947 static ssize_t
3948 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3949                      size_t count, loff_t *ppos)
3950 {
3951         struct trace_array *tr = file_inode(filp)->i_private;
3952         int len;
3953
3954         mutex_lock(&tracing_cpumask_update_lock);
3955
3956         len = snprintf(mask_str, count, "%*pb\n",
3957                        cpumask_pr_args(tr->tracing_cpumask));
3958         if (len >= count) {
3959                 count = -EINVAL;
3960                 goto out_err;
3961         }
3962         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3963
3964 out_err:
3965         mutex_unlock(&tracing_cpumask_update_lock);
3966
3967         return count;
3968 }
3969
3970 static ssize_t
3971 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3972                       size_t count, loff_t *ppos)
3973 {
3974         struct trace_array *tr = file_inode(filp)->i_private;
3975         cpumask_var_t tracing_cpumask_new;
3976         int err, cpu;
3977
3978         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3979                 return -ENOMEM;
3980
3981         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3982         if (err)
3983                 goto err_unlock;
3984
3985         mutex_lock(&tracing_cpumask_update_lock);
3986
3987         local_irq_disable();
3988         arch_spin_lock(&tr->max_lock);
3989         for_each_tracing_cpu(cpu) {
3990                 /*
3991                  * Increase/decrease the disabled counter if we are
3992                  * about to flip a bit in the cpumask:
3993                  */
3994                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3995                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3996                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3997                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3998                 }
3999                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4000                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4001                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4002                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4003                 }
4004         }
4005         arch_spin_unlock(&tr->max_lock);
4006         local_irq_enable();
4007
4008         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4009
4010         mutex_unlock(&tracing_cpumask_update_lock);
4011         free_cpumask_var(tracing_cpumask_new);
4012
4013         return count;
4014
4015 err_unlock:
4016         free_cpumask_var(tracing_cpumask_new);
4017
4018         return err;
4019 }
4020
4021 static const struct file_operations tracing_cpumask_fops = {
4022         .open           = tracing_open_generic_tr,
4023         .read           = tracing_cpumask_read,
4024         .write          = tracing_cpumask_write,
4025         .release        = tracing_release_generic_tr,
4026         .llseek         = generic_file_llseek,
4027 };
4028
4029 static int tracing_trace_options_show(struct seq_file *m, void *v)
4030 {
4031         struct tracer_opt *trace_opts;
4032         struct trace_array *tr = m->private;
4033         u32 tracer_flags;
4034         int i;
4035
4036         mutex_lock(&trace_types_lock);
4037         tracer_flags = tr->current_trace->flags->val;
4038         trace_opts = tr->current_trace->flags->opts;
4039
4040         for (i = 0; trace_options[i]; i++) {
4041                 if (tr->trace_flags & (1 << i))
4042                         seq_printf(m, "%s\n", trace_options[i]);
4043                 else
4044                         seq_printf(m, "no%s\n", trace_options[i]);
4045         }
4046
4047         for (i = 0; trace_opts[i].name; i++) {
4048                 if (tracer_flags & trace_opts[i].bit)
4049                         seq_printf(m, "%s\n", trace_opts[i].name);
4050                 else
4051                         seq_printf(m, "no%s\n", trace_opts[i].name);
4052         }
4053         mutex_unlock(&trace_types_lock);
4054
4055         return 0;
4056 }
4057
4058 static int __set_tracer_option(struct trace_array *tr,
4059                                struct tracer_flags *tracer_flags,
4060                                struct tracer_opt *opts, int neg)
4061 {
4062         struct tracer *trace = tracer_flags->trace;
4063         int ret;
4064
4065         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4066         if (ret)
4067                 return ret;
4068
4069         if (neg)
4070                 tracer_flags->val &= ~opts->bit;
4071         else
4072                 tracer_flags->val |= opts->bit;
4073         return 0;
4074 }
4075
4076 /* Try to assign a tracer specific option */
4077 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4078 {
4079         struct tracer *trace = tr->current_trace;
4080         struct tracer_flags *tracer_flags = trace->flags;
4081         struct tracer_opt *opts = NULL;
4082         int i;
4083
4084         for (i = 0; tracer_flags->opts[i].name; i++) {
4085                 opts = &tracer_flags->opts[i];
4086
4087                 if (strcmp(cmp, opts->name) == 0)
4088                         return __set_tracer_option(tr, trace->flags, opts, neg);
4089         }
4090
4091         return -EINVAL;
4092 }
4093
4094 /* Some tracers require overwrite to stay enabled */
4095 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4096 {
4097         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4098                 return -1;
4099
4100         return 0;
4101 }
4102
4103 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4104 {
4105         /* do nothing if flag is already set */
4106         if (!!(tr->trace_flags & mask) == !!enabled)
4107                 return 0;
4108
4109         /* Give the tracer a chance to approve the change */
4110         if (tr->current_trace->flag_changed)
4111                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4112                         return -EINVAL;
4113
4114         if (enabled)
4115                 tr->trace_flags |= mask;
4116         else
4117                 tr->trace_flags &= ~mask;
4118
4119         if (mask == TRACE_ITER_RECORD_CMD)
4120                 trace_event_enable_cmd_record(enabled);
4121
4122         if (mask == TRACE_ITER_EVENT_FORK)
4123                 trace_event_follow_fork(tr, enabled);
4124
4125         if (mask == TRACE_ITER_OVERWRITE) {
4126                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4127 #ifdef CONFIG_TRACER_MAX_TRACE
4128                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4129 #endif
4130         }
4131
4132         if (mask == TRACE_ITER_PRINTK) {
4133                 trace_printk_start_stop_comm(enabled);
4134                 trace_printk_control(enabled);
4135         }
4136
4137         return 0;
4138 }
4139
4140 static int trace_set_options(struct trace_array *tr, char *option)
4141 {
4142         char *cmp;
4143         int neg = 0;
4144         int ret = -ENODEV;
4145         int i;
4146         size_t orig_len = strlen(option);
4147
4148         cmp = strstrip(option);
4149
4150         if (strncmp(cmp, "no", 2) == 0) {
4151                 neg = 1;
4152                 cmp += 2;
4153         }
4154
4155         mutex_lock(&trace_types_lock);
4156
4157         for (i = 0; trace_options[i]; i++) {
4158                 if (strcmp(cmp, trace_options[i]) == 0) {
4159                         ret = set_tracer_flag(tr, 1 << i, !neg);
4160                         break;
4161                 }
4162         }
4163
4164         /* If no option could be set, test the specific tracer options */
4165         if (!trace_options[i])
4166                 ret = set_tracer_option(tr, cmp, neg);
4167
4168         mutex_unlock(&trace_types_lock);
4169
4170         /*
4171          * If the first trailing whitespace is replaced with '\0' by strstrip,
4172          * turn it back into a space.
4173          */
4174         if (orig_len > strlen(option))
4175                 option[strlen(option)] = ' ';
4176
4177         return ret;
4178 }
4179
4180 static void __init apply_trace_boot_options(void)
4181 {
4182         char *buf = trace_boot_options_buf;
4183         char *option;
4184
4185         while (true) {
4186                 option = strsep(&buf, ",");
4187
4188                 if (!option)
4189                         break;
4190
4191                 if (*option)
4192                         trace_set_options(&global_trace, option);
4193
4194                 /* Put back the comma to allow this to be called again */
4195                 if (buf)
4196                         *(buf - 1) = ',';
4197         }
4198 }
4199
4200 static ssize_t
4201 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4202                         size_t cnt, loff_t *ppos)
4203 {
4204         struct seq_file *m = filp->private_data;
4205         struct trace_array *tr = m->private;
4206         char buf[64];
4207         int ret;
4208
4209         if (cnt >= sizeof(buf))
4210                 return -EINVAL;
4211
4212         if (copy_from_user(buf, ubuf, cnt))
4213                 return -EFAULT;
4214
4215         buf[cnt] = 0;
4216
4217         ret = trace_set_options(tr, buf);
4218         if (ret < 0)
4219                 return ret;
4220
4221         *ppos += cnt;
4222
4223         return cnt;
4224 }
4225
4226 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4227 {
4228         struct trace_array *tr = inode->i_private;
4229         int ret;
4230
4231         if (tracing_disabled)
4232                 return -ENODEV;
4233
4234         if (trace_array_get(tr) < 0)
4235                 return -ENODEV;
4236
4237         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4238         if (ret < 0)
4239                 trace_array_put(tr);
4240
4241         return ret;
4242 }
4243
4244 static const struct file_operations tracing_iter_fops = {
4245         .open           = tracing_trace_options_open,
4246         .read           = seq_read,
4247         .llseek         = seq_lseek,
4248         .release        = tracing_single_release_tr,
4249         .write          = tracing_trace_options_write,
4250 };
4251
4252 static const char readme_msg[] =
4253         "tracing mini-HOWTO:\n\n"
4254         "# echo 0 > tracing_on : quick way to disable tracing\n"
4255         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4256         " Important files:\n"
4257         "  trace\t\t\t- The static contents of the buffer\n"
4258         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4259         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4260         "  current_tracer\t- function and latency tracers\n"
4261         "  available_tracers\t- list of configured tracers for current_tracer\n"
4262         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4263         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4264         "  trace_clock\t\t-change the clock used to order events\n"
4265         "       local:   Per cpu clock but may not be synced across CPUs\n"
4266         "      global:   Synced across CPUs but slows tracing down.\n"
4267         "     counter:   Not a clock, but just an increment\n"
4268         "      uptime:   Jiffy counter from time of boot\n"
4269         "        perf:   Same clock that perf events use\n"
4270 #ifdef CONFIG_X86_64
4271         "     x86-tsc:   TSC cycle counter\n"
4272 #endif
4273         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4274         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4275         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4276         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4277         "\t\t\t  Remove sub-buffer with rmdir\n"
4278         "  trace_options\t\t- Set format or modify how tracing happens\n"
4279         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4280         "\t\t\t  option name\n"
4281         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4282 #ifdef CONFIG_DYNAMIC_FTRACE
4283         "\n  available_filter_functions - list of functions that can be filtered on\n"
4284         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4285         "\t\t\t  functions\n"
4286         "\t     accepts: func_full_name or glob-matching-pattern\n"
4287         "\t     modules: Can select a group via module\n"
4288         "\t      Format: :mod:<module-name>\n"
4289         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4290         "\t    triggers: a command to perform when function is hit\n"
4291         "\t      Format: <function>:<trigger>[:count]\n"
4292         "\t     trigger: traceon, traceoff\n"
4293         "\t\t      enable_event:<system>:<event>\n"
4294         "\t\t      disable_event:<system>:<event>\n"
4295 #ifdef CONFIG_STACKTRACE
4296         "\t\t      stacktrace\n"
4297 #endif
4298 #ifdef CONFIG_TRACER_SNAPSHOT
4299         "\t\t      snapshot\n"
4300 #endif
4301         "\t\t      dump\n"
4302         "\t\t      cpudump\n"
4303         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4304         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4305         "\t     The first one will disable tracing every time do_fault is hit\n"
4306         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4307         "\t       The first time do trap is hit and it disables tracing, the\n"
4308         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4309         "\t       the counter will not decrement. It only decrements when the\n"
4310         "\t       trigger did work\n"
4311         "\t     To remove trigger without count:\n"
4312         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4313         "\t     To remove trigger with a count:\n"
4314         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4315         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4316         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4317         "\t    modules: Can select a group via module command :mod:\n"
4318         "\t    Does not accept triggers\n"
4319 #endif /* CONFIG_DYNAMIC_FTRACE */
4320 #ifdef CONFIG_FUNCTION_TRACER
4321         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4322         "\t\t    (function)\n"
4323 #endif
4324 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4325         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4326         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4327         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4328 #endif
4329 #ifdef CONFIG_TRACER_SNAPSHOT
4330         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4331         "\t\t\t  snapshot buffer. Read the contents for more\n"
4332         "\t\t\t  information\n"
4333 #endif
4334 #ifdef CONFIG_STACK_TRACER
4335         "  stack_trace\t\t- Shows the max stack trace when active\n"
4336         "  stack_max_size\t- Shows current max stack size that was traced\n"
4337         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4338         "\t\t\t  new trace)\n"
4339 #ifdef CONFIG_DYNAMIC_FTRACE
4340         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4341         "\t\t\t  traces\n"
4342 #endif
4343 #endif /* CONFIG_STACK_TRACER */
4344 #ifdef CONFIG_KPROBE_EVENT
4345         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4346         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4347 #endif
4348 #ifdef CONFIG_UPROBE_EVENT
4349         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4350         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4351 #endif
4352 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4353         "\t  accepts: event-definitions (one definition per line)\n"
4354         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4355         "\t           -:[<group>/]<event>\n"
4356 #ifdef CONFIG_KPROBE_EVENT
4357         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4358 #endif
4359 #ifdef CONFIG_UPROBE_EVENT
4360         "\t    place: <path>:<offset>\n"
4361 #endif
4362         "\t     args: <name>=fetcharg[:type]\n"
4363         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4364         "\t           $stack<index>, $stack, $retval, $comm\n"
4365         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4366         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4367 #endif
4368         "  events/\t\t- Directory containing all trace event subsystems:\n"
4369         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4370         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4371         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4372         "\t\t\t  events\n"
4373         "      filter\t\t- If set, only events passing filter are traced\n"
4374         "  events/<system>/<event>/\t- Directory containing control files for\n"
4375         "\t\t\t  <event>:\n"
4376         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4377         "      filter\t\t- If set, only events passing filter are traced\n"
4378         "      trigger\t\t- If set, a command to perform when event is hit\n"
4379         "\t    Format: <trigger>[:count][if <filter>]\n"
4380         "\t   trigger: traceon, traceoff\n"
4381         "\t            enable_event:<system>:<event>\n"
4382         "\t            disable_event:<system>:<event>\n"
4383 #ifdef CONFIG_HIST_TRIGGERS
4384         "\t            enable_hist:<system>:<event>\n"
4385         "\t            disable_hist:<system>:<event>\n"
4386 #endif
4387 #ifdef CONFIG_STACKTRACE
4388         "\t\t    stacktrace\n"
4389 #endif
4390 #ifdef CONFIG_TRACER_SNAPSHOT
4391         "\t\t    snapshot\n"
4392 #endif
4393 #ifdef CONFIG_HIST_TRIGGERS
4394         "\t\t    hist (see below)\n"
4395 #endif
4396         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4397         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4398         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4399         "\t                  events/block/block_unplug/trigger\n"
4400         "\t   The first disables tracing every time block_unplug is hit.\n"
4401         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4402         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4403         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4404         "\t   Like function triggers, the counter is only decremented if it\n"
4405         "\t    enabled or disabled tracing.\n"
4406         "\t   To remove a trigger without a count:\n"
4407         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4408         "\t   To remove a trigger with a count:\n"
4409         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4410         "\t   Filters can be ignored when removing a trigger.\n"
4411 #ifdef CONFIG_HIST_TRIGGERS
4412         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4413         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4414         "\t            [:values=<field1[,field2,...]>]\n"
4415         "\t            [:sort=<field1[,field2,...]>]\n"
4416         "\t            [:size=#entries]\n"
4417         "\t            [:pause][:continue][:clear]\n"
4418         "\t            [:name=histname1]\n"
4419         "\t            [if <filter>]\n\n"
4420         "\t    When a matching event is hit, an entry is added to a hash\n"
4421         "\t    table using the key(s) and value(s) named, and the value of a\n"
4422         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4423         "\t    correspond to fields in the event's format description.  Keys\n"
4424         "\t    can be any field, or the special string 'stacktrace'.\n"
4425         "\t    Compound keys consisting of up to two fields can be specified\n"
4426         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4427         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4428         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4429         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4430         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4431         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4432         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4433         "\t    its histogram data will be shared with other triggers of the\n"
4434         "\t    same name, and trigger hits will update this common data.\n\n"
4435         "\t    Reading the 'hist' file for the event will dump the hash\n"
4436         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4437         "\t    triggers attached to an event, there will be a table for each\n"
4438         "\t    trigger in the output.  The table displayed for a named\n"
4439         "\t    trigger will be the same as any other instance having the\n"
4440         "\t    same name.  The default format used to display a given field\n"
4441         "\t    can be modified by appending any of the following modifiers\n"
4442         "\t    to the field name, as applicable:\n\n"
4443         "\t            .hex        display a number as a hex value\n"
4444         "\t            .sym        display an address as a symbol\n"
4445         "\t            .sym-offset display an address as a symbol and offset\n"
4446         "\t            .execname   display a common_pid as a program name\n"
4447         "\t            .syscall    display a syscall id as a syscall name\n\n"
4448         "\t            .log2       display log2 value rather than raw number\n\n"
4449         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4450         "\t    trigger or to start a hist trigger but not log any events\n"
4451         "\t    until told to do so.  'continue' can be used to start or\n"
4452         "\t    restart a paused hist trigger.\n\n"
4453         "\t    The 'clear' parameter will clear the contents of a running\n"
4454         "\t    hist trigger and leave its current paused/active state\n"
4455         "\t    unchanged.\n\n"
4456         "\t    The enable_hist and disable_hist triggers can be used to\n"
4457         "\t    have one event conditionally start and stop another event's\n"
4458         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4459         "\t    the enable_event and disable_event triggers.\n"
4460 #endif
4461 ;
4462
4463 static ssize_t
4464 tracing_readme_read(struct file *filp, char __user *ubuf,
4465                        size_t cnt, loff_t *ppos)
4466 {
4467         return simple_read_from_buffer(ubuf, cnt, ppos,
4468                                         readme_msg, strlen(readme_msg));
4469 }
4470
4471 static const struct file_operations tracing_readme_fops = {
4472         .open           = tracing_open_generic,
4473         .read           = tracing_readme_read,
4474         .llseek         = generic_file_llseek,
4475 };
4476
4477 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4478 {
4479         unsigned int *ptr = v;
4480
4481         if (*pos || m->count)
4482                 ptr++;
4483
4484         (*pos)++;
4485
4486         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4487              ptr++) {
4488                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4489                         continue;
4490
4491                 return ptr;
4492         }
4493
4494         return NULL;
4495 }
4496
4497 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4498 {
4499         void *v;
4500         loff_t l = 0;
4501
4502         preempt_disable();
4503         arch_spin_lock(&trace_cmdline_lock);
4504
4505         v = &savedcmd->map_cmdline_to_pid[0];
4506         while (l <= *pos) {
4507                 v = saved_cmdlines_next(m, v, &l);
4508                 if (!v)
4509                         return NULL;
4510         }
4511
4512         return v;
4513 }
4514
4515 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4516 {
4517         arch_spin_unlock(&trace_cmdline_lock);
4518         preempt_enable();
4519 }
4520
4521 static int saved_cmdlines_show(struct seq_file *m, void *v)
4522 {
4523         char buf[TASK_COMM_LEN];
4524         unsigned int *pid = v;
4525
4526         __trace_find_cmdline(*pid, buf);
4527         seq_printf(m, "%d %s\n", *pid, buf);
4528         return 0;
4529 }
4530
4531 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4532         .start          = saved_cmdlines_start,
4533         .next           = saved_cmdlines_next,
4534         .stop           = saved_cmdlines_stop,
4535         .show           = saved_cmdlines_show,
4536 };
4537
4538 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4539 {
4540         if (tracing_disabled)
4541                 return -ENODEV;
4542
4543         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4544 }
4545
4546 static const struct file_operations tracing_saved_cmdlines_fops = {
4547         .open           = tracing_saved_cmdlines_open,
4548         .read           = seq_read,
4549         .llseek         = seq_lseek,
4550         .release        = seq_release,
4551 };
4552
4553 static ssize_t
4554 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4555                                  size_t cnt, loff_t *ppos)
4556 {
4557         char buf[64];
4558         int r;
4559
4560         arch_spin_lock(&trace_cmdline_lock);
4561         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4562         arch_spin_unlock(&trace_cmdline_lock);
4563
4564         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4565 }
4566
4567 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4568 {
4569         kfree(s->saved_cmdlines);
4570         kfree(s->map_cmdline_to_pid);
4571         kfree(s);
4572 }
4573
4574 static int tracing_resize_saved_cmdlines(unsigned int val)
4575 {
4576         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4577
4578         s = kmalloc(sizeof(*s), GFP_KERNEL);
4579         if (!s)
4580                 return -ENOMEM;
4581
4582         if (allocate_cmdlines_buffer(val, s) < 0) {
4583                 kfree(s);
4584                 return -ENOMEM;
4585         }
4586
4587         arch_spin_lock(&trace_cmdline_lock);
4588         savedcmd_temp = savedcmd;
4589         savedcmd = s;
4590         arch_spin_unlock(&trace_cmdline_lock);
4591         free_saved_cmdlines_buffer(savedcmd_temp);
4592
4593         return 0;
4594 }
4595
4596 static ssize_t
4597 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4598                                   size_t cnt, loff_t *ppos)
4599 {
4600         unsigned long val;
4601         int ret;
4602
4603         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4604         if (ret)
4605                 return ret;
4606
4607         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4608         if (!val || val > PID_MAX_DEFAULT)
4609                 return -EINVAL;
4610
4611         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4612         if (ret < 0)
4613                 return ret;
4614
4615         *ppos += cnt;
4616
4617         return cnt;
4618 }
4619
4620 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4621         .open           = tracing_open_generic,
4622         .read           = tracing_saved_cmdlines_size_read,
4623         .write          = tracing_saved_cmdlines_size_write,
4624 };
4625
4626 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4627 static union trace_enum_map_item *
4628 update_enum_map(union trace_enum_map_item *ptr)
4629 {
4630         if (!ptr->map.enum_string) {
4631                 if (ptr->tail.next) {
4632                         ptr = ptr->tail.next;
4633                         /* Set ptr to the next real item (skip head) */
4634                         ptr++;
4635                 } else
4636                         return NULL;
4637         }
4638         return ptr;
4639 }
4640
4641 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4642 {
4643         union trace_enum_map_item *ptr = v;
4644
4645         /*
4646          * Paranoid! If ptr points to end, we don't want to increment past it.
4647          * This really should never happen.
4648          */
4649         ptr = update_enum_map(ptr);
4650         if (WARN_ON_ONCE(!ptr))
4651                 return NULL;
4652
4653         ptr++;
4654
4655         (*pos)++;
4656
4657         ptr = update_enum_map(ptr);
4658
4659         return ptr;
4660 }
4661
4662 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4663 {
4664         union trace_enum_map_item *v;
4665         loff_t l = 0;
4666
4667         mutex_lock(&trace_enum_mutex);
4668
4669         v = trace_enum_maps;
4670         if (v)
4671                 v++;
4672
4673         while (v && l < *pos) {
4674                 v = enum_map_next(m, v, &l);
4675         }
4676
4677         return v;
4678 }
4679
4680 static void enum_map_stop(struct seq_file *m, void *v)
4681 {
4682         mutex_unlock(&trace_enum_mutex);
4683 }
4684
4685 static int enum_map_show(struct seq_file *m, void *v)
4686 {
4687         union trace_enum_map_item *ptr = v;
4688
4689         seq_printf(m, "%s %ld (%s)\n",
4690                    ptr->map.enum_string, ptr->map.enum_value,
4691                    ptr->map.system);
4692
4693         return 0;
4694 }
4695
4696 static const struct seq_operations tracing_enum_map_seq_ops = {
4697         .start          = enum_map_start,
4698         .next           = enum_map_next,
4699         .stop           = enum_map_stop,
4700         .show           = enum_map_show,
4701 };
4702
4703 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4704 {
4705         if (tracing_disabled)
4706                 return -ENODEV;
4707
4708         return seq_open(filp, &tracing_enum_map_seq_ops);
4709 }
4710
4711 static const struct file_operations tracing_enum_map_fops = {
4712         .open           = tracing_enum_map_open,
4713         .read           = seq_read,
4714         .llseek         = seq_lseek,
4715         .release        = seq_release,
4716 };
4717
4718 static inline union trace_enum_map_item *
4719 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4720 {
4721         /* Return tail of array given the head */
4722         return ptr + ptr->head.length + 1;
4723 }
4724
4725 static void
4726 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4727                            int len)
4728 {
4729         struct trace_enum_map **stop;
4730         struct trace_enum_map **map;
4731         union trace_enum_map_item *map_array;
4732         union trace_enum_map_item *ptr;
4733
4734         stop = start + len;
4735
4736         /*
4737          * The trace_enum_maps contains the map plus a head and tail item,
4738          * where the head holds the module and length of array, and the
4739          * tail holds a pointer to the next list.
4740          */
4741         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4742         if (!map_array) {
4743                 pr_warn("Unable to allocate trace enum mapping\n");
4744                 return;
4745         }
4746
4747         mutex_lock(&trace_enum_mutex);
4748
4749         if (!trace_enum_maps)
4750                 trace_enum_maps = map_array;
4751         else {
4752                 ptr = trace_enum_maps;
4753                 for (;;) {
4754                         ptr = trace_enum_jmp_to_tail(ptr);
4755                         if (!ptr->tail.next)
4756                                 break;
4757                         ptr = ptr->tail.next;
4758
4759                 }
4760                 ptr->tail.next = map_array;
4761         }
4762         map_array->head.mod = mod;
4763         map_array->head.length = len;
4764         map_array++;
4765
4766         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4767                 map_array->map = **map;
4768                 map_array++;
4769         }
4770         memset(map_array, 0, sizeof(*map_array));
4771
4772         mutex_unlock(&trace_enum_mutex);
4773 }
4774
4775 static void trace_create_enum_file(struct dentry *d_tracer)
4776 {
4777         trace_create_file("enum_map", 0444, d_tracer,
4778                           NULL, &tracing_enum_map_fops);
4779 }
4780
4781 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4782 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4783 static inline void trace_insert_enum_map_file(struct module *mod,
4784                               struct trace_enum_map **start, int len) { }
4785 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4786
4787 static void trace_insert_enum_map(struct module *mod,
4788                                   struct trace_enum_map **start, int len)
4789 {
4790         struct trace_enum_map **map;
4791
4792         if (len <= 0)
4793                 return;
4794
4795         map = start;
4796
4797         trace_event_enum_update(map, len);
4798
4799         trace_insert_enum_map_file(mod, start, len);
4800 }
4801
4802 static ssize_t
4803 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4804                        size_t cnt, loff_t *ppos)
4805 {
4806         struct trace_array *tr = filp->private_data;
4807         char buf[MAX_TRACER_SIZE+2];
4808         int r;
4809
4810         mutex_lock(&trace_types_lock);
4811         r = sprintf(buf, "%s\n", tr->current_trace->name);
4812         mutex_unlock(&trace_types_lock);
4813
4814         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4815 }
4816
4817 int tracer_init(struct tracer *t, struct trace_array *tr)
4818 {
4819         tracing_reset_online_cpus(&tr->trace_buffer);
4820         return t->init(tr);
4821 }
4822
4823 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4824 {
4825         int cpu;
4826
4827         for_each_tracing_cpu(cpu)
4828                 per_cpu_ptr(buf->data, cpu)->entries = val;
4829 }
4830
4831 #ifdef CONFIG_TRACER_MAX_TRACE
4832 /* resize @tr's buffer to the size of @size_tr's entries */
4833 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4834                                         struct trace_buffer *size_buf, int cpu_id)
4835 {
4836         int cpu, ret = 0;
4837
4838         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4839                 for_each_tracing_cpu(cpu) {
4840                         ret = ring_buffer_resize(trace_buf->buffer,
4841                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4842                         if (ret < 0)
4843                                 break;
4844                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4845                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4846                 }
4847         } else {
4848                 ret = ring_buffer_resize(trace_buf->buffer,
4849                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4850                 if (ret == 0)
4851                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4852                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4853         }
4854
4855         return ret;
4856 }
4857 #endif /* CONFIG_TRACER_MAX_TRACE */
4858
4859 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4860                                         unsigned long size, int cpu)
4861 {
4862         int ret;
4863
4864         /*
4865          * If kernel or user changes the size of the ring buffer
4866          * we use the size that was given, and we can forget about
4867          * expanding it later.
4868          */
4869         ring_buffer_expanded = true;
4870
4871         /* May be called before buffers are initialized */
4872         if (!tr->trace_buffer.buffer)
4873                 return 0;
4874
4875         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4876         if (ret < 0)
4877                 return ret;
4878
4879 #ifdef CONFIG_TRACER_MAX_TRACE
4880         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4881             !tr->current_trace->use_max_tr)
4882                 goto out;
4883
4884         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4885         if (ret < 0) {
4886                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4887                                                      &tr->trace_buffer, cpu);
4888                 if (r < 0) {
4889                         /*
4890                          * AARGH! We are left with different
4891                          * size max buffer!!!!
4892                          * The max buffer is our "snapshot" buffer.
4893                          * When a tracer needs a snapshot (one of the
4894                          * latency tracers), it swaps the max buffer
4895                          * with the saved snap shot. We succeeded to
4896                          * update the size of the main buffer, but failed to
4897                          * update the size of the max buffer. But when we tried
4898                          * to reset the main buffer to the original size, we
4899                          * failed there too. This is very unlikely to
4900                          * happen, but if it does, warn and kill all
4901                          * tracing.
4902                          */
4903                         WARN_ON(1);
4904                         tracing_disabled = 1;
4905                 }
4906                 return ret;
4907         }
4908
4909         if (cpu == RING_BUFFER_ALL_CPUS)
4910                 set_buffer_entries(&tr->max_buffer, size);
4911         else
4912                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4913
4914  out:
4915 #endif /* CONFIG_TRACER_MAX_TRACE */
4916
4917         if (cpu == RING_BUFFER_ALL_CPUS)
4918                 set_buffer_entries(&tr->trace_buffer, size);
4919         else
4920                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4921
4922         return ret;
4923 }
4924
4925 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4926                                           unsigned long size, int cpu_id)
4927 {
4928         int ret = size;
4929
4930         mutex_lock(&trace_types_lock);
4931
4932         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4933                 /* make sure, this cpu is enabled in the mask */
4934                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4935                         ret = -EINVAL;
4936                         goto out;
4937                 }
4938         }
4939
4940         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4941         if (ret < 0)
4942                 ret = -ENOMEM;
4943
4944 out:
4945         mutex_unlock(&trace_types_lock);
4946
4947         return ret;
4948 }
4949
4950
4951 /**
4952  * tracing_update_buffers - used by tracing facility to expand ring buffers
4953  *
4954  * To save on memory when the tracing is never used on a system with it
4955  * configured in. The ring buffers are set to a minimum size. But once
4956  * a user starts to use the tracing facility, then they need to grow
4957  * to their default size.
4958  *
4959  * This function is to be called when a tracer is about to be used.
4960  */
4961 int tracing_update_buffers(void)
4962 {
4963         int ret = 0;
4964
4965         mutex_lock(&trace_types_lock);
4966         if (!ring_buffer_expanded)
4967                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4968                                                 RING_BUFFER_ALL_CPUS);
4969         mutex_unlock(&trace_types_lock);
4970
4971         return ret;
4972 }
4973
4974 struct trace_option_dentry;
4975
4976 static void
4977 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4978
4979 /*
4980  * Used to clear out the tracer before deletion of an instance.
4981  * Must have trace_types_lock held.
4982  */
4983 static void tracing_set_nop(struct trace_array *tr)
4984 {
4985         if (tr->current_trace == &nop_trace)
4986                 return;
4987         
4988         tr->current_trace->enabled--;
4989
4990         if (tr->current_trace->reset)
4991                 tr->current_trace->reset(tr);
4992
4993         tr->current_trace = &nop_trace;
4994 }
4995
4996 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4997 {
4998         /* Only enable if the directory has been created already. */
4999         if (!tr->dir)
5000                 return;
5001
5002         create_trace_option_files(tr, t);
5003 }
5004
5005 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5006 {
5007         struct tracer *t;
5008 #ifdef CONFIG_TRACER_MAX_TRACE
5009         bool had_max_tr;
5010 #endif
5011         int ret = 0;
5012
5013         mutex_lock(&trace_types_lock);
5014
5015         if (!ring_buffer_expanded) {
5016                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5017                                                 RING_BUFFER_ALL_CPUS);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022
5023         for (t = trace_types; t; t = t->next) {
5024                 if (strcmp(t->name, buf) == 0)
5025                         break;
5026         }
5027         if (!t) {
5028                 ret = -EINVAL;
5029                 goto out;
5030         }
5031         if (t == tr->current_trace)
5032                 goto out;
5033
5034         /* Some tracers are only allowed for the top level buffer */
5035         if (!trace_ok_for_array(t, tr)) {
5036                 ret = -EINVAL;
5037                 goto out;
5038         }
5039
5040         /* If trace pipe files are being read, we can't change the tracer */
5041         if (tr->current_trace->ref) {
5042                 ret = -EBUSY;
5043                 goto out;
5044         }
5045
5046         trace_branch_disable();
5047
5048         tr->current_trace->enabled--;
5049
5050         if (tr->current_trace->reset)
5051                 tr->current_trace->reset(tr);
5052
5053         /* Current trace needs to be nop_trace before synchronize_sched */
5054         tr->current_trace = &nop_trace;
5055
5056 #ifdef CONFIG_TRACER_MAX_TRACE
5057         had_max_tr = tr->allocated_snapshot;
5058
5059         if (had_max_tr && !t->use_max_tr) {
5060                 /*
5061                  * We need to make sure that the update_max_tr sees that
5062                  * current_trace changed to nop_trace to keep it from
5063                  * swapping the buffers after we resize it.
5064                  * The update_max_tr is called from interrupts disabled
5065                  * so a synchronized_sched() is sufficient.
5066                  */
5067                 synchronize_sched();
5068                 free_snapshot(tr);
5069         }
5070 #endif
5071
5072 #ifdef CONFIG_TRACER_MAX_TRACE
5073         if (t->use_max_tr && !had_max_tr) {
5074                 ret = alloc_snapshot(tr);
5075                 if (ret < 0)
5076                         goto out;
5077         }
5078 #endif
5079
5080         if (t->init) {
5081                 ret = tracer_init(t, tr);
5082                 if (ret)
5083                         goto out;
5084         }
5085
5086         tr->current_trace = t;
5087         tr->current_trace->enabled++;
5088         trace_branch_enable(tr);
5089  out:
5090         mutex_unlock(&trace_types_lock);
5091
5092         return ret;
5093 }
5094
5095 static ssize_t
5096 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5097                         size_t cnt, loff_t *ppos)
5098 {
5099         struct trace_array *tr = filp->private_data;
5100         char buf[MAX_TRACER_SIZE+1];
5101         int i;
5102         size_t ret;
5103         int err;
5104
5105         ret = cnt;
5106
5107         if (cnt > MAX_TRACER_SIZE)
5108                 cnt = MAX_TRACER_SIZE;
5109
5110         if (copy_from_user(buf, ubuf, cnt))
5111                 return -EFAULT;
5112
5113         buf[cnt] = 0;
5114
5115         /* strip ending whitespace. */
5116         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5117                 buf[i] = 0;
5118
5119         err = tracing_set_tracer(tr, buf);
5120         if (err)
5121                 return err;
5122
5123         *ppos += ret;
5124
5125         return ret;
5126 }
5127
5128 static ssize_t
5129 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5130                    size_t cnt, loff_t *ppos)
5131 {
5132         char buf[64];
5133         int r;
5134
5135         r = snprintf(buf, sizeof(buf), "%ld\n",
5136                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5137         if (r > sizeof(buf))
5138                 r = sizeof(buf);
5139         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5140 }
5141
5142 static ssize_t
5143 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5144                     size_t cnt, loff_t *ppos)
5145 {
5146         unsigned long val;
5147         int ret;
5148
5149         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5150         if (ret)
5151                 return ret;
5152
5153         *ptr = val * 1000;
5154
5155         return cnt;
5156 }
5157
5158 static ssize_t
5159 tracing_thresh_read(struct file *filp, char __user *ubuf,
5160                     size_t cnt, loff_t *ppos)
5161 {
5162         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5163 }
5164
5165 static ssize_t
5166 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5167                      size_t cnt, loff_t *ppos)
5168 {
5169         struct trace_array *tr = filp->private_data;
5170         int ret;
5171
5172         mutex_lock(&trace_types_lock);
5173         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5174         if (ret < 0)
5175                 goto out;
5176
5177         if (tr->current_trace->update_thresh) {
5178                 ret = tr->current_trace->update_thresh(tr);
5179                 if (ret < 0)
5180                         goto out;
5181         }
5182
5183         ret = cnt;
5184 out:
5185         mutex_unlock(&trace_types_lock);
5186
5187         return ret;
5188 }
5189
5190 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5191
5192 static ssize_t
5193 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5194                      size_t cnt, loff_t *ppos)
5195 {
5196         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5197 }
5198
5199 static ssize_t
5200 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5201                       size_t cnt, loff_t *ppos)
5202 {
5203         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5204 }
5205
5206 #endif
5207
5208 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5209 {
5210         struct trace_array *tr = inode->i_private;
5211         struct trace_iterator *iter;
5212         int ret = 0;
5213
5214         if (tracing_disabled)
5215                 return -ENODEV;
5216
5217         if (trace_array_get(tr) < 0)
5218                 return -ENODEV;
5219
5220         mutex_lock(&trace_types_lock);
5221
5222         /* create a buffer to store the information to pass to userspace */
5223         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5224         if (!iter) {
5225                 ret = -ENOMEM;
5226                 __trace_array_put(tr);
5227                 goto out;
5228         }
5229
5230         trace_seq_init(&iter->seq);
5231         iter->trace = tr->current_trace;
5232
5233         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5234                 ret = -ENOMEM;
5235                 goto fail;
5236         }
5237
5238         /* trace pipe does not show start of buffer */
5239         cpumask_setall(iter->started);
5240
5241         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5242                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5243
5244         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5245         if (trace_clocks[tr->clock_id].in_ns)
5246                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5247
5248         iter->tr = tr;
5249         iter->trace_buffer = &tr->trace_buffer;
5250         iter->cpu_file = tracing_get_cpu(inode);
5251         mutex_init(&iter->mutex);
5252         filp->private_data = iter;
5253
5254         if (iter->trace->pipe_open)
5255                 iter->trace->pipe_open(iter);
5256
5257         nonseekable_open(inode, filp);
5258
5259         tr->current_trace->ref++;
5260 out:
5261         mutex_unlock(&trace_types_lock);
5262         return ret;
5263
5264 fail:
5265         kfree(iter->trace);
5266         kfree(iter);
5267         __trace_array_put(tr);
5268         mutex_unlock(&trace_types_lock);
5269         return ret;
5270 }
5271
5272 static int tracing_release_pipe(struct inode *inode, struct file *file)
5273 {
5274         struct trace_iterator *iter = file->private_data;
5275         struct trace_array *tr = inode->i_private;
5276
5277         mutex_lock(&trace_types_lock);
5278
5279         tr->current_trace->ref--;
5280
5281         if (iter->trace->pipe_close)
5282                 iter->trace->pipe_close(iter);
5283
5284         mutex_unlock(&trace_types_lock);
5285
5286         free_cpumask_var(iter->started);
5287         mutex_destroy(&iter->mutex);
5288         kfree(iter);
5289
5290         trace_array_put(tr);
5291
5292         return 0;
5293 }
5294
5295 static unsigned int
5296 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5297 {
5298         struct trace_array *tr = iter->tr;
5299
5300         /* Iterators are static, they should be filled or empty */
5301         if (trace_buffer_iter(iter, iter->cpu_file))
5302                 return POLLIN | POLLRDNORM;
5303
5304         if (tr->trace_flags & TRACE_ITER_BLOCK)
5305                 /*
5306                  * Always select as readable when in blocking mode
5307                  */
5308                 return POLLIN | POLLRDNORM;
5309         else
5310                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5311                                              filp, poll_table);
5312 }
5313
5314 static unsigned int
5315 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5316 {
5317         struct trace_iterator *iter = filp->private_data;
5318
5319         return trace_poll(iter, filp, poll_table);
5320 }
5321
5322 /* Must be called with iter->mutex held. */
5323 static int tracing_wait_pipe(struct file *filp)
5324 {
5325         struct trace_iterator *iter = filp->private_data;
5326         int ret;
5327
5328         while (trace_empty(iter)) {
5329
5330                 if ((filp->f_flags & O_NONBLOCK)) {
5331                         return -EAGAIN;
5332                 }
5333
5334                 /*
5335                  * We block until we read something and tracing is disabled.
5336                  * We still block if tracing is disabled, but we have never
5337                  * read anything. This allows a user to cat this file, and
5338                  * then enable tracing. But after we have read something,
5339                  * we give an EOF when tracing is again disabled.
5340                  *
5341                  * iter->pos will be 0 if we haven't read anything.
5342                  */
5343                 if (!tracing_is_on() && iter->pos)
5344                         break;
5345
5346                 mutex_unlock(&iter->mutex);
5347
5348                 ret = wait_on_pipe(iter, false);
5349
5350                 mutex_lock(&iter->mutex);
5351
5352                 if (ret)
5353                         return ret;
5354         }
5355
5356         return 1;
5357 }
5358
5359 /*
5360  * Consumer reader.
5361  */
5362 static ssize_t
5363 tracing_read_pipe(struct file *filp, char __user *ubuf,
5364                   size_t cnt, loff_t *ppos)
5365 {
5366         struct trace_iterator *iter = filp->private_data;
5367         ssize_t sret;
5368
5369         /*
5370          * Avoid more than one consumer on a single file descriptor
5371          * This is just a matter of traces coherency, the ring buffer itself
5372          * is protected.
5373          */
5374         mutex_lock(&iter->mutex);
5375
5376         /* return any leftover data */
5377         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5378         if (sret != -EBUSY)
5379                 goto out;
5380
5381         trace_seq_init(&iter->seq);
5382
5383         if (iter->trace->read) {
5384                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5385                 if (sret)
5386                         goto out;
5387         }
5388
5389 waitagain:
5390         sret = tracing_wait_pipe(filp);
5391         if (sret <= 0)
5392                 goto out;
5393
5394         /* stop when tracing is finished */
5395         if (trace_empty(iter)) {
5396                 sret = 0;
5397                 goto out;
5398         }
5399
5400         if (cnt >= PAGE_SIZE)
5401                 cnt = PAGE_SIZE - 1;
5402
5403         /* reset all but tr, trace, and overruns */
5404         memset(&iter->seq, 0,
5405                sizeof(struct trace_iterator) -
5406                offsetof(struct trace_iterator, seq));
5407         cpumask_clear(iter->started);
5408         iter->pos = -1;
5409
5410         trace_event_read_lock();
5411         trace_access_lock(iter->cpu_file);
5412         while (trace_find_next_entry_inc(iter) != NULL) {
5413                 enum print_line_t ret;
5414                 int save_len = iter->seq.seq.len;
5415
5416                 ret = print_trace_line(iter);
5417                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5418                         /* don't print partial lines */
5419                         iter->seq.seq.len = save_len;
5420                         break;
5421                 }
5422                 if (ret != TRACE_TYPE_NO_CONSUME)
5423                         trace_consume(iter);
5424
5425                 if (trace_seq_used(&iter->seq) >= cnt)
5426                         break;
5427
5428                 /*
5429                  * Setting the full flag means we reached the trace_seq buffer
5430                  * size and we should leave by partial output condition above.
5431                  * One of the trace_seq_* functions is not used properly.
5432                  */
5433                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5434                           iter->ent->type);
5435         }
5436         trace_access_unlock(iter->cpu_file);
5437         trace_event_read_unlock();
5438
5439         /* Now copy what we have to the user */
5440         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5441         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5442                 trace_seq_init(&iter->seq);
5443
5444         /*
5445          * If there was nothing to send to user, in spite of consuming trace
5446          * entries, go back to wait for more entries.
5447          */
5448         if (sret == -EBUSY)
5449                 goto waitagain;
5450
5451 out:
5452         mutex_unlock(&iter->mutex);
5453
5454         return sret;
5455 }
5456
5457 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5458                                      unsigned int idx)
5459 {
5460         __free_page(spd->pages[idx]);
5461 }
5462
5463 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5464         .can_merge              = 0,
5465         .confirm                = generic_pipe_buf_confirm,
5466         .release                = generic_pipe_buf_release,
5467         .steal                  = generic_pipe_buf_steal,
5468         .get                    = generic_pipe_buf_get,
5469 };
5470
5471 static size_t
5472 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5473 {
5474         size_t count;
5475         int save_len;
5476         int ret;
5477
5478         /* Seq buffer is page-sized, exactly what we need. */
5479         for (;;) {
5480                 save_len = iter->seq.seq.len;
5481                 ret = print_trace_line(iter);
5482
5483                 if (trace_seq_has_overflowed(&iter->seq)) {
5484                         iter->seq.seq.len = save_len;
5485                         break;
5486                 }
5487
5488                 /*
5489                  * This should not be hit, because it should only
5490                  * be set if the iter->seq overflowed. But check it
5491                  * anyway to be safe.
5492                  */
5493                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5494                         iter->seq.seq.len = save_len;
5495                         break;
5496                 }
5497
5498                 count = trace_seq_used(&iter->seq) - save_len;
5499                 if (rem < count) {
5500                         rem = 0;
5501                         iter->seq.seq.len = save_len;
5502                         break;
5503                 }
5504
5505                 if (ret != TRACE_TYPE_NO_CONSUME)
5506                         trace_consume(iter);
5507                 rem -= count;
5508                 if (!trace_find_next_entry_inc(iter))   {
5509                         rem = 0;
5510                         iter->ent = NULL;
5511                         break;
5512                 }
5513         }
5514
5515         return rem;
5516 }
5517
5518 static ssize_t tracing_splice_read_pipe(struct file *filp,
5519                                         loff_t *ppos,
5520                                         struct pipe_inode_info *pipe,
5521                                         size_t len,
5522                                         unsigned int flags)
5523 {
5524         struct page *pages_def[PIPE_DEF_BUFFERS];
5525         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5526         struct trace_iterator *iter = filp->private_data;
5527         struct splice_pipe_desc spd = {
5528                 .pages          = pages_def,
5529                 .partial        = partial_def,
5530                 .nr_pages       = 0, /* This gets updated below. */
5531                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5532                 .flags          = flags,
5533                 .ops            = &tracing_pipe_buf_ops,
5534                 .spd_release    = tracing_spd_release_pipe,
5535         };
5536         ssize_t ret;
5537         size_t rem;
5538         unsigned int i;
5539
5540         if (splice_grow_spd(pipe, &spd))
5541                 return -ENOMEM;
5542
5543         mutex_lock(&iter->mutex);
5544
5545         if (iter->trace->splice_read) {
5546                 ret = iter->trace->splice_read(iter, filp,
5547                                                ppos, pipe, len, flags);
5548                 if (ret)
5549                         goto out_err;
5550         }
5551
5552         ret = tracing_wait_pipe(filp);
5553         if (ret <= 0)
5554                 goto out_err;
5555
5556         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5557                 ret = -EFAULT;
5558                 goto out_err;
5559         }
5560
5561         trace_event_read_lock();
5562         trace_access_lock(iter->cpu_file);
5563
5564         /* Fill as many pages as possible. */
5565         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5566                 spd.pages[i] = alloc_page(GFP_KERNEL);
5567                 if (!spd.pages[i])
5568                         break;
5569
5570                 rem = tracing_fill_pipe_page(rem, iter);
5571
5572                 /* Copy the data into the page, so we can start over. */
5573                 ret = trace_seq_to_buffer(&iter->seq,
5574                                           page_address(spd.pages[i]),
5575                                           trace_seq_used(&iter->seq));
5576                 if (ret < 0) {
5577                         __free_page(spd.pages[i]);
5578                         break;
5579                 }
5580                 spd.partial[i].offset = 0;
5581                 spd.partial[i].len = trace_seq_used(&iter->seq);
5582
5583                 trace_seq_init(&iter->seq);
5584         }
5585
5586         trace_access_unlock(iter->cpu_file);
5587         trace_event_read_unlock();
5588         mutex_unlock(&iter->mutex);
5589
5590         spd.nr_pages = i;
5591
5592         if (i)
5593                 ret = splice_to_pipe(pipe, &spd);
5594         else
5595                 ret = 0;
5596 out:
5597         splice_shrink_spd(&spd);
5598         return ret;
5599
5600 out_err:
5601         mutex_unlock(&iter->mutex);
5602         goto out;
5603 }
5604
5605 static ssize_t
5606 tracing_entries_read(struct file *filp, char __user *ubuf,
5607                      size_t cnt, loff_t *ppos)
5608 {
5609         struct inode *inode = file_inode(filp);
5610         struct trace_array *tr = inode->i_private;
5611         int cpu = tracing_get_cpu(inode);
5612         char buf[64];
5613         int r = 0;
5614         ssize_t ret;
5615
5616         mutex_lock(&trace_types_lock);
5617
5618         if (cpu == RING_BUFFER_ALL_CPUS) {
5619                 int cpu, buf_size_same;
5620                 unsigned long size;
5621
5622                 size = 0;
5623                 buf_size_same = 1;
5624                 /* check if all cpu sizes are same */
5625                 for_each_tracing_cpu(cpu) {
5626                         /* fill in the size from first enabled cpu */
5627                         if (size == 0)
5628                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5629                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5630                                 buf_size_same = 0;
5631                                 break;
5632                         }
5633                 }
5634
5635                 if (buf_size_same) {
5636                         if (!ring_buffer_expanded)
5637                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5638                                             size >> 10,
5639                                             trace_buf_size >> 10);
5640                         else
5641                                 r = sprintf(buf, "%lu\n", size >> 10);
5642                 } else
5643                         r = sprintf(buf, "X\n");
5644         } else
5645                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5646
5647         mutex_unlock(&trace_types_lock);
5648
5649         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5650         return ret;
5651 }
5652
5653 static ssize_t
5654 tracing_entries_write(struct file *filp, const char __user *ubuf,
5655                       size_t cnt, loff_t *ppos)
5656 {
5657         struct inode *inode = file_inode(filp);
5658         struct trace_array *tr = inode->i_private;
5659         unsigned long val;
5660         int ret;
5661
5662         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5663         if (ret)
5664                 return ret;
5665
5666         /* must have at least 1 entry */
5667         if (!val)
5668                 return -EINVAL;
5669
5670         /* value is in KB */
5671         val <<= 10;
5672         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5673         if (ret < 0)
5674                 return ret;
5675
5676         *ppos += cnt;
5677
5678         return cnt;
5679 }
5680
5681 static ssize_t
5682 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5683                                 size_t cnt, loff_t *ppos)
5684 {
5685         struct trace_array *tr = filp->private_data;
5686         char buf[64];
5687         int r, cpu;
5688         unsigned long size = 0, expanded_size = 0;
5689
5690         mutex_lock(&trace_types_lock);
5691         for_each_tracing_cpu(cpu) {
5692                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5693                 if (!ring_buffer_expanded)
5694                         expanded_size += trace_buf_size >> 10;
5695         }
5696         if (ring_buffer_expanded)
5697                 r = sprintf(buf, "%lu\n", size);
5698         else
5699                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5700         mutex_unlock(&trace_types_lock);
5701
5702         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5703 }
5704
5705 static ssize_t
5706 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5707                           size_t cnt, loff_t *ppos)
5708 {
5709         /*
5710          * There is no need to read what the user has written, this function
5711          * is just to make sure that there is no error when "echo" is used
5712          */
5713
5714         *ppos += cnt;
5715
5716         return cnt;
5717 }
5718
5719 static int
5720 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5721 {
5722         struct trace_array *tr = inode->i_private;
5723
5724         /* disable tracing ? */
5725         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5726                 tracer_tracing_off(tr);
5727         /* resize the ring buffer to 0 */
5728         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5729
5730         trace_array_put(tr);
5731
5732         return 0;
5733 }
5734
5735 static ssize_t
5736 tracing_mark_write(struct file *filp, const char __user *ubuf,
5737                                         size_t cnt, loff_t *fpos)
5738 {
5739         struct trace_array *tr = filp->private_data;
5740         struct ring_buffer_event *event;
5741         struct ring_buffer *buffer;
5742         struct print_entry *entry;
5743         unsigned long irq_flags;
5744         const char faulted[] = "<faulted>";
5745         ssize_t written;
5746         int size;
5747         int len;
5748
5749 /* Used in tracing_mark_raw_write() as well */
5750 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5751
5752         if (tracing_disabled)
5753                 return -EINVAL;
5754
5755         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5756                 return -EINVAL;
5757
5758         if (cnt > TRACE_BUF_SIZE)
5759                 cnt = TRACE_BUF_SIZE;
5760
5761         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5762
5763         local_save_flags(irq_flags);
5764         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5765
5766         /* If less than "<faulted>", then make sure we can still add that */
5767         if (cnt < FAULTED_SIZE)
5768                 size += FAULTED_SIZE - cnt;
5769
5770         buffer = tr->trace_buffer.buffer;
5771         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5772                                             irq_flags, preempt_count());
5773         if (unlikely(!event))
5774                 /* Ring buffer disabled, return as if not open for write */
5775                 return -EBADF;
5776
5777         entry = ring_buffer_event_data(event);
5778         entry->ip = _THIS_IP_;
5779
5780         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5781         if (len) {
5782                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5783                 cnt = FAULTED_SIZE;
5784                 written = -EFAULT;
5785         } else
5786                 written = cnt;
5787         len = cnt;
5788
5789         if (entry->buf[cnt - 1] != '\n') {
5790                 entry->buf[cnt] = '\n';
5791                 entry->buf[cnt + 1] = '\0';
5792         } else
5793                 entry->buf[cnt] = '\0';
5794
5795         __buffer_unlock_commit(buffer, event);
5796
5797         if (written > 0)
5798                 *fpos += written;
5799
5800         return written;
5801 }
5802
5803 /* Limit it for now to 3K (including tag) */
5804 #define RAW_DATA_MAX_SIZE (1024*3)
5805
5806 static ssize_t
5807 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5808                                         size_t cnt, loff_t *fpos)
5809 {
5810         struct trace_array *tr = filp->private_data;
5811         struct ring_buffer_event *event;
5812         struct ring_buffer *buffer;
5813         struct raw_data_entry *entry;
5814         const char faulted[] = "<faulted>";
5815         unsigned long irq_flags;
5816         ssize_t written;
5817         int size;
5818         int len;
5819
5820 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5821
5822         if (tracing_disabled)
5823                 return -EINVAL;
5824
5825         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5826                 return -EINVAL;
5827
5828         /* The marker must at least have a tag id */
5829         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5830                 return -EINVAL;
5831
5832         if (cnt > TRACE_BUF_SIZE)
5833                 cnt = TRACE_BUF_SIZE;
5834
5835         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5836
5837         local_save_flags(irq_flags);
5838         size = sizeof(*entry) + cnt;
5839         if (cnt < FAULT_SIZE_ID)
5840                 size += FAULT_SIZE_ID - cnt;
5841
5842         buffer = tr->trace_buffer.buffer;
5843         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5844                                             irq_flags, preempt_count());
5845         if (!event)
5846                 /* Ring buffer disabled, return as if not open for write */
5847                 return -EBADF;
5848
5849         entry = ring_buffer_event_data(event);
5850
5851         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5852         if (len) {
5853                 entry->id = -1;
5854                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5855                 written = -EFAULT;
5856         } else
5857                 written = cnt;
5858
5859         __buffer_unlock_commit(buffer, event);
5860
5861         if (written > 0)
5862                 *fpos += written;
5863
5864         return written;
5865 }
5866
5867 static int tracing_clock_show(struct seq_file *m, void *v)
5868 {
5869         struct trace_array *tr = m->private;
5870         int i;
5871
5872         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5873                 seq_printf(m,
5874                         "%s%s%s%s", i ? " " : "",
5875                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5876                         i == tr->clock_id ? "]" : "");
5877         seq_putc(m, '\n');
5878
5879         return 0;
5880 }
5881
5882 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5883 {
5884         int i;
5885
5886         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5887                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5888                         break;
5889         }
5890         if (i == ARRAY_SIZE(trace_clocks))
5891                 return -EINVAL;
5892
5893         mutex_lock(&trace_types_lock);
5894
5895         tr->clock_id = i;
5896
5897         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5898
5899         /*
5900          * New clock may not be consistent with the previous clock.
5901          * Reset the buffer so that it doesn't have incomparable timestamps.
5902          */
5903         tracing_reset_online_cpus(&tr->trace_buffer);
5904
5905 #ifdef CONFIG_TRACER_MAX_TRACE
5906         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5907                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5908         tracing_reset_online_cpus(&tr->max_buffer);
5909 #endif
5910
5911         mutex_unlock(&trace_types_lock);
5912
5913         return 0;
5914 }
5915
5916 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5917                                    size_t cnt, loff_t *fpos)
5918 {
5919         struct seq_file *m = filp->private_data;
5920         struct trace_array *tr = m->private;
5921         char buf[64];
5922         const char *clockstr;
5923         int ret;
5924
5925         if (cnt >= sizeof(buf))
5926                 return -EINVAL;
5927
5928         if (copy_from_user(buf, ubuf, cnt))
5929                 return -EFAULT;
5930
5931         buf[cnt] = 0;
5932
5933         clockstr = strstrip(buf);
5934
5935         ret = tracing_set_clock(tr, clockstr);
5936         if (ret)
5937                 return ret;
5938
5939         *fpos += cnt;
5940
5941         return cnt;
5942 }
5943
5944 static int tracing_clock_open(struct inode *inode, struct file *file)
5945 {
5946         struct trace_array *tr = inode->i_private;
5947         int ret;
5948
5949         if (tracing_disabled)
5950                 return -ENODEV;
5951
5952         if (trace_array_get(tr))
5953                 return -ENODEV;
5954
5955         ret = single_open(file, tracing_clock_show, inode->i_private);
5956         if (ret < 0)
5957                 trace_array_put(tr);
5958
5959         return ret;
5960 }
5961
5962 struct ftrace_buffer_info {
5963         struct trace_iterator   iter;
5964         void                    *spare;
5965         unsigned int            read;
5966 };
5967
5968 #ifdef CONFIG_TRACER_SNAPSHOT
5969 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5970 {
5971         struct trace_array *tr = inode->i_private;
5972         struct trace_iterator *iter;
5973         struct seq_file *m;
5974         int ret = 0;
5975
5976         if (trace_array_get(tr) < 0)
5977                 return -ENODEV;
5978
5979         if (file->f_mode & FMODE_READ) {
5980                 iter = __tracing_open(inode, file, true);
5981                 if (IS_ERR(iter))
5982                         ret = PTR_ERR(iter);
5983         } else {
5984                 /* Writes still need the seq_file to hold the private data */
5985                 ret = -ENOMEM;
5986                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5987                 if (!m)
5988                         goto out;
5989                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5990                 if (!iter) {
5991                         kfree(m);
5992                         goto out;
5993                 }
5994                 ret = 0;
5995
5996                 iter->tr = tr;
5997                 iter->trace_buffer = &tr->max_buffer;
5998                 iter->cpu_file = tracing_get_cpu(inode);
5999                 m->private = iter;
6000                 file->private_data = m;
6001         }
6002 out:
6003         if (ret < 0)
6004                 trace_array_put(tr);
6005
6006         return ret;
6007 }
6008
6009 static ssize_t
6010 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6011                        loff_t *ppos)
6012 {
6013         struct seq_file *m = filp->private_data;
6014         struct trace_iterator *iter = m->private;
6015         struct trace_array *tr = iter->tr;
6016         unsigned long val;
6017         int ret;
6018
6019         ret = tracing_update_buffers();
6020         if (ret < 0)
6021                 return ret;
6022
6023         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6024         if (ret)
6025                 return ret;
6026
6027         mutex_lock(&trace_types_lock);
6028
6029         if (tr->current_trace->use_max_tr) {
6030                 ret = -EBUSY;
6031                 goto out;
6032         }
6033
6034         switch (val) {
6035         case 0:
6036                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6037                         ret = -EINVAL;
6038                         break;
6039                 }
6040                 if (tr->allocated_snapshot)
6041                         free_snapshot(tr);
6042                 break;
6043         case 1:
6044 /* Only allow per-cpu swap if the ring buffer supports it */
6045 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6046                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6047                         ret = -EINVAL;
6048                         break;
6049                 }
6050 #endif
6051                 if (!tr->allocated_snapshot) {
6052                         ret = alloc_snapshot(tr);
6053                         if (ret < 0)
6054                                 break;
6055                 }
6056                 local_irq_disable();
6057                 /* Now, we're going to swap */
6058                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6059                         update_max_tr(tr, current, smp_processor_id());
6060                 else
6061                         update_max_tr_single(tr, current, iter->cpu_file);
6062                 local_irq_enable();
6063                 break;
6064         default:
6065                 if (tr->allocated_snapshot) {
6066                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6067                                 tracing_reset_online_cpus(&tr->max_buffer);
6068                         else
6069                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6070                 }
6071                 break;
6072         }
6073
6074         if (ret >= 0) {
6075                 *ppos += cnt;
6076                 ret = cnt;
6077         }
6078 out:
6079         mutex_unlock(&trace_types_lock);
6080         return ret;
6081 }
6082
6083 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6084 {
6085         struct seq_file *m = file->private_data;
6086         int ret;
6087
6088         ret = tracing_release(inode, file);
6089
6090         if (file->f_mode & FMODE_READ)
6091                 return ret;
6092
6093         /* If write only, the seq_file is just a stub */
6094         if (m)
6095                 kfree(m->private);
6096         kfree(m);
6097
6098         return 0;
6099 }
6100
6101 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6102 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6103                                     size_t count, loff_t *ppos);
6104 static int tracing_buffers_release(struct inode *inode, struct file *file);
6105 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6106                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6107
6108 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6109 {
6110         struct ftrace_buffer_info *info;
6111         int ret;
6112
6113         ret = tracing_buffers_open(inode, filp);
6114         if (ret < 0)
6115                 return ret;
6116
6117         info = filp->private_data;
6118
6119         if (info->iter.trace->use_max_tr) {
6120                 tracing_buffers_release(inode, filp);
6121                 return -EBUSY;
6122         }
6123
6124         info->iter.snapshot = true;
6125         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6126
6127         return ret;
6128 }
6129
6130 #endif /* CONFIG_TRACER_SNAPSHOT */
6131
6132
6133 static const struct file_operations tracing_thresh_fops = {
6134         .open           = tracing_open_generic,
6135         .read           = tracing_thresh_read,
6136         .write          = tracing_thresh_write,
6137         .llseek         = generic_file_llseek,
6138 };
6139
6140 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6141 static const struct file_operations tracing_max_lat_fops = {
6142         .open           = tracing_open_generic,
6143         .read           = tracing_max_lat_read,
6144         .write          = tracing_max_lat_write,
6145         .llseek         = generic_file_llseek,
6146 };
6147 #endif
6148
6149 static const struct file_operations set_tracer_fops = {
6150         .open           = tracing_open_generic,
6151         .read           = tracing_set_trace_read,
6152         .write          = tracing_set_trace_write,
6153         .llseek         = generic_file_llseek,
6154 };
6155
6156 static const struct file_operations tracing_pipe_fops = {
6157         .open           = tracing_open_pipe,
6158         .poll           = tracing_poll_pipe,
6159         .read           = tracing_read_pipe,
6160         .splice_read    = tracing_splice_read_pipe,
6161         .release        = tracing_release_pipe,
6162         .llseek         = no_llseek,
6163 };
6164
6165 static const struct file_operations tracing_entries_fops = {
6166         .open           = tracing_open_generic_tr,
6167         .read           = tracing_entries_read,
6168         .write          = tracing_entries_write,
6169         .llseek         = generic_file_llseek,
6170         .release        = tracing_release_generic_tr,
6171 };
6172
6173 static const struct file_operations tracing_total_entries_fops = {
6174         .open           = tracing_open_generic_tr,
6175         .read           = tracing_total_entries_read,
6176         .llseek         = generic_file_llseek,
6177         .release        = tracing_release_generic_tr,
6178 };
6179
6180 static const struct file_operations tracing_free_buffer_fops = {
6181         .open           = tracing_open_generic_tr,
6182         .write          = tracing_free_buffer_write,
6183         .release        = tracing_free_buffer_release,
6184 };
6185
6186 static const struct file_operations tracing_mark_fops = {
6187         .open           = tracing_open_generic_tr,
6188         .write          = tracing_mark_write,
6189         .llseek         = generic_file_llseek,
6190         .release        = tracing_release_generic_tr,
6191 };
6192
6193 static const struct file_operations tracing_mark_raw_fops = {
6194         .open           = tracing_open_generic_tr,
6195         .write          = tracing_mark_raw_write,
6196         .llseek         = generic_file_llseek,
6197         .release        = tracing_release_generic_tr,
6198 };
6199
6200 static const struct file_operations trace_clock_fops = {
6201         .open           = tracing_clock_open,
6202         .read           = seq_read,
6203         .llseek         = seq_lseek,
6204         .release        = tracing_single_release_tr,
6205         .write          = tracing_clock_write,
6206 };
6207
6208 #ifdef CONFIG_TRACER_SNAPSHOT
6209 static const struct file_operations snapshot_fops = {
6210         .open           = tracing_snapshot_open,
6211         .read           = seq_read,
6212         .write          = tracing_snapshot_write,
6213         .llseek         = tracing_lseek,
6214         .release        = tracing_snapshot_release,
6215 };
6216
6217 static const struct file_operations snapshot_raw_fops = {
6218         .open           = snapshot_raw_open,
6219         .read           = tracing_buffers_read,
6220         .release        = tracing_buffers_release,
6221         .splice_read    = tracing_buffers_splice_read,
6222         .llseek         = no_llseek,
6223 };
6224
6225 #endif /* CONFIG_TRACER_SNAPSHOT */
6226
6227 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6228 {
6229         struct trace_array *tr = inode->i_private;
6230         struct ftrace_buffer_info *info;
6231         int ret;
6232
6233         if (tracing_disabled)
6234                 return -ENODEV;
6235
6236         if (trace_array_get(tr) < 0)
6237                 return -ENODEV;
6238
6239         info = kzalloc(sizeof(*info), GFP_KERNEL);
6240         if (!info) {
6241                 trace_array_put(tr);
6242                 return -ENOMEM;
6243         }
6244
6245         mutex_lock(&trace_types_lock);
6246
6247         info->iter.tr           = tr;
6248         info->iter.cpu_file     = tracing_get_cpu(inode);
6249         info->iter.trace        = tr->current_trace;
6250         info->iter.trace_buffer = &tr->trace_buffer;
6251         info->spare             = NULL;
6252         /* Force reading ring buffer for first read */
6253         info->read              = (unsigned int)-1;
6254
6255         filp->private_data = info;
6256
6257         tr->current_trace->ref++;
6258
6259         mutex_unlock(&trace_types_lock);
6260
6261         ret = nonseekable_open(inode, filp);
6262         if (ret < 0)
6263                 trace_array_put(tr);
6264
6265         return ret;
6266 }
6267
6268 static unsigned int
6269 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6270 {
6271         struct ftrace_buffer_info *info = filp->private_data;
6272         struct trace_iterator *iter = &info->iter;
6273
6274         return trace_poll(iter, filp, poll_table);
6275 }
6276
6277 static ssize_t
6278 tracing_buffers_read(struct file *filp, char __user *ubuf,
6279                      size_t count, loff_t *ppos)
6280 {
6281         struct ftrace_buffer_info *info = filp->private_data;
6282         struct trace_iterator *iter = &info->iter;
6283         ssize_t ret;
6284         ssize_t size;
6285
6286         if (!count)
6287                 return 0;
6288
6289 #ifdef CONFIG_TRACER_MAX_TRACE
6290         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6291                 return -EBUSY;
6292 #endif
6293
6294         if (!info->spare)
6295                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6296                                                           iter->cpu_file);
6297         if (!info->spare)
6298                 return -ENOMEM;
6299
6300         /* Do we have previous read data to read? */
6301         if (info->read < PAGE_SIZE)
6302                 goto read;
6303
6304  again:
6305         trace_access_lock(iter->cpu_file);
6306         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6307                                     &info->spare,
6308                                     count,
6309                                     iter->cpu_file, 0);
6310         trace_access_unlock(iter->cpu_file);
6311
6312         if (ret < 0) {
6313                 if (trace_empty(iter)) {
6314                         if ((filp->f_flags & O_NONBLOCK))
6315                                 return -EAGAIN;
6316
6317                         ret = wait_on_pipe(iter, false);
6318                         if (ret)
6319                                 return ret;
6320
6321                         goto again;
6322                 }
6323                 return 0;
6324         }
6325
6326         info->read = 0;
6327  read:
6328         size = PAGE_SIZE - info->read;
6329         if (size > count)
6330                 size = count;
6331
6332         ret = copy_to_user(ubuf, info->spare + info->read, size);
6333         if (ret == size)
6334                 return -EFAULT;
6335
6336         size -= ret;
6337
6338         *ppos += size;
6339         info->read += size;
6340
6341         return size;
6342 }
6343
6344 static int tracing_buffers_release(struct inode *inode, struct file *file)
6345 {
6346         struct ftrace_buffer_info *info = file->private_data;
6347         struct trace_iterator *iter = &info->iter;
6348
6349         mutex_lock(&trace_types_lock);
6350
6351         iter->tr->current_trace->ref--;
6352
6353         __trace_array_put(iter->tr);
6354
6355         if (info->spare)
6356                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6357         kfree(info);
6358
6359         mutex_unlock(&trace_types_lock);
6360
6361         return 0;
6362 }
6363
6364 struct buffer_ref {
6365         struct ring_buffer      *buffer;
6366         void                    *page;
6367         int                     ref;
6368 };
6369
6370 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6371                                     struct pipe_buffer *buf)
6372 {
6373         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6374
6375         if (--ref->ref)
6376                 return;
6377
6378         ring_buffer_free_read_page(ref->buffer, ref->page);
6379         kfree(ref);
6380         buf->private = 0;
6381 }
6382
6383 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6384                                 struct pipe_buffer *buf)
6385 {
6386         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6387
6388         ref->ref++;
6389 }
6390
6391 /* Pipe buffer operations for a buffer. */
6392 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6393         .can_merge              = 0,
6394         .confirm                = generic_pipe_buf_confirm,
6395         .release                = buffer_pipe_buf_release,
6396         .steal                  = generic_pipe_buf_steal,
6397         .get                    = buffer_pipe_buf_get,
6398 };
6399
6400 /*
6401  * Callback from splice_to_pipe(), if we need to release some pages
6402  * at the end of the spd in case we error'ed out in filling the pipe.
6403  */
6404 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6405 {
6406         struct buffer_ref *ref =
6407                 (struct buffer_ref *)spd->partial[i].private;
6408
6409         if (--ref->ref)
6410                 return;
6411
6412         ring_buffer_free_read_page(ref->buffer, ref->page);
6413         kfree(ref);
6414         spd->partial[i].private = 0;
6415 }
6416
6417 static ssize_t
6418 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6419                             struct pipe_inode_info *pipe, size_t len,
6420                             unsigned int flags)
6421 {
6422         struct ftrace_buffer_info *info = file->private_data;
6423         struct trace_iterator *iter = &info->iter;
6424         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6425         struct page *pages_def[PIPE_DEF_BUFFERS];
6426         struct splice_pipe_desc spd = {
6427                 .pages          = pages_def,
6428                 .partial        = partial_def,
6429                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6430                 .flags          = flags,
6431                 .ops            = &buffer_pipe_buf_ops,
6432                 .spd_release    = buffer_spd_release,
6433         };
6434         struct buffer_ref *ref;
6435         int entries, size, i;
6436         ssize_t ret = 0;
6437
6438 #ifdef CONFIG_TRACER_MAX_TRACE
6439         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6440                 return -EBUSY;
6441 #endif
6442
6443         if (*ppos & (PAGE_SIZE - 1))
6444                 return -EINVAL;
6445
6446         if (len & (PAGE_SIZE - 1)) {
6447                 if (len < PAGE_SIZE)
6448                         return -EINVAL;
6449                 len &= PAGE_MASK;
6450         }
6451
6452         if (splice_grow_spd(pipe, &spd))
6453                 return -ENOMEM;
6454
6455  again:
6456         trace_access_lock(iter->cpu_file);
6457         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6458
6459         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6460                 struct page *page;
6461                 int r;
6462
6463                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6464                 if (!ref) {
6465                         ret = -ENOMEM;
6466                         break;
6467                 }
6468
6469                 ref->ref = 1;
6470                 ref->buffer = iter->trace_buffer->buffer;
6471                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6472                 if (!ref->page) {
6473                         ret = -ENOMEM;
6474                         kfree(ref);
6475                         break;
6476                 }
6477
6478                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6479                                           len, iter->cpu_file, 1);
6480                 if (r < 0) {
6481                         ring_buffer_free_read_page(ref->buffer, ref->page);
6482                         kfree(ref);
6483                         break;
6484                 }
6485
6486                 /*
6487                  * zero out any left over data, this is going to
6488                  * user land.
6489                  */
6490                 size = ring_buffer_page_len(ref->page);
6491                 if (size < PAGE_SIZE)
6492                         memset(ref->page + size, 0, PAGE_SIZE - size);
6493
6494                 page = virt_to_page(ref->page);
6495
6496                 spd.pages[i] = page;
6497                 spd.partial[i].len = PAGE_SIZE;
6498                 spd.partial[i].offset = 0;
6499                 spd.partial[i].private = (unsigned long)ref;
6500                 spd.nr_pages++;
6501                 *ppos += PAGE_SIZE;
6502
6503                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6504         }
6505
6506         trace_access_unlock(iter->cpu_file);
6507         spd.nr_pages = i;
6508
6509         /* did we read anything? */
6510         if (!spd.nr_pages) {
6511                 if (ret)
6512                         goto out;
6513
6514                 ret = -EAGAIN;
6515                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6516                         goto out;
6517
6518                 ret = wait_on_pipe(iter, true);
6519                 if (ret)
6520                         goto out;
6521
6522                 goto again;
6523         }
6524
6525         ret = splice_to_pipe(pipe, &spd);
6526 out:
6527         splice_shrink_spd(&spd);
6528
6529         return ret;
6530 }
6531
6532 static const struct file_operations tracing_buffers_fops = {
6533         .open           = tracing_buffers_open,
6534         .read           = tracing_buffers_read,
6535         .poll           = tracing_buffers_poll,
6536         .release        = tracing_buffers_release,
6537         .splice_read    = tracing_buffers_splice_read,
6538         .llseek         = no_llseek,
6539 };
6540
6541 static ssize_t
6542 tracing_stats_read(struct file *filp, char __user *ubuf,
6543                    size_t count, loff_t *ppos)
6544 {
6545         struct inode *inode = file_inode(filp);
6546         struct trace_array *tr = inode->i_private;
6547         struct trace_buffer *trace_buf = &tr->trace_buffer;
6548         int cpu = tracing_get_cpu(inode);
6549         struct trace_seq *s;
6550         unsigned long cnt;
6551         unsigned long long t;
6552         unsigned long usec_rem;
6553
6554         s = kmalloc(sizeof(*s), GFP_KERNEL);
6555         if (!s)
6556                 return -ENOMEM;
6557
6558         trace_seq_init(s);
6559
6560         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6561         trace_seq_printf(s, "entries: %ld\n", cnt);
6562
6563         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6564         trace_seq_printf(s, "overrun: %ld\n", cnt);
6565
6566         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6567         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6568
6569         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6570         trace_seq_printf(s, "bytes: %ld\n", cnt);
6571
6572         if (trace_clocks[tr->clock_id].in_ns) {
6573                 /* local or global for trace_clock */
6574                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6575                 usec_rem = do_div(t, USEC_PER_SEC);
6576                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6577                                                                 t, usec_rem);
6578
6579                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6580                 usec_rem = do_div(t, USEC_PER_SEC);
6581                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6582         } else {
6583                 /* counter or tsc mode for trace_clock */
6584                 trace_seq_printf(s, "oldest event ts: %llu\n",
6585                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6586
6587                 trace_seq_printf(s, "now ts: %llu\n",
6588                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6589         }
6590
6591         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6592         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6593
6594         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6595         trace_seq_printf(s, "read events: %ld\n", cnt);
6596
6597         count = simple_read_from_buffer(ubuf, count, ppos,
6598                                         s->buffer, trace_seq_used(s));
6599
6600         kfree(s);
6601
6602         return count;
6603 }
6604
6605 static const struct file_operations tracing_stats_fops = {
6606         .open           = tracing_open_generic_tr,
6607         .read           = tracing_stats_read,
6608         .llseek         = generic_file_llseek,
6609         .release        = tracing_release_generic_tr,
6610 };
6611
6612 #ifdef CONFIG_DYNAMIC_FTRACE
6613
6614 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6615 {
6616         return 0;
6617 }
6618
6619 static ssize_t
6620 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6621                   size_t cnt, loff_t *ppos)
6622 {
6623         static char ftrace_dyn_info_buffer[1024];
6624         static DEFINE_MUTEX(dyn_info_mutex);
6625         unsigned long *p = filp->private_data;
6626         char *buf = ftrace_dyn_info_buffer;
6627         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6628         int r;
6629
6630         mutex_lock(&dyn_info_mutex);
6631         r = sprintf(buf, "%ld ", *p);
6632
6633         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6634         buf[r++] = '\n';
6635
6636         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6637
6638         mutex_unlock(&dyn_info_mutex);
6639
6640         return r;
6641 }
6642
6643 static const struct file_operations tracing_dyn_info_fops = {
6644         .open           = tracing_open_generic,
6645         .read           = tracing_read_dyn_info,
6646         .llseek         = generic_file_llseek,
6647 };
6648 #endif /* CONFIG_DYNAMIC_FTRACE */
6649
6650 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6651 static void
6652 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6653 {
6654         tracing_snapshot();
6655 }
6656
6657 static void
6658 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6659 {
6660         unsigned long *count = (long *)data;
6661
6662         if (!*count)
6663                 return;
6664
6665         if (*count != -1)
6666                 (*count)--;
6667
6668         tracing_snapshot();
6669 }
6670
6671 static int
6672 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6673                       struct ftrace_probe_ops *ops, void *data)
6674 {
6675         long count = (long)data;
6676
6677         seq_printf(m, "%ps:", (void *)ip);
6678
6679         seq_puts(m, "snapshot");
6680
6681         if (count == -1)
6682                 seq_puts(m, ":unlimited\n");
6683         else
6684                 seq_printf(m, ":count=%ld\n", count);
6685
6686         return 0;
6687 }
6688
6689 static struct ftrace_probe_ops snapshot_probe_ops = {
6690         .func                   = ftrace_snapshot,
6691         .print                  = ftrace_snapshot_print,
6692 };
6693
6694 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6695         .func                   = ftrace_count_snapshot,
6696         .print                  = ftrace_snapshot_print,
6697 };
6698
6699 static int
6700 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6701                                char *glob, char *cmd, char *param, int enable)
6702 {
6703         struct ftrace_probe_ops *ops;
6704         void *count = (void *)-1;
6705         char *number;
6706         int ret;
6707
6708         /* hash funcs only work with set_ftrace_filter */
6709         if (!enable)
6710                 return -EINVAL;
6711
6712         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6713
6714         if (glob[0] == '!') {
6715                 unregister_ftrace_function_probe_func(glob+1, ops);
6716                 return 0;
6717         }
6718
6719         if (!param)
6720                 goto out_reg;
6721
6722         number = strsep(&param, ":");
6723
6724         if (!strlen(number))
6725                 goto out_reg;
6726
6727         /*
6728          * We use the callback data field (which is a pointer)
6729          * as our counter.
6730          */
6731         ret = kstrtoul(number, 0, (unsigned long *)&count);
6732         if (ret)
6733                 return ret;
6734
6735  out_reg:
6736         ret = register_ftrace_function_probe(glob, ops, count);
6737
6738         if (ret >= 0)
6739                 alloc_snapshot(&global_trace);
6740
6741         return ret < 0 ? ret : 0;
6742 }
6743
6744 static struct ftrace_func_command ftrace_snapshot_cmd = {
6745         .name                   = "snapshot",
6746         .func                   = ftrace_trace_snapshot_callback,
6747 };
6748
6749 static __init int register_snapshot_cmd(void)
6750 {
6751         return register_ftrace_command(&ftrace_snapshot_cmd);
6752 }
6753 #else
6754 static inline __init int register_snapshot_cmd(void) { return 0; }
6755 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6756
6757 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6758 {
6759         if (WARN_ON(!tr->dir))
6760                 return ERR_PTR(-ENODEV);
6761
6762         /* Top directory uses NULL as the parent */
6763         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6764                 return NULL;
6765
6766         /* All sub buffers have a descriptor */
6767         return tr->dir;
6768 }
6769
6770 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6771 {
6772         struct dentry *d_tracer;
6773
6774         if (tr->percpu_dir)
6775                 return tr->percpu_dir;
6776
6777         d_tracer = tracing_get_dentry(tr);
6778         if (IS_ERR(d_tracer))
6779                 return NULL;
6780
6781         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6782
6783         WARN_ONCE(!tr->percpu_dir,
6784                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6785
6786         return tr->percpu_dir;
6787 }
6788
6789 static struct dentry *
6790 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6791                       void *data, long cpu, const struct file_operations *fops)
6792 {
6793         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6794
6795         if (ret) /* See tracing_get_cpu() */
6796                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6797         return ret;
6798 }
6799
6800 static void
6801 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6802 {
6803         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6804         struct dentry *d_cpu;
6805         char cpu_dir[30]; /* 30 characters should be more than enough */
6806
6807         if (!d_percpu)
6808                 return;
6809
6810         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6811         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6812         if (!d_cpu) {
6813                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6814                 return;
6815         }
6816
6817         /* per cpu trace_pipe */
6818         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6819                                 tr, cpu, &tracing_pipe_fops);
6820
6821         /* per cpu trace */
6822         trace_create_cpu_file("trace", 0644, d_cpu,
6823                                 tr, cpu, &tracing_fops);
6824
6825         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6826                                 tr, cpu, &tracing_buffers_fops);
6827
6828         trace_create_cpu_file("stats", 0444, d_cpu,
6829                                 tr, cpu, &tracing_stats_fops);
6830
6831         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6832                                 tr, cpu, &tracing_entries_fops);
6833
6834 #ifdef CONFIG_TRACER_SNAPSHOT
6835         trace_create_cpu_file("snapshot", 0644, d_cpu,
6836                                 tr, cpu, &snapshot_fops);
6837
6838         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6839                                 tr, cpu, &snapshot_raw_fops);
6840 #endif
6841 }
6842
6843 #ifdef CONFIG_FTRACE_SELFTEST
6844 /* Let selftest have access to static functions in this file */
6845 #include "trace_selftest.c"
6846 #endif
6847
6848 static ssize_t
6849 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6850                         loff_t *ppos)
6851 {
6852         struct trace_option_dentry *topt = filp->private_data;
6853         char *buf;
6854
6855         if (topt->flags->val & topt->opt->bit)
6856                 buf = "1\n";
6857         else
6858                 buf = "0\n";
6859
6860         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6861 }
6862
6863 static ssize_t
6864 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6865                          loff_t *ppos)
6866 {
6867         struct trace_option_dentry *topt = filp->private_data;
6868         unsigned long val;
6869         int ret;
6870
6871         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6872         if (ret)
6873                 return ret;
6874
6875         if (val != 0 && val != 1)
6876                 return -EINVAL;
6877
6878         if (!!(topt->flags->val & topt->opt->bit) != val) {
6879                 mutex_lock(&trace_types_lock);
6880                 ret = __set_tracer_option(topt->tr, topt->flags,
6881                                           topt->opt, !val);
6882                 mutex_unlock(&trace_types_lock);
6883                 if (ret)
6884                         return ret;
6885         }
6886
6887         *ppos += cnt;
6888
6889         return cnt;
6890 }
6891
6892
6893 static const struct file_operations trace_options_fops = {
6894         .open = tracing_open_generic,
6895         .read = trace_options_read,
6896         .write = trace_options_write,
6897         .llseek = generic_file_llseek,
6898 };
6899
6900 /*
6901  * In order to pass in both the trace_array descriptor as well as the index
6902  * to the flag that the trace option file represents, the trace_array
6903  * has a character array of trace_flags_index[], which holds the index
6904  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6905  * The address of this character array is passed to the flag option file
6906  * read/write callbacks.
6907  *
6908  * In order to extract both the index and the trace_array descriptor,
6909  * get_tr_index() uses the following algorithm.
6910  *
6911  *   idx = *ptr;
6912  *
6913  * As the pointer itself contains the address of the index (remember
6914  * index[1] == 1).
6915  *
6916  * Then to get the trace_array descriptor, by subtracting that index
6917  * from the ptr, we get to the start of the index itself.
6918  *
6919  *   ptr - idx == &index[0]
6920  *
6921  * Then a simple container_of() from that pointer gets us to the
6922  * trace_array descriptor.
6923  */
6924 static void get_tr_index(void *data, struct trace_array **ptr,
6925                          unsigned int *pindex)
6926 {
6927         *pindex = *(unsigned char *)data;
6928
6929         *ptr = container_of(data - *pindex, struct trace_array,
6930                             trace_flags_index);
6931 }
6932
6933 static ssize_t
6934 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6935                         loff_t *ppos)
6936 {
6937         void *tr_index = filp->private_data;
6938         struct trace_array *tr;
6939         unsigned int index;
6940         char *buf;
6941
6942         get_tr_index(tr_index, &tr, &index);
6943
6944         if (tr->trace_flags & (1 << index))
6945                 buf = "1\n";
6946         else
6947                 buf = "0\n";
6948
6949         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6950 }
6951
6952 static ssize_t
6953 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6954                          loff_t *ppos)
6955 {
6956         void *tr_index = filp->private_data;
6957         struct trace_array *tr;
6958         unsigned int index;
6959         unsigned long val;
6960         int ret;
6961
6962         get_tr_index(tr_index, &tr, &index);
6963
6964         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6965         if (ret)
6966                 return ret;
6967
6968         if (val != 0 && val != 1)
6969                 return -EINVAL;
6970
6971         mutex_lock(&trace_types_lock);
6972         ret = set_tracer_flag(tr, 1 << index, val);
6973         mutex_unlock(&trace_types_lock);
6974
6975         if (ret < 0)
6976                 return ret;
6977
6978         *ppos += cnt;
6979
6980         return cnt;
6981 }
6982
6983 static const struct file_operations trace_options_core_fops = {
6984         .open = tracing_open_generic,
6985         .read = trace_options_core_read,
6986         .write = trace_options_core_write,
6987         .llseek = generic_file_llseek,
6988 };
6989
6990 struct dentry *trace_create_file(const char *name,
6991                                  umode_t mode,
6992                                  struct dentry *parent,
6993                                  void *data,
6994                                  const struct file_operations *fops)
6995 {
6996         struct dentry *ret;
6997
6998         ret = tracefs_create_file(name, mode, parent, data, fops);
6999         if (!ret)
7000                 pr_warn("Could not create tracefs '%s' entry\n", name);
7001
7002         return ret;
7003 }
7004
7005
7006 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7007 {
7008         struct dentry *d_tracer;
7009
7010         if (tr->options)
7011                 return tr->options;
7012
7013         d_tracer = tracing_get_dentry(tr);
7014         if (IS_ERR(d_tracer))
7015                 return NULL;
7016
7017         tr->options = tracefs_create_dir("options", d_tracer);
7018         if (!tr->options) {
7019                 pr_warn("Could not create tracefs directory 'options'\n");
7020                 return NULL;
7021         }
7022
7023         return tr->options;
7024 }
7025
7026 static void
7027 create_trace_option_file(struct trace_array *tr,
7028                          struct trace_option_dentry *topt,
7029                          struct tracer_flags *flags,
7030                          struct tracer_opt *opt)
7031 {
7032         struct dentry *t_options;
7033
7034         t_options = trace_options_init_dentry(tr);
7035         if (!t_options)
7036                 return;
7037
7038         topt->flags = flags;
7039         topt->opt = opt;
7040         topt->tr = tr;
7041
7042         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7043                                     &trace_options_fops);
7044
7045 }
7046
7047 static void
7048 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7049 {
7050         struct trace_option_dentry *topts;
7051         struct trace_options *tr_topts;
7052         struct tracer_flags *flags;
7053         struct tracer_opt *opts;
7054         int cnt;
7055         int i;
7056
7057         if (!tracer)
7058                 return;
7059
7060         flags = tracer->flags;
7061
7062         if (!flags || !flags->opts)
7063                 return;
7064
7065         /*
7066          * If this is an instance, only create flags for tracers
7067          * the instance may have.
7068          */
7069         if (!trace_ok_for_array(tracer, tr))
7070                 return;
7071
7072         for (i = 0; i < tr->nr_topts; i++) {
7073                 /* Make sure there's no duplicate flags. */
7074                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7075                         return;
7076         }
7077
7078         opts = flags->opts;
7079
7080         for (cnt = 0; opts[cnt].name; cnt++)
7081                 ;
7082
7083         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7084         if (!topts)
7085                 return;
7086
7087         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7088                             GFP_KERNEL);
7089         if (!tr_topts) {
7090                 kfree(topts);
7091                 return;
7092         }
7093
7094         tr->topts = tr_topts;
7095         tr->topts[tr->nr_topts].tracer = tracer;
7096         tr->topts[tr->nr_topts].topts = topts;
7097         tr->nr_topts++;
7098
7099         for (cnt = 0; opts[cnt].name; cnt++) {
7100                 create_trace_option_file(tr, &topts[cnt], flags,
7101                                          &opts[cnt]);
7102                 WARN_ONCE(topts[cnt].entry == NULL,
7103                           "Failed to create trace option: %s",
7104                           opts[cnt].name);
7105         }
7106 }
7107
7108 static struct dentry *
7109 create_trace_option_core_file(struct trace_array *tr,
7110                               const char *option, long index)
7111 {
7112         struct dentry *t_options;
7113
7114         t_options = trace_options_init_dentry(tr);
7115         if (!t_options)
7116                 return NULL;
7117
7118         return trace_create_file(option, 0644, t_options,
7119                                  (void *)&tr->trace_flags_index[index],
7120                                  &trace_options_core_fops);
7121 }
7122
7123 static void create_trace_options_dir(struct trace_array *tr)
7124 {
7125         struct dentry *t_options;
7126         bool top_level = tr == &global_trace;
7127         int i;
7128
7129         t_options = trace_options_init_dentry(tr);
7130         if (!t_options)
7131                 return;
7132
7133         for (i = 0; trace_options[i]; i++) {
7134                 if (top_level ||
7135                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7136                         create_trace_option_core_file(tr, trace_options[i], i);
7137         }
7138 }
7139
7140 static ssize_t
7141 rb_simple_read(struct file *filp, char __user *ubuf,
7142                size_t cnt, loff_t *ppos)
7143 {
7144         struct trace_array *tr = filp->private_data;
7145         char buf[64];
7146         int r;
7147
7148         r = tracer_tracing_is_on(tr);
7149         r = sprintf(buf, "%d\n", r);
7150
7151         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7152 }
7153
7154 static ssize_t
7155 rb_simple_write(struct file *filp, const char __user *ubuf,
7156                 size_t cnt, loff_t *ppos)
7157 {
7158         struct trace_array *tr = filp->private_data;
7159         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7160         unsigned long val;
7161         int ret;
7162
7163         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7164         if (ret)
7165                 return ret;
7166
7167         if (buffer) {
7168                 mutex_lock(&trace_types_lock);
7169                 if (val) {
7170                         tracer_tracing_on(tr);
7171                         if (tr->current_trace->start)
7172                                 tr->current_trace->start(tr);
7173                 } else {
7174                         tracer_tracing_off(tr);
7175                         if (tr->current_trace->stop)
7176                                 tr->current_trace->stop(tr);
7177                 }
7178                 mutex_unlock(&trace_types_lock);
7179         }
7180
7181         (*ppos)++;
7182
7183         return cnt;
7184 }
7185
7186 static const struct file_operations rb_simple_fops = {
7187         .open           = tracing_open_generic_tr,
7188         .read           = rb_simple_read,
7189         .write          = rb_simple_write,
7190         .release        = tracing_release_generic_tr,
7191         .llseek         = default_llseek,
7192 };
7193
7194 struct dentry *trace_instance_dir;
7195
7196 static void
7197 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7198
7199 static int
7200 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7201 {
7202         enum ring_buffer_flags rb_flags;
7203
7204         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7205
7206         buf->tr = tr;
7207
7208         buf->buffer = ring_buffer_alloc(size, rb_flags);
7209         if (!buf->buffer)
7210                 return -ENOMEM;
7211
7212         buf->data = alloc_percpu(struct trace_array_cpu);
7213         if (!buf->data) {
7214                 ring_buffer_free(buf->buffer);
7215                 return -ENOMEM;
7216         }
7217
7218         /* Allocate the first page for all buffers */
7219         set_buffer_entries(&tr->trace_buffer,
7220                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7221
7222         return 0;
7223 }
7224
7225 static int allocate_trace_buffers(struct trace_array *tr, int size)
7226 {
7227         int ret;
7228
7229         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7230         if (ret)
7231                 return ret;
7232
7233 #ifdef CONFIG_TRACER_MAX_TRACE
7234         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7235                                     allocate_snapshot ? size : 1);
7236         if (WARN_ON(ret)) {
7237                 ring_buffer_free(tr->trace_buffer.buffer);
7238                 free_percpu(tr->trace_buffer.data);
7239                 return -ENOMEM;
7240         }
7241         tr->allocated_snapshot = allocate_snapshot;
7242
7243         /*
7244          * Only the top level trace array gets its snapshot allocated
7245          * from the kernel command line.
7246          */
7247         allocate_snapshot = false;
7248 #endif
7249         return 0;
7250 }
7251
7252 static void free_trace_buffer(struct trace_buffer *buf)
7253 {
7254         if (buf->buffer) {
7255                 ring_buffer_free(buf->buffer);
7256                 buf->buffer = NULL;
7257                 free_percpu(buf->data);
7258                 buf->data = NULL;
7259         }
7260 }
7261
7262 static void free_trace_buffers(struct trace_array *tr)
7263 {
7264         if (!tr)
7265                 return;
7266
7267         free_trace_buffer(&tr->trace_buffer);
7268
7269 #ifdef CONFIG_TRACER_MAX_TRACE
7270         free_trace_buffer(&tr->max_buffer);
7271 #endif
7272 }
7273
7274 static void init_trace_flags_index(struct trace_array *tr)
7275 {
7276         int i;
7277
7278         /* Used by the trace options files */
7279         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7280                 tr->trace_flags_index[i] = i;
7281 }
7282
7283 static void __update_tracer_options(struct trace_array *tr)
7284 {
7285         struct tracer *t;
7286
7287         for (t = trace_types; t; t = t->next)
7288                 add_tracer_options(tr, t);
7289 }
7290
7291 static void update_tracer_options(struct trace_array *tr)
7292 {
7293         mutex_lock(&trace_types_lock);
7294         __update_tracer_options(tr);
7295         mutex_unlock(&trace_types_lock);
7296 }
7297
7298 static int instance_mkdir(const char *name)
7299 {
7300         struct trace_array *tr;
7301         int ret;
7302
7303         mutex_lock(&trace_types_lock);
7304
7305         ret = -EEXIST;
7306         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7307                 if (tr->name && strcmp(tr->name, name) == 0)
7308                         goto out_unlock;
7309         }
7310
7311         ret = -ENOMEM;
7312         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7313         if (!tr)
7314                 goto out_unlock;
7315
7316         tr->name = kstrdup(name, GFP_KERNEL);
7317         if (!tr->name)
7318                 goto out_free_tr;
7319
7320         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7321                 goto out_free_tr;
7322
7323         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7324
7325         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7326
7327         raw_spin_lock_init(&tr->start_lock);
7328
7329         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7330
7331         tr->current_trace = &nop_trace;
7332
7333         INIT_LIST_HEAD(&tr->systems);
7334         INIT_LIST_HEAD(&tr->events);
7335
7336         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7337                 goto out_free_tr;
7338
7339         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7340         if (!tr->dir)
7341                 goto out_free_tr;
7342
7343         ret = event_trace_add_tracer(tr->dir, tr);
7344         if (ret) {
7345                 tracefs_remove_recursive(tr->dir);
7346                 goto out_free_tr;
7347         }
7348
7349         init_tracer_tracefs(tr, tr->dir);
7350         init_trace_flags_index(tr);
7351         __update_tracer_options(tr);
7352
7353         list_add(&tr->list, &ftrace_trace_arrays);
7354
7355         mutex_unlock(&trace_types_lock);
7356
7357         return 0;
7358
7359  out_free_tr:
7360         free_trace_buffers(tr);
7361         free_cpumask_var(tr->tracing_cpumask);
7362         kfree(tr->name);
7363         kfree(tr);
7364
7365  out_unlock:
7366         mutex_unlock(&trace_types_lock);
7367
7368         return ret;
7369
7370 }
7371
7372 static int instance_rmdir(const char *name)
7373 {
7374         struct trace_array *tr;
7375         int found = 0;
7376         int ret;
7377         int i;
7378
7379         mutex_lock(&trace_types_lock);
7380
7381         ret = -ENODEV;
7382         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7383                 if (tr->name && strcmp(tr->name, name) == 0) {
7384                         found = 1;
7385                         break;
7386                 }
7387         }
7388         if (!found)
7389                 goto out_unlock;
7390
7391         ret = -EBUSY;
7392         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7393                 goto out_unlock;
7394
7395         list_del(&tr->list);
7396
7397         /* Disable all the flags that were enabled coming in */
7398         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7399                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7400                         set_tracer_flag(tr, 1 << i, 0);
7401         }
7402
7403         tracing_set_nop(tr);
7404         event_trace_del_tracer(tr);
7405         ftrace_destroy_function_files(tr);
7406         tracefs_remove_recursive(tr->dir);
7407         free_trace_buffers(tr);
7408
7409         for (i = 0; i < tr->nr_topts; i++) {
7410                 kfree(tr->topts[i].topts);
7411         }
7412         kfree(tr->topts);
7413
7414         kfree(tr->name);
7415         kfree(tr);
7416
7417         ret = 0;
7418
7419  out_unlock:
7420         mutex_unlock(&trace_types_lock);
7421
7422         return ret;
7423 }
7424
7425 static __init void create_trace_instances(struct dentry *d_tracer)
7426 {
7427         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7428                                                          instance_mkdir,
7429                                                          instance_rmdir);
7430         if (WARN_ON(!trace_instance_dir))
7431                 return;
7432 }
7433
7434 static void
7435 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7436 {
7437         int cpu;
7438
7439         trace_create_file("available_tracers", 0444, d_tracer,
7440                         tr, &show_traces_fops);
7441
7442         trace_create_file("current_tracer", 0644, d_tracer,
7443                         tr, &set_tracer_fops);
7444
7445         trace_create_file("tracing_cpumask", 0644, d_tracer,
7446                           tr, &tracing_cpumask_fops);
7447
7448         trace_create_file("trace_options", 0644, d_tracer,
7449                           tr, &tracing_iter_fops);
7450
7451         trace_create_file("trace", 0644, d_tracer,
7452                           tr, &tracing_fops);
7453
7454         trace_create_file("trace_pipe", 0444, d_tracer,
7455                           tr, &tracing_pipe_fops);
7456
7457         trace_create_file("buffer_size_kb", 0644, d_tracer,
7458                           tr, &tracing_entries_fops);
7459
7460         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7461                           tr, &tracing_total_entries_fops);
7462
7463         trace_create_file("free_buffer", 0200, d_tracer,
7464                           tr, &tracing_free_buffer_fops);
7465
7466         trace_create_file("trace_marker", 0220, d_tracer,
7467                           tr, &tracing_mark_fops);
7468
7469         trace_create_file("trace_marker_raw", 0220, d_tracer,
7470                           tr, &tracing_mark_raw_fops);
7471
7472         trace_create_file("trace_clock", 0644, d_tracer, tr,
7473                           &trace_clock_fops);
7474
7475         trace_create_file("tracing_on", 0644, d_tracer,
7476                           tr, &rb_simple_fops);
7477
7478         create_trace_options_dir(tr);
7479
7480 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7481         trace_create_file("tracing_max_latency", 0644, d_tracer,
7482                         &tr->max_latency, &tracing_max_lat_fops);
7483 #endif
7484
7485         if (ftrace_create_function_files(tr, d_tracer))
7486                 WARN(1, "Could not allocate function filter files");
7487
7488 #ifdef CONFIG_TRACER_SNAPSHOT
7489         trace_create_file("snapshot", 0644, d_tracer,
7490                           tr, &snapshot_fops);
7491 #endif
7492
7493         for_each_tracing_cpu(cpu)
7494                 tracing_init_tracefs_percpu(tr, cpu);
7495
7496         ftrace_init_tracefs(tr, d_tracer);
7497 }
7498
7499 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7500 {
7501         struct vfsmount *mnt;
7502         struct file_system_type *type;
7503
7504         /*
7505          * To maintain backward compatibility for tools that mount
7506          * debugfs to get to the tracing facility, tracefs is automatically
7507          * mounted to the debugfs/tracing directory.
7508          */
7509         type = get_fs_type("tracefs");
7510         if (!type)
7511                 return NULL;
7512         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7513         put_filesystem(type);
7514         if (IS_ERR(mnt))
7515                 return NULL;
7516         mntget(mnt);
7517
7518         return mnt;
7519 }
7520
7521 /**
7522  * tracing_init_dentry - initialize top level trace array
7523  *
7524  * This is called when creating files or directories in the tracing
7525  * directory. It is called via fs_initcall() by any of the boot up code
7526  * and expects to return the dentry of the top level tracing directory.
7527  */
7528 struct dentry *tracing_init_dentry(void)
7529 {
7530         struct trace_array *tr = &global_trace;
7531
7532         /* The top level trace array uses  NULL as parent */
7533         if (tr->dir)
7534                 return NULL;
7535
7536         if (WARN_ON(!tracefs_initialized()) ||
7537                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7538                  WARN_ON(!debugfs_initialized())))
7539                 return ERR_PTR(-ENODEV);
7540
7541         /*
7542          * As there may still be users that expect the tracing
7543          * files to exist in debugfs/tracing, we must automount
7544          * the tracefs file system there, so older tools still
7545          * work with the newer kerenl.
7546          */
7547         tr->dir = debugfs_create_automount("tracing", NULL,
7548                                            trace_automount, NULL);
7549         if (!tr->dir) {
7550                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7551                 return ERR_PTR(-ENOMEM);
7552         }
7553
7554         return NULL;
7555 }
7556
7557 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7558 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7559
7560 static void __init trace_enum_init(void)
7561 {
7562         int len;
7563
7564         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7565         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7566 }
7567
7568 #ifdef CONFIG_MODULES
7569 static void trace_module_add_enums(struct module *mod)
7570 {
7571         if (!mod->num_trace_enums)
7572                 return;
7573
7574         /*
7575          * Modules with bad taint do not have events created, do
7576          * not bother with enums either.
7577          */
7578         if (trace_module_has_bad_taint(mod))
7579                 return;
7580
7581         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7582 }
7583
7584 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7585 static void trace_module_remove_enums(struct module *mod)
7586 {
7587         union trace_enum_map_item *map;
7588         union trace_enum_map_item **last = &trace_enum_maps;
7589
7590         if (!mod->num_trace_enums)
7591                 return;
7592
7593         mutex_lock(&trace_enum_mutex);
7594
7595         map = trace_enum_maps;
7596
7597         while (map) {
7598                 if (map->head.mod == mod)
7599                         break;
7600                 map = trace_enum_jmp_to_tail(map);
7601                 last = &map->tail.next;
7602                 map = map->tail.next;
7603         }
7604         if (!map)
7605                 goto out;
7606
7607         *last = trace_enum_jmp_to_tail(map)->tail.next;
7608         kfree(map);
7609  out:
7610         mutex_unlock(&trace_enum_mutex);
7611 }
7612 #else
7613 static inline void trace_module_remove_enums(struct module *mod) { }
7614 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7615
7616 static int trace_module_notify(struct notifier_block *self,
7617                                unsigned long val, void *data)
7618 {
7619         struct module *mod = data;
7620
7621         switch (val) {
7622         case MODULE_STATE_COMING:
7623                 trace_module_add_enums(mod);
7624                 break;
7625         case MODULE_STATE_GOING:
7626                 trace_module_remove_enums(mod);
7627                 break;
7628         }
7629
7630         return 0;
7631 }
7632
7633 static struct notifier_block trace_module_nb = {
7634         .notifier_call = trace_module_notify,
7635         .priority = 0,
7636 };
7637 #endif /* CONFIG_MODULES */
7638
7639 static __init int tracer_init_tracefs(void)
7640 {
7641         struct dentry *d_tracer;
7642
7643         trace_access_lock_init();
7644
7645         d_tracer = tracing_init_dentry();
7646         if (IS_ERR(d_tracer))
7647                 return 0;
7648
7649         init_tracer_tracefs(&global_trace, d_tracer);
7650         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7651
7652         trace_create_file("tracing_thresh", 0644, d_tracer,
7653                         &global_trace, &tracing_thresh_fops);
7654
7655         trace_create_file("README", 0444, d_tracer,
7656                         NULL, &tracing_readme_fops);
7657
7658         trace_create_file("saved_cmdlines", 0444, d_tracer,
7659                         NULL, &tracing_saved_cmdlines_fops);
7660
7661         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7662                           NULL, &tracing_saved_cmdlines_size_fops);
7663
7664         trace_enum_init();
7665
7666         trace_create_enum_file(d_tracer);
7667
7668 #ifdef CONFIG_MODULES
7669         register_module_notifier(&trace_module_nb);
7670 #endif
7671
7672 #ifdef CONFIG_DYNAMIC_FTRACE
7673         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7674                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7675 #endif
7676
7677         create_trace_instances(d_tracer);
7678
7679         update_tracer_options(&global_trace);
7680
7681         return 0;
7682 }
7683
7684 static int trace_panic_handler(struct notifier_block *this,
7685                                unsigned long event, void *unused)
7686 {
7687         if (ftrace_dump_on_oops)
7688                 ftrace_dump(ftrace_dump_on_oops);
7689         return NOTIFY_OK;
7690 }
7691
7692 static struct notifier_block trace_panic_notifier = {
7693         .notifier_call  = trace_panic_handler,
7694         .next           = NULL,
7695         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7696 };
7697
7698 static int trace_die_handler(struct notifier_block *self,
7699                              unsigned long val,
7700                              void *data)
7701 {
7702         switch (val) {
7703         case DIE_OOPS:
7704                 if (ftrace_dump_on_oops)
7705                         ftrace_dump(ftrace_dump_on_oops);
7706                 break;
7707         default:
7708                 break;
7709         }
7710         return NOTIFY_OK;
7711 }
7712
7713 static struct notifier_block trace_die_notifier = {
7714         .notifier_call = trace_die_handler,
7715         .priority = 200
7716 };
7717
7718 /*
7719  * printk is set to max of 1024, we really don't need it that big.
7720  * Nothing should be printing 1000 characters anyway.
7721  */
7722 #define TRACE_MAX_PRINT         1000
7723
7724 /*
7725  * Define here KERN_TRACE so that we have one place to modify
7726  * it if we decide to change what log level the ftrace dump
7727  * should be at.
7728  */
7729 #define KERN_TRACE              KERN_EMERG
7730
7731 void
7732 trace_printk_seq(struct trace_seq *s)
7733 {
7734         /* Probably should print a warning here. */
7735         if (s->seq.len >= TRACE_MAX_PRINT)
7736                 s->seq.len = TRACE_MAX_PRINT;
7737
7738         /*
7739          * More paranoid code. Although the buffer size is set to
7740          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7741          * an extra layer of protection.
7742          */
7743         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7744                 s->seq.len = s->seq.size - 1;
7745
7746         /* should be zero ended, but we are paranoid. */
7747         s->buffer[s->seq.len] = 0;
7748
7749         printk(KERN_TRACE "%s", s->buffer);
7750
7751         trace_seq_init(s);
7752 }
7753
7754 void trace_init_global_iter(struct trace_iterator *iter)
7755 {
7756         iter->tr = &global_trace;
7757         iter->trace = iter->tr->current_trace;
7758         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7759         iter->trace_buffer = &global_trace.trace_buffer;
7760
7761         if (iter->trace && iter->trace->open)
7762                 iter->trace->open(iter);
7763
7764         /* Annotate start of buffers if we had overruns */
7765         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7766                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7767
7768         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7769         if (trace_clocks[iter->tr->clock_id].in_ns)
7770                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7771 }
7772
7773 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7774 {
7775         /* use static because iter can be a bit big for the stack */
7776         static struct trace_iterator iter;
7777         static atomic_t dump_running;
7778         struct trace_array *tr = &global_trace;
7779         unsigned int old_userobj;
7780         unsigned long flags;
7781         int cnt = 0, cpu;
7782
7783         /* Only allow one dump user at a time. */
7784         if (atomic_inc_return(&dump_running) != 1) {
7785                 atomic_dec(&dump_running);
7786                 return;
7787         }
7788
7789         /*
7790          * Always turn off tracing when we dump.
7791          * We don't need to show trace output of what happens
7792          * between multiple crashes.
7793          *
7794          * If the user does a sysrq-z, then they can re-enable
7795          * tracing with echo 1 > tracing_on.
7796          */
7797         tracing_off();
7798
7799         local_irq_save(flags);
7800
7801         /* Simulate the iterator */
7802         trace_init_global_iter(&iter);
7803
7804         for_each_tracing_cpu(cpu) {
7805                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7806         }
7807
7808         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7809
7810         /* don't look at user memory in panic mode */
7811         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7812
7813         switch (oops_dump_mode) {
7814         case DUMP_ALL:
7815                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7816                 break;
7817         case DUMP_ORIG:
7818                 iter.cpu_file = raw_smp_processor_id();
7819                 break;
7820         case DUMP_NONE:
7821                 goto out_enable;
7822         default:
7823                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7824                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7825         }
7826
7827         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7828
7829         /* Did function tracer already get disabled? */
7830         if (ftrace_is_dead()) {
7831                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7832                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7833         }
7834
7835         /*
7836          * We need to stop all tracing on all CPUS to read the
7837          * the next buffer. This is a bit expensive, but is
7838          * not done often. We fill all what we can read,
7839          * and then release the locks again.
7840          */
7841
7842         while (!trace_empty(&iter)) {
7843
7844                 if (!cnt)
7845                         printk(KERN_TRACE "---------------------------------\n");
7846
7847                 cnt++;
7848
7849                 /* reset all but tr, trace, and overruns */
7850                 memset(&iter.seq, 0,
7851                        sizeof(struct trace_iterator) -
7852                        offsetof(struct trace_iterator, seq));
7853                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7854                 iter.pos = -1;
7855
7856                 if (trace_find_next_entry_inc(&iter) != NULL) {
7857                         int ret;
7858
7859                         ret = print_trace_line(&iter);
7860                         if (ret != TRACE_TYPE_NO_CONSUME)
7861                                 trace_consume(&iter);
7862                 }
7863                 touch_nmi_watchdog();
7864
7865                 trace_printk_seq(&iter.seq);
7866         }
7867
7868         if (!cnt)
7869                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7870         else
7871                 printk(KERN_TRACE "---------------------------------\n");
7872
7873  out_enable:
7874         tr->trace_flags |= old_userobj;
7875
7876         for_each_tracing_cpu(cpu) {
7877                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7878         }
7879         atomic_dec(&dump_running);
7880         local_irq_restore(flags);
7881 }
7882 EXPORT_SYMBOL_GPL(ftrace_dump);
7883
7884 __init static int tracer_alloc_buffers(void)
7885 {
7886         int ring_buf_size;
7887         int ret = -ENOMEM;
7888
7889         /*
7890          * Make sure we don't accidently add more trace options
7891          * than we have bits for.
7892          */
7893         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7894
7895         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7896                 goto out;
7897
7898         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7899                 goto out_free_buffer_mask;
7900
7901         /* Only allocate trace_printk buffers if a trace_printk exists */
7902         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7903                 /* Must be called before global_trace.buffer is allocated */
7904                 trace_printk_init_buffers();
7905
7906         /* To save memory, keep the ring buffer size to its minimum */
7907         if (ring_buffer_expanded)
7908                 ring_buf_size = trace_buf_size;
7909         else
7910                 ring_buf_size = 1;
7911
7912         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7913         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7914
7915         raw_spin_lock_init(&global_trace.start_lock);
7916
7917         /*
7918          * The prepare callbacks allocates some memory for the ring buffer. We
7919          * don't free the buffer if the if the CPU goes down. If we were to free
7920          * the buffer, then the user would lose any trace that was in the
7921          * buffer. The memory will be removed once the "instance" is removed.
7922          */
7923         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7924                                       "trace/RB:preapre", trace_rb_cpu_prepare,
7925                                       NULL);
7926         if (ret < 0)
7927                 goto out_free_cpumask;
7928         /* Used for event triggers */
7929         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7930         if (!temp_buffer)
7931                 goto out_rm_hp_state;
7932
7933         if (trace_create_savedcmd() < 0)
7934                 goto out_free_temp_buffer;
7935
7936         /* TODO: make the number of buffers hot pluggable with CPUS */
7937         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7938                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7939                 WARN_ON(1);
7940                 goto out_free_savedcmd;
7941         }
7942
7943         if (global_trace.buffer_disabled)
7944                 tracing_off();
7945
7946         if (trace_boot_clock) {
7947                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7948                 if (ret < 0)
7949                         pr_warn("Trace clock %s not defined, going back to default\n",
7950                                 trace_boot_clock);
7951         }
7952
7953         /*
7954          * register_tracer() might reference current_trace, so it
7955          * needs to be set before we register anything. This is
7956          * just a bootstrap of current_trace anyway.
7957          */
7958         global_trace.current_trace = &nop_trace;
7959
7960         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7961
7962         ftrace_init_global_array_ops(&global_trace);
7963
7964         init_trace_flags_index(&global_trace);
7965
7966         register_tracer(&nop_trace);
7967
7968         /* All seems OK, enable tracing */
7969         tracing_disabled = 0;
7970
7971         atomic_notifier_chain_register(&panic_notifier_list,
7972                                        &trace_panic_notifier);
7973
7974         register_die_notifier(&trace_die_notifier);
7975
7976         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7977
7978         INIT_LIST_HEAD(&global_trace.systems);
7979         INIT_LIST_HEAD(&global_trace.events);
7980         list_add(&global_trace.list, &ftrace_trace_arrays);
7981
7982         apply_trace_boot_options();
7983
7984         register_snapshot_cmd();
7985
7986         return 0;
7987
7988 out_free_savedcmd:
7989         free_saved_cmdlines_buffer(savedcmd);
7990 out_free_temp_buffer:
7991         ring_buffer_free(temp_buffer);
7992 out_rm_hp_state:
7993         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
7994 out_free_cpumask:
7995         free_cpumask_var(global_trace.tracing_cpumask);
7996 out_free_buffer_mask:
7997         free_cpumask_var(tracing_buffer_mask);
7998 out:
7999         return ret;
8000 }
8001
8002 void __init trace_init(void)
8003 {
8004         if (tracepoint_printk) {
8005                 tracepoint_print_iter =
8006                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8007                 if (WARN_ON(!tracepoint_print_iter))
8008                         tracepoint_printk = 0;
8009                 else
8010                         static_key_enable(&tracepoint_printk_key.key);
8011         }
8012         tracer_alloc_buffers();
8013         trace_event_init();
8014 }
8015
8016 __init static int clear_boot_tracer(void)
8017 {
8018         /*
8019          * The default tracer at boot buffer is an init section.
8020          * This function is called in lateinit. If we did not
8021          * find the boot tracer, then clear it out, to prevent
8022          * later registration from accessing the buffer that is
8023          * about to be freed.
8024          */
8025         if (!default_bootup_tracer)
8026                 return 0;
8027
8028         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8029                default_bootup_tracer);
8030         default_bootup_tracer = NULL;
8031
8032         return 0;
8033 }
8034
8035 fs_initcall(tracer_init_tracefs);
8036 late_initcall(clear_boot_tracer);