Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list)
500                 return -ENOMEM;
501
502         pid_list->pid_max = READ_ONCE(pid_max);
503
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 kfree(pid_list);
511                 return -ENOMEM;
512         }
513
514         if (filtered_pids) {
515                 /* copy the current bits to the new max */
516                 for_each_set_bit(pid, filtered_pids->pids,
517                                  filtered_pids->pid_max) {
518                         set_bit(pid, pid_list->pids);
519                         nr_pids++;
520                 }
521         }
522
523         while (cnt > 0) {
524
525                 pos = 0;
526
527                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528                 if (ret < 0 || !trace_parser_loaded(&parser))
529                         break;
530
531                 read += ret;
532                 ubuf += ret;
533                 cnt -= ret;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * tracing_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 int tracing_alloc_snapshot_instance(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = tracing_alloc_snapshot_instance(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to tracing_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 bool tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 bool trace_clock_in_ns(struct trace_array *tr)
1174 {
1175         if (trace_clocks[tr->clock_id].in_ns)
1176                 return true;
1177
1178         return false;
1179 }
1180
1181 /*
1182  * trace_parser_get_init - gets the buffer for trace parser
1183  */
1184 int trace_parser_get_init(struct trace_parser *parser, int size)
1185 {
1186         memset(parser, 0, sizeof(*parser));
1187
1188         parser->buffer = kmalloc(size, GFP_KERNEL);
1189         if (!parser->buffer)
1190                 return 1;
1191
1192         parser->size = size;
1193         return 0;
1194 }
1195
1196 /*
1197  * trace_parser_put - frees the buffer for trace parser
1198  */
1199 void trace_parser_put(struct trace_parser *parser)
1200 {
1201         kfree(parser->buffer);
1202         parser->buffer = NULL;
1203 }
1204
1205 /*
1206  * trace_get_user - reads the user input string separated by  space
1207  * (matched by isspace(ch))
1208  *
1209  * For each string found the 'struct trace_parser' is updated,
1210  * and the function returns.
1211  *
1212  * Returns number of bytes read.
1213  *
1214  * See kernel/trace/trace.h for 'struct trace_parser' details.
1215  */
1216 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1217         size_t cnt, loff_t *ppos)
1218 {
1219         char ch;
1220         size_t read = 0;
1221         ssize_t ret;
1222
1223         if (!*ppos)
1224                 trace_parser_clear(parser);
1225
1226         ret = get_user(ch, ubuf++);
1227         if (ret)
1228                 goto out;
1229
1230         read++;
1231         cnt--;
1232
1233         /*
1234          * The parser is not finished with the last write,
1235          * continue reading the user input without skipping spaces.
1236          */
1237         if (!parser->cont) {
1238                 /* skip white space */
1239                 while (cnt && isspace(ch)) {
1240                         ret = get_user(ch, ubuf++);
1241                         if (ret)
1242                                 goto out;
1243                         read++;
1244                         cnt--;
1245                 }
1246
1247                 parser->idx = 0;
1248
1249                 /* only spaces were written */
1250                 if (isspace(ch) || !ch) {
1251                         *ppos += read;
1252                         ret = read;
1253                         goto out;
1254                 }
1255         }
1256
1257         /* read the non-space input */
1258         while (cnt && !isspace(ch) && ch) {
1259                 if (parser->idx < parser->size - 1)
1260                         parser->buffer[parser->idx++] = ch;
1261                 else {
1262                         ret = -EINVAL;
1263                         goto out;
1264                 }
1265                 ret = get_user(ch, ubuf++);
1266                 if (ret)
1267                         goto out;
1268                 read++;
1269                 cnt--;
1270         }
1271
1272         /* We either got finished input or we have to wait for another call. */
1273         if (isspace(ch) || !ch) {
1274                 parser->buffer[parser->idx] = 0;
1275                 parser->cont = false;
1276         } else if (parser->idx < parser->size - 1) {
1277                 parser->cont = true;
1278                 parser->buffer[parser->idx++] = ch;
1279                 /* Make sure the parsed string always terminates with '\0'. */
1280                 parser->buffer[parser->idx] = 0;
1281         } else {
1282                 ret = -EINVAL;
1283                 goto out;
1284         }
1285
1286         *ppos += read;
1287         ret = read;
1288
1289 out:
1290         return ret;
1291 }
1292
1293 /* TODO add a seq_buf_to_buffer() */
1294 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1295 {
1296         int len;
1297
1298         if (trace_seq_used(s) <= s->seq.readpos)
1299                 return -EBUSY;
1300
1301         len = trace_seq_used(s) - s->seq.readpos;
1302         if (cnt > len)
1303                 cnt = len;
1304         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1305
1306         s->seq.readpos += cnt;
1307         return cnt;
1308 }
1309
1310 unsigned long __read_mostly     tracing_thresh;
1311
1312 #ifdef CONFIG_TRACER_MAX_TRACE
1313 /*
1314  * Copy the new maximum trace into the separate maximum-trace
1315  * structure. (this way the maximum trace is permanently saved,
1316  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1317  */
1318 static void
1319 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1320 {
1321         struct trace_buffer *trace_buf = &tr->trace_buffer;
1322         struct trace_buffer *max_buf = &tr->max_buffer;
1323         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1324         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1325
1326         max_buf->cpu = cpu;
1327         max_buf->time_start = data->preempt_timestamp;
1328
1329         max_data->saved_latency = tr->max_latency;
1330         max_data->critical_start = data->critical_start;
1331         max_data->critical_end = data->critical_end;
1332
1333         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1334         max_data->pid = tsk->pid;
1335         /*
1336          * If tsk == current, then use current_uid(), as that does not use
1337          * RCU. The irq tracer can be called out of RCU scope.
1338          */
1339         if (tsk == current)
1340                 max_data->uid = current_uid();
1341         else
1342                 max_data->uid = task_uid(tsk);
1343
1344         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1345         max_data->policy = tsk->policy;
1346         max_data->rt_priority = tsk->rt_priority;
1347
1348         /* record this tasks comm */
1349         tracing_record_cmdline(tsk);
1350 }
1351
1352 /**
1353  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1354  * @tr: tracer
1355  * @tsk: the task with the latency
1356  * @cpu: The cpu that initiated the trace.
1357  *
1358  * Flip the buffers between the @tr and the max_tr and record information
1359  * about which task was the cause of this latency.
1360  */
1361 void
1362 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1363 {
1364         if (tr->stop_count)
1365                 return;
1366
1367         WARN_ON_ONCE(!irqs_disabled());
1368
1369         if (!tr->allocated_snapshot) {
1370                 /* Only the nop tracer should hit this when disabling */
1371                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1372                 return;
1373         }
1374
1375         arch_spin_lock(&tr->max_lock);
1376
1377         /* Inherit the recordable setting from trace_buffer */
1378         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1379                 ring_buffer_record_on(tr->max_buffer.buffer);
1380         else
1381                 ring_buffer_record_off(tr->max_buffer.buffer);
1382
1383         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1384
1385         __update_max_tr(tr, tsk, cpu);
1386         arch_spin_unlock(&tr->max_lock);
1387 }
1388
1389 /**
1390  * update_max_tr_single - only copy one trace over, and reset the rest
1391  * @tr - tracer
1392  * @tsk - task with the latency
1393  * @cpu - the cpu of the buffer to copy.
1394  *
1395  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1396  */
1397 void
1398 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1399 {
1400         int ret;
1401
1402         if (tr->stop_count)
1403                 return;
1404
1405         WARN_ON_ONCE(!irqs_disabled());
1406         if (!tr->allocated_snapshot) {
1407                 /* Only the nop tracer should hit this when disabling */
1408                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1409                 return;
1410         }
1411
1412         arch_spin_lock(&tr->max_lock);
1413
1414         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1415
1416         if (ret == -EBUSY) {
1417                 /*
1418                  * We failed to swap the buffer due to a commit taking
1419                  * place on this CPU. We fail to record, but we reset
1420                  * the max trace buffer (no one writes directly to it)
1421                  * and flag that it failed.
1422                  */
1423                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1424                         "Failed to swap buffers due to commit in progress\n");
1425         }
1426
1427         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1428
1429         __update_max_tr(tr, tsk, cpu);
1430         arch_spin_unlock(&tr->max_lock);
1431 }
1432 #endif /* CONFIG_TRACER_MAX_TRACE */
1433
1434 static int wait_on_pipe(struct trace_iterator *iter, int full)
1435 {
1436         /* Iterators are static, they should be filled or empty */
1437         if (trace_buffer_iter(iter, iter->cpu_file))
1438                 return 0;
1439
1440         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1441                                 full);
1442 }
1443
1444 #ifdef CONFIG_FTRACE_STARTUP_TEST
1445 static bool selftests_can_run;
1446
1447 struct trace_selftests {
1448         struct list_head                list;
1449         struct tracer                   *type;
1450 };
1451
1452 static LIST_HEAD(postponed_selftests);
1453
1454 static int save_selftest(struct tracer *type)
1455 {
1456         struct trace_selftests *selftest;
1457
1458         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1459         if (!selftest)
1460                 return -ENOMEM;
1461
1462         selftest->type = type;
1463         list_add(&selftest->list, &postponed_selftests);
1464         return 0;
1465 }
1466
1467 static int run_tracer_selftest(struct tracer *type)
1468 {
1469         struct trace_array *tr = &global_trace;
1470         struct tracer *saved_tracer = tr->current_trace;
1471         int ret;
1472
1473         if (!type->selftest || tracing_selftest_disabled)
1474                 return 0;
1475
1476         /*
1477          * If a tracer registers early in boot up (before scheduling is
1478          * initialized and such), then do not run its selftests yet.
1479          * Instead, run it a little later in the boot process.
1480          */
1481         if (!selftests_can_run)
1482                 return save_selftest(type);
1483
1484         /*
1485          * Run a selftest on this tracer.
1486          * Here we reset the trace buffer, and set the current
1487          * tracer to be this tracer. The tracer can then run some
1488          * internal tracing to verify that everything is in order.
1489          * If we fail, we do not register this tracer.
1490          */
1491         tracing_reset_online_cpus(&tr->trace_buffer);
1492
1493         tr->current_trace = type;
1494
1495 #ifdef CONFIG_TRACER_MAX_TRACE
1496         if (type->use_max_tr) {
1497                 /* If we expanded the buffers, make sure the max is expanded too */
1498                 if (ring_buffer_expanded)
1499                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1500                                            RING_BUFFER_ALL_CPUS);
1501                 tr->allocated_snapshot = true;
1502         }
1503 #endif
1504
1505         /* the test is responsible for initializing and enabling */
1506         pr_info("Testing tracer %s: ", type->name);
1507         ret = type->selftest(type, tr);
1508         /* the test is responsible for resetting too */
1509         tr->current_trace = saved_tracer;
1510         if (ret) {
1511                 printk(KERN_CONT "FAILED!\n");
1512                 /* Add the warning after printing 'FAILED' */
1513                 WARN_ON(1);
1514                 return -1;
1515         }
1516         /* Only reset on passing, to avoid touching corrupted buffers */
1517         tracing_reset_online_cpus(&tr->trace_buffer);
1518
1519 #ifdef CONFIG_TRACER_MAX_TRACE
1520         if (type->use_max_tr) {
1521                 tr->allocated_snapshot = false;
1522
1523                 /* Shrink the max buffer again */
1524                 if (ring_buffer_expanded)
1525                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1526                                            RING_BUFFER_ALL_CPUS);
1527         }
1528 #endif
1529
1530         printk(KERN_CONT "PASSED\n");
1531         return 0;
1532 }
1533
1534 static __init int init_trace_selftests(void)
1535 {
1536         struct trace_selftests *p, *n;
1537         struct tracer *t, **last;
1538         int ret;
1539
1540         selftests_can_run = true;
1541
1542         mutex_lock(&trace_types_lock);
1543
1544         if (list_empty(&postponed_selftests))
1545                 goto out;
1546
1547         pr_info("Running postponed tracer tests:\n");
1548
1549         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1550                 ret = run_tracer_selftest(p->type);
1551                 /* If the test fails, then warn and remove from available_tracers */
1552                 if (ret < 0) {
1553                         WARN(1, "tracer: %s failed selftest, disabling\n",
1554                              p->type->name);
1555                         last = &trace_types;
1556                         for (t = trace_types; t; t = t->next) {
1557                                 if (t == p->type) {
1558                                         *last = t->next;
1559                                         break;
1560                                 }
1561                                 last = &t->next;
1562                         }
1563                 }
1564                 list_del(&p->list);
1565                 kfree(p);
1566         }
1567
1568  out:
1569         mutex_unlock(&trace_types_lock);
1570
1571         return 0;
1572 }
1573 core_initcall(init_trace_selftests);
1574 #else
1575 static inline int run_tracer_selftest(struct tracer *type)
1576 {
1577         return 0;
1578 }
1579 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1580
1581 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1582
1583 static void __init apply_trace_boot_options(void);
1584
1585 /**
1586  * register_tracer - register a tracer with the ftrace system.
1587  * @type - the plugin for the tracer
1588  *
1589  * Register a new plugin tracer.
1590  */
1591 int __init register_tracer(struct tracer *type)
1592 {
1593         struct tracer *t;
1594         int ret = 0;
1595
1596         if (!type->name) {
1597                 pr_info("Tracer must have a name\n");
1598                 return -1;
1599         }
1600
1601         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1602                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1603                 return -1;
1604         }
1605
1606         mutex_lock(&trace_types_lock);
1607
1608         tracing_selftest_running = true;
1609
1610         for (t = trace_types; t; t = t->next) {
1611                 if (strcmp(type->name, t->name) == 0) {
1612                         /* already found */
1613                         pr_info("Tracer %s already registered\n",
1614                                 type->name);
1615                         ret = -1;
1616                         goto out;
1617                 }
1618         }
1619
1620         if (!type->set_flag)
1621                 type->set_flag = &dummy_set_flag;
1622         if (!type->flags) {
1623                 /*allocate a dummy tracer_flags*/
1624                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1625                 if (!type->flags) {
1626                         ret = -ENOMEM;
1627                         goto out;
1628                 }
1629                 type->flags->val = 0;
1630                 type->flags->opts = dummy_tracer_opt;
1631         } else
1632                 if (!type->flags->opts)
1633                         type->flags->opts = dummy_tracer_opt;
1634
1635         /* store the tracer for __set_tracer_option */
1636         type->flags->trace = type;
1637
1638         ret = run_tracer_selftest(type);
1639         if (ret < 0)
1640                 goto out;
1641
1642         type->next = trace_types;
1643         trace_types = type;
1644         add_tracer_options(&global_trace, type);
1645
1646  out:
1647         tracing_selftest_running = false;
1648         mutex_unlock(&trace_types_lock);
1649
1650         if (ret || !default_bootup_tracer)
1651                 goto out_unlock;
1652
1653         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1654                 goto out_unlock;
1655
1656         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1657         /* Do we want this tracer to start on bootup? */
1658         tracing_set_tracer(&global_trace, type->name);
1659         default_bootup_tracer = NULL;
1660
1661         apply_trace_boot_options();
1662
1663         /* disable other selftests, since this will break it. */
1664         tracing_selftest_disabled = true;
1665 #ifdef CONFIG_FTRACE_STARTUP_TEST
1666         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1667                type->name);
1668 #endif
1669
1670  out_unlock:
1671         return ret;
1672 }
1673
1674 void tracing_reset(struct trace_buffer *buf, int cpu)
1675 {
1676         struct ring_buffer *buffer = buf->buffer;
1677
1678         if (!buffer)
1679                 return;
1680
1681         ring_buffer_record_disable(buffer);
1682
1683         /* Make sure all commits have finished */
1684         synchronize_rcu();
1685         ring_buffer_reset_cpu(buffer, cpu);
1686
1687         ring_buffer_record_enable(buffer);
1688 }
1689
1690 void tracing_reset_online_cpus(struct trace_buffer *buf)
1691 {
1692         struct ring_buffer *buffer = buf->buffer;
1693         int cpu;
1694
1695         if (!buffer)
1696                 return;
1697
1698         ring_buffer_record_disable(buffer);
1699
1700         /* Make sure all commits have finished */
1701         synchronize_rcu();
1702
1703         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1704
1705         for_each_online_cpu(cpu)
1706                 ring_buffer_reset_cpu(buffer, cpu);
1707
1708         ring_buffer_record_enable(buffer);
1709 }
1710
1711 /* Must have trace_types_lock held */
1712 void tracing_reset_all_online_cpus(void)
1713 {
1714         struct trace_array *tr;
1715
1716         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1717                 if (!tr->clear_trace)
1718                         continue;
1719                 tr->clear_trace = false;
1720                 tracing_reset_online_cpus(&tr->trace_buffer);
1721 #ifdef CONFIG_TRACER_MAX_TRACE
1722                 tracing_reset_online_cpus(&tr->max_buffer);
1723 #endif
1724         }
1725 }
1726
1727 static int *tgid_map;
1728
1729 #define SAVED_CMDLINES_DEFAULT 128
1730 #define NO_CMDLINE_MAP UINT_MAX
1731 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1732 struct saved_cmdlines_buffer {
1733         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1734         unsigned *map_cmdline_to_pid;
1735         unsigned cmdline_num;
1736         int cmdline_idx;
1737         char *saved_cmdlines;
1738 };
1739 static struct saved_cmdlines_buffer *savedcmd;
1740
1741 /* temporary disable recording */
1742 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1743
1744 static inline char *get_saved_cmdlines(int idx)
1745 {
1746         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1747 }
1748
1749 static inline void set_cmdline(int idx, const char *cmdline)
1750 {
1751         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1752 }
1753
1754 static int allocate_cmdlines_buffer(unsigned int val,
1755                                     struct saved_cmdlines_buffer *s)
1756 {
1757         s->map_cmdline_to_pid = kmalloc_array(val,
1758                                               sizeof(*s->map_cmdline_to_pid),
1759                                               GFP_KERNEL);
1760         if (!s->map_cmdline_to_pid)
1761                 return -ENOMEM;
1762
1763         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1764         if (!s->saved_cmdlines) {
1765                 kfree(s->map_cmdline_to_pid);
1766                 return -ENOMEM;
1767         }
1768
1769         s->cmdline_idx = 0;
1770         s->cmdline_num = val;
1771         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1772                sizeof(s->map_pid_to_cmdline));
1773         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1774                val * sizeof(*s->map_cmdline_to_pid));
1775
1776         return 0;
1777 }
1778
1779 static int trace_create_savedcmd(void)
1780 {
1781         int ret;
1782
1783         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1784         if (!savedcmd)
1785                 return -ENOMEM;
1786
1787         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1788         if (ret < 0) {
1789                 kfree(savedcmd);
1790                 savedcmd = NULL;
1791                 return -ENOMEM;
1792         }
1793
1794         return 0;
1795 }
1796
1797 int is_tracing_stopped(void)
1798 {
1799         return global_trace.stop_count;
1800 }
1801
1802 /**
1803  * tracing_start - quick start of the tracer
1804  *
1805  * If tracing is enabled but was stopped by tracing_stop,
1806  * this will start the tracer back up.
1807  */
1808 void tracing_start(void)
1809 {
1810         struct ring_buffer *buffer;
1811         unsigned long flags;
1812
1813         if (tracing_disabled)
1814                 return;
1815
1816         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1817         if (--global_trace.stop_count) {
1818                 if (global_trace.stop_count < 0) {
1819                         /* Someone screwed up their debugging */
1820                         WARN_ON_ONCE(1);
1821                         global_trace.stop_count = 0;
1822                 }
1823                 goto out;
1824         }
1825
1826         /* Prevent the buffers from switching */
1827         arch_spin_lock(&global_trace.max_lock);
1828
1829         buffer = global_trace.trace_buffer.buffer;
1830         if (buffer)
1831                 ring_buffer_record_enable(buffer);
1832
1833 #ifdef CONFIG_TRACER_MAX_TRACE
1834         buffer = global_trace.max_buffer.buffer;
1835         if (buffer)
1836                 ring_buffer_record_enable(buffer);
1837 #endif
1838
1839         arch_spin_unlock(&global_trace.max_lock);
1840
1841  out:
1842         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1843 }
1844
1845 static void tracing_start_tr(struct trace_array *tr)
1846 {
1847         struct ring_buffer *buffer;
1848         unsigned long flags;
1849
1850         if (tracing_disabled)
1851                 return;
1852
1853         /* If global, we need to also start the max tracer */
1854         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1855                 return tracing_start();
1856
1857         raw_spin_lock_irqsave(&tr->start_lock, flags);
1858
1859         if (--tr->stop_count) {
1860                 if (tr->stop_count < 0) {
1861                         /* Someone screwed up their debugging */
1862                         WARN_ON_ONCE(1);
1863                         tr->stop_count = 0;
1864                 }
1865                 goto out;
1866         }
1867
1868         buffer = tr->trace_buffer.buffer;
1869         if (buffer)
1870                 ring_buffer_record_enable(buffer);
1871
1872  out:
1873         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1874 }
1875
1876 /**
1877  * tracing_stop - quick stop of the tracer
1878  *
1879  * Light weight way to stop tracing. Use in conjunction with
1880  * tracing_start.
1881  */
1882 void tracing_stop(void)
1883 {
1884         struct ring_buffer *buffer;
1885         unsigned long flags;
1886
1887         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1888         if (global_trace.stop_count++)
1889                 goto out;
1890
1891         /* Prevent the buffers from switching */
1892         arch_spin_lock(&global_trace.max_lock);
1893
1894         buffer = global_trace.trace_buffer.buffer;
1895         if (buffer)
1896                 ring_buffer_record_disable(buffer);
1897
1898 #ifdef CONFIG_TRACER_MAX_TRACE
1899         buffer = global_trace.max_buffer.buffer;
1900         if (buffer)
1901                 ring_buffer_record_disable(buffer);
1902 #endif
1903
1904         arch_spin_unlock(&global_trace.max_lock);
1905
1906  out:
1907         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1908 }
1909
1910 static void tracing_stop_tr(struct trace_array *tr)
1911 {
1912         struct ring_buffer *buffer;
1913         unsigned long flags;
1914
1915         /* If global, we need to also stop the max tracer */
1916         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1917                 return tracing_stop();
1918
1919         raw_spin_lock_irqsave(&tr->start_lock, flags);
1920         if (tr->stop_count++)
1921                 goto out;
1922
1923         buffer = tr->trace_buffer.buffer;
1924         if (buffer)
1925                 ring_buffer_record_disable(buffer);
1926
1927  out:
1928         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1929 }
1930
1931 static int trace_save_cmdline(struct task_struct *tsk)
1932 {
1933         unsigned pid, idx;
1934
1935         /* treat recording of idle task as a success */
1936         if (!tsk->pid)
1937                 return 1;
1938
1939         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1940                 return 0;
1941
1942         /*
1943          * It's not the end of the world if we don't get
1944          * the lock, but we also don't want to spin
1945          * nor do we want to disable interrupts,
1946          * so if we miss here, then better luck next time.
1947          */
1948         if (!arch_spin_trylock(&trace_cmdline_lock))
1949                 return 0;
1950
1951         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1952         if (idx == NO_CMDLINE_MAP) {
1953                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1954
1955                 /*
1956                  * Check whether the cmdline buffer at idx has a pid
1957                  * mapped. We are going to overwrite that entry so we
1958                  * need to clear the map_pid_to_cmdline. Otherwise we
1959                  * would read the new comm for the old pid.
1960                  */
1961                 pid = savedcmd->map_cmdline_to_pid[idx];
1962                 if (pid != NO_CMDLINE_MAP)
1963                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1964
1965                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1966                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1967
1968                 savedcmd->cmdline_idx = idx;
1969         }
1970
1971         set_cmdline(idx, tsk->comm);
1972
1973         arch_spin_unlock(&trace_cmdline_lock);
1974
1975         return 1;
1976 }
1977
1978 static void __trace_find_cmdline(int pid, char comm[])
1979 {
1980         unsigned map;
1981
1982         if (!pid) {
1983                 strcpy(comm, "<idle>");
1984                 return;
1985         }
1986
1987         if (WARN_ON_ONCE(pid < 0)) {
1988                 strcpy(comm, "<XXX>");
1989                 return;
1990         }
1991
1992         if (pid > PID_MAX_DEFAULT) {
1993                 strcpy(comm, "<...>");
1994                 return;
1995         }
1996
1997         map = savedcmd->map_pid_to_cmdline[pid];
1998         if (map != NO_CMDLINE_MAP)
1999                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2000         else
2001                 strcpy(comm, "<...>");
2002 }
2003
2004 void trace_find_cmdline(int pid, char comm[])
2005 {
2006         preempt_disable();
2007         arch_spin_lock(&trace_cmdline_lock);
2008
2009         __trace_find_cmdline(pid, comm);
2010
2011         arch_spin_unlock(&trace_cmdline_lock);
2012         preempt_enable();
2013 }
2014
2015 int trace_find_tgid(int pid)
2016 {
2017         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2018                 return 0;
2019
2020         return tgid_map[pid];
2021 }
2022
2023 static int trace_save_tgid(struct task_struct *tsk)
2024 {
2025         /* treat recording of idle task as a success */
2026         if (!tsk->pid)
2027                 return 1;
2028
2029         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2030                 return 0;
2031
2032         tgid_map[tsk->pid] = tsk->tgid;
2033         return 1;
2034 }
2035
2036 static bool tracing_record_taskinfo_skip(int flags)
2037 {
2038         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2039                 return true;
2040         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2041                 return true;
2042         if (!__this_cpu_read(trace_taskinfo_save))
2043                 return true;
2044         return false;
2045 }
2046
2047 /**
2048  * tracing_record_taskinfo - record the task info of a task
2049  *
2050  * @task  - task to record
2051  * @flags - TRACE_RECORD_CMDLINE for recording comm
2052  *        - TRACE_RECORD_TGID for recording tgid
2053  */
2054 void tracing_record_taskinfo(struct task_struct *task, int flags)
2055 {
2056         bool done;
2057
2058         if (tracing_record_taskinfo_skip(flags))
2059                 return;
2060
2061         /*
2062          * Record as much task information as possible. If some fail, continue
2063          * to try to record the others.
2064          */
2065         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2066         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2067
2068         /* If recording any information failed, retry again soon. */
2069         if (!done)
2070                 return;
2071
2072         __this_cpu_write(trace_taskinfo_save, false);
2073 }
2074
2075 /**
2076  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2077  *
2078  * @prev - previous task during sched_switch
2079  * @next - next task during sched_switch
2080  * @flags - TRACE_RECORD_CMDLINE for recording comm
2081  *          TRACE_RECORD_TGID for recording tgid
2082  */
2083 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2084                                           struct task_struct *next, int flags)
2085 {
2086         bool done;
2087
2088         if (tracing_record_taskinfo_skip(flags))
2089                 return;
2090
2091         /*
2092          * Record as much task information as possible. If some fail, continue
2093          * to try to record the others.
2094          */
2095         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2096         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2097         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2098         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2099
2100         /* If recording any information failed, retry again soon. */
2101         if (!done)
2102                 return;
2103
2104         __this_cpu_write(trace_taskinfo_save, false);
2105 }
2106
2107 /* Helpers to record a specific task information */
2108 void tracing_record_cmdline(struct task_struct *task)
2109 {
2110         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2111 }
2112
2113 void tracing_record_tgid(struct task_struct *task)
2114 {
2115         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2116 }
2117
2118 /*
2119  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2120  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2121  * simplifies those functions and keeps them in sync.
2122  */
2123 enum print_line_t trace_handle_return(struct trace_seq *s)
2124 {
2125         return trace_seq_has_overflowed(s) ?
2126                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_handle_return);
2129
2130 void
2131 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2132                              int pc)
2133 {
2134         struct task_struct *tsk = current;
2135
2136         entry->preempt_count            = pc & 0xff;
2137         entry->pid                      = (tsk) ? tsk->pid : 0;
2138         entry->flags =
2139 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2140                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2141 #else
2142                 TRACE_FLAG_IRQS_NOSUPPORT |
2143 #endif
2144                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2145                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2146                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2147                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2148                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2149 }
2150 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2151
2152 struct ring_buffer_event *
2153 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2154                           int type,
2155                           unsigned long len,
2156                           unsigned long flags, int pc)
2157 {
2158         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2159 }
2160
2161 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2162 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2163 static int trace_buffered_event_ref;
2164
2165 /**
2166  * trace_buffered_event_enable - enable buffering events
2167  *
2168  * When events are being filtered, it is quicker to use a temporary
2169  * buffer to write the event data into if there's a likely chance
2170  * that it will not be committed. The discard of the ring buffer
2171  * is not as fast as committing, and is much slower than copying
2172  * a commit.
2173  *
2174  * When an event is to be filtered, allocate per cpu buffers to
2175  * write the event data into, and if the event is filtered and discarded
2176  * it is simply dropped, otherwise, the entire data is to be committed
2177  * in one shot.
2178  */
2179 void trace_buffered_event_enable(void)
2180 {
2181         struct ring_buffer_event *event;
2182         struct page *page;
2183         int cpu;
2184
2185         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2186
2187         if (trace_buffered_event_ref++)
2188                 return;
2189
2190         for_each_tracing_cpu(cpu) {
2191                 page = alloc_pages_node(cpu_to_node(cpu),
2192                                         GFP_KERNEL | __GFP_NORETRY, 0);
2193                 if (!page)
2194                         goto failed;
2195
2196                 event = page_address(page);
2197                 memset(event, 0, sizeof(*event));
2198
2199                 per_cpu(trace_buffered_event, cpu) = event;
2200
2201                 preempt_disable();
2202                 if (cpu == smp_processor_id() &&
2203                     this_cpu_read(trace_buffered_event) !=
2204                     per_cpu(trace_buffered_event, cpu))
2205                         WARN_ON_ONCE(1);
2206                 preempt_enable();
2207         }
2208
2209         return;
2210  failed:
2211         trace_buffered_event_disable();
2212 }
2213
2214 static void enable_trace_buffered_event(void *data)
2215 {
2216         /* Probably not needed, but do it anyway */
2217         smp_rmb();
2218         this_cpu_dec(trace_buffered_event_cnt);
2219 }
2220
2221 static void disable_trace_buffered_event(void *data)
2222 {
2223         this_cpu_inc(trace_buffered_event_cnt);
2224 }
2225
2226 /**
2227  * trace_buffered_event_disable - disable buffering events
2228  *
2229  * When a filter is removed, it is faster to not use the buffered
2230  * events, and to commit directly into the ring buffer. Free up
2231  * the temp buffers when there are no more users. This requires
2232  * special synchronization with current events.
2233  */
2234 void trace_buffered_event_disable(void)
2235 {
2236         int cpu;
2237
2238         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2239
2240         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2241                 return;
2242
2243         if (--trace_buffered_event_ref)
2244                 return;
2245
2246         preempt_disable();
2247         /* For each CPU, set the buffer as used. */
2248         smp_call_function_many(tracing_buffer_mask,
2249                                disable_trace_buffered_event, NULL, 1);
2250         preempt_enable();
2251
2252         /* Wait for all current users to finish */
2253         synchronize_rcu();
2254
2255         for_each_tracing_cpu(cpu) {
2256                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2257                 per_cpu(trace_buffered_event, cpu) = NULL;
2258         }
2259         /*
2260          * Make sure trace_buffered_event is NULL before clearing
2261          * trace_buffered_event_cnt.
2262          */
2263         smp_wmb();
2264
2265         preempt_disable();
2266         /* Do the work on each cpu */
2267         smp_call_function_many(tracing_buffer_mask,
2268                                enable_trace_buffered_event, NULL, 1);
2269         preempt_enable();
2270 }
2271
2272 static struct ring_buffer *temp_buffer;
2273
2274 struct ring_buffer_event *
2275 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2276                           struct trace_event_file *trace_file,
2277                           int type, unsigned long len,
2278                           unsigned long flags, int pc)
2279 {
2280         struct ring_buffer_event *entry;
2281         int val;
2282
2283         *current_rb = trace_file->tr->trace_buffer.buffer;
2284
2285         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2286              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2287             (entry = this_cpu_read(trace_buffered_event))) {
2288                 /* Try to use the per cpu buffer first */
2289                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2290                 if (val == 1) {
2291                         trace_event_setup(entry, type, flags, pc);
2292                         entry->array[0] = len;
2293                         return entry;
2294                 }
2295                 this_cpu_dec(trace_buffered_event_cnt);
2296         }
2297
2298         entry = __trace_buffer_lock_reserve(*current_rb,
2299                                             type, len, flags, pc);
2300         /*
2301          * If tracing is off, but we have triggers enabled
2302          * we still need to look at the event data. Use the temp_buffer
2303          * to store the trace event for the tigger to use. It's recusive
2304          * safe and will not be recorded anywhere.
2305          */
2306         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2307                 *current_rb = temp_buffer;
2308                 entry = __trace_buffer_lock_reserve(*current_rb,
2309                                                     type, len, flags, pc);
2310         }
2311         return entry;
2312 }
2313 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2314
2315 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2316 static DEFINE_MUTEX(tracepoint_printk_mutex);
2317
2318 static void output_printk(struct trace_event_buffer *fbuffer)
2319 {
2320         struct trace_event_call *event_call;
2321         struct trace_event *event;
2322         unsigned long flags;
2323         struct trace_iterator *iter = tracepoint_print_iter;
2324
2325         /* We should never get here if iter is NULL */
2326         if (WARN_ON_ONCE(!iter))
2327                 return;
2328
2329         event_call = fbuffer->trace_file->event_call;
2330         if (!event_call || !event_call->event.funcs ||
2331             !event_call->event.funcs->trace)
2332                 return;
2333
2334         event = &fbuffer->trace_file->event_call->event;
2335
2336         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2337         trace_seq_init(&iter->seq);
2338         iter->ent = fbuffer->entry;
2339         event_call->event.funcs->trace(iter, 0, event);
2340         trace_seq_putc(&iter->seq, 0);
2341         printk("%s", iter->seq.buffer);
2342
2343         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2344 }
2345
2346 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2347                              void __user *buffer, size_t *lenp,
2348                              loff_t *ppos)
2349 {
2350         int save_tracepoint_printk;
2351         int ret;
2352
2353         mutex_lock(&tracepoint_printk_mutex);
2354         save_tracepoint_printk = tracepoint_printk;
2355
2356         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2357
2358         /*
2359          * This will force exiting early, as tracepoint_printk
2360          * is always zero when tracepoint_printk_iter is not allocated
2361          */
2362         if (!tracepoint_print_iter)
2363                 tracepoint_printk = 0;
2364
2365         if (save_tracepoint_printk == tracepoint_printk)
2366                 goto out;
2367
2368         if (tracepoint_printk)
2369                 static_key_enable(&tracepoint_printk_key.key);
2370         else
2371                 static_key_disable(&tracepoint_printk_key.key);
2372
2373  out:
2374         mutex_unlock(&tracepoint_printk_mutex);
2375
2376         return ret;
2377 }
2378
2379 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2380 {
2381         if (static_key_false(&tracepoint_printk_key.key))
2382                 output_printk(fbuffer);
2383
2384         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2385                                     fbuffer->event, fbuffer->entry,
2386                                     fbuffer->flags, fbuffer->pc);
2387 }
2388 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2389
2390 /*
2391  * Skip 3:
2392  *
2393  *   trace_buffer_unlock_commit_regs()
2394  *   trace_event_buffer_commit()
2395  *   trace_event_raw_event_xxx()
2396  */
2397 # define STACK_SKIP 3
2398
2399 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2400                                      struct ring_buffer *buffer,
2401                                      struct ring_buffer_event *event,
2402                                      unsigned long flags, int pc,
2403                                      struct pt_regs *regs)
2404 {
2405         __buffer_unlock_commit(buffer, event);
2406
2407         /*
2408          * If regs is not set, then skip the necessary functions.
2409          * Note, we can still get here via blktrace, wakeup tracer
2410          * and mmiotrace, but that's ok if they lose a function or
2411          * two. They are not that meaningful.
2412          */
2413         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2414         ftrace_trace_userstack(buffer, flags, pc);
2415 }
2416
2417 /*
2418  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2419  */
2420 void
2421 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2422                                    struct ring_buffer_event *event)
2423 {
2424         __buffer_unlock_commit(buffer, event);
2425 }
2426
2427 static void
2428 trace_process_export(struct trace_export *export,
2429                struct ring_buffer_event *event)
2430 {
2431         struct trace_entry *entry;
2432         unsigned int size = 0;
2433
2434         entry = ring_buffer_event_data(event);
2435         size = ring_buffer_event_length(event);
2436         export->write(export, entry, size);
2437 }
2438
2439 static DEFINE_MUTEX(ftrace_export_lock);
2440
2441 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2442
2443 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2444
2445 static inline void ftrace_exports_enable(void)
2446 {
2447         static_branch_enable(&ftrace_exports_enabled);
2448 }
2449
2450 static inline void ftrace_exports_disable(void)
2451 {
2452         static_branch_disable(&ftrace_exports_enabled);
2453 }
2454
2455 static void ftrace_exports(struct ring_buffer_event *event)
2456 {
2457         struct trace_export *export;
2458
2459         preempt_disable_notrace();
2460
2461         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2462         while (export) {
2463                 trace_process_export(export, event);
2464                 export = rcu_dereference_raw_notrace(export->next);
2465         }
2466
2467         preempt_enable_notrace();
2468 }
2469
2470 static inline void
2471 add_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473         rcu_assign_pointer(export->next, *list);
2474         /*
2475          * We are entering export into the list but another
2476          * CPU might be walking that list. We need to make sure
2477          * the export->next pointer is valid before another CPU sees
2478          * the export pointer included into the list.
2479          */
2480         rcu_assign_pointer(*list, export);
2481 }
2482
2483 static inline int
2484 rm_trace_export(struct trace_export **list, struct trace_export *export)
2485 {
2486         struct trace_export **p;
2487
2488         for (p = list; *p != NULL; p = &(*p)->next)
2489                 if (*p == export)
2490                         break;
2491
2492         if (*p != export)
2493                 return -1;
2494
2495         rcu_assign_pointer(*p, (*p)->next);
2496
2497         return 0;
2498 }
2499
2500 static inline void
2501 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2502 {
2503         if (*list == NULL)
2504                 ftrace_exports_enable();
2505
2506         add_trace_export(list, export);
2507 }
2508
2509 static inline int
2510 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2511 {
2512         int ret;
2513
2514         ret = rm_trace_export(list, export);
2515         if (*list == NULL)
2516                 ftrace_exports_disable();
2517
2518         return ret;
2519 }
2520
2521 int register_ftrace_export(struct trace_export *export)
2522 {
2523         if (WARN_ON_ONCE(!export->write))
2524                 return -1;
2525
2526         mutex_lock(&ftrace_export_lock);
2527
2528         add_ftrace_export(&ftrace_exports_list, export);
2529
2530         mutex_unlock(&ftrace_export_lock);
2531
2532         return 0;
2533 }
2534 EXPORT_SYMBOL_GPL(register_ftrace_export);
2535
2536 int unregister_ftrace_export(struct trace_export *export)
2537 {
2538         int ret;
2539
2540         mutex_lock(&ftrace_export_lock);
2541
2542         ret = rm_ftrace_export(&ftrace_exports_list, export);
2543
2544         mutex_unlock(&ftrace_export_lock);
2545
2546         return ret;
2547 }
2548 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2549
2550 void
2551 trace_function(struct trace_array *tr,
2552                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2553                int pc)
2554 {
2555         struct trace_event_call *call = &event_function;
2556         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2557         struct ring_buffer_event *event;
2558         struct ftrace_entry *entry;
2559
2560         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2561                                             flags, pc);
2562         if (!event)
2563                 return;
2564         entry   = ring_buffer_event_data(event);
2565         entry->ip                       = ip;
2566         entry->parent_ip                = parent_ip;
2567
2568         if (!call_filter_check_discard(call, entry, buffer, event)) {
2569                 if (static_branch_unlikely(&ftrace_exports_enabled))
2570                         ftrace_exports(event);
2571                 __buffer_unlock_commit(buffer, event);
2572         }
2573 }
2574
2575 #ifdef CONFIG_STACKTRACE
2576
2577 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2578 struct ftrace_stack {
2579         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2580 };
2581
2582 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2583 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2584
2585 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2586                                  unsigned long flags,
2587                                  int skip, int pc, struct pt_regs *regs)
2588 {
2589         struct trace_event_call *call = &event_kernel_stack;
2590         struct ring_buffer_event *event;
2591         struct stack_entry *entry;
2592         struct stack_trace trace;
2593         int use_stack;
2594         int size = FTRACE_STACK_ENTRIES;
2595
2596         trace.nr_entries        = 0;
2597         trace.skip              = skip;
2598
2599         /*
2600          * Add one, for this function and the call to save_stack_trace()
2601          * If regs is set, then these functions will not be in the way.
2602          */
2603 #ifndef CONFIG_UNWINDER_ORC
2604         if (!regs)
2605                 trace.skip++;
2606 #endif
2607
2608         /*
2609          * Since events can happen in NMIs there's no safe way to
2610          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2611          * or NMI comes in, it will just have to use the default
2612          * FTRACE_STACK_SIZE.
2613          */
2614         preempt_disable_notrace();
2615
2616         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2617         /*
2618          * We don't need any atomic variables, just a barrier.
2619          * If an interrupt comes in, we don't care, because it would
2620          * have exited and put the counter back to what we want.
2621          * We just need a barrier to keep gcc from moving things
2622          * around.
2623          */
2624         barrier();
2625         if (use_stack == 1) {
2626                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2627                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2628
2629                 if (regs)
2630                         save_stack_trace_regs(regs, &trace);
2631                 else
2632                         save_stack_trace(&trace);
2633
2634                 if (trace.nr_entries > size)
2635                         size = trace.nr_entries;
2636         } else
2637                 /* From now on, use_stack is a boolean */
2638                 use_stack = 0;
2639
2640         size *= sizeof(unsigned long);
2641
2642         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2643                                             sizeof(*entry) + size, flags, pc);
2644         if (!event)
2645                 goto out;
2646         entry = ring_buffer_event_data(event);
2647
2648         memset(&entry->caller, 0, size);
2649
2650         if (use_stack)
2651                 memcpy(&entry->caller, trace.entries,
2652                        trace.nr_entries * sizeof(unsigned long));
2653         else {
2654                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2655                 trace.entries           = entry->caller;
2656                 if (regs)
2657                         save_stack_trace_regs(regs, &trace);
2658                 else
2659                         save_stack_trace(&trace);
2660         }
2661
2662         entry->size = trace.nr_entries;
2663
2664         if (!call_filter_check_discard(call, entry, buffer, event))
2665                 __buffer_unlock_commit(buffer, event);
2666
2667  out:
2668         /* Again, don't let gcc optimize things here */
2669         barrier();
2670         __this_cpu_dec(ftrace_stack_reserve);
2671         preempt_enable_notrace();
2672
2673 }
2674
2675 static inline void ftrace_trace_stack(struct trace_array *tr,
2676                                       struct ring_buffer *buffer,
2677                                       unsigned long flags,
2678                                       int skip, int pc, struct pt_regs *regs)
2679 {
2680         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2681                 return;
2682
2683         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2684 }
2685
2686 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2687                    int pc)
2688 {
2689         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2690
2691         if (rcu_is_watching()) {
2692                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2693                 return;
2694         }
2695
2696         /*
2697          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2698          * but if the above rcu_is_watching() failed, then the NMI
2699          * triggered someplace critical, and rcu_irq_enter() should
2700          * not be called from NMI.
2701          */
2702         if (unlikely(in_nmi()))
2703                 return;
2704
2705         rcu_irq_enter_irqson();
2706         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707         rcu_irq_exit_irqson();
2708 }
2709
2710 /**
2711  * trace_dump_stack - record a stack back trace in the trace buffer
2712  * @skip: Number of functions to skip (helper handlers)
2713  */
2714 void trace_dump_stack(int skip)
2715 {
2716         unsigned long flags;
2717
2718         if (tracing_disabled || tracing_selftest_running)
2719                 return;
2720
2721         local_save_flags(flags);
2722
2723 #ifndef CONFIG_UNWINDER_ORC
2724         /* Skip 1 to skip this function. */
2725         skip++;
2726 #endif
2727         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2728                              flags, skip, preempt_count(), NULL);
2729 }
2730 EXPORT_SYMBOL_GPL(trace_dump_stack);
2731
2732 static DEFINE_PER_CPU(int, user_stack_count);
2733
2734 void
2735 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2736 {
2737         struct trace_event_call *call = &event_user_stack;
2738         struct ring_buffer_event *event;
2739         struct userstack_entry *entry;
2740         struct stack_trace trace;
2741
2742         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2743                 return;
2744
2745         /*
2746          * NMIs can not handle page faults, even with fix ups.
2747          * The save user stack can (and often does) fault.
2748          */
2749         if (unlikely(in_nmi()))
2750                 return;
2751
2752         /*
2753          * prevent recursion, since the user stack tracing may
2754          * trigger other kernel events.
2755          */
2756         preempt_disable();
2757         if (__this_cpu_read(user_stack_count))
2758                 goto out;
2759
2760         __this_cpu_inc(user_stack_count);
2761
2762         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2763                                             sizeof(*entry), flags, pc);
2764         if (!event)
2765                 goto out_drop_count;
2766         entry   = ring_buffer_event_data(event);
2767
2768         entry->tgid             = current->tgid;
2769         memset(&entry->caller, 0, sizeof(entry->caller));
2770
2771         trace.nr_entries        = 0;
2772         trace.max_entries       = FTRACE_STACK_ENTRIES;
2773         trace.skip              = 0;
2774         trace.entries           = entry->caller;
2775
2776         save_stack_trace_user(&trace);
2777         if (!call_filter_check_discard(call, entry, buffer, event))
2778                 __buffer_unlock_commit(buffer, event);
2779
2780  out_drop_count:
2781         __this_cpu_dec(user_stack_count);
2782  out:
2783         preempt_enable();
2784 }
2785
2786 #ifdef UNUSED
2787 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2788 {
2789         ftrace_trace_userstack(tr, flags, preempt_count());
2790 }
2791 #endif /* UNUSED */
2792
2793 #endif /* CONFIG_STACKTRACE */
2794
2795 /* created for use with alloc_percpu */
2796 struct trace_buffer_struct {
2797         int nesting;
2798         char buffer[4][TRACE_BUF_SIZE];
2799 };
2800
2801 static struct trace_buffer_struct *trace_percpu_buffer;
2802
2803 /*
2804  * Thise allows for lockless recording.  If we're nested too deeply, then
2805  * this returns NULL.
2806  */
2807 static char *get_trace_buf(void)
2808 {
2809         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2810
2811         if (!buffer || buffer->nesting >= 4)
2812                 return NULL;
2813
2814         buffer->nesting++;
2815
2816         /* Interrupts must see nesting incremented before we use the buffer */
2817         barrier();
2818         return &buffer->buffer[buffer->nesting][0];
2819 }
2820
2821 static void put_trace_buf(void)
2822 {
2823         /* Don't let the decrement of nesting leak before this */
2824         barrier();
2825         this_cpu_dec(trace_percpu_buffer->nesting);
2826 }
2827
2828 static int alloc_percpu_trace_buffer(void)
2829 {
2830         struct trace_buffer_struct *buffers;
2831
2832         buffers = alloc_percpu(struct trace_buffer_struct);
2833         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2834                 return -ENOMEM;
2835
2836         trace_percpu_buffer = buffers;
2837         return 0;
2838 }
2839
2840 static int buffers_allocated;
2841
2842 void trace_printk_init_buffers(void)
2843 {
2844         if (buffers_allocated)
2845                 return;
2846
2847         if (alloc_percpu_trace_buffer())
2848                 return;
2849
2850         /* trace_printk() is for debug use only. Don't use it in production. */
2851
2852         pr_warn("\n");
2853         pr_warn("**********************************************************\n");
2854         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2855         pr_warn("**                                                      **\n");
2856         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2857         pr_warn("**                                                      **\n");
2858         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2859         pr_warn("** unsafe for production use.                           **\n");
2860         pr_warn("**                                                      **\n");
2861         pr_warn("** If you see this message and you are not debugging    **\n");
2862         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2863         pr_warn("**                                                      **\n");
2864         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2865         pr_warn("**********************************************************\n");
2866
2867         /* Expand the buffers to set size */
2868         tracing_update_buffers();
2869
2870         buffers_allocated = 1;
2871
2872         /*
2873          * trace_printk_init_buffers() can be called by modules.
2874          * If that happens, then we need to start cmdline recording
2875          * directly here. If the global_trace.buffer is already
2876          * allocated here, then this was called by module code.
2877          */
2878         if (global_trace.trace_buffer.buffer)
2879                 tracing_start_cmdline_record();
2880 }
2881
2882 void trace_printk_start_comm(void)
2883 {
2884         /* Start tracing comms if trace printk is set */
2885         if (!buffers_allocated)
2886                 return;
2887         tracing_start_cmdline_record();
2888 }
2889
2890 static void trace_printk_start_stop_comm(int enabled)
2891 {
2892         if (!buffers_allocated)
2893                 return;
2894
2895         if (enabled)
2896                 tracing_start_cmdline_record();
2897         else
2898                 tracing_stop_cmdline_record();
2899 }
2900
2901 /**
2902  * trace_vbprintk - write binary msg to tracing buffer
2903  *
2904  */
2905 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2906 {
2907         struct trace_event_call *call = &event_bprint;
2908         struct ring_buffer_event *event;
2909         struct ring_buffer *buffer;
2910         struct trace_array *tr = &global_trace;
2911         struct bprint_entry *entry;
2912         unsigned long flags;
2913         char *tbuffer;
2914         int len = 0, size, pc;
2915
2916         if (unlikely(tracing_selftest_running || tracing_disabled))
2917                 return 0;
2918
2919         /* Don't pollute graph traces with trace_vprintk internals */
2920         pause_graph_tracing();
2921
2922         pc = preempt_count();
2923         preempt_disable_notrace();
2924
2925         tbuffer = get_trace_buf();
2926         if (!tbuffer) {
2927                 len = 0;
2928                 goto out_nobuffer;
2929         }
2930
2931         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2932
2933         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2934                 goto out;
2935
2936         local_save_flags(flags);
2937         size = sizeof(*entry) + sizeof(u32) * len;
2938         buffer = tr->trace_buffer.buffer;
2939         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2940                                             flags, pc);
2941         if (!event)
2942                 goto out;
2943         entry = ring_buffer_event_data(event);
2944         entry->ip                       = ip;
2945         entry->fmt                      = fmt;
2946
2947         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2948         if (!call_filter_check_discard(call, entry, buffer, event)) {
2949                 __buffer_unlock_commit(buffer, event);
2950                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2951         }
2952
2953 out:
2954         put_trace_buf();
2955
2956 out_nobuffer:
2957         preempt_enable_notrace();
2958         unpause_graph_tracing();
2959
2960         return len;
2961 }
2962 EXPORT_SYMBOL_GPL(trace_vbprintk);
2963
2964 __printf(3, 0)
2965 static int
2966 __trace_array_vprintk(struct ring_buffer *buffer,
2967                       unsigned long ip, const char *fmt, va_list args)
2968 {
2969         struct trace_event_call *call = &event_print;
2970         struct ring_buffer_event *event;
2971         int len = 0, size, pc;
2972         struct print_entry *entry;
2973         unsigned long flags;
2974         char *tbuffer;
2975
2976         if (tracing_disabled || tracing_selftest_running)
2977                 return 0;
2978
2979         /* Don't pollute graph traces with trace_vprintk internals */
2980         pause_graph_tracing();
2981
2982         pc = preempt_count();
2983         preempt_disable_notrace();
2984
2985
2986         tbuffer = get_trace_buf();
2987         if (!tbuffer) {
2988                 len = 0;
2989                 goto out_nobuffer;
2990         }
2991
2992         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2993
2994         local_save_flags(flags);
2995         size = sizeof(*entry) + len + 1;
2996         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2997                                             flags, pc);
2998         if (!event)
2999                 goto out;
3000         entry = ring_buffer_event_data(event);
3001         entry->ip = ip;
3002
3003         memcpy(&entry->buf, tbuffer, len + 1);
3004         if (!call_filter_check_discard(call, entry, buffer, event)) {
3005                 __buffer_unlock_commit(buffer, event);
3006                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3007         }
3008
3009 out:
3010         put_trace_buf();
3011
3012 out_nobuffer:
3013         preempt_enable_notrace();
3014         unpause_graph_tracing();
3015
3016         return len;
3017 }
3018
3019 __printf(3, 0)
3020 int trace_array_vprintk(struct trace_array *tr,
3021                         unsigned long ip, const char *fmt, va_list args)
3022 {
3023         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3024 }
3025
3026 __printf(3, 0)
3027 int trace_array_printk(struct trace_array *tr,
3028                        unsigned long ip, const char *fmt, ...)
3029 {
3030         int ret;
3031         va_list ap;
3032
3033         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3034                 return 0;
3035
3036         va_start(ap, fmt);
3037         ret = trace_array_vprintk(tr, ip, fmt, ap);
3038         va_end(ap);
3039         return ret;
3040 }
3041
3042 __printf(3, 4)
3043 int trace_array_printk_buf(struct ring_buffer *buffer,
3044                            unsigned long ip, const char *fmt, ...)
3045 {
3046         int ret;
3047         va_list ap;
3048
3049         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3050                 return 0;
3051
3052         va_start(ap, fmt);
3053         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3054         va_end(ap);
3055         return ret;
3056 }
3057
3058 __printf(2, 0)
3059 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3060 {
3061         return trace_array_vprintk(&global_trace, ip, fmt, args);
3062 }
3063 EXPORT_SYMBOL_GPL(trace_vprintk);
3064
3065 static void trace_iterator_increment(struct trace_iterator *iter)
3066 {
3067         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3068
3069         iter->idx++;
3070         if (buf_iter)
3071                 ring_buffer_read(buf_iter, NULL);
3072 }
3073
3074 static struct trace_entry *
3075 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3076                 unsigned long *lost_events)
3077 {
3078         struct ring_buffer_event *event;
3079         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3080
3081         if (buf_iter)
3082                 event = ring_buffer_iter_peek(buf_iter, ts);
3083         else
3084                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3085                                          lost_events);
3086
3087         if (event) {
3088                 iter->ent_size = ring_buffer_event_length(event);
3089                 return ring_buffer_event_data(event);
3090         }
3091         iter->ent_size = 0;
3092         return NULL;
3093 }
3094
3095 static struct trace_entry *
3096 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3097                   unsigned long *missing_events, u64 *ent_ts)
3098 {
3099         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3100         struct trace_entry *ent, *next = NULL;
3101         unsigned long lost_events = 0, next_lost = 0;
3102         int cpu_file = iter->cpu_file;
3103         u64 next_ts = 0, ts;
3104         int next_cpu = -1;
3105         int next_size = 0;
3106         int cpu;
3107
3108         /*
3109          * If we are in a per_cpu trace file, don't bother by iterating over
3110          * all cpu and peek directly.
3111          */
3112         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3113                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3114                         return NULL;
3115                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3116                 if (ent_cpu)
3117                         *ent_cpu = cpu_file;
3118
3119                 return ent;
3120         }
3121
3122         for_each_tracing_cpu(cpu) {
3123
3124                 if (ring_buffer_empty_cpu(buffer, cpu))
3125                         continue;
3126
3127                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3128
3129                 /*
3130                  * Pick the entry with the smallest timestamp:
3131                  */
3132                 if (ent && (!next || ts < next_ts)) {
3133                         next = ent;
3134                         next_cpu = cpu;
3135                         next_ts = ts;
3136                         next_lost = lost_events;
3137                         next_size = iter->ent_size;
3138                 }
3139         }
3140
3141         iter->ent_size = next_size;
3142
3143         if (ent_cpu)
3144                 *ent_cpu = next_cpu;
3145
3146         if (ent_ts)
3147                 *ent_ts = next_ts;
3148
3149         if (missing_events)
3150                 *missing_events = next_lost;
3151
3152         return next;
3153 }
3154
3155 /* Find the next real entry, without updating the iterator itself */
3156 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3157                                           int *ent_cpu, u64 *ent_ts)
3158 {
3159         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3160 }
3161
3162 /* Find the next real entry, and increment the iterator to the next entry */
3163 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3164 {
3165         iter->ent = __find_next_entry(iter, &iter->cpu,
3166                                       &iter->lost_events, &iter->ts);
3167
3168         if (iter->ent)
3169                 trace_iterator_increment(iter);
3170
3171         return iter->ent ? iter : NULL;
3172 }
3173
3174 static void trace_consume(struct trace_iterator *iter)
3175 {
3176         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3177                             &iter->lost_events);
3178 }
3179
3180 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3181 {
3182         struct trace_iterator *iter = m->private;
3183         int i = (int)*pos;
3184         void *ent;
3185
3186         WARN_ON_ONCE(iter->leftover);
3187
3188         (*pos)++;
3189
3190         /* can't go backwards */
3191         if (iter->idx > i)
3192                 return NULL;
3193
3194         if (iter->idx < 0)
3195                 ent = trace_find_next_entry_inc(iter);
3196         else
3197                 ent = iter;
3198
3199         while (ent && iter->idx < i)
3200                 ent = trace_find_next_entry_inc(iter);
3201
3202         iter->pos = *pos;
3203
3204         return ent;
3205 }
3206
3207 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3208 {
3209         struct ring_buffer_event *event;
3210         struct ring_buffer_iter *buf_iter;
3211         unsigned long entries = 0;
3212         u64 ts;
3213
3214         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3215
3216         buf_iter = trace_buffer_iter(iter, cpu);
3217         if (!buf_iter)
3218                 return;
3219
3220         ring_buffer_iter_reset(buf_iter);
3221
3222         /*
3223          * We could have the case with the max latency tracers
3224          * that a reset never took place on a cpu. This is evident
3225          * by the timestamp being before the start of the buffer.
3226          */
3227         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3228                 if (ts >= iter->trace_buffer->time_start)
3229                         break;
3230                 entries++;
3231                 ring_buffer_read(buf_iter, NULL);
3232         }
3233
3234         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3235 }
3236
3237 /*
3238  * The current tracer is copied to avoid a global locking
3239  * all around.
3240  */
3241 static void *s_start(struct seq_file *m, loff_t *pos)
3242 {
3243         struct trace_iterator *iter = m->private;
3244         struct trace_array *tr = iter->tr;
3245         int cpu_file = iter->cpu_file;
3246         void *p = NULL;
3247         loff_t l = 0;
3248         int cpu;
3249
3250         /*
3251          * copy the tracer to avoid using a global lock all around.
3252          * iter->trace is a copy of current_trace, the pointer to the
3253          * name may be used instead of a strcmp(), as iter->trace->name
3254          * will point to the same string as current_trace->name.
3255          */
3256         mutex_lock(&trace_types_lock);
3257         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3258                 *iter->trace = *tr->current_trace;
3259         mutex_unlock(&trace_types_lock);
3260
3261 #ifdef CONFIG_TRACER_MAX_TRACE
3262         if (iter->snapshot && iter->trace->use_max_tr)
3263                 return ERR_PTR(-EBUSY);
3264 #endif
3265
3266         if (!iter->snapshot)
3267                 atomic_inc(&trace_record_taskinfo_disabled);
3268
3269         if (*pos != iter->pos) {
3270                 iter->ent = NULL;
3271                 iter->cpu = 0;
3272                 iter->idx = -1;
3273
3274                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3275                         for_each_tracing_cpu(cpu)
3276                                 tracing_iter_reset(iter, cpu);
3277                 } else
3278                         tracing_iter_reset(iter, cpu_file);
3279
3280                 iter->leftover = 0;
3281                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3282                         ;
3283
3284         } else {
3285                 /*
3286                  * If we overflowed the seq_file before, then we want
3287                  * to just reuse the trace_seq buffer again.
3288                  */
3289                 if (iter->leftover)
3290                         p = iter;
3291                 else {
3292                         l = *pos - 1;
3293                         p = s_next(m, p, &l);
3294                 }
3295         }
3296
3297         trace_event_read_lock();
3298         trace_access_lock(cpu_file);
3299         return p;
3300 }
3301
3302 static void s_stop(struct seq_file *m, void *p)
3303 {
3304         struct trace_iterator *iter = m->private;
3305
3306 #ifdef CONFIG_TRACER_MAX_TRACE
3307         if (iter->snapshot && iter->trace->use_max_tr)
3308                 return;
3309 #endif
3310
3311         if (!iter->snapshot)
3312                 atomic_dec(&trace_record_taskinfo_disabled);
3313
3314         trace_access_unlock(iter->cpu_file);
3315         trace_event_read_unlock();
3316 }
3317
3318 static void
3319 get_total_entries(struct trace_buffer *buf,
3320                   unsigned long *total, unsigned long *entries)
3321 {
3322         unsigned long count;
3323         int cpu;
3324
3325         *total = 0;
3326         *entries = 0;
3327
3328         for_each_tracing_cpu(cpu) {
3329                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3330                 /*
3331                  * If this buffer has skipped entries, then we hold all
3332                  * entries for the trace and we need to ignore the
3333                  * ones before the time stamp.
3334                  */
3335                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3336                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3337                         /* total is the same as the entries */
3338                         *total += count;
3339                 } else
3340                         *total += count +
3341                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3342                 *entries += count;
3343         }
3344 }
3345
3346 static void print_lat_help_header(struct seq_file *m)
3347 {
3348         seq_puts(m, "#                  _------=> CPU#            \n"
3349                     "#                 / _-----=> irqs-off        \n"
3350                     "#                | / _----=> need-resched    \n"
3351                     "#                || / _---=> hardirq/softirq \n"
3352                     "#                ||| / _--=> preempt-depth   \n"
3353                     "#                |||| /     delay            \n"
3354                     "#  cmd     pid   ||||| time  |   caller      \n"
3355                     "#     \\   /      |||||  \\    |   /         \n");
3356 }
3357
3358 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3359 {
3360         unsigned long total;
3361         unsigned long entries;
3362
3363         get_total_entries(buf, &total, &entries);
3364         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3365                    entries, total, num_online_cpus());
3366         seq_puts(m, "#\n");
3367 }
3368
3369 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3370                                    unsigned int flags)
3371 {
3372         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3373
3374         print_event_info(buf, m);
3375
3376         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3377         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3378 }
3379
3380 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3381                                        unsigned int flags)
3382 {
3383         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3384         const char tgid_space[] = "          ";
3385         const char space[] = "  ";
3386
3387         print_event_info(buf, m);
3388
3389         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3390                    tgid ? tgid_space : space);
3391         seq_printf(m, "#                          %s / _----=> need-resched\n",
3392                    tgid ? tgid_space : space);
3393         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3394                    tgid ? tgid_space : space);
3395         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3396                    tgid ? tgid_space : space);
3397         seq_printf(m, "#                          %s||| /     delay\n",
3398                    tgid ? tgid_space : space);
3399         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3400                    tgid ? "   TGID   " : space);
3401         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3402                    tgid ? "     |    " : space);
3403 }
3404
3405 void
3406 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3407 {
3408         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3409         struct trace_buffer *buf = iter->trace_buffer;
3410         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3411         struct tracer *type = iter->trace;
3412         unsigned long entries;
3413         unsigned long total;
3414         const char *name = "preemption";
3415
3416         name = type->name;
3417
3418         get_total_entries(buf, &total, &entries);
3419
3420         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3421                    name, UTS_RELEASE);
3422         seq_puts(m, "# -----------------------------------"
3423                  "---------------------------------\n");
3424         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3425                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3426                    nsecs_to_usecs(data->saved_latency),
3427                    entries,
3428                    total,
3429                    buf->cpu,
3430 #if defined(CONFIG_PREEMPT_NONE)
3431                    "server",
3432 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3433                    "desktop",
3434 #elif defined(CONFIG_PREEMPT)
3435                    "preempt",
3436 #else
3437                    "unknown",
3438 #endif
3439                    /* These are reserved for later use */
3440                    0, 0, 0, 0);
3441 #ifdef CONFIG_SMP
3442         seq_printf(m, " #P:%d)\n", num_online_cpus());
3443 #else
3444         seq_puts(m, ")\n");
3445 #endif
3446         seq_puts(m, "#    -----------------\n");
3447         seq_printf(m, "#    | task: %.16s-%d "
3448                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3449                    data->comm, data->pid,
3450                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3451                    data->policy, data->rt_priority);
3452         seq_puts(m, "#    -----------------\n");
3453
3454         if (data->critical_start) {
3455                 seq_puts(m, "#  => started at: ");
3456                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3457                 trace_print_seq(m, &iter->seq);
3458                 seq_puts(m, "\n#  => ended at:   ");
3459                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3460                 trace_print_seq(m, &iter->seq);
3461                 seq_puts(m, "\n#\n");
3462         }
3463
3464         seq_puts(m, "#\n");
3465 }
3466
3467 static void test_cpu_buff_start(struct trace_iterator *iter)
3468 {
3469         struct trace_seq *s = &iter->seq;
3470         struct trace_array *tr = iter->tr;
3471
3472         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3473                 return;
3474
3475         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3476                 return;
3477
3478         if (cpumask_available(iter->started) &&
3479             cpumask_test_cpu(iter->cpu, iter->started))
3480                 return;
3481
3482         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3483                 return;
3484
3485         if (cpumask_available(iter->started))
3486                 cpumask_set_cpu(iter->cpu, iter->started);
3487
3488         /* Don't print started cpu buffer for the first entry of the trace */
3489         if (iter->idx > 1)
3490                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3491                                 iter->cpu);
3492 }
3493
3494 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3495 {
3496         struct trace_array *tr = iter->tr;
3497         struct trace_seq *s = &iter->seq;
3498         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3499         struct trace_entry *entry;
3500         struct trace_event *event;
3501
3502         entry = iter->ent;
3503
3504         test_cpu_buff_start(iter);
3505
3506         event = ftrace_find_event(entry->type);
3507
3508         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3509                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3510                         trace_print_lat_context(iter);
3511                 else
3512                         trace_print_context(iter);
3513         }
3514
3515         if (trace_seq_has_overflowed(s))
3516                 return TRACE_TYPE_PARTIAL_LINE;
3517
3518         if (event)
3519                 return event->funcs->trace(iter, sym_flags, event);
3520
3521         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3522
3523         return trace_handle_return(s);
3524 }
3525
3526 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3527 {
3528         struct trace_array *tr = iter->tr;
3529         struct trace_seq *s = &iter->seq;
3530         struct trace_entry *entry;
3531         struct trace_event *event;
3532
3533         entry = iter->ent;
3534
3535         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3536                 trace_seq_printf(s, "%d %d %llu ",
3537                                  entry->pid, iter->cpu, iter->ts);
3538
3539         if (trace_seq_has_overflowed(s))
3540                 return TRACE_TYPE_PARTIAL_LINE;
3541
3542         event = ftrace_find_event(entry->type);
3543         if (event)
3544                 return event->funcs->raw(iter, 0, event);
3545
3546         trace_seq_printf(s, "%d ?\n", entry->type);
3547
3548         return trace_handle_return(s);
3549 }
3550
3551 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3552 {
3553         struct trace_array *tr = iter->tr;
3554         struct trace_seq *s = &iter->seq;
3555         unsigned char newline = '\n';
3556         struct trace_entry *entry;
3557         struct trace_event *event;
3558
3559         entry = iter->ent;
3560
3561         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3562                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3563                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3564                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3565                 if (trace_seq_has_overflowed(s))
3566                         return TRACE_TYPE_PARTIAL_LINE;
3567         }
3568
3569         event = ftrace_find_event(entry->type);
3570         if (event) {
3571                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3572                 if (ret != TRACE_TYPE_HANDLED)
3573                         return ret;
3574         }
3575
3576         SEQ_PUT_FIELD(s, newline);
3577
3578         return trace_handle_return(s);
3579 }
3580
3581 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3582 {
3583         struct trace_array *tr = iter->tr;
3584         struct trace_seq *s = &iter->seq;
3585         struct trace_entry *entry;
3586         struct trace_event *event;
3587
3588         entry = iter->ent;
3589
3590         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3591                 SEQ_PUT_FIELD(s, entry->pid);
3592                 SEQ_PUT_FIELD(s, iter->cpu);
3593                 SEQ_PUT_FIELD(s, iter->ts);
3594                 if (trace_seq_has_overflowed(s))
3595                         return TRACE_TYPE_PARTIAL_LINE;
3596         }
3597
3598         event = ftrace_find_event(entry->type);
3599         return event ? event->funcs->binary(iter, 0, event) :
3600                 TRACE_TYPE_HANDLED;
3601 }
3602
3603 int trace_empty(struct trace_iterator *iter)
3604 {
3605         struct ring_buffer_iter *buf_iter;
3606         int cpu;
3607
3608         /* If we are looking at one CPU buffer, only check that one */
3609         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3610                 cpu = iter->cpu_file;
3611                 buf_iter = trace_buffer_iter(iter, cpu);
3612                 if (buf_iter) {
3613                         if (!ring_buffer_iter_empty(buf_iter))
3614                                 return 0;
3615                 } else {
3616                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617                                 return 0;
3618                 }
3619                 return 1;
3620         }
3621
3622         for_each_tracing_cpu(cpu) {
3623                 buf_iter = trace_buffer_iter(iter, cpu);
3624                 if (buf_iter) {
3625                         if (!ring_buffer_iter_empty(buf_iter))
3626                                 return 0;
3627                 } else {
3628                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3629                                 return 0;
3630                 }
3631         }
3632
3633         return 1;
3634 }
3635
3636 /*  Called with trace_event_read_lock() held. */
3637 enum print_line_t print_trace_line(struct trace_iterator *iter)
3638 {
3639         struct trace_array *tr = iter->tr;
3640         unsigned long trace_flags = tr->trace_flags;
3641         enum print_line_t ret;
3642
3643         if (iter->lost_events) {
3644                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3645                                  iter->cpu, iter->lost_events);
3646                 if (trace_seq_has_overflowed(&iter->seq))
3647                         return TRACE_TYPE_PARTIAL_LINE;
3648         }
3649
3650         if (iter->trace && iter->trace->print_line) {
3651                 ret = iter->trace->print_line(iter);
3652                 if (ret != TRACE_TYPE_UNHANDLED)
3653                         return ret;
3654         }
3655
3656         if (iter->ent->type == TRACE_BPUTS &&
3657                         trace_flags & TRACE_ITER_PRINTK &&
3658                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3659                 return trace_print_bputs_msg_only(iter);
3660
3661         if (iter->ent->type == TRACE_BPRINT &&
3662                         trace_flags & TRACE_ITER_PRINTK &&
3663                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3664                 return trace_print_bprintk_msg_only(iter);
3665
3666         if (iter->ent->type == TRACE_PRINT &&
3667                         trace_flags & TRACE_ITER_PRINTK &&
3668                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3669                 return trace_print_printk_msg_only(iter);
3670
3671         if (trace_flags & TRACE_ITER_BIN)
3672                 return print_bin_fmt(iter);
3673
3674         if (trace_flags & TRACE_ITER_HEX)
3675                 return print_hex_fmt(iter);
3676
3677         if (trace_flags & TRACE_ITER_RAW)
3678                 return print_raw_fmt(iter);
3679
3680         return print_trace_fmt(iter);
3681 }
3682
3683 void trace_latency_header(struct seq_file *m)
3684 {
3685         struct trace_iterator *iter = m->private;
3686         struct trace_array *tr = iter->tr;
3687
3688         /* print nothing if the buffers are empty */
3689         if (trace_empty(iter))
3690                 return;
3691
3692         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3693                 print_trace_header(m, iter);
3694
3695         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3696                 print_lat_help_header(m);
3697 }
3698
3699 void trace_default_header(struct seq_file *m)
3700 {
3701         struct trace_iterator *iter = m->private;
3702         struct trace_array *tr = iter->tr;
3703         unsigned long trace_flags = tr->trace_flags;
3704
3705         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3706                 return;
3707
3708         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3709                 /* print nothing if the buffers are empty */
3710                 if (trace_empty(iter))
3711                         return;
3712                 print_trace_header(m, iter);
3713                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3714                         print_lat_help_header(m);
3715         } else {
3716                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3717                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3718                                 print_func_help_header_irq(iter->trace_buffer,
3719                                                            m, trace_flags);
3720                         else
3721                                 print_func_help_header(iter->trace_buffer, m,
3722                                                        trace_flags);
3723                 }
3724         }
3725 }
3726
3727 static void test_ftrace_alive(struct seq_file *m)
3728 {
3729         if (!ftrace_is_dead())
3730                 return;
3731         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3732                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3733 }
3734
3735 #ifdef CONFIG_TRACER_MAX_TRACE
3736 static void show_snapshot_main_help(struct seq_file *m)
3737 {
3738         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3739                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740                     "#                      Takes a snapshot of the main buffer.\n"
3741                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3742                     "#                      (Doesn't have to be '2' works with any number that\n"
3743                     "#                       is not a '0' or '1')\n");
3744 }
3745
3746 static void show_snapshot_percpu_help(struct seq_file *m)
3747 {
3748         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3749 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3750         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3751                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3752 #else
3753         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3754                     "#                     Must use main snapshot file to allocate.\n");
3755 #endif
3756         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3757                     "#                      (Doesn't have to be '2' works with any number that\n"
3758                     "#                       is not a '0' or '1')\n");
3759 }
3760
3761 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3762 {
3763         if (iter->tr->allocated_snapshot)
3764                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3765         else
3766                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3767
3768         seq_puts(m, "# Snapshot commands:\n");
3769         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3770                 show_snapshot_main_help(m);
3771         else
3772                 show_snapshot_percpu_help(m);
3773 }
3774 #else
3775 /* Should never be called */
3776 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3777 #endif
3778
3779 static int s_show(struct seq_file *m, void *v)
3780 {
3781         struct trace_iterator *iter = v;
3782         int ret;
3783
3784         if (iter->ent == NULL) {
3785                 if (iter->tr) {
3786                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3787                         seq_puts(m, "#\n");
3788                         test_ftrace_alive(m);
3789                 }
3790                 if (iter->snapshot && trace_empty(iter))
3791                         print_snapshot_help(m, iter);
3792                 else if (iter->trace && iter->trace->print_header)
3793                         iter->trace->print_header(m);
3794                 else
3795                         trace_default_header(m);
3796
3797         } else if (iter->leftover) {
3798                 /*
3799                  * If we filled the seq_file buffer earlier, we
3800                  * want to just show it now.
3801                  */
3802                 ret = trace_print_seq(m, &iter->seq);
3803
3804                 /* ret should this time be zero, but you never know */
3805                 iter->leftover = ret;
3806
3807         } else {
3808                 print_trace_line(iter);
3809                 ret = trace_print_seq(m, &iter->seq);
3810                 /*
3811                  * If we overflow the seq_file buffer, then it will
3812                  * ask us for this data again at start up.
3813                  * Use that instead.
3814                  *  ret is 0 if seq_file write succeeded.
3815                  *        -1 otherwise.
3816                  */
3817                 iter->leftover = ret;
3818         }
3819
3820         return 0;
3821 }
3822
3823 /*
3824  * Should be used after trace_array_get(), trace_types_lock
3825  * ensures that i_cdev was already initialized.
3826  */
3827 static inline int tracing_get_cpu(struct inode *inode)
3828 {
3829         if (inode->i_cdev) /* See trace_create_cpu_file() */
3830                 return (long)inode->i_cdev - 1;
3831         return RING_BUFFER_ALL_CPUS;
3832 }
3833
3834 static const struct seq_operations tracer_seq_ops = {
3835         .start          = s_start,
3836         .next           = s_next,
3837         .stop           = s_stop,
3838         .show           = s_show,
3839 };
3840
3841 static struct trace_iterator *
3842 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3843 {
3844         struct trace_array *tr = inode->i_private;
3845         struct trace_iterator *iter;
3846         int cpu;
3847
3848         if (tracing_disabled)
3849                 return ERR_PTR(-ENODEV);
3850
3851         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3852         if (!iter)
3853                 return ERR_PTR(-ENOMEM);
3854
3855         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3856                                     GFP_KERNEL);
3857         if (!iter->buffer_iter)
3858                 goto release;
3859
3860         /*
3861          * We make a copy of the current tracer to avoid concurrent
3862          * changes on it while we are reading.
3863          */
3864         mutex_lock(&trace_types_lock);
3865         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3866         if (!iter->trace)
3867                 goto fail;
3868
3869         *iter->trace = *tr->current_trace;
3870
3871         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3872                 goto fail;
3873
3874         iter->tr = tr;
3875
3876 #ifdef CONFIG_TRACER_MAX_TRACE
3877         /* Currently only the top directory has a snapshot */
3878         if (tr->current_trace->print_max || snapshot)
3879                 iter->trace_buffer = &tr->max_buffer;
3880         else
3881 #endif
3882                 iter->trace_buffer = &tr->trace_buffer;
3883         iter->snapshot = snapshot;
3884         iter->pos = -1;
3885         iter->cpu_file = tracing_get_cpu(inode);
3886         mutex_init(&iter->mutex);
3887
3888         /* Notify the tracer early; before we stop tracing. */
3889         if (iter->trace && iter->trace->open)
3890                 iter->trace->open(iter);
3891
3892         /* Annotate start of buffers if we had overruns */
3893         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3894                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3895
3896         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3897         if (trace_clocks[tr->clock_id].in_ns)
3898                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3899
3900         /* stop the trace while dumping if we are not opening "snapshot" */
3901         if (!iter->snapshot)
3902                 tracing_stop_tr(tr);
3903
3904         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3905                 for_each_tracing_cpu(cpu) {
3906                         iter->buffer_iter[cpu] =
3907                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3908                 }
3909                 ring_buffer_read_prepare_sync();
3910                 for_each_tracing_cpu(cpu) {
3911                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3912                         tracing_iter_reset(iter, cpu);
3913                 }
3914         } else {
3915                 cpu = iter->cpu_file;
3916                 iter->buffer_iter[cpu] =
3917                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3918                 ring_buffer_read_prepare_sync();
3919                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3920                 tracing_iter_reset(iter, cpu);
3921         }
3922
3923         mutex_unlock(&trace_types_lock);
3924
3925         return iter;
3926
3927  fail:
3928         mutex_unlock(&trace_types_lock);
3929         kfree(iter->trace);
3930         kfree(iter->buffer_iter);
3931 release:
3932         seq_release_private(inode, file);
3933         return ERR_PTR(-ENOMEM);
3934 }
3935
3936 int tracing_open_generic(struct inode *inode, struct file *filp)
3937 {
3938         if (tracing_disabled)
3939                 return -ENODEV;
3940
3941         filp->private_data = inode->i_private;
3942         return 0;
3943 }
3944
3945 bool tracing_is_disabled(void)
3946 {
3947         return (tracing_disabled) ? true: false;
3948 }
3949
3950 /*
3951  * Open and update trace_array ref count.
3952  * Must have the current trace_array passed to it.
3953  */
3954 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3955 {
3956         struct trace_array *tr = inode->i_private;
3957
3958         if (tracing_disabled)
3959                 return -ENODEV;
3960
3961         if (trace_array_get(tr) < 0)
3962                 return -ENODEV;
3963
3964         filp->private_data = inode->i_private;
3965
3966         return 0;
3967 }
3968
3969 static int tracing_release(struct inode *inode, struct file *file)
3970 {
3971         struct trace_array *tr = inode->i_private;
3972         struct seq_file *m = file->private_data;
3973         struct trace_iterator *iter;
3974         int cpu;
3975
3976         if (!(file->f_mode & FMODE_READ)) {
3977                 trace_array_put(tr);
3978                 return 0;
3979         }
3980
3981         /* Writes do not use seq_file */
3982         iter = m->private;
3983         mutex_lock(&trace_types_lock);
3984
3985         for_each_tracing_cpu(cpu) {
3986                 if (iter->buffer_iter[cpu])
3987                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3988         }
3989
3990         if (iter->trace && iter->trace->close)
3991                 iter->trace->close(iter);
3992
3993         if (!iter->snapshot)
3994                 /* reenable tracing if it was previously enabled */
3995                 tracing_start_tr(tr);
3996
3997         __trace_array_put(tr);
3998
3999         mutex_unlock(&trace_types_lock);
4000
4001         mutex_destroy(&iter->mutex);
4002         free_cpumask_var(iter->started);
4003         kfree(iter->trace);
4004         kfree(iter->buffer_iter);
4005         seq_release_private(inode, file);
4006
4007         return 0;
4008 }
4009
4010 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4011 {
4012         struct trace_array *tr = inode->i_private;
4013
4014         trace_array_put(tr);
4015         return 0;
4016 }
4017
4018 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4019 {
4020         struct trace_array *tr = inode->i_private;
4021
4022         trace_array_put(tr);
4023
4024         return single_release(inode, file);
4025 }
4026
4027 static int tracing_open(struct inode *inode, struct file *file)
4028 {
4029         struct trace_array *tr = inode->i_private;
4030         struct trace_iterator *iter;
4031         int ret = 0;
4032
4033         if (trace_array_get(tr) < 0)
4034                 return -ENODEV;
4035
4036         /* If this file was open for write, then erase contents */
4037         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4038                 int cpu = tracing_get_cpu(inode);
4039                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4040
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042                 if (tr->current_trace->print_max)
4043                         trace_buf = &tr->max_buffer;
4044 #endif
4045
4046                 if (cpu == RING_BUFFER_ALL_CPUS)
4047                         tracing_reset_online_cpus(trace_buf);
4048                 else
4049                         tracing_reset(trace_buf, cpu);
4050         }
4051
4052         if (file->f_mode & FMODE_READ) {
4053                 iter = __tracing_open(inode, file, false);
4054                 if (IS_ERR(iter))
4055                         ret = PTR_ERR(iter);
4056                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4057                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4058         }
4059
4060         if (ret < 0)
4061                 trace_array_put(tr);
4062
4063         return ret;
4064 }
4065
4066 /*
4067  * Some tracers are not suitable for instance buffers.
4068  * A tracer is always available for the global array (toplevel)
4069  * or if it explicitly states that it is.
4070  */
4071 static bool
4072 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4073 {
4074         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4075 }
4076
4077 /* Find the next tracer that this trace array may use */
4078 static struct tracer *
4079 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4080 {
4081         while (t && !trace_ok_for_array(t, tr))
4082                 t = t->next;
4083
4084         return t;
4085 }
4086
4087 static void *
4088 t_next(struct seq_file *m, void *v, loff_t *pos)
4089 {
4090         struct trace_array *tr = m->private;
4091         struct tracer *t = v;
4092
4093         (*pos)++;
4094
4095         if (t)
4096                 t = get_tracer_for_array(tr, t->next);
4097
4098         return t;
4099 }
4100
4101 static void *t_start(struct seq_file *m, loff_t *pos)
4102 {
4103         struct trace_array *tr = m->private;
4104         struct tracer *t;
4105         loff_t l = 0;
4106
4107         mutex_lock(&trace_types_lock);
4108
4109         t = get_tracer_for_array(tr, trace_types);
4110         for (; t && l < *pos; t = t_next(m, t, &l))
4111                         ;
4112
4113         return t;
4114 }
4115
4116 static void t_stop(struct seq_file *m, void *p)
4117 {
4118         mutex_unlock(&trace_types_lock);
4119 }
4120
4121 static int t_show(struct seq_file *m, void *v)
4122 {
4123         struct tracer *t = v;
4124
4125         if (!t)
4126                 return 0;
4127
4128         seq_puts(m, t->name);
4129         if (t->next)
4130                 seq_putc(m, ' ');
4131         else
4132                 seq_putc(m, '\n');
4133
4134         return 0;
4135 }
4136
4137 static const struct seq_operations show_traces_seq_ops = {
4138         .start          = t_start,
4139         .next           = t_next,
4140         .stop           = t_stop,
4141         .show           = t_show,
4142 };
4143
4144 static int show_traces_open(struct inode *inode, struct file *file)
4145 {
4146         struct trace_array *tr = inode->i_private;
4147         struct seq_file *m;
4148         int ret;
4149
4150         if (tracing_disabled)
4151                 return -ENODEV;
4152
4153         ret = seq_open(file, &show_traces_seq_ops);
4154         if (ret)
4155                 return ret;
4156
4157         m = file->private_data;
4158         m->private = tr;
4159
4160         return 0;
4161 }
4162
4163 static ssize_t
4164 tracing_write_stub(struct file *filp, const char __user *ubuf,
4165                    size_t count, loff_t *ppos)
4166 {
4167         return count;
4168 }
4169
4170 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4171 {
4172         int ret;
4173
4174         if (file->f_mode & FMODE_READ)
4175                 ret = seq_lseek(file, offset, whence);
4176         else
4177                 file->f_pos = ret = 0;
4178
4179         return ret;
4180 }
4181
4182 static const struct file_operations tracing_fops = {
4183         .open           = tracing_open,
4184         .read           = seq_read,
4185         .write          = tracing_write_stub,
4186         .llseek         = tracing_lseek,
4187         .release        = tracing_release,
4188 };
4189
4190 static const struct file_operations show_traces_fops = {
4191         .open           = show_traces_open,
4192         .read           = seq_read,
4193         .release        = seq_release,
4194         .llseek         = seq_lseek,
4195 };
4196
4197 static ssize_t
4198 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4199                      size_t count, loff_t *ppos)
4200 {
4201         struct trace_array *tr = file_inode(filp)->i_private;
4202         char *mask_str;
4203         int len;
4204
4205         len = snprintf(NULL, 0, "%*pb\n",
4206                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4207         mask_str = kmalloc(len, GFP_KERNEL);
4208         if (!mask_str)
4209                 return -ENOMEM;
4210
4211         len = snprintf(mask_str, len, "%*pb\n",
4212                        cpumask_pr_args(tr->tracing_cpumask));
4213         if (len >= count) {
4214                 count = -EINVAL;
4215                 goto out_err;
4216         }
4217         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4218
4219 out_err:
4220         kfree(mask_str);
4221
4222         return count;
4223 }
4224
4225 static ssize_t
4226 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4227                       size_t count, loff_t *ppos)
4228 {
4229         struct trace_array *tr = file_inode(filp)->i_private;
4230         cpumask_var_t tracing_cpumask_new;
4231         int err, cpu;
4232
4233         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4234                 return -ENOMEM;
4235
4236         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4237         if (err)
4238                 goto err_unlock;
4239
4240         local_irq_disable();
4241         arch_spin_lock(&tr->max_lock);
4242         for_each_tracing_cpu(cpu) {
4243                 /*
4244                  * Increase/decrease the disabled counter if we are
4245                  * about to flip a bit in the cpumask:
4246                  */
4247                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4248                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4249                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4250                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4251                 }
4252                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4253                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4254                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4255                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4256                 }
4257         }
4258         arch_spin_unlock(&tr->max_lock);
4259         local_irq_enable();
4260
4261         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4262         free_cpumask_var(tracing_cpumask_new);
4263
4264         return count;
4265
4266 err_unlock:
4267         free_cpumask_var(tracing_cpumask_new);
4268
4269         return err;
4270 }
4271
4272 static const struct file_operations tracing_cpumask_fops = {
4273         .open           = tracing_open_generic_tr,
4274         .read           = tracing_cpumask_read,
4275         .write          = tracing_cpumask_write,
4276         .release        = tracing_release_generic_tr,
4277         .llseek         = generic_file_llseek,
4278 };
4279
4280 static int tracing_trace_options_show(struct seq_file *m, void *v)
4281 {
4282         struct tracer_opt *trace_opts;
4283         struct trace_array *tr = m->private;
4284         u32 tracer_flags;
4285         int i;
4286
4287         mutex_lock(&trace_types_lock);
4288         tracer_flags = tr->current_trace->flags->val;
4289         trace_opts = tr->current_trace->flags->opts;
4290
4291         for (i = 0; trace_options[i]; i++) {
4292                 if (tr->trace_flags & (1 << i))
4293                         seq_printf(m, "%s\n", trace_options[i]);
4294                 else
4295                         seq_printf(m, "no%s\n", trace_options[i]);
4296         }
4297
4298         for (i = 0; trace_opts[i].name; i++) {
4299                 if (tracer_flags & trace_opts[i].bit)
4300                         seq_printf(m, "%s\n", trace_opts[i].name);
4301                 else
4302                         seq_printf(m, "no%s\n", trace_opts[i].name);
4303         }
4304         mutex_unlock(&trace_types_lock);
4305
4306         return 0;
4307 }
4308
4309 static int __set_tracer_option(struct trace_array *tr,
4310                                struct tracer_flags *tracer_flags,
4311                                struct tracer_opt *opts, int neg)
4312 {
4313         struct tracer *trace = tracer_flags->trace;
4314         int ret;
4315
4316         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4317         if (ret)
4318                 return ret;
4319
4320         if (neg)
4321                 tracer_flags->val &= ~opts->bit;
4322         else
4323                 tracer_flags->val |= opts->bit;
4324         return 0;
4325 }
4326
4327 /* Try to assign a tracer specific option */
4328 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4329 {
4330         struct tracer *trace = tr->current_trace;
4331         struct tracer_flags *tracer_flags = trace->flags;
4332         struct tracer_opt *opts = NULL;
4333         int i;
4334
4335         for (i = 0; tracer_flags->opts[i].name; i++) {
4336                 opts = &tracer_flags->opts[i];
4337
4338                 if (strcmp(cmp, opts->name) == 0)
4339                         return __set_tracer_option(tr, trace->flags, opts, neg);
4340         }
4341
4342         return -EINVAL;
4343 }
4344
4345 /* Some tracers require overwrite to stay enabled */
4346 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4347 {
4348         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4349                 return -1;
4350
4351         return 0;
4352 }
4353
4354 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4355 {
4356         /* do nothing if flag is already set */
4357         if (!!(tr->trace_flags & mask) == !!enabled)
4358                 return 0;
4359
4360         /* Give the tracer a chance to approve the change */
4361         if (tr->current_trace->flag_changed)
4362                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4363                         return -EINVAL;
4364
4365         if (enabled)
4366                 tr->trace_flags |= mask;
4367         else
4368                 tr->trace_flags &= ~mask;
4369
4370         if (mask == TRACE_ITER_RECORD_CMD)
4371                 trace_event_enable_cmd_record(enabled);
4372
4373         if (mask == TRACE_ITER_RECORD_TGID) {
4374                 if (!tgid_map)
4375                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4376                                            sizeof(*tgid_map),
4377                                            GFP_KERNEL);
4378                 if (!tgid_map) {
4379                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4380                         return -ENOMEM;
4381                 }
4382
4383                 trace_event_enable_tgid_record(enabled);
4384         }
4385
4386         if (mask == TRACE_ITER_EVENT_FORK)
4387                 trace_event_follow_fork(tr, enabled);
4388
4389         if (mask == TRACE_ITER_FUNC_FORK)
4390                 ftrace_pid_follow_fork(tr, enabled);
4391
4392         if (mask == TRACE_ITER_OVERWRITE) {
4393                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4394 #ifdef CONFIG_TRACER_MAX_TRACE
4395                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4396 #endif
4397         }
4398
4399         if (mask == TRACE_ITER_PRINTK) {
4400                 trace_printk_start_stop_comm(enabled);
4401                 trace_printk_control(enabled);
4402         }
4403
4404         return 0;
4405 }
4406
4407 static int trace_set_options(struct trace_array *tr, char *option)
4408 {
4409         char *cmp;
4410         int neg = 0;
4411         int ret;
4412         size_t orig_len = strlen(option);
4413         int len;
4414
4415         cmp = strstrip(option);
4416
4417         len = str_has_prefix(cmp, "no");
4418         if (len)
4419                 neg = 1;
4420
4421         cmp += len;
4422
4423         mutex_lock(&trace_types_lock);
4424
4425         ret = match_string(trace_options, -1, cmp);
4426         /* If no option could be set, test the specific tracer options */
4427         if (ret < 0)
4428                 ret = set_tracer_option(tr, cmp, neg);
4429         else
4430                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4431
4432         mutex_unlock(&trace_types_lock);
4433
4434         /*
4435          * If the first trailing whitespace is replaced with '\0' by strstrip,
4436          * turn it back into a space.
4437          */
4438         if (orig_len > strlen(option))
4439                 option[strlen(option)] = ' ';
4440
4441         return ret;
4442 }
4443
4444 static void __init apply_trace_boot_options(void)
4445 {
4446         char *buf = trace_boot_options_buf;
4447         char *option;
4448
4449         while (true) {
4450                 option = strsep(&buf, ",");
4451
4452                 if (!option)
4453                         break;
4454
4455                 if (*option)
4456                         trace_set_options(&global_trace, option);
4457
4458                 /* Put back the comma to allow this to be called again */
4459                 if (buf)
4460                         *(buf - 1) = ',';
4461         }
4462 }
4463
4464 static ssize_t
4465 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4466                         size_t cnt, loff_t *ppos)
4467 {
4468         struct seq_file *m = filp->private_data;
4469         struct trace_array *tr = m->private;
4470         char buf[64];
4471         int ret;
4472
4473         if (cnt >= sizeof(buf))
4474                 return -EINVAL;
4475
4476         if (copy_from_user(buf, ubuf, cnt))
4477                 return -EFAULT;
4478
4479         buf[cnt] = 0;
4480
4481         ret = trace_set_options(tr, buf);
4482         if (ret < 0)
4483                 return ret;
4484
4485         *ppos += cnt;
4486
4487         return cnt;
4488 }
4489
4490 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4491 {
4492         struct trace_array *tr = inode->i_private;
4493         int ret;
4494
4495         if (tracing_disabled)
4496                 return -ENODEV;
4497
4498         if (trace_array_get(tr) < 0)
4499                 return -ENODEV;
4500
4501         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4502         if (ret < 0)
4503                 trace_array_put(tr);
4504
4505         return ret;
4506 }
4507
4508 static const struct file_operations tracing_iter_fops = {
4509         .open           = tracing_trace_options_open,
4510         .read           = seq_read,
4511         .llseek         = seq_lseek,
4512         .release        = tracing_single_release_tr,
4513         .write          = tracing_trace_options_write,
4514 };
4515
4516 static const char readme_msg[] =
4517         "tracing mini-HOWTO:\n\n"
4518         "# echo 0 > tracing_on : quick way to disable tracing\n"
4519         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4520         " Important files:\n"
4521         "  trace\t\t\t- The static contents of the buffer\n"
4522         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4523         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4524         "  current_tracer\t- function and latency tracers\n"
4525         "  available_tracers\t- list of configured tracers for current_tracer\n"
4526         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4527         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4528         "  trace_clock\t\t-change the clock used to order events\n"
4529         "       local:   Per cpu clock but may not be synced across CPUs\n"
4530         "      global:   Synced across CPUs but slows tracing down.\n"
4531         "     counter:   Not a clock, but just an increment\n"
4532         "      uptime:   Jiffy counter from time of boot\n"
4533         "        perf:   Same clock that perf events use\n"
4534 #ifdef CONFIG_X86_64
4535         "     x86-tsc:   TSC cycle counter\n"
4536 #endif
4537         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4538         "       delta:   Delta difference against a buffer-wide timestamp\n"
4539         "    absolute:   Absolute (standalone) timestamp\n"
4540         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4541         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4542         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4543         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4544         "\t\t\t  Remove sub-buffer with rmdir\n"
4545         "  trace_options\t\t- Set format or modify how tracing happens\n"
4546         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4547         "\t\t\t  option name\n"
4548         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4549 #ifdef CONFIG_DYNAMIC_FTRACE
4550         "\n  available_filter_functions - list of functions that can be filtered on\n"
4551         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4552         "\t\t\t  functions\n"
4553         "\t     accepts: func_full_name or glob-matching-pattern\n"
4554         "\t     modules: Can select a group via module\n"
4555         "\t      Format: :mod:<module-name>\n"
4556         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4557         "\t    triggers: a command to perform when function is hit\n"
4558         "\t      Format: <function>:<trigger>[:count]\n"
4559         "\t     trigger: traceon, traceoff\n"
4560         "\t\t      enable_event:<system>:<event>\n"
4561         "\t\t      disable_event:<system>:<event>\n"
4562 #ifdef CONFIG_STACKTRACE
4563         "\t\t      stacktrace\n"
4564 #endif
4565 #ifdef CONFIG_TRACER_SNAPSHOT
4566         "\t\t      snapshot\n"
4567 #endif
4568         "\t\t      dump\n"
4569         "\t\t      cpudump\n"
4570         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4571         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4572         "\t     The first one will disable tracing every time do_fault is hit\n"
4573         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4574         "\t       The first time do trap is hit and it disables tracing, the\n"
4575         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4576         "\t       the counter will not decrement. It only decrements when the\n"
4577         "\t       trigger did work\n"
4578         "\t     To remove trigger without count:\n"
4579         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4580         "\t     To remove trigger with a count:\n"
4581         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4582         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4583         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4584         "\t    modules: Can select a group via module command :mod:\n"
4585         "\t    Does not accept triggers\n"
4586 #endif /* CONFIG_DYNAMIC_FTRACE */
4587 #ifdef CONFIG_FUNCTION_TRACER
4588         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4589         "\t\t    (function)\n"
4590 #endif
4591 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4592         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4593         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4594         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4595 #endif
4596 #ifdef CONFIG_TRACER_SNAPSHOT
4597         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4598         "\t\t\t  snapshot buffer. Read the contents for more\n"
4599         "\t\t\t  information\n"
4600 #endif
4601 #ifdef CONFIG_STACK_TRACER
4602         "  stack_trace\t\t- Shows the max stack trace when active\n"
4603         "  stack_max_size\t- Shows current max stack size that was traced\n"
4604         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4605         "\t\t\t  new trace)\n"
4606 #ifdef CONFIG_DYNAMIC_FTRACE
4607         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4608         "\t\t\t  traces\n"
4609 #endif
4610 #endif /* CONFIG_STACK_TRACER */
4611 #ifdef CONFIG_DYNAMIC_EVENTS
4612         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4613         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4614 #endif
4615 #ifdef CONFIG_KPROBE_EVENTS
4616         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4617         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4618 #endif
4619 #ifdef CONFIG_UPROBE_EVENTS
4620         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4621         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4622 #endif
4623 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4624         "\t  accepts: event-definitions (one definition per line)\n"
4625         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4626         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4627 #ifdef CONFIG_HIST_TRIGGERS
4628         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4629 #endif
4630         "\t           -:[<group>/]<event>\n"
4631 #ifdef CONFIG_KPROBE_EVENTS
4632         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4633   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4634 #endif
4635 #ifdef CONFIG_UPROBE_EVENTS
4636   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4637 #endif
4638         "\t     args: <name>=fetcharg[:type]\n"
4639         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4640 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4641         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4642 #else
4643         "\t           $stack<index>, $stack, $retval, $comm\n"
4644 #endif
4645         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4646         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4647         "\t           <type>\\[<array-size>\\]\n"
4648 #ifdef CONFIG_HIST_TRIGGERS
4649         "\t    field: <stype> <name>;\n"
4650         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4651         "\t           [unsigned] char/int/long\n"
4652 #endif
4653 #endif
4654         "  events/\t\t- Directory containing all trace event subsystems:\n"
4655         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4656         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4657         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4658         "\t\t\t  events\n"
4659         "      filter\t\t- If set, only events passing filter are traced\n"
4660         "  events/<system>/<event>/\t- Directory containing control files for\n"
4661         "\t\t\t  <event>:\n"
4662         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4663         "      filter\t\t- If set, only events passing filter are traced\n"
4664         "      trigger\t\t- If set, a command to perform when event is hit\n"
4665         "\t    Format: <trigger>[:count][if <filter>]\n"
4666         "\t   trigger: traceon, traceoff\n"
4667         "\t            enable_event:<system>:<event>\n"
4668         "\t            disable_event:<system>:<event>\n"
4669 #ifdef CONFIG_HIST_TRIGGERS
4670         "\t            enable_hist:<system>:<event>\n"
4671         "\t            disable_hist:<system>:<event>\n"
4672 #endif
4673 #ifdef CONFIG_STACKTRACE
4674         "\t\t    stacktrace\n"
4675 #endif
4676 #ifdef CONFIG_TRACER_SNAPSHOT
4677         "\t\t    snapshot\n"
4678 #endif
4679 #ifdef CONFIG_HIST_TRIGGERS
4680         "\t\t    hist (see below)\n"
4681 #endif
4682         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4683         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4684         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4685         "\t                  events/block/block_unplug/trigger\n"
4686         "\t   The first disables tracing every time block_unplug is hit.\n"
4687         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4688         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4689         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4690         "\t   Like function triggers, the counter is only decremented if it\n"
4691         "\t    enabled or disabled tracing.\n"
4692         "\t   To remove a trigger without a count:\n"
4693         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4694         "\t   To remove a trigger with a count:\n"
4695         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4696         "\t   Filters can be ignored when removing a trigger.\n"
4697 #ifdef CONFIG_HIST_TRIGGERS
4698         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4699         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4700         "\t            [:values=<field1[,field2,...]>]\n"
4701         "\t            [:sort=<field1[,field2,...]>]\n"
4702         "\t            [:size=#entries]\n"
4703         "\t            [:pause][:continue][:clear]\n"
4704         "\t            [:name=histname1]\n"
4705         "\t            [if <filter>]\n\n"
4706         "\t    When a matching event is hit, an entry is added to a hash\n"
4707         "\t    table using the key(s) and value(s) named, and the value of a\n"
4708         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4709         "\t    correspond to fields in the event's format description.  Keys\n"
4710         "\t    can be any field, or the special string 'stacktrace'.\n"
4711         "\t    Compound keys consisting of up to two fields can be specified\n"
4712         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4713         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4714         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4715         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4716         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4717         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4718         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4719         "\t    its histogram data will be shared with other triggers of the\n"
4720         "\t    same name, and trigger hits will update this common data.\n\n"
4721         "\t    Reading the 'hist' file for the event will dump the hash\n"
4722         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4723         "\t    triggers attached to an event, there will be a table for each\n"
4724         "\t    trigger in the output.  The table displayed for a named\n"
4725         "\t    trigger will be the same as any other instance having the\n"
4726         "\t    same name.  The default format used to display a given field\n"
4727         "\t    can be modified by appending any of the following modifiers\n"
4728         "\t    to the field name, as applicable:\n\n"
4729         "\t            .hex        display a number as a hex value\n"
4730         "\t            .sym        display an address as a symbol\n"
4731         "\t            .sym-offset display an address as a symbol and offset\n"
4732         "\t            .execname   display a common_pid as a program name\n"
4733         "\t            .syscall    display a syscall id as a syscall name\n"
4734         "\t            .log2       display log2 value rather than raw number\n"
4735         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4736         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4737         "\t    trigger or to start a hist trigger but not log any events\n"
4738         "\t    until told to do so.  'continue' can be used to start or\n"
4739         "\t    restart a paused hist trigger.\n\n"
4740         "\t    The 'clear' parameter will clear the contents of a running\n"
4741         "\t    hist trigger and leave its current paused/active state\n"
4742         "\t    unchanged.\n\n"
4743         "\t    The enable_hist and disable_hist triggers can be used to\n"
4744         "\t    have one event conditionally start and stop another event's\n"
4745         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4746         "\t    the enable_event and disable_event triggers.\n"
4747 #endif
4748 ;
4749
4750 static ssize_t
4751 tracing_readme_read(struct file *filp, char __user *ubuf,
4752                        size_t cnt, loff_t *ppos)
4753 {
4754         return simple_read_from_buffer(ubuf, cnt, ppos,
4755                                         readme_msg, strlen(readme_msg));
4756 }
4757
4758 static const struct file_operations tracing_readme_fops = {
4759         .open           = tracing_open_generic,
4760         .read           = tracing_readme_read,
4761         .llseek         = generic_file_llseek,
4762 };
4763
4764 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4765 {
4766         int *ptr = v;
4767
4768         if (*pos || m->count)
4769                 ptr++;
4770
4771         (*pos)++;
4772
4773         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4774                 if (trace_find_tgid(*ptr))
4775                         return ptr;
4776         }
4777
4778         return NULL;
4779 }
4780
4781 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4782 {
4783         void *v;
4784         loff_t l = 0;
4785
4786         if (!tgid_map)
4787                 return NULL;
4788
4789         v = &tgid_map[0];
4790         while (l <= *pos) {
4791                 v = saved_tgids_next(m, v, &l);
4792                 if (!v)
4793                         return NULL;
4794         }
4795
4796         return v;
4797 }
4798
4799 static void saved_tgids_stop(struct seq_file *m, void *v)
4800 {
4801 }
4802
4803 static int saved_tgids_show(struct seq_file *m, void *v)
4804 {
4805         int pid = (int *)v - tgid_map;
4806
4807         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4808         return 0;
4809 }
4810
4811 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4812         .start          = saved_tgids_start,
4813         .stop           = saved_tgids_stop,
4814         .next           = saved_tgids_next,
4815         .show           = saved_tgids_show,
4816 };
4817
4818 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4819 {
4820         if (tracing_disabled)
4821                 return -ENODEV;
4822
4823         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4824 }
4825
4826
4827 static const struct file_operations tracing_saved_tgids_fops = {
4828         .open           = tracing_saved_tgids_open,
4829         .read           = seq_read,
4830         .llseek         = seq_lseek,
4831         .release        = seq_release,
4832 };
4833
4834 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4835 {
4836         unsigned int *ptr = v;
4837
4838         if (*pos || m->count)
4839                 ptr++;
4840
4841         (*pos)++;
4842
4843         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4844              ptr++) {
4845                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4846                         continue;
4847
4848                 return ptr;
4849         }
4850
4851         return NULL;
4852 }
4853
4854 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4855 {
4856         void *v;
4857         loff_t l = 0;
4858
4859         preempt_disable();
4860         arch_spin_lock(&trace_cmdline_lock);
4861
4862         v = &savedcmd->map_cmdline_to_pid[0];
4863         while (l <= *pos) {
4864                 v = saved_cmdlines_next(m, v, &l);
4865                 if (!v)
4866                         return NULL;
4867         }
4868
4869         return v;
4870 }
4871
4872 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4873 {
4874         arch_spin_unlock(&trace_cmdline_lock);
4875         preempt_enable();
4876 }
4877
4878 static int saved_cmdlines_show(struct seq_file *m, void *v)
4879 {
4880         char buf[TASK_COMM_LEN];
4881         unsigned int *pid = v;
4882
4883         __trace_find_cmdline(*pid, buf);
4884         seq_printf(m, "%d %s\n", *pid, buf);
4885         return 0;
4886 }
4887
4888 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4889         .start          = saved_cmdlines_start,
4890         .next           = saved_cmdlines_next,
4891         .stop           = saved_cmdlines_stop,
4892         .show           = saved_cmdlines_show,
4893 };
4894
4895 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4896 {
4897         if (tracing_disabled)
4898                 return -ENODEV;
4899
4900         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4901 }
4902
4903 static const struct file_operations tracing_saved_cmdlines_fops = {
4904         .open           = tracing_saved_cmdlines_open,
4905         .read           = seq_read,
4906         .llseek         = seq_lseek,
4907         .release        = seq_release,
4908 };
4909
4910 static ssize_t
4911 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4912                                  size_t cnt, loff_t *ppos)
4913 {
4914         char buf[64];
4915         int r;
4916
4917         arch_spin_lock(&trace_cmdline_lock);
4918         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4919         arch_spin_unlock(&trace_cmdline_lock);
4920
4921         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4922 }
4923
4924 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4925 {
4926         kfree(s->saved_cmdlines);
4927         kfree(s->map_cmdline_to_pid);
4928         kfree(s);
4929 }
4930
4931 static int tracing_resize_saved_cmdlines(unsigned int val)
4932 {
4933         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4934
4935         s = kmalloc(sizeof(*s), GFP_KERNEL);
4936         if (!s)
4937                 return -ENOMEM;
4938
4939         if (allocate_cmdlines_buffer(val, s) < 0) {
4940                 kfree(s);
4941                 return -ENOMEM;
4942         }
4943
4944         arch_spin_lock(&trace_cmdline_lock);
4945         savedcmd_temp = savedcmd;
4946         savedcmd = s;
4947         arch_spin_unlock(&trace_cmdline_lock);
4948         free_saved_cmdlines_buffer(savedcmd_temp);
4949
4950         return 0;
4951 }
4952
4953 static ssize_t
4954 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4955                                   size_t cnt, loff_t *ppos)
4956 {
4957         unsigned long val;
4958         int ret;
4959
4960         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4961         if (ret)
4962                 return ret;
4963
4964         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4965         if (!val || val > PID_MAX_DEFAULT)
4966                 return -EINVAL;
4967
4968         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4969         if (ret < 0)
4970                 return ret;
4971
4972         *ppos += cnt;
4973
4974         return cnt;
4975 }
4976
4977 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4978         .open           = tracing_open_generic,
4979         .read           = tracing_saved_cmdlines_size_read,
4980         .write          = tracing_saved_cmdlines_size_write,
4981 };
4982
4983 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4984 static union trace_eval_map_item *
4985 update_eval_map(union trace_eval_map_item *ptr)
4986 {
4987         if (!ptr->map.eval_string) {
4988                 if (ptr->tail.next) {
4989                         ptr = ptr->tail.next;
4990                         /* Set ptr to the next real item (skip head) */
4991                         ptr++;
4992                 } else
4993                         return NULL;
4994         }
4995         return ptr;
4996 }
4997
4998 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4999 {
5000         union trace_eval_map_item *ptr = v;
5001
5002         /*
5003          * Paranoid! If ptr points to end, we don't want to increment past it.
5004          * This really should never happen.
5005          */
5006         ptr = update_eval_map(ptr);
5007         if (WARN_ON_ONCE(!ptr))
5008                 return NULL;
5009
5010         ptr++;
5011
5012         (*pos)++;
5013
5014         ptr = update_eval_map(ptr);
5015
5016         return ptr;
5017 }
5018
5019 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5020 {
5021         union trace_eval_map_item *v;
5022         loff_t l = 0;
5023
5024         mutex_lock(&trace_eval_mutex);
5025
5026         v = trace_eval_maps;
5027         if (v)
5028                 v++;
5029
5030         while (v && l < *pos) {
5031                 v = eval_map_next(m, v, &l);
5032         }
5033
5034         return v;
5035 }
5036
5037 static void eval_map_stop(struct seq_file *m, void *v)
5038 {
5039         mutex_unlock(&trace_eval_mutex);
5040 }
5041
5042 static int eval_map_show(struct seq_file *m, void *v)
5043 {
5044         union trace_eval_map_item *ptr = v;
5045
5046         seq_printf(m, "%s %ld (%s)\n",
5047                    ptr->map.eval_string, ptr->map.eval_value,
5048                    ptr->map.system);
5049
5050         return 0;
5051 }
5052
5053 static const struct seq_operations tracing_eval_map_seq_ops = {
5054         .start          = eval_map_start,
5055         .next           = eval_map_next,
5056         .stop           = eval_map_stop,
5057         .show           = eval_map_show,
5058 };
5059
5060 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5061 {
5062         if (tracing_disabled)
5063                 return -ENODEV;
5064
5065         return seq_open(filp, &tracing_eval_map_seq_ops);
5066 }
5067
5068 static const struct file_operations tracing_eval_map_fops = {
5069         .open           = tracing_eval_map_open,
5070         .read           = seq_read,
5071         .llseek         = seq_lseek,
5072         .release        = seq_release,
5073 };
5074
5075 static inline union trace_eval_map_item *
5076 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5077 {
5078         /* Return tail of array given the head */
5079         return ptr + ptr->head.length + 1;
5080 }
5081
5082 static void
5083 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5084                            int len)
5085 {
5086         struct trace_eval_map **stop;
5087         struct trace_eval_map **map;
5088         union trace_eval_map_item *map_array;
5089         union trace_eval_map_item *ptr;
5090
5091         stop = start + len;
5092
5093         /*
5094          * The trace_eval_maps contains the map plus a head and tail item,
5095          * where the head holds the module and length of array, and the
5096          * tail holds a pointer to the next list.
5097          */
5098         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5099         if (!map_array) {
5100                 pr_warn("Unable to allocate trace eval mapping\n");
5101                 return;
5102         }
5103
5104         mutex_lock(&trace_eval_mutex);
5105
5106         if (!trace_eval_maps)
5107                 trace_eval_maps = map_array;
5108         else {
5109                 ptr = trace_eval_maps;
5110                 for (;;) {
5111                         ptr = trace_eval_jmp_to_tail(ptr);
5112                         if (!ptr->tail.next)
5113                                 break;
5114                         ptr = ptr->tail.next;
5115
5116                 }
5117                 ptr->tail.next = map_array;
5118         }
5119         map_array->head.mod = mod;
5120         map_array->head.length = len;
5121         map_array++;
5122
5123         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5124                 map_array->map = **map;
5125                 map_array++;
5126         }
5127         memset(map_array, 0, sizeof(*map_array));
5128
5129         mutex_unlock(&trace_eval_mutex);
5130 }
5131
5132 static void trace_create_eval_file(struct dentry *d_tracer)
5133 {
5134         trace_create_file("eval_map", 0444, d_tracer,
5135                           NULL, &tracing_eval_map_fops);
5136 }
5137
5138 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5139 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5140 static inline void trace_insert_eval_map_file(struct module *mod,
5141                               struct trace_eval_map **start, int len) { }
5142 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5143
5144 static void trace_insert_eval_map(struct module *mod,
5145                                   struct trace_eval_map **start, int len)
5146 {
5147         struct trace_eval_map **map;
5148
5149         if (len <= 0)
5150                 return;
5151
5152         map = start;
5153
5154         trace_event_eval_update(map, len);
5155
5156         trace_insert_eval_map_file(mod, start, len);
5157 }
5158
5159 static ssize_t
5160 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5161                        size_t cnt, loff_t *ppos)
5162 {
5163         struct trace_array *tr = filp->private_data;
5164         char buf[MAX_TRACER_SIZE+2];
5165         int r;
5166
5167         mutex_lock(&trace_types_lock);
5168         r = sprintf(buf, "%s\n", tr->current_trace->name);
5169         mutex_unlock(&trace_types_lock);
5170
5171         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5172 }
5173
5174 int tracer_init(struct tracer *t, struct trace_array *tr)
5175 {
5176         tracing_reset_online_cpus(&tr->trace_buffer);
5177         return t->init(tr);
5178 }
5179
5180 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5181 {
5182         int cpu;
5183
5184         for_each_tracing_cpu(cpu)
5185                 per_cpu_ptr(buf->data, cpu)->entries = val;
5186 }
5187
5188 #ifdef CONFIG_TRACER_MAX_TRACE
5189 /* resize @tr's buffer to the size of @size_tr's entries */
5190 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5191                                         struct trace_buffer *size_buf, int cpu_id)
5192 {
5193         int cpu, ret = 0;
5194
5195         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5196                 for_each_tracing_cpu(cpu) {
5197                         ret = ring_buffer_resize(trace_buf->buffer,
5198                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5199                         if (ret < 0)
5200                                 break;
5201                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5202                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5203                 }
5204         } else {
5205                 ret = ring_buffer_resize(trace_buf->buffer,
5206                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5207                 if (ret == 0)
5208                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5209                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5210         }
5211
5212         return ret;
5213 }
5214 #endif /* CONFIG_TRACER_MAX_TRACE */
5215
5216 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5217                                         unsigned long size, int cpu)
5218 {
5219         int ret;
5220
5221         /*
5222          * If kernel or user changes the size of the ring buffer
5223          * we use the size that was given, and we can forget about
5224          * expanding it later.
5225          */
5226         ring_buffer_expanded = true;
5227
5228         /* May be called before buffers are initialized */
5229         if (!tr->trace_buffer.buffer)
5230                 return 0;
5231
5232         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5233         if (ret < 0)
5234                 return ret;
5235
5236 #ifdef CONFIG_TRACER_MAX_TRACE
5237         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5238             !tr->current_trace->use_max_tr)
5239                 goto out;
5240
5241         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5242         if (ret < 0) {
5243                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5244                                                      &tr->trace_buffer, cpu);
5245                 if (r < 0) {
5246                         /*
5247                          * AARGH! We are left with different
5248                          * size max buffer!!!!
5249                          * The max buffer is our "snapshot" buffer.
5250                          * When a tracer needs a snapshot (one of the
5251                          * latency tracers), it swaps the max buffer
5252                          * with the saved snap shot. We succeeded to
5253                          * update the size of the main buffer, but failed to
5254                          * update the size of the max buffer. But when we tried
5255                          * to reset the main buffer to the original size, we
5256                          * failed there too. This is very unlikely to
5257                          * happen, but if it does, warn and kill all
5258                          * tracing.
5259                          */
5260                         WARN_ON(1);
5261                         tracing_disabled = 1;
5262                 }
5263                 return ret;
5264         }
5265
5266         if (cpu == RING_BUFFER_ALL_CPUS)
5267                 set_buffer_entries(&tr->max_buffer, size);
5268         else
5269                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5270
5271  out:
5272 #endif /* CONFIG_TRACER_MAX_TRACE */
5273
5274         if (cpu == RING_BUFFER_ALL_CPUS)
5275                 set_buffer_entries(&tr->trace_buffer, size);
5276         else
5277                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5278
5279         return ret;
5280 }
5281
5282 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5283                                           unsigned long size, int cpu_id)
5284 {
5285         int ret = size;
5286
5287         mutex_lock(&trace_types_lock);
5288
5289         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5290                 /* make sure, this cpu is enabled in the mask */
5291                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5292                         ret = -EINVAL;
5293                         goto out;
5294                 }
5295         }
5296
5297         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5298         if (ret < 0)
5299                 ret = -ENOMEM;
5300
5301 out:
5302         mutex_unlock(&trace_types_lock);
5303
5304         return ret;
5305 }
5306
5307
5308 /**
5309  * tracing_update_buffers - used by tracing facility to expand ring buffers
5310  *
5311  * To save on memory when the tracing is never used on a system with it
5312  * configured in. The ring buffers are set to a minimum size. But once
5313  * a user starts to use the tracing facility, then they need to grow
5314  * to their default size.
5315  *
5316  * This function is to be called when a tracer is about to be used.
5317  */
5318 int tracing_update_buffers(void)
5319 {
5320         int ret = 0;
5321
5322         mutex_lock(&trace_types_lock);
5323         if (!ring_buffer_expanded)
5324                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5325                                                 RING_BUFFER_ALL_CPUS);
5326         mutex_unlock(&trace_types_lock);
5327
5328         return ret;
5329 }
5330
5331 struct trace_option_dentry;
5332
5333 static void
5334 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5335
5336 /*
5337  * Used to clear out the tracer before deletion of an instance.
5338  * Must have trace_types_lock held.
5339  */
5340 static void tracing_set_nop(struct trace_array *tr)
5341 {
5342         if (tr->current_trace == &nop_trace)
5343                 return;
5344         
5345         tr->current_trace->enabled--;
5346
5347         if (tr->current_trace->reset)
5348                 tr->current_trace->reset(tr);
5349
5350         tr->current_trace = &nop_trace;
5351 }
5352
5353 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5354 {
5355         /* Only enable if the directory has been created already. */
5356         if (!tr->dir)
5357                 return;
5358
5359         create_trace_option_files(tr, t);
5360 }
5361
5362 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5363 {
5364         struct tracer *t;
5365 #ifdef CONFIG_TRACER_MAX_TRACE
5366         bool had_max_tr;
5367 #endif
5368         int ret = 0;
5369
5370         mutex_lock(&trace_types_lock);
5371
5372         if (!ring_buffer_expanded) {
5373                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5374                                                 RING_BUFFER_ALL_CPUS);
5375                 if (ret < 0)
5376                         goto out;
5377                 ret = 0;
5378         }
5379
5380         for (t = trace_types; t; t = t->next) {
5381                 if (strcmp(t->name, buf) == 0)
5382                         break;
5383         }
5384         if (!t) {
5385                 ret = -EINVAL;
5386                 goto out;
5387         }
5388         if (t == tr->current_trace)
5389                 goto out;
5390
5391         /* Some tracers won't work on kernel command line */
5392         if (system_state < SYSTEM_RUNNING && t->noboot) {
5393                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5394                         t->name);
5395                 goto out;
5396         }
5397
5398         /* Some tracers are only allowed for the top level buffer */
5399         if (!trace_ok_for_array(t, tr)) {
5400                 ret = -EINVAL;
5401                 goto out;
5402         }
5403
5404         /* If trace pipe files are being read, we can't change the tracer */
5405         if (tr->current_trace->ref) {
5406                 ret = -EBUSY;
5407                 goto out;
5408         }
5409
5410         trace_branch_disable();
5411
5412         tr->current_trace->enabled--;
5413
5414         if (tr->current_trace->reset)
5415                 tr->current_trace->reset(tr);
5416
5417         /* Current trace needs to be nop_trace before synchronize_rcu */
5418         tr->current_trace = &nop_trace;
5419
5420 #ifdef CONFIG_TRACER_MAX_TRACE
5421         had_max_tr = tr->allocated_snapshot;
5422
5423         if (had_max_tr && !t->use_max_tr) {
5424                 /*
5425                  * We need to make sure that the update_max_tr sees that
5426                  * current_trace changed to nop_trace to keep it from
5427                  * swapping the buffers after we resize it.
5428                  * The update_max_tr is called from interrupts disabled
5429                  * so a synchronized_sched() is sufficient.
5430                  */
5431                 synchronize_rcu();
5432                 free_snapshot(tr);
5433         }
5434 #endif
5435
5436 #ifdef CONFIG_TRACER_MAX_TRACE
5437         if (t->use_max_tr && !had_max_tr) {
5438                 ret = tracing_alloc_snapshot_instance(tr);
5439                 if (ret < 0)
5440                         goto out;
5441         }
5442 #endif
5443
5444         if (t->init) {
5445                 ret = tracer_init(t, tr);
5446                 if (ret)
5447                         goto out;
5448         }
5449
5450         tr->current_trace = t;
5451         tr->current_trace->enabled++;
5452         trace_branch_enable(tr);
5453  out:
5454         mutex_unlock(&trace_types_lock);
5455
5456         return ret;
5457 }
5458
5459 static ssize_t
5460 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5461                         size_t cnt, loff_t *ppos)
5462 {
5463         struct trace_array *tr = filp->private_data;
5464         char buf[MAX_TRACER_SIZE+1];
5465         int i;
5466         size_t ret;
5467         int err;
5468
5469         ret = cnt;
5470
5471         if (cnt > MAX_TRACER_SIZE)
5472                 cnt = MAX_TRACER_SIZE;
5473
5474         if (copy_from_user(buf, ubuf, cnt))
5475                 return -EFAULT;
5476
5477         buf[cnt] = 0;
5478
5479         /* strip ending whitespace. */
5480         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5481                 buf[i] = 0;
5482
5483         err = tracing_set_tracer(tr, buf);
5484         if (err)
5485                 return err;
5486
5487         *ppos += ret;
5488
5489         return ret;
5490 }
5491
5492 static ssize_t
5493 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5494                    size_t cnt, loff_t *ppos)
5495 {
5496         char buf[64];
5497         int r;
5498
5499         r = snprintf(buf, sizeof(buf), "%ld\n",
5500                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5501         if (r > sizeof(buf))
5502                 r = sizeof(buf);
5503         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5504 }
5505
5506 static ssize_t
5507 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5508                     size_t cnt, loff_t *ppos)
5509 {
5510         unsigned long val;
5511         int ret;
5512
5513         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5514         if (ret)
5515                 return ret;
5516
5517         *ptr = val * 1000;
5518
5519         return cnt;
5520 }
5521
5522 static ssize_t
5523 tracing_thresh_read(struct file *filp, char __user *ubuf,
5524                     size_t cnt, loff_t *ppos)
5525 {
5526         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5527 }
5528
5529 static ssize_t
5530 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5531                      size_t cnt, loff_t *ppos)
5532 {
5533         struct trace_array *tr = filp->private_data;
5534         int ret;
5535
5536         mutex_lock(&trace_types_lock);
5537         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5538         if (ret < 0)
5539                 goto out;
5540
5541         if (tr->current_trace->update_thresh) {
5542                 ret = tr->current_trace->update_thresh(tr);
5543                 if (ret < 0)
5544                         goto out;
5545         }
5546
5547         ret = cnt;
5548 out:
5549         mutex_unlock(&trace_types_lock);
5550
5551         return ret;
5552 }
5553
5554 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5555
5556 static ssize_t
5557 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5558                      size_t cnt, loff_t *ppos)
5559 {
5560         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5561 }
5562
5563 static ssize_t
5564 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5565                       size_t cnt, loff_t *ppos)
5566 {
5567         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5568 }
5569
5570 #endif
5571
5572 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5573 {
5574         struct trace_array *tr = inode->i_private;
5575         struct trace_iterator *iter;
5576         int ret = 0;
5577
5578         if (tracing_disabled)
5579                 return -ENODEV;
5580
5581         if (trace_array_get(tr) < 0)
5582                 return -ENODEV;
5583
5584         mutex_lock(&trace_types_lock);
5585
5586         /* create a buffer to store the information to pass to userspace */
5587         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5588         if (!iter) {
5589                 ret = -ENOMEM;
5590                 __trace_array_put(tr);
5591                 goto out;
5592         }
5593
5594         trace_seq_init(&iter->seq);
5595         iter->trace = tr->current_trace;
5596
5597         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5598                 ret = -ENOMEM;
5599                 goto fail;
5600         }
5601
5602         /* trace pipe does not show start of buffer */
5603         cpumask_setall(iter->started);
5604
5605         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5606                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5607
5608         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5609         if (trace_clocks[tr->clock_id].in_ns)
5610                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5611
5612         iter->tr = tr;
5613         iter->trace_buffer = &tr->trace_buffer;
5614         iter->cpu_file = tracing_get_cpu(inode);
5615         mutex_init(&iter->mutex);
5616         filp->private_data = iter;
5617
5618         if (iter->trace->pipe_open)
5619                 iter->trace->pipe_open(iter);
5620
5621         nonseekable_open(inode, filp);
5622
5623         tr->current_trace->ref++;
5624 out:
5625         mutex_unlock(&trace_types_lock);
5626         return ret;
5627
5628 fail:
5629         kfree(iter->trace);
5630         kfree(iter);
5631         __trace_array_put(tr);
5632         mutex_unlock(&trace_types_lock);
5633         return ret;
5634 }
5635
5636 static int tracing_release_pipe(struct inode *inode, struct file *file)
5637 {
5638         struct trace_iterator *iter = file->private_data;
5639         struct trace_array *tr = inode->i_private;
5640
5641         mutex_lock(&trace_types_lock);
5642
5643         tr->current_trace->ref--;
5644
5645         if (iter->trace->pipe_close)
5646                 iter->trace->pipe_close(iter);
5647
5648         mutex_unlock(&trace_types_lock);
5649
5650         free_cpumask_var(iter->started);
5651         mutex_destroy(&iter->mutex);
5652         kfree(iter);
5653
5654         trace_array_put(tr);
5655
5656         return 0;
5657 }
5658
5659 static __poll_t
5660 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5661 {
5662         struct trace_array *tr = iter->tr;
5663
5664         /* Iterators are static, they should be filled or empty */
5665         if (trace_buffer_iter(iter, iter->cpu_file))
5666                 return EPOLLIN | EPOLLRDNORM;
5667
5668         if (tr->trace_flags & TRACE_ITER_BLOCK)
5669                 /*
5670                  * Always select as readable when in blocking mode
5671                  */
5672                 return EPOLLIN | EPOLLRDNORM;
5673         else
5674                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5675                                              filp, poll_table);
5676 }
5677
5678 static __poll_t
5679 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5680 {
5681         struct trace_iterator *iter = filp->private_data;
5682
5683         return trace_poll(iter, filp, poll_table);
5684 }
5685
5686 /* Must be called with iter->mutex held. */
5687 static int tracing_wait_pipe(struct file *filp)
5688 {
5689         struct trace_iterator *iter = filp->private_data;
5690         int ret;
5691
5692         while (trace_empty(iter)) {
5693
5694                 if ((filp->f_flags & O_NONBLOCK)) {
5695                         return -EAGAIN;
5696                 }
5697
5698                 /*
5699                  * We block until we read something and tracing is disabled.
5700                  * We still block if tracing is disabled, but we have never
5701                  * read anything. This allows a user to cat this file, and
5702                  * then enable tracing. But after we have read something,
5703                  * we give an EOF when tracing is again disabled.
5704                  *
5705                  * iter->pos will be 0 if we haven't read anything.
5706                  */
5707                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5708                         break;
5709
5710                 mutex_unlock(&iter->mutex);
5711
5712                 ret = wait_on_pipe(iter, 0);
5713
5714                 mutex_lock(&iter->mutex);
5715
5716                 if (ret)
5717                         return ret;
5718         }
5719
5720         return 1;
5721 }
5722
5723 /*
5724  * Consumer reader.
5725  */
5726 static ssize_t
5727 tracing_read_pipe(struct file *filp, char __user *ubuf,
5728                   size_t cnt, loff_t *ppos)
5729 {
5730         struct trace_iterator *iter = filp->private_data;
5731         ssize_t sret;
5732
5733         /*
5734          * Avoid more than one consumer on a single file descriptor
5735          * This is just a matter of traces coherency, the ring buffer itself
5736          * is protected.
5737          */
5738         mutex_lock(&iter->mutex);
5739
5740         /* return any leftover data */
5741         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5742         if (sret != -EBUSY)
5743                 goto out;
5744
5745         trace_seq_init(&iter->seq);
5746
5747         if (iter->trace->read) {
5748                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5749                 if (sret)
5750                         goto out;
5751         }
5752
5753 waitagain:
5754         sret = tracing_wait_pipe(filp);
5755         if (sret <= 0)
5756                 goto out;
5757
5758         /* stop when tracing is finished */
5759         if (trace_empty(iter)) {
5760                 sret = 0;
5761                 goto out;
5762         }
5763
5764         if (cnt >= PAGE_SIZE)
5765                 cnt = PAGE_SIZE - 1;
5766
5767         /* reset all but tr, trace, and overruns */
5768         memset(&iter->seq, 0,
5769                sizeof(struct trace_iterator) -
5770                offsetof(struct trace_iterator, seq));
5771         cpumask_clear(iter->started);
5772         iter->pos = -1;
5773
5774         trace_event_read_lock();
5775         trace_access_lock(iter->cpu_file);
5776         while (trace_find_next_entry_inc(iter) != NULL) {
5777                 enum print_line_t ret;
5778                 int save_len = iter->seq.seq.len;
5779
5780                 ret = print_trace_line(iter);
5781                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5782                         /* don't print partial lines */
5783                         iter->seq.seq.len = save_len;
5784                         break;
5785                 }
5786                 if (ret != TRACE_TYPE_NO_CONSUME)
5787                         trace_consume(iter);
5788
5789                 if (trace_seq_used(&iter->seq) >= cnt)
5790                         break;
5791
5792                 /*
5793                  * Setting the full flag means we reached the trace_seq buffer
5794                  * size and we should leave by partial output condition above.
5795                  * One of the trace_seq_* functions is not used properly.
5796                  */
5797                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5798                           iter->ent->type);
5799         }
5800         trace_access_unlock(iter->cpu_file);
5801         trace_event_read_unlock();
5802
5803         /* Now copy what we have to the user */
5804         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5805         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5806                 trace_seq_init(&iter->seq);
5807
5808         /*
5809          * If there was nothing to send to user, in spite of consuming trace
5810          * entries, go back to wait for more entries.
5811          */
5812         if (sret == -EBUSY)
5813                 goto waitagain;
5814
5815 out:
5816         mutex_unlock(&iter->mutex);
5817
5818         return sret;
5819 }
5820
5821 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5822                                      unsigned int idx)
5823 {
5824         __free_page(spd->pages[idx]);
5825 }
5826
5827 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5828         .can_merge              = 0,
5829         .confirm                = generic_pipe_buf_confirm,
5830         .release                = generic_pipe_buf_release,
5831         .steal                  = generic_pipe_buf_steal,
5832         .get                    = generic_pipe_buf_get,
5833 };
5834
5835 static size_t
5836 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5837 {
5838         size_t count;
5839         int save_len;
5840         int ret;
5841
5842         /* Seq buffer is page-sized, exactly what we need. */
5843         for (;;) {
5844                 save_len = iter->seq.seq.len;
5845                 ret = print_trace_line(iter);
5846
5847                 if (trace_seq_has_overflowed(&iter->seq)) {
5848                         iter->seq.seq.len = save_len;
5849                         break;
5850                 }
5851
5852                 /*
5853                  * This should not be hit, because it should only
5854                  * be set if the iter->seq overflowed. But check it
5855                  * anyway to be safe.
5856                  */
5857                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5858                         iter->seq.seq.len = save_len;
5859                         break;
5860                 }
5861
5862                 count = trace_seq_used(&iter->seq) - save_len;
5863                 if (rem < count) {
5864                         rem = 0;
5865                         iter->seq.seq.len = save_len;
5866                         break;
5867                 }
5868
5869                 if (ret != TRACE_TYPE_NO_CONSUME)
5870                         trace_consume(iter);
5871                 rem -= count;
5872                 if (!trace_find_next_entry_inc(iter))   {
5873                         rem = 0;
5874                         iter->ent = NULL;
5875                         break;
5876                 }
5877         }
5878
5879         return rem;
5880 }
5881
5882 static ssize_t tracing_splice_read_pipe(struct file *filp,
5883                                         loff_t *ppos,
5884                                         struct pipe_inode_info *pipe,
5885                                         size_t len,
5886                                         unsigned int flags)
5887 {
5888         struct page *pages_def[PIPE_DEF_BUFFERS];
5889         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5890         struct trace_iterator *iter = filp->private_data;
5891         struct splice_pipe_desc spd = {
5892                 .pages          = pages_def,
5893                 .partial        = partial_def,
5894                 .nr_pages       = 0, /* This gets updated below. */
5895                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5896                 .ops            = &tracing_pipe_buf_ops,
5897                 .spd_release    = tracing_spd_release_pipe,
5898         };
5899         ssize_t ret;
5900         size_t rem;
5901         unsigned int i;
5902
5903         if (splice_grow_spd(pipe, &spd))
5904                 return -ENOMEM;
5905
5906         mutex_lock(&iter->mutex);
5907
5908         if (iter->trace->splice_read) {
5909                 ret = iter->trace->splice_read(iter, filp,
5910                                                ppos, pipe, len, flags);
5911                 if (ret)
5912                         goto out_err;
5913         }
5914
5915         ret = tracing_wait_pipe(filp);
5916         if (ret <= 0)
5917                 goto out_err;
5918
5919         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5920                 ret = -EFAULT;
5921                 goto out_err;
5922         }
5923
5924         trace_event_read_lock();
5925         trace_access_lock(iter->cpu_file);
5926
5927         /* Fill as many pages as possible. */
5928         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5929                 spd.pages[i] = alloc_page(GFP_KERNEL);
5930                 if (!spd.pages[i])
5931                         break;
5932
5933                 rem = tracing_fill_pipe_page(rem, iter);
5934
5935                 /* Copy the data into the page, so we can start over. */
5936                 ret = trace_seq_to_buffer(&iter->seq,
5937                                           page_address(spd.pages[i]),
5938                                           trace_seq_used(&iter->seq));
5939                 if (ret < 0) {
5940                         __free_page(spd.pages[i]);
5941                         break;
5942                 }
5943                 spd.partial[i].offset = 0;
5944                 spd.partial[i].len = trace_seq_used(&iter->seq);
5945
5946                 trace_seq_init(&iter->seq);
5947         }
5948
5949         trace_access_unlock(iter->cpu_file);
5950         trace_event_read_unlock();
5951         mutex_unlock(&iter->mutex);
5952
5953         spd.nr_pages = i;
5954
5955         if (i)
5956                 ret = splice_to_pipe(pipe, &spd);
5957         else
5958                 ret = 0;
5959 out:
5960         splice_shrink_spd(&spd);
5961         return ret;
5962
5963 out_err:
5964         mutex_unlock(&iter->mutex);
5965         goto out;
5966 }
5967
5968 static ssize_t
5969 tracing_entries_read(struct file *filp, char __user *ubuf,
5970                      size_t cnt, loff_t *ppos)
5971 {
5972         struct inode *inode = file_inode(filp);
5973         struct trace_array *tr = inode->i_private;
5974         int cpu = tracing_get_cpu(inode);
5975         char buf[64];
5976         int r = 0;
5977         ssize_t ret;
5978
5979         mutex_lock(&trace_types_lock);
5980
5981         if (cpu == RING_BUFFER_ALL_CPUS) {
5982                 int cpu, buf_size_same;
5983                 unsigned long size;
5984
5985                 size = 0;
5986                 buf_size_same = 1;
5987                 /* check if all cpu sizes are same */
5988                 for_each_tracing_cpu(cpu) {
5989                         /* fill in the size from first enabled cpu */
5990                         if (size == 0)
5991                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5992                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5993                                 buf_size_same = 0;
5994                                 break;
5995                         }
5996                 }
5997
5998                 if (buf_size_same) {
5999                         if (!ring_buffer_expanded)
6000                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6001                                             size >> 10,
6002                                             trace_buf_size >> 10);
6003                         else
6004                                 r = sprintf(buf, "%lu\n", size >> 10);
6005                 } else
6006                         r = sprintf(buf, "X\n");
6007         } else
6008                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6009
6010         mutex_unlock(&trace_types_lock);
6011
6012         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6013         return ret;
6014 }
6015
6016 static ssize_t
6017 tracing_entries_write(struct file *filp, const char __user *ubuf,
6018                       size_t cnt, loff_t *ppos)
6019 {
6020         struct inode *inode = file_inode(filp);
6021         struct trace_array *tr = inode->i_private;
6022         unsigned long val;
6023         int ret;
6024
6025         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6026         if (ret)
6027                 return ret;
6028
6029         /* must have at least 1 entry */
6030         if (!val)
6031                 return -EINVAL;
6032
6033         /* value is in KB */
6034         val <<= 10;
6035         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6036         if (ret < 0)
6037                 return ret;
6038
6039         *ppos += cnt;
6040
6041         return cnt;
6042 }
6043
6044 static ssize_t
6045 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6046                                 size_t cnt, loff_t *ppos)
6047 {
6048         struct trace_array *tr = filp->private_data;
6049         char buf[64];
6050         int r, cpu;
6051         unsigned long size = 0, expanded_size = 0;
6052
6053         mutex_lock(&trace_types_lock);
6054         for_each_tracing_cpu(cpu) {
6055                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6056                 if (!ring_buffer_expanded)
6057                         expanded_size += trace_buf_size >> 10;
6058         }
6059         if (ring_buffer_expanded)
6060                 r = sprintf(buf, "%lu\n", size);
6061         else
6062                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6063         mutex_unlock(&trace_types_lock);
6064
6065         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6066 }
6067
6068 static ssize_t
6069 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6070                           size_t cnt, loff_t *ppos)
6071 {
6072         /*
6073          * There is no need to read what the user has written, this function
6074          * is just to make sure that there is no error when "echo" is used
6075          */
6076
6077         *ppos += cnt;
6078
6079         return cnt;
6080 }
6081
6082 static int
6083 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6084 {
6085         struct trace_array *tr = inode->i_private;
6086
6087         /* disable tracing ? */
6088         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6089                 tracer_tracing_off(tr);
6090         /* resize the ring buffer to 0 */
6091         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6092
6093         trace_array_put(tr);
6094
6095         return 0;
6096 }
6097
6098 static ssize_t
6099 tracing_mark_write(struct file *filp, const char __user *ubuf,
6100                                         size_t cnt, loff_t *fpos)
6101 {
6102         struct trace_array *tr = filp->private_data;
6103         struct ring_buffer_event *event;
6104         enum event_trigger_type tt = ETT_NONE;
6105         struct ring_buffer *buffer;
6106         struct print_entry *entry;
6107         unsigned long irq_flags;
6108         const char faulted[] = "<faulted>";
6109         ssize_t written;
6110         int size;
6111         int len;
6112
6113 /* Used in tracing_mark_raw_write() as well */
6114 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6115
6116         if (tracing_disabled)
6117                 return -EINVAL;
6118
6119         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6120                 return -EINVAL;
6121
6122         if (cnt > TRACE_BUF_SIZE)
6123                 cnt = TRACE_BUF_SIZE;
6124
6125         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6126
6127         local_save_flags(irq_flags);
6128         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6129
6130         /* If less than "<faulted>", then make sure we can still add that */
6131         if (cnt < FAULTED_SIZE)
6132                 size += FAULTED_SIZE - cnt;
6133
6134         buffer = tr->trace_buffer.buffer;
6135         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6136                                             irq_flags, preempt_count());
6137         if (unlikely(!event))
6138                 /* Ring buffer disabled, return as if not open for write */
6139                 return -EBADF;
6140
6141         entry = ring_buffer_event_data(event);
6142         entry->ip = _THIS_IP_;
6143
6144         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6145         if (len) {
6146                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6147                 cnt = FAULTED_SIZE;
6148                 written = -EFAULT;
6149         } else
6150                 written = cnt;
6151         len = cnt;
6152
6153         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6154                 /* do not add \n before testing triggers, but add \0 */
6155                 entry->buf[cnt] = '\0';
6156                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6157         }
6158
6159         if (entry->buf[cnt - 1] != '\n') {
6160                 entry->buf[cnt] = '\n';
6161                 entry->buf[cnt + 1] = '\0';
6162         } else
6163                 entry->buf[cnt] = '\0';
6164
6165         __buffer_unlock_commit(buffer, event);
6166
6167         if (tt)
6168                 event_triggers_post_call(tr->trace_marker_file, tt);
6169
6170         if (written > 0)
6171                 *fpos += written;
6172
6173         return written;
6174 }
6175
6176 /* Limit it for now to 3K (including tag) */
6177 #define RAW_DATA_MAX_SIZE (1024*3)
6178
6179 static ssize_t
6180 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6181                                         size_t cnt, loff_t *fpos)
6182 {
6183         struct trace_array *tr = filp->private_data;
6184         struct ring_buffer_event *event;
6185         struct ring_buffer *buffer;
6186         struct raw_data_entry *entry;
6187         const char faulted[] = "<faulted>";
6188         unsigned long irq_flags;
6189         ssize_t written;
6190         int size;
6191         int len;
6192
6193 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6194
6195         if (tracing_disabled)
6196                 return -EINVAL;
6197
6198         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6199                 return -EINVAL;
6200
6201         /* The marker must at least have a tag id */
6202         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6203                 return -EINVAL;
6204
6205         if (cnt > TRACE_BUF_SIZE)
6206                 cnt = TRACE_BUF_SIZE;
6207
6208         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6209
6210         local_save_flags(irq_flags);
6211         size = sizeof(*entry) + cnt;
6212         if (cnt < FAULT_SIZE_ID)
6213                 size += FAULT_SIZE_ID - cnt;
6214
6215         buffer = tr->trace_buffer.buffer;
6216         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6217                                             irq_flags, preempt_count());
6218         if (!event)
6219                 /* Ring buffer disabled, return as if not open for write */
6220                 return -EBADF;
6221
6222         entry = ring_buffer_event_data(event);
6223
6224         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6225         if (len) {
6226                 entry->id = -1;
6227                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6228                 written = -EFAULT;
6229         } else
6230                 written = cnt;
6231
6232         __buffer_unlock_commit(buffer, event);
6233
6234         if (written > 0)
6235                 *fpos += written;
6236
6237         return written;
6238 }
6239
6240 static int tracing_clock_show(struct seq_file *m, void *v)
6241 {
6242         struct trace_array *tr = m->private;
6243         int i;
6244
6245         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6246                 seq_printf(m,
6247                         "%s%s%s%s", i ? " " : "",
6248                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6249                         i == tr->clock_id ? "]" : "");
6250         seq_putc(m, '\n');
6251
6252         return 0;
6253 }
6254
6255 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6256 {
6257         int i;
6258
6259         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6260                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6261                         break;
6262         }
6263         if (i == ARRAY_SIZE(trace_clocks))
6264                 return -EINVAL;
6265
6266         mutex_lock(&trace_types_lock);
6267
6268         tr->clock_id = i;
6269
6270         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6271
6272         /*
6273          * New clock may not be consistent with the previous clock.
6274          * Reset the buffer so that it doesn't have incomparable timestamps.
6275          */
6276         tracing_reset_online_cpus(&tr->trace_buffer);
6277
6278 #ifdef CONFIG_TRACER_MAX_TRACE
6279         if (tr->max_buffer.buffer)
6280                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6281         tracing_reset_online_cpus(&tr->max_buffer);
6282 #endif
6283
6284         mutex_unlock(&trace_types_lock);
6285
6286         return 0;
6287 }
6288
6289 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6290                                    size_t cnt, loff_t *fpos)
6291 {
6292         struct seq_file *m = filp->private_data;
6293         struct trace_array *tr = m->private;
6294         char buf[64];
6295         const char *clockstr;
6296         int ret;
6297
6298         if (cnt >= sizeof(buf))
6299                 return -EINVAL;
6300
6301         if (copy_from_user(buf, ubuf, cnt))
6302                 return -EFAULT;
6303
6304         buf[cnt] = 0;
6305
6306         clockstr = strstrip(buf);
6307
6308         ret = tracing_set_clock(tr, clockstr);
6309         if (ret)
6310                 return ret;
6311
6312         *fpos += cnt;
6313
6314         return cnt;
6315 }
6316
6317 static int tracing_clock_open(struct inode *inode, struct file *file)
6318 {
6319         struct trace_array *tr = inode->i_private;
6320         int ret;
6321
6322         if (tracing_disabled)
6323                 return -ENODEV;
6324
6325         if (trace_array_get(tr))
6326                 return -ENODEV;
6327
6328         ret = single_open(file, tracing_clock_show, inode->i_private);
6329         if (ret < 0)
6330                 trace_array_put(tr);
6331
6332         return ret;
6333 }
6334
6335 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6336 {
6337         struct trace_array *tr = m->private;
6338
6339         mutex_lock(&trace_types_lock);
6340
6341         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6342                 seq_puts(m, "delta [absolute]\n");
6343         else
6344                 seq_puts(m, "[delta] absolute\n");
6345
6346         mutex_unlock(&trace_types_lock);
6347
6348         return 0;
6349 }
6350
6351 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6352 {
6353         struct trace_array *tr = inode->i_private;
6354         int ret;
6355
6356         if (tracing_disabled)
6357                 return -ENODEV;
6358
6359         if (trace_array_get(tr))
6360                 return -ENODEV;
6361
6362         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6363         if (ret < 0)
6364                 trace_array_put(tr);
6365
6366         return ret;
6367 }
6368
6369 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6370 {
6371         int ret = 0;
6372
6373         mutex_lock(&trace_types_lock);
6374
6375         if (abs && tr->time_stamp_abs_ref++)
6376                 goto out;
6377
6378         if (!abs) {
6379                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6380                         ret = -EINVAL;
6381                         goto out;
6382                 }
6383
6384                 if (--tr->time_stamp_abs_ref)
6385                         goto out;
6386         }
6387
6388         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6389
6390 #ifdef CONFIG_TRACER_MAX_TRACE
6391         if (tr->max_buffer.buffer)
6392                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6393 #endif
6394  out:
6395         mutex_unlock(&trace_types_lock);
6396
6397         return ret;
6398 }
6399
6400 struct ftrace_buffer_info {
6401         struct trace_iterator   iter;
6402         void                    *spare;
6403         unsigned int            spare_cpu;
6404         unsigned int            read;
6405 };
6406
6407 #ifdef CONFIG_TRACER_SNAPSHOT
6408 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6409 {
6410         struct trace_array *tr = inode->i_private;
6411         struct trace_iterator *iter;
6412         struct seq_file *m;
6413         int ret = 0;
6414
6415         if (trace_array_get(tr) < 0)
6416                 return -ENODEV;
6417
6418         if (file->f_mode & FMODE_READ) {
6419                 iter = __tracing_open(inode, file, true);
6420                 if (IS_ERR(iter))
6421                         ret = PTR_ERR(iter);
6422         } else {
6423                 /* Writes still need the seq_file to hold the private data */
6424                 ret = -ENOMEM;
6425                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6426                 if (!m)
6427                         goto out;
6428                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6429                 if (!iter) {
6430                         kfree(m);
6431                         goto out;
6432                 }
6433                 ret = 0;
6434
6435                 iter->tr = tr;
6436                 iter->trace_buffer = &tr->max_buffer;
6437                 iter->cpu_file = tracing_get_cpu(inode);
6438                 m->private = iter;
6439                 file->private_data = m;
6440         }
6441 out:
6442         if (ret < 0)
6443                 trace_array_put(tr);
6444
6445         return ret;
6446 }
6447
6448 static ssize_t
6449 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6450                        loff_t *ppos)
6451 {
6452         struct seq_file *m = filp->private_data;
6453         struct trace_iterator *iter = m->private;
6454         struct trace_array *tr = iter->tr;
6455         unsigned long val;
6456         int ret;
6457
6458         ret = tracing_update_buffers();
6459         if (ret < 0)
6460                 return ret;
6461
6462         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6463         if (ret)
6464                 return ret;
6465
6466         mutex_lock(&trace_types_lock);
6467
6468         if (tr->current_trace->use_max_tr) {
6469                 ret = -EBUSY;
6470                 goto out;
6471         }
6472
6473         switch (val) {
6474         case 0:
6475                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6476                         ret = -EINVAL;
6477                         break;
6478                 }
6479                 if (tr->allocated_snapshot)
6480                         free_snapshot(tr);
6481                 break;
6482         case 1:
6483 /* Only allow per-cpu swap if the ring buffer supports it */
6484 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6485                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6486                         ret = -EINVAL;
6487                         break;
6488                 }
6489 #endif
6490                 if (!tr->allocated_snapshot) {
6491                         ret = tracing_alloc_snapshot_instance(tr);
6492                         if (ret < 0)
6493                                 break;
6494                 }
6495                 local_irq_disable();
6496                 /* Now, we're going to swap */
6497                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6498                         update_max_tr(tr, current, smp_processor_id());
6499                 else
6500                         update_max_tr_single(tr, current, iter->cpu_file);
6501                 local_irq_enable();
6502                 break;
6503         default:
6504                 if (tr->allocated_snapshot) {
6505                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6506                                 tracing_reset_online_cpus(&tr->max_buffer);
6507                         else
6508                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6509                 }
6510                 break;
6511         }
6512
6513         if (ret >= 0) {
6514                 *ppos += cnt;
6515                 ret = cnt;
6516         }
6517 out:
6518         mutex_unlock(&trace_types_lock);
6519         return ret;
6520 }
6521
6522 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6523 {
6524         struct seq_file *m = file->private_data;
6525         int ret;
6526
6527         ret = tracing_release(inode, file);
6528
6529         if (file->f_mode & FMODE_READ)
6530                 return ret;
6531
6532         /* If write only, the seq_file is just a stub */
6533         if (m)
6534                 kfree(m->private);
6535         kfree(m);
6536
6537         return 0;
6538 }
6539
6540 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6541 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6542                                     size_t count, loff_t *ppos);
6543 static int tracing_buffers_release(struct inode *inode, struct file *file);
6544 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6545                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6546
6547 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6548 {
6549         struct ftrace_buffer_info *info;
6550         int ret;
6551
6552         ret = tracing_buffers_open(inode, filp);
6553         if (ret < 0)
6554                 return ret;
6555
6556         info = filp->private_data;
6557
6558         if (info->iter.trace->use_max_tr) {
6559                 tracing_buffers_release(inode, filp);
6560                 return -EBUSY;
6561         }
6562
6563         info->iter.snapshot = true;
6564         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6565
6566         return ret;
6567 }
6568
6569 #endif /* CONFIG_TRACER_SNAPSHOT */
6570
6571
6572 static const struct file_operations tracing_thresh_fops = {
6573         .open           = tracing_open_generic,
6574         .read           = tracing_thresh_read,
6575         .write          = tracing_thresh_write,
6576         .llseek         = generic_file_llseek,
6577 };
6578
6579 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6580 static const struct file_operations tracing_max_lat_fops = {
6581         .open           = tracing_open_generic,
6582         .read           = tracing_max_lat_read,
6583         .write          = tracing_max_lat_write,
6584         .llseek         = generic_file_llseek,
6585 };
6586 #endif
6587
6588 static const struct file_operations set_tracer_fops = {
6589         .open           = tracing_open_generic,
6590         .read           = tracing_set_trace_read,
6591         .write          = tracing_set_trace_write,
6592         .llseek         = generic_file_llseek,
6593 };
6594
6595 static const struct file_operations tracing_pipe_fops = {
6596         .open           = tracing_open_pipe,
6597         .poll           = tracing_poll_pipe,
6598         .read           = tracing_read_pipe,
6599         .splice_read    = tracing_splice_read_pipe,
6600         .release        = tracing_release_pipe,
6601         .llseek         = no_llseek,
6602 };
6603
6604 static const struct file_operations tracing_entries_fops = {
6605         .open           = tracing_open_generic_tr,
6606         .read           = tracing_entries_read,
6607         .write          = tracing_entries_write,
6608         .llseek         = generic_file_llseek,
6609         .release        = tracing_release_generic_tr,
6610 };
6611
6612 static const struct file_operations tracing_total_entries_fops = {
6613         .open           = tracing_open_generic_tr,
6614         .read           = tracing_total_entries_read,
6615         .llseek         = generic_file_llseek,
6616         .release        = tracing_release_generic_tr,
6617 };
6618
6619 static const struct file_operations tracing_free_buffer_fops = {
6620         .open           = tracing_open_generic_tr,
6621         .write          = tracing_free_buffer_write,
6622         .release        = tracing_free_buffer_release,
6623 };
6624
6625 static const struct file_operations tracing_mark_fops = {
6626         .open           = tracing_open_generic_tr,
6627         .write          = tracing_mark_write,
6628         .llseek         = generic_file_llseek,
6629         .release        = tracing_release_generic_tr,
6630 };
6631
6632 static const struct file_operations tracing_mark_raw_fops = {
6633         .open           = tracing_open_generic_tr,
6634         .write          = tracing_mark_raw_write,
6635         .llseek         = generic_file_llseek,
6636         .release        = tracing_release_generic_tr,
6637 };
6638
6639 static const struct file_operations trace_clock_fops = {
6640         .open           = tracing_clock_open,
6641         .read           = seq_read,
6642         .llseek         = seq_lseek,
6643         .release        = tracing_single_release_tr,
6644         .write          = tracing_clock_write,
6645 };
6646
6647 static const struct file_operations trace_time_stamp_mode_fops = {
6648         .open           = tracing_time_stamp_mode_open,
6649         .read           = seq_read,
6650         .llseek         = seq_lseek,
6651         .release        = tracing_single_release_tr,
6652 };
6653
6654 #ifdef CONFIG_TRACER_SNAPSHOT
6655 static const struct file_operations snapshot_fops = {
6656         .open           = tracing_snapshot_open,
6657         .read           = seq_read,
6658         .write          = tracing_snapshot_write,
6659         .llseek         = tracing_lseek,
6660         .release        = tracing_snapshot_release,
6661 };
6662
6663 static const struct file_operations snapshot_raw_fops = {
6664         .open           = snapshot_raw_open,
6665         .read           = tracing_buffers_read,
6666         .release        = tracing_buffers_release,
6667         .splice_read    = tracing_buffers_splice_read,
6668         .llseek         = no_llseek,
6669 };
6670
6671 #endif /* CONFIG_TRACER_SNAPSHOT */
6672
6673 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6674 {
6675         struct trace_array *tr = inode->i_private;
6676         struct ftrace_buffer_info *info;
6677         int ret;
6678
6679         if (tracing_disabled)
6680                 return -ENODEV;
6681
6682         if (trace_array_get(tr) < 0)
6683                 return -ENODEV;
6684
6685         info = kzalloc(sizeof(*info), GFP_KERNEL);
6686         if (!info) {
6687                 trace_array_put(tr);
6688                 return -ENOMEM;
6689         }
6690
6691         mutex_lock(&trace_types_lock);
6692
6693         info->iter.tr           = tr;
6694         info->iter.cpu_file     = tracing_get_cpu(inode);
6695         info->iter.trace        = tr->current_trace;
6696         info->iter.trace_buffer = &tr->trace_buffer;
6697         info->spare             = NULL;
6698         /* Force reading ring buffer for first read */
6699         info->read              = (unsigned int)-1;
6700
6701         filp->private_data = info;
6702
6703         tr->current_trace->ref++;
6704
6705         mutex_unlock(&trace_types_lock);
6706
6707         ret = nonseekable_open(inode, filp);
6708         if (ret < 0)
6709                 trace_array_put(tr);
6710
6711         return ret;
6712 }
6713
6714 static __poll_t
6715 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6716 {
6717         struct ftrace_buffer_info *info = filp->private_data;
6718         struct trace_iterator *iter = &info->iter;
6719
6720         return trace_poll(iter, filp, poll_table);
6721 }
6722
6723 static ssize_t
6724 tracing_buffers_read(struct file *filp, char __user *ubuf,
6725                      size_t count, loff_t *ppos)
6726 {
6727         struct ftrace_buffer_info *info = filp->private_data;
6728         struct trace_iterator *iter = &info->iter;
6729         ssize_t ret = 0;
6730         ssize_t size;
6731
6732         if (!count)
6733                 return 0;
6734
6735 #ifdef CONFIG_TRACER_MAX_TRACE
6736         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6737                 return -EBUSY;
6738 #endif
6739
6740         if (!info->spare) {
6741                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6742                                                           iter->cpu_file);
6743                 if (IS_ERR(info->spare)) {
6744                         ret = PTR_ERR(info->spare);
6745                         info->spare = NULL;
6746                 } else {
6747                         info->spare_cpu = iter->cpu_file;
6748                 }
6749         }
6750         if (!info->spare)
6751                 return ret;
6752
6753         /* Do we have previous read data to read? */
6754         if (info->read < PAGE_SIZE)
6755                 goto read;
6756
6757  again:
6758         trace_access_lock(iter->cpu_file);
6759         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6760                                     &info->spare,
6761                                     count,
6762                                     iter->cpu_file, 0);
6763         trace_access_unlock(iter->cpu_file);
6764
6765         if (ret < 0) {
6766                 if (trace_empty(iter)) {
6767                         if ((filp->f_flags & O_NONBLOCK))
6768                                 return -EAGAIN;
6769
6770                         ret = wait_on_pipe(iter, 0);
6771                         if (ret)
6772                                 return ret;
6773
6774                         goto again;
6775                 }
6776                 return 0;
6777         }
6778
6779         info->read = 0;
6780  read:
6781         size = PAGE_SIZE - info->read;
6782         if (size > count)
6783                 size = count;
6784
6785         ret = copy_to_user(ubuf, info->spare + info->read, size);
6786         if (ret == size)
6787                 return -EFAULT;
6788
6789         size -= ret;
6790
6791         *ppos += size;
6792         info->read += size;
6793
6794         return size;
6795 }
6796
6797 static int tracing_buffers_release(struct inode *inode, struct file *file)
6798 {
6799         struct ftrace_buffer_info *info = file->private_data;
6800         struct trace_iterator *iter = &info->iter;
6801
6802         mutex_lock(&trace_types_lock);
6803
6804         iter->tr->current_trace->ref--;
6805
6806         __trace_array_put(iter->tr);
6807
6808         if (info->spare)
6809                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6810                                            info->spare_cpu, info->spare);
6811         kfree(info);
6812
6813         mutex_unlock(&trace_types_lock);
6814
6815         return 0;
6816 }
6817
6818 struct buffer_ref {
6819         struct ring_buffer      *buffer;
6820         void                    *page;
6821         int                     cpu;
6822         int                     ref;
6823 };
6824
6825 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6826                                     struct pipe_buffer *buf)
6827 {
6828         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6829
6830         if (--ref->ref)
6831                 return;
6832
6833         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6834         kfree(ref);
6835         buf->private = 0;
6836 }
6837
6838 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6839                                 struct pipe_buffer *buf)
6840 {
6841         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6842
6843         ref->ref++;
6844 }
6845
6846 /* Pipe buffer operations for a buffer. */
6847 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6848         .can_merge              = 0,
6849         .confirm                = generic_pipe_buf_confirm,
6850         .release                = buffer_pipe_buf_release,
6851         .steal                  = generic_pipe_buf_steal,
6852         .get                    = buffer_pipe_buf_get,
6853 };
6854
6855 /*
6856  * Callback from splice_to_pipe(), if we need to release some pages
6857  * at the end of the spd in case we error'ed out in filling the pipe.
6858  */
6859 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6860 {
6861         struct buffer_ref *ref =
6862                 (struct buffer_ref *)spd->partial[i].private;
6863
6864         if (--ref->ref)
6865                 return;
6866
6867         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6868         kfree(ref);
6869         spd->partial[i].private = 0;
6870 }
6871
6872 static ssize_t
6873 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6874                             struct pipe_inode_info *pipe, size_t len,
6875                             unsigned int flags)
6876 {
6877         struct ftrace_buffer_info *info = file->private_data;
6878         struct trace_iterator *iter = &info->iter;
6879         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6880         struct page *pages_def[PIPE_DEF_BUFFERS];
6881         struct splice_pipe_desc spd = {
6882                 .pages          = pages_def,
6883                 .partial        = partial_def,
6884                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6885                 .ops            = &buffer_pipe_buf_ops,
6886                 .spd_release    = buffer_spd_release,
6887         };
6888         struct buffer_ref *ref;
6889         int entries, i;
6890         ssize_t ret = 0;
6891
6892 #ifdef CONFIG_TRACER_MAX_TRACE
6893         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6894                 return -EBUSY;
6895 #endif
6896
6897         if (*ppos & (PAGE_SIZE - 1))
6898                 return -EINVAL;
6899
6900         if (len & (PAGE_SIZE - 1)) {
6901                 if (len < PAGE_SIZE)
6902                         return -EINVAL;
6903                 len &= PAGE_MASK;
6904         }
6905
6906         if (splice_grow_spd(pipe, &spd))
6907                 return -ENOMEM;
6908
6909  again:
6910         trace_access_lock(iter->cpu_file);
6911         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6912
6913         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6914                 struct page *page;
6915                 int r;
6916
6917                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6918                 if (!ref) {
6919                         ret = -ENOMEM;
6920                         break;
6921                 }
6922
6923                 ref->ref = 1;
6924                 ref->buffer = iter->trace_buffer->buffer;
6925                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6926                 if (IS_ERR(ref->page)) {
6927                         ret = PTR_ERR(ref->page);
6928                         ref->page = NULL;
6929                         kfree(ref);
6930                         break;
6931                 }
6932                 ref->cpu = iter->cpu_file;
6933
6934                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6935                                           len, iter->cpu_file, 1);
6936                 if (r < 0) {
6937                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6938                                                    ref->page);
6939                         kfree(ref);
6940                         break;
6941                 }
6942
6943                 page = virt_to_page(ref->page);
6944
6945                 spd.pages[i] = page;
6946                 spd.partial[i].len = PAGE_SIZE;
6947                 spd.partial[i].offset = 0;
6948                 spd.partial[i].private = (unsigned long)ref;
6949                 spd.nr_pages++;
6950                 *ppos += PAGE_SIZE;
6951
6952                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6953         }
6954
6955         trace_access_unlock(iter->cpu_file);
6956         spd.nr_pages = i;
6957
6958         /* did we read anything? */
6959         if (!spd.nr_pages) {
6960                 if (ret)
6961                         goto out;
6962
6963                 ret = -EAGAIN;
6964                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6965                         goto out;
6966
6967                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
6968                 if (ret)
6969                         goto out;
6970
6971                 goto again;
6972         }
6973
6974         ret = splice_to_pipe(pipe, &spd);
6975 out:
6976         splice_shrink_spd(&spd);
6977
6978         return ret;
6979 }
6980
6981 static const struct file_operations tracing_buffers_fops = {
6982         .open           = tracing_buffers_open,
6983         .read           = tracing_buffers_read,
6984         .poll           = tracing_buffers_poll,
6985         .release        = tracing_buffers_release,
6986         .splice_read    = tracing_buffers_splice_read,
6987         .llseek         = no_llseek,
6988 };
6989
6990 static ssize_t
6991 tracing_stats_read(struct file *filp, char __user *ubuf,
6992                    size_t count, loff_t *ppos)
6993 {
6994         struct inode *inode = file_inode(filp);
6995         struct trace_array *tr = inode->i_private;
6996         struct trace_buffer *trace_buf = &tr->trace_buffer;
6997         int cpu = tracing_get_cpu(inode);
6998         struct trace_seq *s;
6999         unsigned long cnt;
7000         unsigned long long t;
7001         unsigned long usec_rem;
7002
7003         s = kmalloc(sizeof(*s), GFP_KERNEL);
7004         if (!s)
7005                 return -ENOMEM;
7006
7007         trace_seq_init(s);
7008
7009         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7010         trace_seq_printf(s, "entries: %ld\n", cnt);
7011
7012         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7013         trace_seq_printf(s, "overrun: %ld\n", cnt);
7014
7015         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7016         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7017
7018         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7019         trace_seq_printf(s, "bytes: %ld\n", cnt);
7020
7021         if (trace_clocks[tr->clock_id].in_ns) {
7022                 /* local or global for trace_clock */
7023                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7024                 usec_rem = do_div(t, USEC_PER_SEC);
7025                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7026                                                                 t, usec_rem);
7027
7028                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7029                 usec_rem = do_div(t, USEC_PER_SEC);
7030                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7031         } else {
7032                 /* counter or tsc mode for trace_clock */
7033                 trace_seq_printf(s, "oldest event ts: %llu\n",
7034                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7035
7036                 trace_seq_printf(s, "now ts: %llu\n",
7037                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7038         }
7039
7040         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7041         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7042
7043         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7044         trace_seq_printf(s, "read events: %ld\n", cnt);
7045
7046         count = simple_read_from_buffer(ubuf, count, ppos,
7047                                         s->buffer, trace_seq_used(s));
7048
7049         kfree(s);
7050
7051         return count;
7052 }
7053
7054 static const struct file_operations tracing_stats_fops = {
7055         .open           = tracing_open_generic_tr,
7056         .read           = tracing_stats_read,
7057         .llseek         = generic_file_llseek,
7058         .release        = tracing_release_generic_tr,
7059 };
7060
7061 #ifdef CONFIG_DYNAMIC_FTRACE
7062
7063 static ssize_t
7064 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7065                   size_t cnt, loff_t *ppos)
7066 {
7067         unsigned long *p = filp->private_data;
7068         char buf[64]; /* Not too big for a shallow stack */
7069         int r;
7070
7071         r = scnprintf(buf, 63, "%ld", *p);
7072         buf[r++] = '\n';
7073
7074         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7075 }
7076
7077 static const struct file_operations tracing_dyn_info_fops = {
7078         .open           = tracing_open_generic,
7079         .read           = tracing_read_dyn_info,
7080         .llseek         = generic_file_llseek,
7081 };
7082 #endif /* CONFIG_DYNAMIC_FTRACE */
7083
7084 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7085 static void
7086 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7087                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7088                 void *data)
7089 {
7090         tracing_snapshot_instance(tr);
7091 }
7092
7093 static void
7094 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7095                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7096                       void *data)
7097 {
7098         struct ftrace_func_mapper *mapper = data;
7099         long *count = NULL;
7100
7101         if (mapper)
7102                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7103
7104         if (count) {
7105
7106                 if (*count <= 0)
7107                         return;
7108
7109                 (*count)--;
7110         }
7111
7112         tracing_snapshot_instance(tr);
7113 }
7114
7115 static int
7116 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7117                       struct ftrace_probe_ops *ops, void *data)
7118 {
7119         struct ftrace_func_mapper *mapper = data;
7120         long *count = NULL;
7121
7122         seq_printf(m, "%ps:", (void *)ip);
7123
7124         seq_puts(m, "snapshot");
7125
7126         if (mapper)
7127                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7128
7129         if (count)
7130                 seq_printf(m, ":count=%ld\n", *count);
7131         else
7132                 seq_puts(m, ":unlimited\n");
7133
7134         return 0;
7135 }
7136
7137 static int
7138 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7139                      unsigned long ip, void *init_data, void **data)
7140 {
7141         struct ftrace_func_mapper *mapper = *data;
7142
7143         if (!mapper) {
7144                 mapper = allocate_ftrace_func_mapper();
7145                 if (!mapper)
7146                         return -ENOMEM;
7147                 *data = mapper;
7148         }
7149
7150         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7151 }
7152
7153 static void
7154 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7155                      unsigned long ip, void *data)
7156 {
7157         struct ftrace_func_mapper *mapper = data;
7158
7159         if (!ip) {
7160                 if (!mapper)
7161                         return;
7162                 free_ftrace_func_mapper(mapper, NULL);
7163                 return;
7164         }
7165
7166         ftrace_func_mapper_remove_ip(mapper, ip);
7167 }
7168
7169 static struct ftrace_probe_ops snapshot_probe_ops = {
7170         .func                   = ftrace_snapshot,
7171         .print                  = ftrace_snapshot_print,
7172 };
7173
7174 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7175         .func                   = ftrace_count_snapshot,
7176         .print                  = ftrace_snapshot_print,
7177         .init                   = ftrace_snapshot_init,
7178         .free                   = ftrace_snapshot_free,
7179 };
7180
7181 static int
7182 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7183                                char *glob, char *cmd, char *param, int enable)
7184 {
7185         struct ftrace_probe_ops *ops;
7186         void *count = (void *)-1;
7187         char *number;
7188         int ret;
7189
7190         if (!tr)
7191                 return -ENODEV;
7192
7193         /* hash funcs only work with set_ftrace_filter */
7194         if (!enable)
7195                 return -EINVAL;
7196
7197         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7198
7199         if (glob[0] == '!')
7200                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7201
7202         if (!param)
7203                 goto out_reg;
7204
7205         number = strsep(&param, ":");
7206
7207         if (!strlen(number))
7208                 goto out_reg;
7209
7210         /*
7211          * We use the callback data field (which is a pointer)
7212          * as our counter.
7213          */
7214         ret = kstrtoul(number, 0, (unsigned long *)&count);
7215         if (ret)
7216                 return ret;
7217
7218  out_reg:
7219         ret = tracing_alloc_snapshot_instance(tr);
7220         if (ret < 0)
7221                 goto out;
7222
7223         ret = register_ftrace_function_probe(glob, tr, ops, count);
7224
7225  out:
7226         return ret < 0 ? ret : 0;
7227 }
7228
7229 static struct ftrace_func_command ftrace_snapshot_cmd = {
7230         .name                   = "snapshot",
7231         .func                   = ftrace_trace_snapshot_callback,
7232 };
7233
7234 static __init int register_snapshot_cmd(void)
7235 {
7236         return register_ftrace_command(&ftrace_snapshot_cmd);
7237 }
7238 #else
7239 static inline __init int register_snapshot_cmd(void) { return 0; }
7240 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7241
7242 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7243 {
7244         if (WARN_ON(!tr->dir))
7245                 return ERR_PTR(-ENODEV);
7246
7247         /* Top directory uses NULL as the parent */
7248         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7249                 return NULL;
7250
7251         /* All sub buffers have a descriptor */
7252         return tr->dir;
7253 }
7254
7255 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7256 {
7257         struct dentry *d_tracer;
7258
7259         if (tr->percpu_dir)
7260                 return tr->percpu_dir;
7261
7262         d_tracer = tracing_get_dentry(tr);
7263         if (IS_ERR(d_tracer))
7264                 return NULL;
7265
7266         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7267
7268         WARN_ONCE(!tr->percpu_dir,
7269                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7270
7271         return tr->percpu_dir;
7272 }
7273
7274 static struct dentry *
7275 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7276                       void *data, long cpu, const struct file_operations *fops)
7277 {
7278         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7279
7280         if (ret) /* See tracing_get_cpu() */
7281                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7282         return ret;
7283 }
7284
7285 static void
7286 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7287 {
7288         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7289         struct dentry *d_cpu;
7290         char cpu_dir[30]; /* 30 characters should be more than enough */
7291
7292         if (!d_percpu)
7293                 return;
7294
7295         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7296         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7297         if (!d_cpu) {
7298                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7299                 return;
7300         }
7301
7302         /* per cpu trace_pipe */
7303         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7304                                 tr, cpu, &tracing_pipe_fops);
7305
7306         /* per cpu trace */
7307         trace_create_cpu_file("trace", 0644, d_cpu,
7308                                 tr, cpu, &tracing_fops);
7309
7310         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7311                                 tr, cpu, &tracing_buffers_fops);
7312
7313         trace_create_cpu_file("stats", 0444, d_cpu,
7314                                 tr, cpu, &tracing_stats_fops);
7315
7316         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7317                                 tr, cpu, &tracing_entries_fops);
7318
7319 #ifdef CONFIG_TRACER_SNAPSHOT
7320         trace_create_cpu_file("snapshot", 0644, d_cpu,
7321                                 tr, cpu, &snapshot_fops);
7322
7323         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7324                                 tr, cpu, &snapshot_raw_fops);
7325 #endif
7326 }
7327
7328 #ifdef CONFIG_FTRACE_SELFTEST
7329 /* Let selftest have access to static functions in this file */
7330 #include "trace_selftest.c"
7331 #endif
7332
7333 static ssize_t
7334 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7335                         loff_t *ppos)
7336 {
7337         struct trace_option_dentry *topt = filp->private_data;
7338         char *buf;
7339
7340         if (topt->flags->val & topt->opt->bit)
7341                 buf = "1\n";
7342         else
7343                 buf = "0\n";
7344
7345         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7346 }
7347
7348 static ssize_t
7349 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7350                          loff_t *ppos)
7351 {
7352         struct trace_option_dentry *topt = filp->private_data;
7353         unsigned long val;
7354         int ret;
7355
7356         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7357         if (ret)
7358                 return ret;
7359
7360         if (val != 0 && val != 1)
7361                 return -EINVAL;
7362
7363         if (!!(topt->flags->val & topt->opt->bit) != val) {
7364                 mutex_lock(&trace_types_lock);
7365                 ret = __set_tracer_option(topt->tr, topt->flags,
7366                                           topt->opt, !val);
7367                 mutex_unlock(&trace_types_lock);
7368                 if (ret)
7369                         return ret;
7370         }
7371
7372         *ppos += cnt;
7373
7374         return cnt;
7375 }
7376
7377
7378 static const struct file_operations trace_options_fops = {
7379         .open = tracing_open_generic,
7380         .read = trace_options_read,
7381         .write = trace_options_write,
7382         .llseek = generic_file_llseek,
7383 };
7384
7385 /*
7386  * In order to pass in both the trace_array descriptor as well as the index
7387  * to the flag that the trace option file represents, the trace_array
7388  * has a character array of trace_flags_index[], which holds the index
7389  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7390  * The address of this character array is passed to the flag option file
7391  * read/write callbacks.
7392  *
7393  * In order to extract both the index and the trace_array descriptor,
7394  * get_tr_index() uses the following algorithm.
7395  *
7396  *   idx = *ptr;
7397  *
7398  * As the pointer itself contains the address of the index (remember
7399  * index[1] == 1).
7400  *
7401  * Then to get the trace_array descriptor, by subtracting that index
7402  * from the ptr, we get to the start of the index itself.
7403  *
7404  *   ptr - idx == &index[0]
7405  *
7406  * Then a simple container_of() from that pointer gets us to the
7407  * trace_array descriptor.
7408  */
7409 static void get_tr_index(void *data, struct trace_array **ptr,
7410                          unsigned int *pindex)
7411 {
7412         *pindex = *(unsigned char *)data;
7413
7414         *ptr = container_of(data - *pindex, struct trace_array,
7415                             trace_flags_index);
7416 }
7417
7418 static ssize_t
7419 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7420                         loff_t *ppos)
7421 {
7422         void *tr_index = filp->private_data;
7423         struct trace_array *tr;
7424         unsigned int index;
7425         char *buf;
7426
7427         get_tr_index(tr_index, &tr, &index);
7428
7429         if (tr->trace_flags & (1 << index))
7430                 buf = "1\n";
7431         else
7432                 buf = "0\n";
7433
7434         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7435 }
7436
7437 static ssize_t
7438 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7439                          loff_t *ppos)
7440 {
7441         void *tr_index = filp->private_data;
7442         struct trace_array *tr;
7443         unsigned int index;
7444         unsigned long val;
7445         int ret;
7446
7447         get_tr_index(tr_index, &tr, &index);
7448
7449         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7450         if (ret)
7451                 return ret;
7452
7453         if (val != 0 && val != 1)
7454                 return -EINVAL;
7455
7456         mutex_lock(&trace_types_lock);
7457         ret = set_tracer_flag(tr, 1 << index, val);
7458         mutex_unlock(&trace_types_lock);
7459
7460         if (ret < 0)
7461                 return ret;
7462
7463         *ppos += cnt;
7464
7465         return cnt;
7466 }
7467
7468 static const struct file_operations trace_options_core_fops = {
7469         .open = tracing_open_generic,
7470         .read = trace_options_core_read,
7471         .write = trace_options_core_write,
7472         .llseek = generic_file_llseek,
7473 };
7474
7475 struct dentry *trace_create_file(const char *name,
7476                                  umode_t mode,
7477                                  struct dentry *parent,
7478                                  void *data,
7479                                  const struct file_operations *fops)
7480 {
7481         struct dentry *ret;
7482
7483         ret = tracefs_create_file(name, mode, parent, data, fops);
7484         if (!ret)
7485                 pr_warn("Could not create tracefs '%s' entry\n", name);
7486
7487         return ret;
7488 }
7489
7490
7491 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7492 {
7493         struct dentry *d_tracer;
7494
7495         if (tr->options)
7496                 return tr->options;
7497
7498         d_tracer = tracing_get_dentry(tr);
7499         if (IS_ERR(d_tracer))
7500                 return NULL;
7501
7502         tr->options = tracefs_create_dir("options", d_tracer);
7503         if (!tr->options) {
7504                 pr_warn("Could not create tracefs directory 'options'\n");
7505                 return NULL;
7506         }
7507
7508         return tr->options;
7509 }
7510
7511 static void
7512 create_trace_option_file(struct trace_array *tr,
7513                          struct trace_option_dentry *topt,
7514                          struct tracer_flags *flags,
7515                          struct tracer_opt *opt)
7516 {
7517         struct dentry *t_options;
7518
7519         t_options = trace_options_init_dentry(tr);
7520         if (!t_options)
7521                 return;
7522
7523         topt->flags = flags;
7524         topt->opt = opt;
7525         topt->tr = tr;
7526
7527         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7528                                     &trace_options_fops);
7529
7530 }
7531
7532 static void
7533 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7534 {
7535         struct trace_option_dentry *topts;
7536         struct trace_options *tr_topts;
7537         struct tracer_flags *flags;
7538         struct tracer_opt *opts;
7539         int cnt;
7540         int i;
7541
7542         if (!tracer)
7543                 return;
7544
7545         flags = tracer->flags;
7546
7547         if (!flags || !flags->opts)
7548                 return;
7549
7550         /*
7551          * If this is an instance, only create flags for tracers
7552          * the instance may have.
7553          */
7554         if (!trace_ok_for_array(tracer, tr))
7555                 return;
7556
7557         for (i = 0; i < tr->nr_topts; i++) {
7558                 /* Make sure there's no duplicate flags. */
7559                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7560                         return;
7561         }
7562
7563         opts = flags->opts;
7564
7565         for (cnt = 0; opts[cnt].name; cnt++)
7566                 ;
7567
7568         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7569         if (!topts)
7570                 return;
7571
7572         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7573                             GFP_KERNEL);
7574         if (!tr_topts) {
7575                 kfree(topts);
7576                 return;
7577         }
7578
7579         tr->topts = tr_topts;
7580         tr->topts[tr->nr_topts].tracer = tracer;
7581         tr->topts[tr->nr_topts].topts = topts;
7582         tr->nr_topts++;
7583
7584         for (cnt = 0; opts[cnt].name; cnt++) {
7585                 create_trace_option_file(tr, &topts[cnt], flags,
7586                                          &opts[cnt]);
7587                 WARN_ONCE(topts[cnt].entry == NULL,
7588                           "Failed to create trace option: %s",
7589                           opts[cnt].name);
7590         }
7591 }
7592
7593 static struct dentry *
7594 create_trace_option_core_file(struct trace_array *tr,
7595                               const char *option, long index)
7596 {
7597         struct dentry *t_options;
7598
7599         t_options = trace_options_init_dentry(tr);
7600         if (!t_options)
7601                 return NULL;
7602
7603         return trace_create_file(option, 0644, t_options,
7604                                  (void *)&tr->trace_flags_index[index],
7605                                  &trace_options_core_fops);
7606 }
7607
7608 static void create_trace_options_dir(struct trace_array *tr)
7609 {
7610         struct dentry *t_options;
7611         bool top_level = tr == &global_trace;
7612         int i;
7613
7614         t_options = trace_options_init_dentry(tr);
7615         if (!t_options)
7616                 return;
7617
7618         for (i = 0; trace_options[i]; i++) {
7619                 if (top_level ||
7620                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7621                         create_trace_option_core_file(tr, trace_options[i], i);
7622         }
7623 }
7624
7625 static ssize_t
7626 rb_simple_read(struct file *filp, char __user *ubuf,
7627                size_t cnt, loff_t *ppos)
7628 {
7629         struct trace_array *tr = filp->private_data;
7630         char buf[64];
7631         int r;
7632
7633         r = tracer_tracing_is_on(tr);
7634         r = sprintf(buf, "%d\n", r);
7635
7636         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7637 }
7638
7639 static ssize_t
7640 rb_simple_write(struct file *filp, const char __user *ubuf,
7641                 size_t cnt, loff_t *ppos)
7642 {
7643         struct trace_array *tr = filp->private_data;
7644         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7645         unsigned long val;
7646         int ret;
7647
7648         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7649         if (ret)
7650                 return ret;
7651
7652         if (buffer) {
7653                 mutex_lock(&trace_types_lock);
7654                 if (!!val == tracer_tracing_is_on(tr)) {
7655                         val = 0; /* do nothing */
7656                 } else if (val) {
7657                         tracer_tracing_on(tr);
7658                         if (tr->current_trace->start)
7659                                 tr->current_trace->start(tr);
7660                 } else {
7661                         tracer_tracing_off(tr);
7662                         if (tr->current_trace->stop)
7663                                 tr->current_trace->stop(tr);
7664                 }
7665                 mutex_unlock(&trace_types_lock);
7666         }
7667
7668         (*ppos)++;
7669
7670         return cnt;
7671 }
7672
7673 static const struct file_operations rb_simple_fops = {
7674         .open           = tracing_open_generic_tr,
7675         .read           = rb_simple_read,
7676         .write          = rb_simple_write,
7677         .release        = tracing_release_generic_tr,
7678         .llseek         = default_llseek,
7679 };
7680
7681 static ssize_t
7682 buffer_percent_read(struct file *filp, char __user *ubuf,
7683                     size_t cnt, loff_t *ppos)
7684 {
7685         struct trace_array *tr = filp->private_data;
7686         char buf[64];
7687         int r;
7688
7689         r = tr->buffer_percent;
7690         r = sprintf(buf, "%d\n", r);
7691
7692         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7693 }
7694
7695 static ssize_t
7696 buffer_percent_write(struct file *filp, const char __user *ubuf,
7697                      size_t cnt, loff_t *ppos)
7698 {
7699         struct trace_array *tr = filp->private_data;
7700         unsigned long val;
7701         int ret;
7702
7703         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7704         if (ret)
7705                 return ret;
7706
7707         if (val > 100)
7708                 return -EINVAL;
7709
7710         if (!val)
7711                 val = 1;
7712
7713         tr->buffer_percent = val;
7714
7715         (*ppos)++;
7716
7717         return cnt;
7718 }
7719
7720 static const struct file_operations buffer_percent_fops = {
7721         .open           = tracing_open_generic_tr,
7722         .read           = buffer_percent_read,
7723         .write          = buffer_percent_write,
7724         .release        = tracing_release_generic_tr,
7725         .llseek         = default_llseek,
7726 };
7727
7728 struct dentry *trace_instance_dir;
7729
7730 static void
7731 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7732
7733 static int
7734 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7735 {
7736         enum ring_buffer_flags rb_flags;
7737
7738         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7739
7740         buf->tr = tr;
7741
7742         buf->buffer = ring_buffer_alloc(size, rb_flags);
7743         if (!buf->buffer)
7744                 return -ENOMEM;
7745
7746         buf->data = alloc_percpu(struct trace_array_cpu);
7747         if (!buf->data) {
7748                 ring_buffer_free(buf->buffer);
7749                 buf->buffer = NULL;
7750                 return -ENOMEM;
7751         }
7752
7753         /* Allocate the first page for all buffers */
7754         set_buffer_entries(&tr->trace_buffer,
7755                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7756
7757         return 0;
7758 }
7759
7760 static int allocate_trace_buffers(struct trace_array *tr, int size)
7761 {
7762         int ret;
7763
7764         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7765         if (ret)
7766                 return ret;
7767
7768 #ifdef CONFIG_TRACER_MAX_TRACE
7769         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7770                                     allocate_snapshot ? size : 1);
7771         if (WARN_ON(ret)) {
7772                 ring_buffer_free(tr->trace_buffer.buffer);
7773                 tr->trace_buffer.buffer = NULL;
7774                 free_percpu(tr->trace_buffer.data);
7775                 tr->trace_buffer.data = NULL;
7776                 return -ENOMEM;
7777         }
7778         tr->allocated_snapshot = allocate_snapshot;
7779
7780         /*
7781          * Only the top level trace array gets its snapshot allocated
7782          * from the kernel command line.
7783          */
7784         allocate_snapshot = false;
7785 #endif
7786         return 0;
7787 }
7788
7789 static void free_trace_buffer(struct trace_buffer *buf)
7790 {
7791         if (buf->buffer) {
7792                 ring_buffer_free(buf->buffer);
7793                 buf->buffer = NULL;
7794                 free_percpu(buf->data);
7795                 buf->data = NULL;
7796         }
7797 }
7798
7799 static void free_trace_buffers(struct trace_array *tr)
7800 {
7801         if (!tr)
7802                 return;
7803
7804         free_trace_buffer(&tr->trace_buffer);
7805
7806 #ifdef CONFIG_TRACER_MAX_TRACE
7807         free_trace_buffer(&tr->max_buffer);
7808 #endif
7809 }
7810
7811 static void init_trace_flags_index(struct trace_array *tr)
7812 {
7813         int i;
7814
7815         /* Used by the trace options files */
7816         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7817                 tr->trace_flags_index[i] = i;
7818 }
7819
7820 static void __update_tracer_options(struct trace_array *tr)
7821 {
7822         struct tracer *t;
7823
7824         for (t = trace_types; t; t = t->next)
7825                 add_tracer_options(tr, t);
7826 }
7827
7828 static void update_tracer_options(struct trace_array *tr)
7829 {
7830         mutex_lock(&trace_types_lock);
7831         __update_tracer_options(tr);
7832         mutex_unlock(&trace_types_lock);
7833 }
7834
7835 static int instance_mkdir(const char *name)
7836 {
7837         struct trace_array *tr;
7838         int ret;
7839
7840         mutex_lock(&event_mutex);
7841         mutex_lock(&trace_types_lock);
7842
7843         ret = -EEXIST;
7844         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7845                 if (tr->name && strcmp(tr->name, name) == 0)
7846                         goto out_unlock;
7847         }
7848
7849         ret = -ENOMEM;
7850         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7851         if (!tr)
7852                 goto out_unlock;
7853
7854         tr->name = kstrdup(name, GFP_KERNEL);
7855         if (!tr->name)
7856                 goto out_free_tr;
7857
7858         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7859                 goto out_free_tr;
7860
7861         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7862
7863         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7864
7865         raw_spin_lock_init(&tr->start_lock);
7866
7867         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7868
7869         tr->current_trace = &nop_trace;
7870
7871         INIT_LIST_HEAD(&tr->systems);
7872         INIT_LIST_HEAD(&tr->events);
7873         INIT_LIST_HEAD(&tr->hist_vars);
7874
7875         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7876                 goto out_free_tr;
7877
7878         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7879         if (!tr->dir)
7880                 goto out_free_tr;
7881
7882         ret = event_trace_add_tracer(tr->dir, tr);
7883         if (ret) {
7884                 tracefs_remove_recursive(tr->dir);
7885                 goto out_free_tr;
7886         }
7887
7888         ftrace_init_trace_array(tr);
7889
7890         init_tracer_tracefs(tr, tr->dir);
7891         init_trace_flags_index(tr);
7892         __update_tracer_options(tr);
7893
7894         list_add(&tr->list, &ftrace_trace_arrays);
7895
7896         mutex_unlock(&trace_types_lock);
7897         mutex_unlock(&event_mutex);
7898
7899         return 0;
7900
7901  out_free_tr:
7902         free_trace_buffers(tr);
7903         free_cpumask_var(tr->tracing_cpumask);
7904         kfree(tr->name);
7905         kfree(tr);
7906
7907  out_unlock:
7908         mutex_unlock(&trace_types_lock);
7909         mutex_unlock(&event_mutex);
7910
7911         return ret;
7912
7913 }
7914
7915 static int instance_rmdir(const char *name)
7916 {
7917         struct trace_array *tr;
7918         int found = 0;
7919         int ret;
7920         int i;
7921
7922         mutex_lock(&event_mutex);
7923         mutex_lock(&trace_types_lock);
7924
7925         ret = -ENODEV;
7926         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7927                 if (tr->name && strcmp(tr->name, name) == 0) {
7928                         found = 1;
7929                         break;
7930                 }
7931         }
7932         if (!found)
7933                 goto out_unlock;
7934
7935         ret = -EBUSY;
7936         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7937                 goto out_unlock;
7938
7939         list_del(&tr->list);
7940
7941         /* Disable all the flags that were enabled coming in */
7942         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7943                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7944                         set_tracer_flag(tr, 1 << i, 0);
7945         }
7946
7947         tracing_set_nop(tr);
7948         clear_ftrace_function_probes(tr);
7949         event_trace_del_tracer(tr);
7950         ftrace_clear_pids(tr);
7951         ftrace_destroy_function_files(tr);
7952         tracefs_remove_recursive(tr->dir);
7953         free_trace_buffers(tr);
7954
7955         for (i = 0; i < tr->nr_topts; i++) {
7956                 kfree(tr->topts[i].topts);
7957         }
7958         kfree(tr->topts);
7959
7960         free_cpumask_var(tr->tracing_cpumask);
7961         kfree(tr->name);
7962         kfree(tr);
7963
7964         ret = 0;
7965
7966  out_unlock:
7967         mutex_unlock(&trace_types_lock);
7968         mutex_unlock(&event_mutex);
7969
7970         return ret;
7971 }
7972
7973 static __init void create_trace_instances(struct dentry *d_tracer)
7974 {
7975         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7976                                                          instance_mkdir,
7977                                                          instance_rmdir);
7978         if (WARN_ON(!trace_instance_dir))
7979                 return;
7980 }
7981
7982 static void
7983 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7984 {
7985         struct trace_event_file *file;
7986         int cpu;
7987
7988         trace_create_file("available_tracers", 0444, d_tracer,
7989                         tr, &show_traces_fops);
7990
7991         trace_create_file("current_tracer", 0644, d_tracer,
7992                         tr, &set_tracer_fops);
7993
7994         trace_create_file("tracing_cpumask", 0644, d_tracer,
7995                           tr, &tracing_cpumask_fops);
7996
7997         trace_create_file("trace_options", 0644, d_tracer,
7998                           tr, &tracing_iter_fops);
7999
8000         trace_create_file("trace", 0644, d_tracer,
8001                           tr, &tracing_fops);
8002
8003         trace_create_file("trace_pipe", 0444, d_tracer,
8004                           tr, &tracing_pipe_fops);
8005
8006         trace_create_file("buffer_size_kb", 0644, d_tracer,
8007                           tr, &tracing_entries_fops);
8008
8009         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8010                           tr, &tracing_total_entries_fops);
8011
8012         trace_create_file("free_buffer", 0200, d_tracer,
8013                           tr, &tracing_free_buffer_fops);
8014
8015         trace_create_file("trace_marker", 0220, d_tracer,
8016                           tr, &tracing_mark_fops);
8017
8018         file = __find_event_file(tr, "ftrace", "print");
8019         if (file && file->dir)
8020                 trace_create_file("trigger", 0644, file->dir, file,
8021                                   &event_trigger_fops);
8022         tr->trace_marker_file = file;
8023
8024         trace_create_file("trace_marker_raw", 0220, d_tracer,
8025                           tr, &tracing_mark_raw_fops);
8026
8027         trace_create_file("trace_clock", 0644, d_tracer, tr,
8028                           &trace_clock_fops);
8029
8030         trace_create_file("tracing_on", 0644, d_tracer,
8031                           tr, &rb_simple_fops);
8032
8033         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8034                           &trace_time_stamp_mode_fops);
8035
8036         tr->buffer_percent = 50;
8037
8038         trace_create_file("buffer_percent", 0444, d_tracer,
8039                         tr, &buffer_percent_fops);
8040
8041         create_trace_options_dir(tr);
8042
8043 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8044         trace_create_file("tracing_max_latency", 0644, d_tracer,
8045                         &tr->max_latency, &tracing_max_lat_fops);
8046 #endif
8047
8048         if (ftrace_create_function_files(tr, d_tracer))
8049                 WARN(1, "Could not allocate function filter files");
8050
8051 #ifdef CONFIG_TRACER_SNAPSHOT
8052         trace_create_file("snapshot", 0644, d_tracer,
8053                           tr, &snapshot_fops);
8054 #endif
8055
8056         for_each_tracing_cpu(cpu)
8057                 tracing_init_tracefs_percpu(tr, cpu);
8058
8059         ftrace_init_tracefs(tr, d_tracer);
8060 }
8061
8062 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8063 {
8064         struct vfsmount *mnt;
8065         struct file_system_type *type;
8066
8067         /*
8068          * To maintain backward compatibility for tools that mount
8069          * debugfs to get to the tracing facility, tracefs is automatically
8070          * mounted to the debugfs/tracing directory.
8071          */
8072         type = get_fs_type("tracefs");
8073         if (!type)
8074                 return NULL;
8075         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8076         put_filesystem(type);
8077         if (IS_ERR(mnt))
8078                 return NULL;
8079         mntget(mnt);
8080
8081         return mnt;
8082 }
8083
8084 /**
8085  * tracing_init_dentry - initialize top level trace array
8086  *
8087  * This is called when creating files or directories in the tracing
8088  * directory. It is called via fs_initcall() by any of the boot up code
8089  * and expects to return the dentry of the top level tracing directory.
8090  */
8091 struct dentry *tracing_init_dentry(void)
8092 {
8093         struct trace_array *tr = &global_trace;
8094
8095         /* The top level trace array uses  NULL as parent */
8096         if (tr->dir)
8097                 return NULL;
8098
8099         if (WARN_ON(!tracefs_initialized()) ||
8100                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8101                  WARN_ON(!debugfs_initialized())))
8102                 return ERR_PTR(-ENODEV);
8103
8104         /*
8105          * As there may still be users that expect the tracing
8106          * files to exist in debugfs/tracing, we must automount
8107          * the tracefs file system there, so older tools still
8108          * work with the newer kerenl.
8109          */
8110         tr->dir = debugfs_create_automount("tracing", NULL,
8111                                            trace_automount, NULL);
8112         if (!tr->dir) {
8113                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8114                 return ERR_PTR(-ENOMEM);
8115         }
8116
8117         return NULL;
8118 }
8119
8120 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8121 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8122
8123 static void __init trace_eval_init(void)
8124 {
8125         int len;
8126
8127         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8128         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8129 }
8130
8131 #ifdef CONFIG_MODULES
8132 static void trace_module_add_evals(struct module *mod)
8133 {
8134         if (!mod->num_trace_evals)
8135                 return;
8136
8137         /*
8138          * Modules with bad taint do not have events created, do
8139          * not bother with enums either.
8140          */
8141         if (trace_module_has_bad_taint(mod))
8142                 return;
8143
8144         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8145 }
8146
8147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8148 static void trace_module_remove_evals(struct module *mod)
8149 {
8150         union trace_eval_map_item *map;
8151         union trace_eval_map_item **last = &trace_eval_maps;
8152
8153         if (!mod->num_trace_evals)
8154                 return;
8155
8156         mutex_lock(&trace_eval_mutex);
8157
8158         map = trace_eval_maps;
8159
8160         while (map) {
8161                 if (map->head.mod == mod)
8162                         break;
8163                 map = trace_eval_jmp_to_tail(map);
8164                 last = &map->tail.next;
8165                 map = map->tail.next;
8166         }
8167         if (!map)
8168                 goto out;
8169
8170         *last = trace_eval_jmp_to_tail(map)->tail.next;
8171         kfree(map);
8172  out:
8173         mutex_unlock(&trace_eval_mutex);
8174 }
8175 #else
8176 static inline void trace_module_remove_evals(struct module *mod) { }
8177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8178
8179 static int trace_module_notify(struct notifier_block *self,
8180                                unsigned long val, void *data)
8181 {
8182         struct module *mod = data;
8183
8184         switch (val) {
8185         case MODULE_STATE_COMING:
8186                 trace_module_add_evals(mod);
8187                 break;
8188         case MODULE_STATE_GOING:
8189                 trace_module_remove_evals(mod);
8190                 break;
8191         }
8192
8193         return 0;
8194 }
8195
8196 static struct notifier_block trace_module_nb = {
8197         .notifier_call = trace_module_notify,
8198         .priority = 0,
8199 };
8200 #endif /* CONFIG_MODULES */
8201
8202 static __init int tracer_init_tracefs(void)
8203 {
8204         struct dentry *d_tracer;
8205
8206         trace_access_lock_init();
8207
8208         d_tracer = tracing_init_dentry();
8209         if (IS_ERR(d_tracer))
8210                 return 0;
8211
8212         event_trace_init();
8213
8214         init_tracer_tracefs(&global_trace, d_tracer);
8215         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8216
8217         trace_create_file("tracing_thresh", 0644, d_tracer,
8218                         &global_trace, &tracing_thresh_fops);
8219
8220         trace_create_file("README", 0444, d_tracer,
8221                         NULL, &tracing_readme_fops);
8222
8223         trace_create_file("saved_cmdlines", 0444, d_tracer,
8224                         NULL, &tracing_saved_cmdlines_fops);
8225
8226         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8227                           NULL, &tracing_saved_cmdlines_size_fops);
8228
8229         trace_create_file("saved_tgids", 0444, d_tracer,
8230                         NULL, &tracing_saved_tgids_fops);
8231
8232         trace_eval_init();
8233
8234         trace_create_eval_file(d_tracer);
8235
8236 #ifdef CONFIG_MODULES
8237         register_module_notifier(&trace_module_nb);
8238 #endif
8239
8240 #ifdef CONFIG_DYNAMIC_FTRACE
8241         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8242                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8243 #endif
8244
8245         create_trace_instances(d_tracer);
8246
8247         update_tracer_options(&global_trace);
8248
8249         return 0;
8250 }
8251
8252 static int trace_panic_handler(struct notifier_block *this,
8253                                unsigned long event, void *unused)
8254 {
8255         if (ftrace_dump_on_oops)
8256                 ftrace_dump(ftrace_dump_on_oops);
8257         return NOTIFY_OK;
8258 }
8259
8260 static struct notifier_block trace_panic_notifier = {
8261         .notifier_call  = trace_panic_handler,
8262         .next           = NULL,
8263         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8264 };
8265
8266 static int trace_die_handler(struct notifier_block *self,
8267                              unsigned long val,
8268                              void *data)
8269 {
8270         switch (val) {
8271         case DIE_OOPS:
8272                 if (ftrace_dump_on_oops)
8273                         ftrace_dump(ftrace_dump_on_oops);
8274                 break;
8275         default:
8276                 break;
8277         }
8278         return NOTIFY_OK;
8279 }
8280
8281 static struct notifier_block trace_die_notifier = {
8282         .notifier_call = trace_die_handler,
8283         .priority = 200
8284 };
8285
8286 /*
8287  * printk is set to max of 1024, we really don't need it that big.
8288  * Nothing should be printing 1000 characters anyway.
8289  */
8290 #define TRACE_MAX_PRINT         1000
8291
8292 /*
8293  * Define here KERN_TRACE so that we have one place to modify
8294  * it if we decide to change what log level the ftrace dump
8295  * should be at.
8296  */
8297 #define KERN_TRACE              KERN_EMERG
8298
8299 void
8300 trace_printk_seq(struct trace_seq *s)
8301 {
8302         /* Probably should print a warning here. */
8303         if (s->seq.len >= TRACE_MAX_PRINT)
8304                 s->seq.len = TRACE_MAX_PRINT;
8305
8306         /*
8307          * More paranoid code. Although the buffer size is set to
8308          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8309          * an extra layer of protection.
8310          */
8311         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8312                 s->seq.len = s->seq.size - 1;
8313
8314         /* should be zero ended, but we are paranoid. */
8315         s->buffer[s->seq.len] = 0;
8316
8317         printk(KERN_TRACE "%s", s->buffer);
8318
8319         trace_seq_init(s);
8320 }
8321
8322 void trace_init_global_iter(struct trace_iterator *iter)
8323 {
8324         iter->tr = &global_trace;
8325         iter->trace = iter->tr->current_trace;
8326         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8327         iter->trace_buffer = &global_trace.trace_buffer;
8328
8329         if (iter->trace && iter->trace->open)
8330                 iter->trace->open(iter);
8331
8332         /* Annotate start of buffers if we had overruns */
8333         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8334                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8335
8336         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8337         if (trace_clocks[iter->tr->clock_id].in_ns)
8338                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8339 }
8340
8341 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8342 {
8343         /* use static because iter can be a bit big for the stack */
8344         static struct trace_iterator iter;
8345         static atomic_t dump_running;
8346         struct trace_array *tr = &global_trace;
8347         unsigned int old_userobj;
8348         unsigned long flags;
8349         int cnt = 0, cpu;
8350
8351         /* Only allow one dump user at a time. */
8352         if (atomic_inc_return(&dump_running) != 1) {
8353                 atomic_dec(&dump_running);
8354                 return;
8355         }
8356
8357         /*
8358          * Always turn off tracing when we dump.
8359          * We don't need to show trace output of what happens
8360          * between multiple crashes.
8361          *
8362          * If the user does a sysrq-z, then they can re-enable
8363          * tracing with echo 1 > tracing_on.
8364          */
8365         tracing_off();
8366
8367         local_irq_save(flags);
8368         printk_nmi_direct_enter();
8369
8370         /* Simulate the iterator */
8371         trace_init_global_iter(&iter);
8372
8373         for_each_tracing_cpu(cpu) {
8374                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8375         }
8376
8377         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8378
8379         /* don't look at user memory in panic mode */
8380         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8381
8382         switch (oops_dump_mode) {
8383         case DUMP_ALL:
8384                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8385                 break;
8386         case DUMP_ORIG:
8387                 iter.cpu_file = raw_smp_processor_id();
8388                 break;
8389         case DUMP_NONE:
8390                 goto out_enable;
8391         default:
8392                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8393                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8394         }
8395
8396         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8397
8398         /* Did function tracer already get disabled? */
8399         if (ftrace_is_dead()) {
8400                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8401                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8402         }
8403
8404         /*
8405          * We need to stop all tracing on all CPUS to read the
8406          * the next buffer. This is a bit expensive, but is
8407          * not done often. We fill all what we can read,
8408          * and then release the locks again.
8409          */
8410
8411         while (!trace_empty(&iter)) {
8412
8413                 if (!cnt)
8414                         printk(KERN_TRACE "---------------------------------\n");
8415
8416                 cnt++;
8417
8418                 /* reset all but tr, trace, and overruns */
8419                 memset(&iter.seq, 0,
8420                        sizeof(struct trace_iterator) -
8421                        offsetof(struct trace_iterator, seq));
8422                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8423                 iter.pos = -1;
8424
8425                 if (trace_find_next_entry_inc(&iter) != NULL) {
8426                         int ret;
8427
8428                         ret = print_trace_line(&iter);
8429                         if (ret != TRACE_TYPE_NO_CONSUME)
8430                                 trace_consume(&iter);
8431                 }
8432                 touch_nmi_watchdog();
8433
8434                 trace_printk_seq(&iter.seq);
8435         }
8436
8437         if (!cnt)
8438                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8439         else
8440                 printk(KERN_TRACE "---------------------------------\n");
8441
8442  out_enable:
8443         tr->trace_flags |= old_userobj;
8444
8445         for_each_tracing_cpu(cpu) {
8446                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8447         }
8448         atomic_dec(&dump_running);
8449         printk_nmi_direct_exit();
8450         local_irq_restore(flags);
8451 }
8452 EXPORT_SYMBOL_GPL(ftrace_dump);
8453
8454 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8455 {
8456         char **argv;
8457         int argc, ret;
8458
8459         argc = 0;
8460         ret = 0;
8461         argv = argv_split(GFP_KERNEL, buf, &argc);
8462         if (!argv)
8463                 return -ENOMEM;
8464
8465         if (argc)
8466                 ret = createfn(argc, argv);
8467
8468         argv_free(argv);
8469
8470         return ret;
8471 }
8472
8473 #define WRITE_BUFSIZE  4096
8474
8475 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8476                                 size_t count, loff_t *ppos,
8477                                 int (*createfn)(int, char **))
8478 {
8479         char *kbuf, *buf, *tmp;
8480         int ret = 0;
8481         size_t done = 0;
8482         size_t size;
8483
8484         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8485         if (!kbuf)
8486                 return -ENOMEM;
8487
8488         while (done < count) {
8489                 size = count - done;
8490
8491                 if (size >= WRITE_BUFSIZE)
8492                         size = WRITE_BUFSIZE - 1;
8493
8494                 if (copy_from_user(kbuf, buffer + done, size)) {
8495                         ret = -EFAULT;
8496                         goto out;
8497                 }
8498                 kbuf[size] = '\0';
8499                 buf = kbuf;
8500                 do {
8501                         tmp = strchr(buf, '\n');
8502                         if (tmp) {
8503                                 *tmp = '\0';
8504                                 size = tmp - buf + 1;
8505                         } else {
8506                                 size = strlen(buf);
8507                                 if (done + size < count) {
8508                                         if (buf != kbuf)
8509                                                 break;
8510                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8511                                         pr_warn("Line length is too long: Should be less than %d\n",
8512                                                 WRITE_BUFSIZE - 2);
8513                                         ret = -EINVAL;
8514                                         goto out;
8515                                 }
8516                         }
8517                         done += size;
8518
8519                         /* Remove comments */
8520                         tmp = strchr(buf, '#');
8521
8522                         if (tmp)
8523                                 *tmp = '\0';
8524
8525                         ret = trace_run_command(buf, createfn);
8526                         if (ret)
8527                                 goto out;
8528                         buf += size;
8529
8530                 } while (done < count);
8531         }
8532         ret = done;
8533
8534 out:
8535         kfree(kbuf);
8536
8537         return ret;
8538 }
8539
8540 __init static int tracer_alloc_buffers(void)
8541 {
8542         int ring_buf_size;
8543         int ret = -ENOMEM;
8544
8545         /*
8546          * Make sure we don't accidently add more trace options
8547          * than we have bits for.
8548          */
8549         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8550
8551         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8552                 goto out;
8553
8554         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8555                 goto out_free_buffer_mask;
8556
8557         /* Only allocate trace_printk buffers if a trace_printk exists */
8558         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8559                 /* Must be called before global_trace.buffer is allocated */
8560                 trace_printk_init_buffers();
8561
8562         /* To save memory, keep the ring buffer size to its minimum */
8563         if (ring_buffer_expanded)
8564                 ring_buf_size = trace_buf_size;
8565         else
8566                 ring_buf_size = 1;
8567
8568         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8569         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8570
8571         raw_spin_lock_init(&global_trace.start_lock);
8572
8573         /*
8574          * The prepare callbacks allocates some memory for the ring buffer. We
8575          * don't free the buffer if the if the CPU goes down. If we were to free
8576          * the buffer, then the user would lose any trace that was in the
8577          * buffer. The memory will be removed once the "instance" is removed.
8578          */
8579         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8580                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8581                                       NULL);
8582         if (ret < 0)
8583                 goto out_free_cpumask;
8584         /* Used for event triggers */
8585         ret = -ENOMEM;
8586         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8587         if (!temp_buffer)
8588                 goto out_rm_hp_state;
8589
8590         if (trace_create_savedcmd() < 0)
8591                 goto out_free_temp_buffer;
8592
8593         /* TODO: make the number of buffers hot pluggable with CPUS */
8594         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8595                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8596                 WARN_ON(1);
8597                 goto out_free_savedcmd;
8598         }
8599
8600         if (global_trace.buffer_disabled)
8601                 tracing_off();
8602
8603         if (trace_boot_clock) {
8604                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8605                 if (ret < 0)
8606                         pr_warn("Trace clock %s not defined, going back to default\n",
8607                                 trace_boot_clock);
8608         }
8609
8610         /*
8611          * register_tracer() might reference current_trace, so it
8612          * needs to be set before we register anything. This is
8613          * just a bootstrap of current_trace anyway.
8614          */
8615         global_trace.current_trace = &nop_trace;
8616
8617         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8618
8619         ftrace_init_global_array_ops(&global_trace);
8620
8621         init_trace_flags_index(&global_trace);
8622
8623         register_tracer(&nop_trace);
8624
8625         /* Function tracing may start here (via kernel command line) */
8626         init_function_trace();
8627
8628         /* All seems OK, enable tracing */
8629         tracing_disabled = 0;
8630
8631         atomic_notifier_chain_register(&panic_notifier_list,
8632                                        &trace_panic_notifier);
8633
8634         register_die_notifier(&trace_die_notifier);
8635
8636         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8637
8638         INIT_LIST_HEAD(&global_trace.systems);
8639         INIT_LIST_HEAD(&global_trace.events);
8640         INIT_LIST_HEAD(&global_trace.hist_vars);
8641         list_add(&global_trace.list, &ftrace_trace_arrays);
8642
8643         apply_trace_boot_options();
8644
8645         register_snapshot_cmd();
8646
8647         return 0;
8648
8649 out_free_savedcmd:
8650         free_saved_cmdlines_buffer(savedcmd);
8651 out_free_temp_buffer:
8652         ring_buffer_free(temp_buffer);
8653 out_rm_hp_state:
8654         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8655 out_free_cpumask:
8656         free_cpumask_var(global_trace.tracing_cpumask);
8657 out_free_buffer_mask:
8658         free_cpumask_var(tracing_buffer_mask);
8659 out:
8660         return ret;
8661 }
8662
8663 void __init early_trace_init(void)
8664 {
8665         if (tracepoint_printk) {
8666                 tracepoint_print_iter =
8667                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8668                 if (WARN_ON(!tracepoint_print_iter))
8669                         tracepoint_printk = 0;
8670                 else
8671                         static_key_enable(&tracepoint_printk_key.key);
8672         }
8673         tracer_alloc_buffers();
8674 }
8675
8676 void __init trace_init(void)
8677 {
8678         trace_event_init();
8679 }
8680
8681 __init static int clear_boot_tracer(void)
8682 {
8683         /*
8684          * The default tracer at boot buffer is an init section.
8685          * This function is called in lateinit. If we did not
8686          * find the boot tracer, then clear it out, to prevent
8687          * later registration from accessing the buffer that is
8688          * about to be freed.
8689          */
8690         if (!default_bootup_tracer)
8691                 return 0;
8692
8693         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8694                default_bootup_tracer);
8695         default_bootup_tracer = NULL;
8696
8697         return 0;
8698 }
8699
8700 fs_initcall(tracer_init_tracefs);
8701 late_initcall_sync(clear_boot_tracer);
8702
8703 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8704 __init static int tracing_set_default_clock(void)
8705 {
8706         /* sched_clock_stable() is determined in late_initcall */
8707         if (!trace_boot_clock && !sched_clock_stable()) {
8708                 printk(KERN_WARNING
8709                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8710                        "If you want to keep using the local clock, then add:\n"
8711                        "  \"trace_clock=local\"\n"
8712                        "on the kernel command line\n");
8713                 tracing_set_clock(&global_trace, "global");
8714         }
8715
8716         return 0;
8717 }
8718 late_initcall_sync(tracing_set_default_clock);
8719 #endif