Merge tag 'fsnotify_for_v6.5-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running && tr == &global_trace))
1058                 return 0;
1059
1060         if (unlikely(tracing_disabled))
1061                 return 0;
1062
1063         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = tr->array_buffer.buffer;
1067         ring_buffer_nest_start(buffer);
1068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069                                             trace_ctx);
1070         if (!event) {
1071                 size = 0;
1072                 goto out;
1073         }
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip = ip;
1077
1078         memcpy(&entry->buf, str, size);
1079
1080         /* Add a newline if necessary */
1081         if (entry->buf[size - 1] != '\n') {
1082                 entry->buf[size] = '\n';
1083                 entry->buf[size + 1] = '\0';
1084         } else
1085                 entry->buf[size] = '\0';
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:    The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103         return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:    The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114         struct ring_buffer_event *event;
1115         struct trace_buffer *buffer;
1116         struct bputs_entry *entry;
1117         unsigned int trace_ctx;
1118         int size = sizeof(struct bputs_entry);
1119         int ret = 0;
1120
1121         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122                 return 0;
1123
1124         if (unlikely(tracing_selftest_running || tracing_disabled))
1125                 return 0;
1126
1127         trace_ctx = tracing_gen_ctx();
1128         buffer = global_trace.array_buffer.buffer;
1129
1130         ring_buffer_nest_start(buffer);
1131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132                                             trace_ctx);
1133         if (!event)
1134                 goto out;
1135
1136         entry = ring_buffer_event_data(event);
1137         entry->ip                       = ip;
1138         entry->str                      = str;
1139
1140         __buffer_unlock_commit(buffer, event);
1141         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142
1143         ret = 1;
1144  out:
1145         ring_buffer_nest_end(buffer);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152                                            void *cond_data)
1153 {
1154         struct tracer *tracer = tr->current_trace;
1155         unsigned long flags;
1156
1157         if (in_nmi()) {
1158                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160                 return;
1161         }
1162
1163         if (!tr->allocated_snapshot) {
1164                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166                 tracer_tracing_off(tr);
1167                 return;
1168         }
1169
1170         /* Note, snapshot can not be used when the tracer uses it */
1171         if (tracer->use_max_tr) {
1172                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174                 return;
1175         }
1176
1177         local_irq_save(flags);
1178         update_max_tr(tr, current, smp_processor_id(), cond_data);
1179         local_irq_restore(flags);
1180 }
1181
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184         tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203         struct trace_array *tr = &global_trace;
1204
1205         tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:         The tracing instance to snapshot
1212  * @cond_data:  The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224         tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:         The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244         void *cond_data = NULL;
1245
1246         local_irq_disable();
1247         arch_spin_lock(&tr->max_lock);
1248
1249         if (tr->cond_snapshot)
1250                 cond_data = tr->cond_snapshot->cond_data;
1251
1252         arch_spin_unlock(&tr->max_lock);
1253         local_irq_enable();
1254
1255         return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260                                         struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265         int ret;
1266
1267         if (!tr->allocated_snapshot) {
1268
1269                 /* allocate spare buffer */
1270                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272                 if (ret < 0)
1273                         return ret;
1274
1275                 tr->allocated_snapshot = true;
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283         /*
1284          * We don't free the ring buffer. instead, resize it because
1285          * The max_tr ring buffer has some state (e.g. ring->clock) and
1286          * we want preserve it.
1287          */
1288         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289         set_buffer_entries(&tr->max_buffer, 1);
1290         tracing_reset_online_cpus(&tr->max_buffer);
1291         tr->allocated_snapshot = false;
1292 }
1293
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306         struct trace_array *tr = &global_trace;
1307         int ret;
1308
1309         ret = tracing_alloc_snapshot_instance(tr);
1310         WARN_ON(ret < 0);
1311
1312         return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329         int ret;
1330
1331         ret = tracing_alloc_snapshot();
1332         if (ret < 0)
1333                 return;
1334
1335         tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:         The tracing instance
1342  * @cond_data:  User data to associate with the snapshot
1343  * @update:     Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353                                  cond_update_fn_t update)
1354 {
1355         struct cond_snapshot *cond_snapshot;
1356         int ret = 0;
1357
1358         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359         if (!cond_snapshot)
1360                 return -ENOMEM;
1361
1362         cond_snapshot->cond_data = cond_data;
1363         cond_snapshot->update = update;
1364
1365         mutex_lock(&trace_types_lock);
1366
1367         ret = tracing_alloc_snapshot_instance(tr);
1368         if (ret)
1369                 goto fail_unlock;
1370
1371         if (tr->current_trace->use_max_tr) {
1372                 ret = -EBUSY;
1373                 goto fail_unlock;
1374         }
1375
1376         /*
1377          * The cond_snapshot can only change to NULL without the
1378          * trace_types_lock. We don't care if we race with it going
1379          * to NULL, but we want to make sure that it's not set to
1380          * something other than NULL when we get here, which we can
1381          * do safely with only holding the trace_types_lock and not
1382          * having to take the max_lock.
1383          */
1384         if (tr->cond_snapshot) {
1385                 ret = -EBUSY;
1386                 goto fail_unlock;
1387         }
1388
1389         local_irq_disable();
1390         arch_spin_lock(&tr->max_lock);
1391         tr->cond_snapshot = cond_snapshot;
1392         arch_spin_unlock(&tr->max_lock);
1393         local_irq_enable();
1394
1395         mutex_unlock(&trace_types_lock);
1396
1397         return ret;
1398
1399  fail_unlock:
1400         mutex_unlock(&trace_types_lock);
1401         kfree(cond_snapshot);
1402         return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:         The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418         int ret = 0;
1419
1420         local_irq_disable();
1421         arch_spin_lock(&tr->max_lock);
1422
1423         if (!tr->cond_snapshot)
1424                 ret = -EINVAL;
1425         else {
1426                 kfree(tr->cond_snapshot);
1427                 tr->cond_snapshot = NULL;
1428         }
1429
1430         arch_spin_unlock(&tr->max_lock);
1431         local_irq_enable();
1432
1433         return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450         return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455         /* Give warning */
1456         tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461         return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466         return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471         return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)       do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 ring_buffer_record_off(tr->array_buffer.buffer);
1481         /*
1482          * This flag is looked at when buffers haven't been allocated
1483          * yet, or by some tracers (like irqsoff), that just want to
1484          * know if the ring buffer has been disabled, but it can handle
1485          * races of where it gets disabled but we still do a record.
1486          * As the check is in the fast path of the tracers, it is more
1487          * important to be fast than accurate.
1488          */
1489         tr->buffer_disabled = 1;
1490         /* Make the flag seen by readers */
1491         smp_wmb();
1492 }
1493
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504         tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507
1508 void disable_trace_on_warning(void)
1509 {
1510         if (__disable_trace_on_warning) {
1511                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512                         "Disabling tracing due to warning\n");
1513                 tracing_off();
1514         }
1515 }
1516
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525         if (tr->array_buffer.buffer)
1526                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527         return !tr->buffer_disabled;
1528 }
1529
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535         return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538
1539 static int __init set_buf_size(char *str)
1540 {
1541         unsigned long buf_size;
1542
1543         if (!str)
1544                 return 0;
1545         buf_size = memparse(str, &str);
1546         /*
1547          * nr_entries can not be zero and the startup
1548          * tests require some buffer space. Therefore
1549          * ensure we have at least 4096 bytes of buffer.
1550          */
1551         trace_buf_size = max(4096UL, buf_size);
1552         return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558         unsigned long threshold;
1559         int ret;
1560
1561         if (!str)
1562                 return 0;
1563         ret = kstrtoul(str, 0, &threshold);
1564         if (ret < 0)
1565                 return 0;
1566         tracing_thresh = threshold * 1000;
1567         return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573         return nsecs / 1000;
1574 }
1575
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587         TRACE_FLAGS
1588         NULL
1589 };
1590
1591 static struct {
1592         u64 (*func)(void);
1593         const char *name;
1594         int in_ns;              /* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596         { trace_clock_local,            "local",        1 },
1597         { trace_clock_global,           "global",       1 },
1598         { trace_clock_counter,          "counter",      0 },
1599         { trace_clock_jiffies,          "uptime",       0 },
1600         { trace_clock,                  "perf",         1 },
1601         { ktime_get_mono_fast_ns,       "mono",         1 },
1602         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1603         { ktime_get_boot_fast_ns,       "boot",         1 },
1604         { ktime_get_tai_fast_ns,        "tai",          1 },
1605         ARCH_TRACE_CLOCKS
1606 };
1607
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610         if (trace_clocks[tr->clock_id].in_ns)
1611                 return true;
1612
1613         return false;
1614 }
1615
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621         memset(parser, 0, sizeof(*parser));
1622
1623         parser->buffer = kmalloc(size, GFP_KERNEL);
1624         if (!parser->buffer)
1625                 return 1;
1626
1627         parser->size = size;
1628         return 0;
1629 }
1630
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636         kfree(parser->buffer);
1637         parser->buffer = NULL;
1638 }
1639
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652         size_t cnt, loff_t *ppos)
1653 {
1654         char ch;
1655         size_t read = 0;
1656         ssize_t ret;
1657
1658         if (!*ppos)
1659                 trace_parser_clear(parser);
1660
1661         ret = get_user(ch, ubuf++);
1662         if (ret)
1663                 goto out;
1664
1665         read++;
1666         cnt--;
1667
1668         /*
1669          * The parser is not finished with the last write,
1670          * continue reading the user input without skipping spaces.
1671          */
1672         if (!parser->cont) {
1673                 /* skip white space */
1674                 while (cnt && isspace(ch)) {
1675                         ret = get_user(ch, ubuf++);
1676                         if (ret)
1677                                 goto out;
1678                         read++;
1679                         cnt--;
1680                 }
1681
1682                 parser->idx = 0;
1683
1684                 /* only spaces were written */
1685                 if (isspace(ch) || !ch) {
1686                         *ppos += read;
1687                         ret = read;
1688                         goto out;
1689                 }
1690         }
1691
1692         /* read the non-space input */
1693         while (cnt && !isspace(ch) && ch) {
1694                 if (parser->idx < parser->size - 1)
1695                         parser->buffer[parser->idx++] = ch;
1696                 else {
1697                         ret = -EINVAL;
1698                         goto out;
1699                 }
1700                 ret = get_user(ch, ubuf++);
1701                 if (ret)
1702                         goto out;
1703                 read++;
1704                 cnt--;
1705         }
1706
1707         /* We either got finished input or we have to wait for another call. */
1708         if (isspace(ch) || !ch) {
1709                 parser->buffer[parser->idx] = 0;
1710                 parser->cont = false;
1711         } else if (parser->idx < parser->size - 1) {
1712                 parser->cont = true;
1713                 parser->buffer[parser->idx++] = ch;
1714                 /* Make sure the parsed string always terminates with '\0'. */
1715                 parser->buffer[parser->idx] = 0;
1716         } else {
1717                 ret = -EINVAL;
1718                 goto out;
1719         }
1720
1721         *ppos += read;
1722         ret = read;
1723
1724 out:
1725         return ret;
1726 }
1727
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731         int len;
1732
1733         if (trace_seq_used(s) <= s->seq.readpos)
1734                 return -EBUSY;
1735
1736         len = trace_seq_used(s) - s->seq.readpos;
1737         if (cnt > len)
1738                 cnt = len;
1739         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740
1741         s->seq.readpos += cnt;
1742         return cnt;
1743 }
1744
1745 unsigned long __read_mostly     tracing_thresh;
1746
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749
1750 #ifdef LATENCY_FS_NOTIFY
1751
1752 static struct workqueue_struct *fsnotify_wq;
1753
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756         struct trace_array *tr = container_of(work, struct trace_array,
1757                                               fsnotify_work);
1758         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763         struct trace_array *tr = container_of(iwork, struct trace_array,
1764                                               fsnotify_irqwork);
1765         queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769                                      struct dentry *d_tracer)
1770 {
1771         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773         tr->d_max_latency = trace_create_file("tracing_max_latency",
1774                                               TRACE_MODE_WRITE,
1775                                               d_tracer, &tr->max_latency,
1776                                               &tracing_max_lat_fops);
1777 }
1778
1779 __init static int latency_fsnotify_init(void)
1780 {
1781         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1783         if (!fsnotify_wq) {
1784                 pr_err("Unable to allocate tr_max_lat_wq\n");
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 late_initcall_sync(latency_fsnotify_init);
1791
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794         if (!fsnotify_wq)
1795                 return;
1796         /*
1797          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798          * possible that we are called from __schedule() or do_idle(), which
1799          * could cause a deadlock.
1800          */
1801         irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803
1804 #else /* !LATENCY_FS_NOTIFY */
1805
1806 #define trace_create_maxlat_file(tr, d_tracer)                          \
1807         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1808                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1809
1810 #endif
1811
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820         struct array_buffer *trace_buf = &tr->array_buffer;
1821         struct array_buffer *max_buf = &tr->max_buffer;
1822         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824
1825         max_buf->cpu = cpu;
1826         max_buf->time_start = data->preempt_timestamp;
1827
1828         max_data->saved_latency = tr->max_latency;
1829         max_data->critical_start = data->critical_start;
1830         max_data->critical_end = data->critical_end;
1831
1832         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833         max_data->pid = tsk->pid;
1834         /*
1835          * If tsk == current, then use current_uid(), as that does not use
1836          * RCU. The irq tracer can be called out of RCU scope.
1837          */
1838         if (tsk == current)
1839                 max_data->uid = current_uid();
1840         else
1841                 max_data->uid = task_uid(tsk);
1842
1843         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844         max_data->policy = tsk->policy;
1845         max_data->rt_priority = tsk->rt_priority;
1846
1847         /* record this tasks comm */
1848         tracing_record_cmdline(tsk);
1849         latency_fsnotify(tr);
1850 }
1851
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864               void *cond_data)
1865 {
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870
1871         if (!tr->allocated_snapshot) {
1872                 /* Only the nop tracer should hit this when disabling */
1873                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874                 return;
1875         }
1876
1877         arch_spin_lock(&tr->max_lock);
1878
1879         /* Inherit the recordable setting from array_buffer */
1880         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881                 ring_buffer_record_on(tr->max_buffer.buffer);
1882         else
1883                 ring_buffer_record_off(tr->max_buffer.buffer);
1884
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887                 arch_spin_unlock(&tr->max_lock);
1888                 return;
1889         }
1890 #endif
1891         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892
1893         __update_max_tr(tr, tsk, cpu);
1894
1895         arch_spin_unlock(&tr->max_lock);
1896 }
1897
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909         int ret;
1910
1911         if (tr->stop_count)
1912                 return;
1913
1914         WARN_ON_ONCE(!irqs_disabled());
1915         if (!tr->allocated_snapshot) {
1916                 /* Only the nop tracer should hit this when disabling */
1917                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918                 return;
1919         }
1920
1921         arch_spin_lock(&tr->max_lock);
1922
1923         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924
1925         if (ret == -EBUSY) {
1926                 /*
1927                  * We failed to swap the buffer due to a commit taking
1928                  * place on this CPU. We fail to record, but we reset
1929                  * the max trace buffer (no one writes directly to it)
1930                  * and flag that it failed.
1931                  */
1932                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1933                         "Failed to swap buffers due to commit in progress\n");
1934         }
1935
1936         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1937
1938         __update_max_tr(tr, tsk, cpu);
1939         arch_spin_unlock(&tr->max_lock);
1940 }
1941
1942 #endif /* CONFIG_TRACER_MAX_TRACE */
1943
1944 static int wait_on_pipe(struct trace_iterator *iter, int full)
1945 {
1946         /* Iterators are static, they should be filled or empty */
1947         if (trace_buffer_iter(iter, iter->cpu_file))
1948                 return 0;
1949
1950         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1951                                 full);
1952 }
1953
1954 #ifdef CONFIG_FTRACE_STARTUP_TEST
1955 static bool selftests_can_run;
1956
1957 struct trace_selftests {
1958         struct list_head                list;
1959         struct tracer                   *type;
1960 };
1961
1962 static LIST_HEAD(postponed_selftests);
1963
1964 static int save_selftest(struct tracer *type)
1965 {
1966         struct trace_selftests *selftest;
1967
1968         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1969         if (!selftest)
1970                 return -ENOMEM;
1971
1972         selftest->type = type;
1973         list_add(&selftest->list, &postponed_selftests);
1974         return 0;
1975 }
1976
1977 static int run_tracer_selftest(struct tracer *type)
1978 {
1979         struct trace_array *tr = &global_trace;
1980         struct tracer *saved_tracer = tr->current_trace;
1981         int ret;
1982
1983         if (!type->selftest || tracing_selftest_disabled)
1984                 return 0;
1985
1986         /*
1987          * If a tracer registers early in boot up (before scheduling is
1988          * initialized and such), then do not run its selftests yet.
1989          * Instead, run it a little later in the boot process.
1990          */
1991         if (!selftests_can_run)
1992                 return save_selftest(type);
1993
1994         if (!tracing_is_on()) {
1995                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1996                         type->name);
1997                 return 0;
1998         }
1999
2000         /*
2001          * Run a selftest on this tracer.
2002          * Here we reset the trace buffer, and set the current
2003          * tracer to be this tracer. The tracer can then run some
2004          * internal tracing to verify that everything is in order.
2005          * If we fail, we do not register this tracer.
2006          */
2007         tracing_reset_online_cpus(&tr->array_buffer);
2008
2009         tr->current_trace = type;
2010
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012         if (type->use_max_tr) {
2013                 /* If we expanded the buffers, make sure the max is expanded too */
2014                 if (ring_buffer_expanded)
2015                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2016                                            RING_BUFFER_ALL_CPUS);
2017                 tr->allocated_snapshot = true;
2018         }
2019 #endif
2020
2021         /* the test is responsible for initializing and enabling */
2022         pr_info("Testing tracer %s: ", type->name);
2023         ret = type->selftest(type, tr);
2024         /* the test is responsible for resetting too */
2025         tr->current_trace = saved_tracer;
2026         if (ret) {
2027                 printk(KERN_CONT "FAILED!\n");
2028                 /* Add the warning after printing 'FAILED' */
2029                 WARN_ON(1);
2030                 return -1;
2031         }
2032         /* Only reset on passing, to avoid touching corrupted buffers */
2033         tracing_reset_online_cpus(&tr->array_buffer);
2034
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036         if (type->use_max_tr) {
2037                 tr->allocated_snapshot = false;
2038
2039                 /* Shrink the max buffer again */
2040                 if (ring_buffer_expanded)
2041                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2042                                            RING_BUFFER_ALL_CPUS);
2043         }
2044 #endif
2045
2046         printk(KERN_CONT "PASSED\n");
2047         return 0;
2048 }
2049
2050 static int do_run_tracer_selftest(struct tracer *type)
2051 {
2052         int ret;
2053
2054         /*
2055          * Tests can take a long time, especially if they are run one after the
2056          * other, as does happen during bootup when all the tracers are
2057          * registered. This could cause the soft lockup watchdog to trigger.
2058          */
2059         cond_resched();
2060
2061         tracing_selftest_running = true;
2062         ret = run_tracer_selftest(type);
2063         tracing_selftest_running = false;
2064
2065         return ret;
2066 }
2067
2068 static __init int init_trace_selftests(void)
2069 {
2070         struct trace_selftests *p, *n;
2071         struct tracer *t, **last;
2072         int ret;
2073
2074         selftests_can_run = true;
2075
2076         mutex_lock(&trace_types_lock);
2077
2078         if (list_empty(&postponed_selftests))
2079                 goto out;
2080
2081         pr_info("Running postponed tracer tests:\n");
2082
2083         tracing_selftest_running = true;
2084         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2085                 /* This loop can take minutes when sanitizers are enabled, so
2086                  * lets make sure we allow RCU processing.
2087                  */
2088                 cond_resched();
2089                 ret = run_tracer_selftest(p->type);
2090                 /* If the test fails, then warn and remove from available_tracers */
2091                 if (ret < 0) {
2092                         WARN(1, "tracer: %s failed selftest, disabling\n",
2093                              p->type->name);
2094                         last = &trace_types;
2095                         for (t = trace_types; t; t = t->next) {
2096                                 if (t == p->type) {
2097                                         *last = t->next;
2098                                         break;
2099                                 }
2100                                 last = &t->next;
2101                         }
2102                 }
2103                 list_del(&p->list);
2104                 kfree(p);
2105         }
2106         tracing_selftest_running = false;
2107
2108  out:
2109         mutex_unlock(&trace_types_lock);
2110
2111         return 0;
2112 }
2113 core_initcall(init_trace_selftests);
2114 #else
2115 static inline int run_tracer_selftest(struct tracer *type)
2116 {
2117         return 0;
2118 }
2119 static inline int do_run_tracer_selftest(struct tracer *type)
2120 {
2121         return 0;
2122 }
2123 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2124
2125 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2126
2127 static void __init apply_trace_boot_options(void);
2128
2129 /**
2130  * register_tracer - register a tracer with the ftrace system.
2131  * @type: the plugin for the tracer
2132  *
2133  * Register a new plugin tracer.
2134  */
2135 int __init register_tracer(struct tracer *type)
2136 {
2137         struct tracer *t;
2138         int ret = 0;
2139
2140         if (!type->name) {
2141                 pr_info("Tracer must have a name\n");
2142                 return -1;
2143         }
2144
2145         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2146                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2147                 return -1;
2148         }
2149
2150         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2151                 pr_warn("Can not register tracer %s due to lockdown\n",
2152                            type->name);
2153                 return -EPERM;
2154         }
2155
2156         mutex_lock(&trace_types_lock);
2157
2158         for (t = trace_types; t; t = t->next) {
2159                 if (strcmp(type->name, t->name) == 0) {
2160                         /* already found */
2161                         pr_info("Tracer %s already registered\n",
2162                                 type->name);
2163                         ret = -1;
2164                         goto out;
2165                 }
2166         }
2167
2168         if (!type->set_flag)
2169                 type->set_flag = &dummy_set_flag;
2170         if (!type->flags) {
2171                 /*allocate a dummy tracer_flags*/
2172                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2173                 if (!type->flags) {
2174                         ret = -ENOMEM;
2175                         goto out;
2176                 }
2177                 type->flags->val = 0;
2178                 type->flags->opts = dummy_tracer_opt;
2179         } else
2180                 if (!type->flags->opts)
2181                         type->flags->opts = dummy_tracer_opt;
2182
2183         /* store the tracer for __set_tracer_option */
2184         type->flags->trace = type;
2185
2186         ret = do_run_tracer_selftest(type);
2187         if (ret < 0)
2188                 goto out;
2189
2190         type->next = trace_types;
2191         trace_types = type;
2192         add_tracer_options(&global_trace, type);
2193
2194  out:
2195         mutex_unlock(&trace_types_lock);
2196
2197         if (ret || !default_bootup_tracer)
2198                 goto out_unlock;
2199
2200         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2201                 goto out_unlock;
2202
2203         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2204         /* Do we want this tracer to start on bootup? */
2205         tracing_set_tracer(&global_trace, type->name);
2206         default_bootup_tracer = NULL;
2207
2208         apply_trace_boot_options();
2209
2210         /* disable other selftests, since this will break it. */
2211         disable_tracing_selftest("running a tracer");
2212
2213  out_unlock:
2214         return ret;
2215 }
2216
2217 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2218 {
2219         struct trace_buffer *buffer = buf->buffer;
2220
2221         if (!buffer)
2222                 return;
2223
2224         ring_buffer_record_disable(buffer);
2225
2226         /* Make sure all commits have finished */
2227         synchronize_rcu();
2228         ring_buffer_reset_cpu(buffer, cpu);
2229
2230         ring_buffer_record_enable(buffer);
2231 }
2232
2233 void tracing_reset_online_cpus(struct array_buffer *buf)
2234 {
2235         struct trace_buffer *buffer = buf->buffer;
2236
2237         if (!buffer)
2238                 return;
2239
2240         ring_buffer_record_disable(buffer);
2241
2242         /* Make sure all commits have finished */
2243         synchronize_rcu();
2244
2245         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2246
2247         ring_buffer_reset_online_cpus(buffer);
2248
2249         ring_buffer_record_enable(buffer);
2250 }
2251
2252 /* Must have trace_types_lock held */
2253 void tracing_reset_all_online_cpus_unlocked(void)
2254 {
2255         struct trace_array *tr;
2256
2257         lockdep_assert_held(&trace_types_lock);
2258
2259         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2260                 if (!tr->clear_trace)
2261                         continue;
2262                 tr->clear_trace = false;
2263                 tracing_reset_online_cpus(&tr->array_buffer);
2264 #ifdef CONFIG_TRACER_MAX_TRACE
2265                 tracing_reset_online_cpus(&tr->max_buffer);
2266 #endif
2267         }
2268 }
2269
2270 void tracing_reset_all_online_cpus(void)
2271 {
2272         mutex_lock(&trace_types_lock);
2273         tracing_reset_all_online_cpus_unlocked();
2274         mutex_unlock(&trace_types_lock);
2275 }
2276
2277 /*
2278  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2279  * is the tgid last observed corresponding to pid=i.
2280  */
2281 static int *tgid_map;
2282
2283 /* The maximum valid index into tgid_map. */
2284 static size_t tgid_map_max;
2285
2286 #define SAVED_CMDLINES_DEFAULT 128
2287 #define NO_CMDLINE_MAP UINT_MAX
2288 /*
2289  * Preemption must be disabled before acquiring trace_cmdline_lock.
2290  * The various trace_arrays' max_lock must be acquired in a context
2291  * where interrupt is disabled.
2292  */
2293 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2294 struct saved_cmdlines_buffer {
2295         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2296         unsigned *map_cmdline_to_pid;
2297         unsigned cmdline_num;
2298         int cmdline_idx;
2299         char *saved_cmdlines;
2300 };
2301 static struct saved_cmdlines_buffer *savedcmd;
2302
2303 static inline char *get_saved_cmdlines(int idx)
2304 {
2305         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2306 }
2307
2308 static inline void set_cmdline(int idx, const char *cmdline)
2309 {
2310         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2311 }
2312
2313 static int allocate_cmdlines_buffer(unsigned int val,
2314                                     struct saved_cmdlines_buffer *s)
2315 {
2316         s->map_cmdline_to_pid = kmalloc_array(val,
2317                                               sizeof(*s->map_cmdline_to_pid),
2318                                               GFP_KERNEL);
2319         if (!s->map_cmdline_to_pid)
2320                 return -ENOMEM;
2321
2322         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2323         if (!s->saved_cmdlines) {
2324                 kfree(s->map_cmdline_to_pid);
2325                 return -ENOMEM;
2326         }
2327
2328         s->cmdline_idx = 0;
2329         s->cmdline_num = val;
2330         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2331                sizeof(s->map_pid_to_cmdline));
2332         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2333                val * sizeof(*s->map_cmdline_to_pid));
2334
2335         return 0;
2336 }
2337
2338 static int trace_create_savedcmd(void)
2339 {
2340         int ret;
2341
2342         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2343         if (!savedcmd)
2344                 return -ENOMEM;
2345
2346         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2347         if (ret < 0) {
2348                 kfree(savedcmd);
2349                 savedcmd = NULL;
2350                 return -ENOMEM;
2351         }
2352
2353         return 0;
2354 }
2355
2356 int is_tracing_stopped(void)
2357 {
2358         return global_trace.stop_count;
2359 }
2360
2361 /**
2362  * tracing_start - quick start of the tracer
2363  *
2364  * If tracing is enabled but was stopped by tracing_stop,
2365  * this will start the tracer back up.
2366  */
2367 void tracing_start(void)
2368 {
2369         struct trace_buffer *buffer;
2370         unsigned long flags;
2371
2372         if (tracing_disabled)
2373                 return;
2374
2375         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2376         if (--global_trace.stop_count) {
2377                 if (global_trace.stop_count < 0) {
2378                         /* Someone screwed up their debugging */
2379                         WARN_ON_ONCE(1);
2380                         global_trace.stop_count = 0;
2381                 }
2382                 goto out;
2383         }
2384
2385         /* Prevent the buffers from switching */
2386         arch_spin_lock(&global_trace.max_lock);
2387
2388         buffer = global_trace.array_buffer.buffer;
2389         if (buffer)
2390                 ring_buffer_record_enable(buffer);
2391
2392 #ifdef CONFIG_TRACER_MAX_TRACE
2393         buffer = global_trace.max_buffer.buffer;
2394         if (buffer)
2395                 ring_buffer_record_enable(buffer);
2396 #endif
2397
2398         arch_spin_unlock(&global_trace.max_lock);
2399
2400  out:
2401         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2402 }
2403
2404 static void tracing_start_tr(struct trace_array *tr)
2405 {
2406         struct trace_buffer *buffer;
2407         unsigned long flags;
2408
2409         if (tracing_disabled)
2410                 return;
2411
2412         /* If global, we need to also start the max tracer */
2413         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2414                 return tracing_start();
2415
2416         raw_spin_lock_irqsave(&tr->start_lock, flags);
2417
2418         if (--tr->stop_count) {
2419                 if (tr->stop_count < 0) {
2420                         /* Someone screwed up their debugging */
2421                         WARN_ON_ONCE(1);
2422                         tr->stop_count = 0;
2423                 }
2424                 goto out;
2425         }
2426
2427         buffer = tr->array_buffer.buffer;
2428         if (buffer)
2429                 ring_buffer_record_enable(buffer);
2430
2431  out:
2432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 }
2434
2435 /**
2436  * tracing_stop - quick stop of the tracer
2437  *
2438  * Light weight way to stop tracing. Use in conjunction with
2439  * tracing_start.
2440  */
2441 void tracing_stop(void)
2442 {
2443         struct trace_buffer *buffer;
2444         unsigned long flags;
2445
2446         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2447         if (global_trace.stop_count++)
2448                 goto out;
2449
2450         /* Prevent the buffers from switching */
2451         arch_spin_lock(&global_trace.max_lock);
2452
2453         buffer = global_trace.array_buffer.buffer;
2454         if (buffer)
2455                 ring_buffer_record_disable(buffer);
2456
2457 #ifdef CONFIG_TRACER_MAX_TRACE
2458         buffer = global_trace.max_buffer.buffer;
2459         if (buffer)
2460                 ring_buffer_record_disable(buffer);
2461 #endif
2462
2463         arch_spin_unlock(&global_trace.max_lock);
2464
2465  out:
2466         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2467 }
2468
2469 static void tracing_stop_tr(struct trace_array *tr)
2470 {
2471         struct trace_buffer *buffer;
2472         unsigned long flags;
2473
2474         /* If global, we need to also stop the max tracer */
2475         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2476                 return tracing_stop();
2477
2478         raw_spin_lock_irqsave(&tr->start_lock, flags);
2479         if (tr->stop_count++)
2480                 goto out;
2481
2482         buffer = tr->array_buffer.buffer;
2483         if (buffer)
2484                 ring_buffer_record_disable(buffer);
2485
2486  out:
2487         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2488 }
2489
2490 static int trace_save_cmdline(struct task_struct *tsk)
2491 {
2492         unsigned tpid, idx;
2493
2494         /* treat recording of idle task as a success */
2495         if (!tsk->pid)
2496                 return 1;
2497
2498         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2499
2500         /*
2501          * It's not the end of the world if we don't get
2502          * the lock, but we also don't want to spin
2503          * nor do we want to disable interrupts,
2504          * so if we miss here, then better luck next time.
2505          *
2506          * This is called within the scheduler and wake up, so interrupts
2507          * had better been disabled and run queue lock been held.
2508          */
2509         lockdep_assert_preemption_disabled();
2510         if (!arch_spin_trylock(&trace_cmdline_lock))
2511                 return 0;
2512
2513         idx = savedcmd->map_pid_to_cmdline[tpid];
2514         if (idx == NO_CMDLINE_MAP) {
2515                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2516
2517                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2518                 savedcmd->cmdline_idx = idx;
2519         }
2520
2521         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2522         set_cmdline(idx, tsk->comm);
2523
2524         arch_spin_unlock(&trace_cmdline_lock);
2525
2526         return 1;
2527 }
2528
2529 static void __trace_find_cmdline(int pid, char comm[])
2530 {
2531         unsigned map;
2532         int tpid;
2533
2534         if (!pid) {
2535                 strcpy(comm, "<idle>");
2536                 return;
2537         }
2538
2539         if (WARN_ON_ONCE(pid < 0)) {
2540                 strcpy(comm, "<XXX>");
2541                 return;
2542         }
2543
2544         tpid = pid & (PID_MAX_DEFAULT - 1);
2545         map = savedcmd->map_pid_to_cmdline[tpid];
2546         if (map != NO_CMDLINE_MAP) {
2547                 tpid = savedcmd->map_cmdline_to_pid[map];
2548                 if (tpid == pid) {
2549                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2550                         return;
2551                 }
2552         }
2553         strcpy(comm, "<...>");
2554 }
2555
2556 void trace_find_cmdline(int pid, char comm[])
2557 {
2558         preempt_disable();
2559         arch_spin_lock(&trace_cmdline_lock);
2560
2561         __trace_find_cmdline(pid, comm);
2562
2563         arch_spin_unlock(&trace_cmdline_lock);
2564         preempt_enable();
2565 }
2566
2567 static int *trace_find_tgid_ptr(int pid)
2568 {
2569         /*
2570          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2571          * if we observe a non-NULL tgid_map then we also observe the correct
2572          * tgid_map_max.
2573          */
2574         int *map = smp_load_acquire(&tgid_map);
2575
2576         if (unlikely(!map || pid > tgid_map_max))
2577                 return NULL;
2578
2579         return &map[pid];
2580 }
2581
2582 int trace_find_tgid(int pid)
2583 {
2584         int *ptr = trace_find_tgid_ptr(pid);
2585
2586         return ptr ? *ptr : 0;
2587 }
2588
2589 static int trace_save_tgid(struct task_struct *tsk)
2590 {
2591         int *ptr;
2592
2593         /* treat recording of idle task as a success */
2594         if (!tsk->pid)
2595                 return 1;
2596
2597         ptr = trace_find_tgid_ptr(tsk->pid);
2598         if (!ptr)
2599                 return 0;
2600
2601         *ptr = tsk->tgid;
2602         return 1;
2603 }
2604
2605 static bool tracing_record_taskinfo_skip(int flags)
2606 {
2607         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2608                 return true;
2609         if (!__this_cpu_read(trace_taskinfo_save))
2610                 return true;
2611         return false;
2612 }
2613
2614 /**
2615  * tracing_record_taskinfo - record the task info of a task
2616  *
2617  * @task:  task to record
2618  * @flags: TRACE_RECORD_CMDLINE for recording comm
2619  *         TRACE_RECORD_TGID for recording tgid
2620  */
2621 void tracing_record_taskinfo(struct task_struct *task, int flags)
2622 {
2623         bool done;
2624
2625         if (tracing_record_taskinfo_skip(flags))
2626                 return;
2627
2628         /*
2629          * Record as much task information as possible. If some fail, continue
2630          * to try to record the others.
2631          */
2632         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2633         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2634
2635         /* If recording any information failed, retry again soon. */
2636         if (!done)
2637                 return;
2638
2639         __this_cpu_write(trace_taskinfo_save, false);
2640 }
2641
2642 /**
2643  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2644  *
2645  * @prev: previous task during sched_switch
2646  * @next: next task during sched_switch
2647  * @flags: TRACE_RECORD_CMDLINE for recording comm
2648  *         TRACE_RECORD_TGID for recording tgid
2649  */
2650 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2651                                           struct task_struct *next, int flags)
2652 {
2653         bool done;
2654
2655         if (tracing_record_taskinfo_skip(flags))
2656                 return;
2657
2658         /*
2659          * Record as much task information as possible. If some fail, continue
2660          * to try to record the others.
2661          */
2662         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2663         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2664         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2665         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2666
2667         /* If recording any information failed, retry again soon. */
2668         if (!done)
2669                 return;
2670
2671         __this_cpu_write(trace_taskinfo_save, false);
2672 }
2673
2674 /* Helpers to record a specific task information */
2675 void tracing_record_cmdline(struct task_struct *task)
2676 {
2677         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2678 }
2679
2680 void tracing_record_tgid(struct task_struct *task)
2681 {
2682         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2683 }
2684
2685 /*
2686  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2687  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2688  * simplifies those functions and keeps them in sync.
2689  */
2690 enum print_line_t trace_handle_return(struct trace_seq *s)
2691 {
2692         return trace_seq_has_overflowed(s) ?
2693                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2694 }
2695 EXPORT_SYMBOL_GPL(trace_handle_return);
2696
2697 static unsigned short migration_disable_value(void)
2698 {
2699 #if defined(CONFIG_SMP)
2700         return current->migration_disabled;
2701 #else
2702         return 0;
2703 #endif
2704 }
2705
2706 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2707 {
2708         unsigned int trace_flags = irqs_status;
2709         unsigned int pc;
2710
2711         pc = preempt_count();
2712
2713         if (pc & NMI_MASK)
2714                 trace_flags |= TRACE_FLAG_NMI;
2715         if (pc & HARDIRQ_MASK)
2716                 trace_flags |= TRACE_FLAG_HARDIRQ;
2717         if (in_serving_softirq())
2718                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2719         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2720                 trace_flags |= TRACE_FLAG_BH_OFF;
2721
2722         if (tif_need_resched())
2723                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2724         if (test_preempt_need_resched())
2725                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2726         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2727                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2728 }
2729
2730 struct ring_buffer_event *
2731 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2732                           int type,
2733                           unsigned long len,
2734                           unsigned int trace_ctx)
2735 {
2736         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2737 }
2738
2739 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2740 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2741 static int trace_buffered_event_ref;
2742
2743 /**
2744  * trace_buffered_event_enable - enable buffering events
2745  *
2746  * When events are being filtered, it is quicker to use a temporary
2747  * buffer to write the event data into if there's a likely chance
2748  * that it will not be committed. The discard of the ring buffer
2749  * is not as fast as committing, and is much slower than copying
2750  * a commit.
2751  *
2752  * When an event is to be filtered, allocate per cpu buffers to
2753  * write the event data into, and if the event is filtered and discarded
2754  * it is simply dropped, otherwise, the entire data is to be committed
2755  * in one shot.
2756  */
2757 void trace_buffered_event_enable(void)
2758 {
2759         struct ring_buffer_event *event;
2760         struct page *page;
2761         int cpu;
2762
2763         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2764
2765         if (trace_buffered_event_ref++)
2766                 return;
2767
2768         for_each_tracing_cpu(cpu) {
2769                 page = alloc_pages_node(cpu_to_node(cpu),
2770                                         GFP_KERNEL | __GFP_NORETRY, 0);
2771                 if (!page)
2772                         goto failed;
2773
2774                 event = page_address(page);
2775                 memset(event, 0, sizeof(*event));
2776
2777                 per_cpu(trace_buffered_event, cpu) = event;
2778
2779                 preempt_disable();
2780                 if (cpu == smp_processor_id() &&
2781                     __this_cpu_read(trace_buffered_event) !=
2782                     per_cpu(trace_buffered_event, cpu))
2783                         WARN_ON_ONCE(1);
2784                 preempt_enable();
2785         }
2786
2787         return;
2788  failed:
2789         trace_buffered_event_disable();
2790 }
2791
2792 static void enable_trace_buffered_event(void *data)
2793 {
2794         /* Probably not needed, but do it anyway */
2795         smp_rmb();
2796         this_cpu_dec(trace_buffered_event_cnt);
2797 }
2798
2799 static void disable_trace_buffered_event(void *data)
2800 {
2801         this_cpu_inc(trace_buffered_event_cnt);
2802 }
2803
2804 /**
2805  * trace_buffered_event_disable - disable buffering events
2806  *
2807  * When a filter is removed, it is faster to not use the buffered
2808  * events, and to commit directly into the ring buffer. Free up
2809  * the temp buffers when there are no more users. This requires
2810  * special synchronization with current events.
2811  */
2812 void trace_buffered_event_disable(void)
2813 {
2814         int cpu;
2815
2816         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2817
2818         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2819                 return;
2820
2821         if (--trace_buffered_event_ref)
2822                 return;
2823
2824         preempt_disable();
2825         /* For each CPU, set the buffer as used. */
2826         smp_call_function_many(tracing_buffer_mask,
2827                                disable_trace_buffered_event, NULL, 1);
2828         preempt_enable();
2829
2830         /* Wait for all current users to finish */
2831         synchronize_rcu();
2832
2833         for_each_tracing_cpu(cpu) {
2834                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2835                 per_cpu(trace_buffered_event, cpu) = NULL;
2836         }
2837         /*
2838          * Make sure trace_buffered_event is NULL before clearing
2839          * trace_buffered_event_cnt.
2840          */
2841         smp_wmb();
2842
2843         preempt_disable();
2844         /* Do the work on each cpu */
2845         smp_call_function_many(tracing_buffer_mask,
2846                                enable_trace_buffered_event, NULL, 1);
2847         preempt_enable();
2848 }
2849
2850 static struct trace_buffer *temp_buffer;
2851
2852 struct ring_buffer_event *
2853 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2854                           struct trace_event_file *trace_file,
2855                           int type, unsigned long len,
2856                           unsigned int trace_ctx)
2857 {
2858         struct ring_buffer_event *entry;
2859         struct trace_array *tr = trace_file->tr;
2860         int val;
2861
2862         *current_rb = tr->array_buffer.buffer;
2863
2864         if (!tr->no_filter_buffering_ref &&
2865             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2866                 preempt_disable_notrace();
2867                 /*
2868                  * Filtering is on, so try to use the per cpu buffer first.
2869                  * This buffer will simulate a ring_buffer_event,
2870                  * where the type_len is zero and the array[0] will
2871                  * hold the full length.
2872                  * (see include/linux/ring-buffer.h for details on
2873                  *  how the ring_buffer_event is structured).
2874                  *
2875                  * Using a temp buffer during filtering and copying it
2876                  * on a matched filter is quicker than writing directly
2877                  * into the ring buffer and then discarding it when
2878                  * it doesn't match. That is because the discard
2879                  * requires several atomic operations to get right.
2880                  * Copying on match and doing nothing on a failed match
2881                  * is still quicker than no copy on match, but having
2882                  * to discard out of the ring buffer on a failed match.
2883                  */
2884                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2885                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2886
2887                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2888
2889                         /*
2890                          * Preemption is disabled, but interrupts and NMIs
2891                          * can still come in now. If that happens after
2892                          * the above increment, then it will have to go
2893                          * back to the old method of allocating the event
2894                          * on the ring buffer, and if the filter fails, it
2895                          * will have to call ring_buffer_discard_commit()
2896                          * to remove it.
2897                          *
2898                          * Need to also check the unlikely case that the
2899                          * length is bigger than the temp buffer size.
2900                          * If that happens, then the reserve is pretty much
2901                          * guaranteed to fail, as the ring buffer currently
2902                          * only allows events less than a page. But that may
2903                          * change in the future, so let the ring buffer reserve
2904                          * handle the failure in that case.
2905                          */
2906                         if (val == 1 && likely(len <= max_len)) {
2907                                 trace_event_setup(entry, type, trace_ctx);
2908                                 entry->array[0] = len;
2909                                 /* Return with preemption disabled */
2910                                 return entry;
2911                         }
2912                         this_cpu_dec(trace_buffered_event_cnt);
2913                 }
2914                 /* __trace_buffer_lock_reserve() disables preemption */
2915                 preempt_enable_notrace();
2916         }
2917
2918         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2919                                             trace_ctx);
2920         /*
2921          * If tracing is off, but we have triggers enabled
2922          * we still need to look at the event data. Use the temp_buffer
2923          * to store the trace event for the trigger to use. It's recursive
2924          * safe and will not be recorded anywhere.
2925          */
2926         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2927                 *current_rb = temp_buffer;
2928                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2929                                                     trace_ctx);
2930         }
2931         return entry;
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2934
2935 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2936 static DEFINE_MUTEX(tracepoint_printk_mutex);
2937
2938 static void output_printk(struct trace_event_buffer *fbuffer)
2939 {
2940         struct trace_event_call *event_call;
2941         struct trace_event_file *file;
2942         struct trace_event *event;
2943         unsigned long flags;
2944         struct trace_iterator *iter = tracepoint_print_iter;
2945
2946         /* We should never get here if iter is NULL */
2947         if (WARN_ON_ONCE(!iter))
2948                 return;
2949
2950         event_call = fbuffer->trace_file->event_call;
2951         if (!event_call || !event_call->event.funcs ||
2952             !event_call->event.funcs->trace)
2953                 return;
2954
2955         file = fbuffer->trace_file;
2956         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2957             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2958              !filter_match_preds(file->filter, fbuffer->entry)))
2959                 return;
2960
2961         event = &fbuffer->trace_file->event_call->event;
2962
2963         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2964         trace_seq_init(&iter->seq);
2965         iter->ent = fbuffer->entry;
2966         event_call->event.funcs->trace(iter, 0, event);
2967         trace_seq_putc(&iter->seq, 0);
2968         printk("%s", iter->seq.buffer);
2969
2970         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2971 }
2972
2973 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2974                              void *buffer, size_t *lenp,
2975                              loff_t *ppos)
2976 {
2977         int save_tracepoint_printk;
2978         int ret;
2979
2980         mutex_lock(&tracepoint_printk_mutex);
2981         save_tracepoint_printk = tracepoint_printk;
2982
2983         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2984
2985         /*
2986          * This will force exiting early, as tracepoint_printk
2987          * is always zero when tracepoint_printk_iter is not allocated
2988          */
2989         if (!tracepoint_print_iter)
2990                 tracepoint_printk = 0;
2991
2992         if (save_tracepoint_printk == tracepoint_printk)
2993                 goto out;
2994
2995         if (tracepoint_printk)
2996                 static_key_enable(&tracepoint_printk_key.key);
2997         else
2998                 static_key_disable(&tracepoint_printk_key.key);
2999
3000  out:
3001         mutex_unlock(&tracepoint_printk_mutex);
3002
3003         return ret;
3004 }
3005
3006 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3007 {
3008         enum event_trigger_type tt = ETT_NONE;
3009         struct trace_event_file *file = fbuffer->trace_file;
3010
3011         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3012                         fbuffer->entry, &tt))
3013                 goto discard;
3014
3015         if (static_key_false(&tracepoint_printk_key.key))
3016                 output_printk(fbuffer);
3017
3018         if (static_branch_unlikely(&trace_event_exports_enabled))
3019                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3020
3021         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3022                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3023
3024 discard:
3025         if (tt)
3026                 event_triggers_post_call(file, tt);
3027
3028 }
3029 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3030
3031 /*
3032  * Skip 3:
3033  *
3034  *   trace_buffer_unlock_commit_regs()
3035  *   trace_event_buffer_commit()
3036  *   trace_event_raw_event_xxx()
3037  */
3038 # define STACK_SKIP 3
3039
3040 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3041                                      struct trace_buffer *buffer,
3042                                      struct ring_buffer_event *event,
3043                                      unsigned int trace_ctx,
3044                                      struct pt_regs *regs)
3045 {
3046         __buffer_unlock_commit(buffer, event);
3047
3048         /*
3049          * If regs is not set, then skip the necessary functions.
3050          * Note, we can still get here via blktrace, wakeup tracer
3051          * and mmiotrace, but that's ok if they lose a function or
3052          * two. They are not that meaningful.
3053          */
3054         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3055         ftrace_trace_userstack(tr, buffer, trace_ctx);
3056 }
3057
3058 /*
3059  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3060  */
3061 void
3062 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3063                                    struct ring_buffer_event *event)
3064 {
3065         __buffer_unlock_commit(buffer, event);
3066 }
3067
3068 void
3069 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3070                parent_ip, unsigned int trace_ctx)
3071 {
3072         struct trace_event_call *call = &event_function;
3073         struct trace_buffer *buffer = tr->array_buffer.buffer;
3074         struct ring_buffer_event *event;
3075         struct ftrace_entry *entry;
3076
3077         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3078                                             trace_ctx);
3079         if (!event)
3080                 return;
3081         entry   = ring_buffer_event_data(event);
3082         entry->ip                       = ip;
3083         entry->parent_ip                = parent_ip;
3084
3085         if (!call_filter_check_discard(call, entry, buffer, event)) {
3086                 if (static_branch_unlikely(&trace_function_exports_enabled))
3087                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3088                 __buffer_unlock_commit(buffer, event);
3089         }
3090 }
3091
3092 #ifdef CONFIG_STACKTRACE
3093
3094 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3095 #define FTRACE_KSTACK_NESTING   4
3096
3097 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3098
3099 struct ftrace_stack {
3100         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3101 };
3102
3103
3104 struct ftrace_stacks {
3105         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3106 };
3107
3108 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3109 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3110
3111 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3112                                  unsigned int trace_ctx,
3113                                  int skip, struct pt_regs *regs)
3114 {
3115         struct trace_event_call *call = &event_kernel_stack;
3116         struct ring_buffer_event *event;
3117         unsigned int size, nr_entries;
3118         struct ftrace_stack *fstack;
3119         struct stack_entry *entry;
3120         int stackidx;
3121
3122         /*
3123          * Add one, for this function and the call to save_stack_trace()
3124          * If regs is set, then these functions will not be in the way.
3125          */
3126 #ifndef CONFIG_UNWINDER_ORC
3127         if (!regs)
3128                 skip++;
3129 #endif
3130
3131         preempt_disable_notrace();
3132
3133         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3134
3135         /* This should never happen. If it does, yell once and skip */
3136         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3137                 goto out;
3138
3139         /*
3140          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3141          * interrupt will either see the value pre increment or post
3142          * increment. If the interrupt happens pre increment it will have
3143          * restored the counter when it returns.  We just need a barrier to
3144          * keep gcc from moving things around.
3145          */
3146         barrier();
3147
3148         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3149         size = ARRAY_SIZE(fstack->calls);
3150
3151         if (regs) {
3152                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3153                                                    size, skip);
3154         } else {
3155                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3156         }
3157
3158         size = nr_entries * sizeof(unsigned long);
3159         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3160                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3161                                     trace_ctx);
3162         if (!event)
3163                 goto out;
3164         entry = ring_buffer_event_data(event);
3165
3166         memcpy(&entry->caller, fstack->calls, size);
3167         entry->size = nr_entries;
3168
3169         if (!call_filter_check_discard(call, entry, buffer, event))
3170                 __buffer_unlock_commit(buffer, event);
3171
3172  out:
3173         /* Again, don't let gcc optimize things here */
3174         barrier();
3175         __this_cpu_dec(ftrace_stack_reserve);
3176         preempt_enable_notrace();
3177
3178 }
3179
3180 static inline void ftrace_trace_stack(struct trace_array *tr,
3181                                       struct trace_buffer *buffer,
3182                                       unsigned int trace_ctx,
3183                                       int skip, struct pt_regs *regs)
3184 {
3185         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3186                 return;
3187
3188         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3189 }
3190
3191 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3192                    int skip)
3193 {
3194         struct trace_buffer *buffer = tr->array_buffer.buffer;
3195
3196         if (rcu_is_watching()) {
3197                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3198                 return;
3199         }
3200
3201         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3202                 return;
3203
3204         /*
3205          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3206          * but if the above rcu_is_watching() failed, then the NMI
3207          * triggered someplace critical, and ct_irq_enter() should
3208          * not be called from NMI.
3209          */
3210         if (unlikely(in_nmi()))
3211                 return;
3212
3213         ct_irq_enter_irqson();
3214         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3215         ct_irq_exit_irqson();
3216 }
3217
3218 /**
3219  * trace_dump_stack - record a stack back trace in the trace buffer
3220  * @skip: Number of functions to skip (helper handlers)
3221  */
3222 void trace_dump_stack(int skip)
3223 {
3224         if (tracing_disabled || tracing_selftest_running)
3225                 return;
3226
3227 #ifndef CONFIG_UNWINDER_ORC
3228         /* Skip 1 to skip this function. */
3229         skip++;
3230 #endif
3231         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3232                              tracing_gen_ctx(), skip, NULL);
3233 }
3234 EXPORT_SYMBOL_GPL(trace_dump_stack);
3235
3236 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3237 static DEFINE_PER_CPU(int, user_stack_count);
3238
3239 static void
3240 ftrace_trace_userstack(struct trace_array *tr,
3241                        struct trace_buffer *buffer, unsigned int trace_ctx)
3242 {
3243         struct trace_event_call *call = &event_user_stack;
3244         struct ring_buffer_event *event;
3245         struct userstack_entry *entry;
3246
3247         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3248                 return;
3249
3250         /*
3251          * NMIs can not handle page faults, even with fix ups.
3252          * The save user stack can (and often does) fault.
3253          */
3254         if (unlikely(in_nmi()))
3255                 return;
3256
3257         /*
3258          * prevent recursion, since the user stack tracing may
3259          * trigger other kernel events.
3260          */
3261         preempt_disable();
3262         if (__this_cpu_read(user_stack_count))
3263                 goto out;
3264
3265         __this_cpu_inc(user_stack_count);
3266
3267         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3268                                             sizeof(*entry), trace_ctx);
3269         if (!event)
3270                 goto out_drop_count;
3271         entry   = ring_buffer_event_data(event);
3272
3273         entry->tgid             = current->tgid;
3274         memset(&entry->caller, 0, sizeof(entry->caller));
3275
3276         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3277         if (!call_filter_check_discard(call, entry, buffer, event))
3278                 __buffer_unlock_commit(buffer, event);
3279
3280  out_drop_count:
3281         __this_cpu_dec(user_stack_count);
3282  out:
3283         preempt_enable();
3284 }
3285 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3286 static void ftrace_trace_userstack(struct trace_array *tr,
3287                                    struct trace_buffer *buffer,
3288                                    unsigned int trace_ctx)
3289 {
3290 }
3291 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3292
3293 #endif /* CONFIG_STACKTRACE */
3294
3295 static inline void
3296 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3297                           unsigned long long delta)
3298 {
3299         entry->bottom_delta_ts = delta & U32_MAX;
3300         entry->top_delta_ts = (delta >> 32);
3301 }
3302
3303 void trace_last_func_repeats(struct trace_array *tr,
3304                              struct trace_func_repeats *last_info,
3305                              unsigned int trace_ctx)
3306 {
3307         struct trace_buffer *buffer = tr->array_buffer.buffer;
3308         struct func_repeats_entry *entry;
3309         struct ring_buffer_event *event;
3310         u64 delta;
3311
3312         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3313                                             sizeof(*entry), trace_ctx);
3314         if (!event)
3315                 return;
3316
3317         delta = ring_buffer_event_time_stamp(buffer, event) -
3318                 last_info->ts_last_call;
3319
3320         entry = ring_buffer_event_data(event);
3321         entry->ip = last_info->ip;
3322         entry->parent_ip = last_info->parent_ip;
3323         entry->count = last_info->count;
3324         func_repeats_set_delta_ts(entry, delta);
3325
3326         __buffer_unlock_commit(buffer, event);
3327 }
3328
3329 /* created for use with alloc_percpu */
3330 struct trace_buffer_struct {
3331         int nesting;
3332         char buffer[4][TRACE_BUF_SIZE];
3333 };
3334
3335 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3336
3337 /*
3338  * This allows for lockless recording.  If we're nested too deeply, then
3339  * this returns NULL.
3340  */
3341 static char *get_trace_buf(void)
3342 {
3343         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3344
3345         if (!trace_percpu_buffer || buffer->nesting >= 4)
3346                 return NULL;
3347
3348         buffer->nesting++;
3349
3350         /* Interrupts must see nesting incremented before we use the buffer */
3351         barrier();
3352         return &buffer->buffer[buffer->nesting - 1][0];
3353 }
3354
3355 static void put_trace_buf(void)
3356 {
3357         /* Don't let the decrement of nesting leak before this */
3358         barrier();
3359         this_cpu_dec(trace_percpu_buffer->nesting);
3360 }
3361
3362 static int alloc_percpu_trace_buffer(void)
3363 {
3364         struct trace_buffer_struct __percpu *buffers;
3365
3366         if (trace_percpu_buffer)
3367                 return 0;
3368
3369         buffers = alloc_percpu(struct trace_buffer_struct);
3370         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3371                 return -ENOMEM;
3372
3373         trace_percpu_buffer = buffers;
3374         return 0;
3375 }
3376
3377 static int buffers_allocated;
3378
3379 void trace_printk_init_buffers(void)
3380 {
3381         if (buffers_allocated)
3382                 return;
3383
3384         if (alloc_percpu_trace_buffer())
3385                 return;
3386
3387         /* trace_printk() is for debug use only. Don't use it in production. */
3388
3389         pr_warn("\n");
3390         pr_warn("**********************************************************\n");
3391         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3392         pr_warn("**                                                      **\n");
3393         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3394         pr_warn("**                                                      **\n");
3395         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3396         pr_warn("** unsafe for production use.                           **\n");
3397         pr_warn("**                                                      **\n");
3398         pr_warn("** If you see this message and you are not debugging    **\n");
3399         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3400         pr_warn("**                                                      **\n");
3401         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3402         pr_warn("**********************************************************\n");
3403
3404         /* Expand the buffers to set size */
3405         tracing_update_buffers();
3406
3407         buffers_allocated = 1;
3408
3409         /*
3410          * trace_printk_init_buffers() can be called by modules.
3411          * If that happens, then we need to start cmdline recording
3412          * directly here. If the global_trace.buffer is already
3413          * allocated here, then this was called by module code.
3414          */
3415         if (global_trace.array_buffer.buffer)
3416                 tracing_start_cmdline_record();
3417 }
3418 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3419
3420 void trace_printk_start_comm(void)
3421 {
3422         /* Start tracing comms if trace printk is set */
3423         if (!buffers_allocated)
3424                 return;
3425         tracing_start_cmdline_record();
3426 }
3427
3428 static void trace_printk_start_stop_comm(int enabled)
3429 {
3430         if (!buffers_allocated)
3431                 return;
3432
3433         if (enabled)
3434                 tracing_start_cmdline_record();
3435         else
3436                 tracing_stop_cmdline_record();
3437 }
3438
3439 /**
3440  * trace_vbprintk - write binary msg to tracing buffer
3441  * @ip:    The address of the caller
3442  * @fmt:   The string format to write to the buffer
3443  * @args:  Arguments for @fmt
3444  */
3445 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3446 {
3447         struct trace_event_call *call = &event_bprint;
3448         struct ring_buffer_event *event;
3449         struct trace_buffer *buffer;
3450         struct trace_array *tr = &global_trace;
3451         struct bprint_entry *entry;
3452         unsigned int trace_ctx;
3453         char *tbuffer;
3454         int len = 0, size;
3455
3456         if (unlikely(tracing_selftest_running || tracing_disabled))
3457                 return 0;
3458
3459         /* Don't pollute graph traces with trace_vprintk internals */
3460         pause_graph_tracing();
3461
3462         trace_ctx = tracing_gen_ctx();
3463         preempt_disable_notrace();
3464
3465         tbuffer = get_trace_buf();
3466         if (!tbuffer) {
3467                 len = 0;
3468                 goto out_nobuffer;
3469         }
3470
3471         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3472
3473         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3474                 goto out_put;
3475
3476         size = sizeof(*entry) + sizeof(u32) * len;
3477         buffer = tr->array_buffer.buffer;
3478         ring_buffer_nest_start(buffer);
3479         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3480                                             trace_ctx);
3481         if (!event)
3482                 goto out;
3483         entry = ring_buffer_event_data(event);
3484         entry->ip                       = ip;
3485         entry->fmt                      = fmt;
3486
3487         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3488         if (!call_filter_check_discard(call, entry, buffer, event)) {
3489                 __buffer_unlock_commit(buffer, event);
3490                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3491         }
3492
3493 out:
3494         ring_buffer_nest_end(buffer);
3495 out_put:
3496         put_trace_buf();
3497
3498 out_nobuffer:
3499         preempt_enable_notrace();
3500         unpause_graph_tracing();
3501
3502         return len;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_vbprintk);
3505
3506 __printf(3, 0)
3507 static int
3508 __trace_array_vprintk(struct trace_buffer *buffer,
3509                       unsigned long ip, const char *fmt, va_list args)
3510 {
3511         struct trace_event_call *call = &event_print;
3512         struct ring_buffer_event *event;
3513         int len = 0, size;
3514         struct print_entry *entry;
3515         unsigned int trace_ctx;
3516         char *tbuffer;
3517
3518         if (tracing_disabled)
3519                 return 0;
3520
3521         /* Don't pollute graph traces with trace_vprintk internals */
3522         pause_graph_tracing();
3523
3524         trace_ctx = tracing_gen_ctx();
3525         preempt_disable_notrace();
3526
3527
3528         tbuffer = get_trace_buf();
3529         if (!tbuffer) {
3530                 len = 0;
3531                 goto out_nobuffer;
3532         }
3533
3534         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3535
3536         size = sizeof(*entry) + len + 1;
3537         ring_buffer_nest_start(buffer);
3538         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3539                                             trace_ctx);
3540         if (!event)
3541                 goto out;
3542         entry = ring_buffer_event_data(event);
3543         entry->ip = ip;
3544
3545         memcpy(&entry->buf, tbuffer, len + 1);
3546         if (!call_filter_check_discard(call, entry, buffer, event)) {
3547                 __buffer_unlock_commit(buffer, event);
3548                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3549         }
3550
3551 out:
3552         ring_buffer_nest_end(buffer);
3553         put_trace_buf();
3554
3555 out_nobuffer:
3556         preempt_enable_notrace();
3557         unpause_graph_tracing();
3558
3559         return len;
3560 }
3561
3562 __printf(3, 0)
3563 int trace_array_vprintk(struct trace_array *tr,
3564                         unsigned long ip, const char *fmt, va_list args)
3565 {
3566         if (tracing_selftest_running && tr == &global_trace)
3567                 return 0;
3568
3569         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3570 }
3571
3572 /**
3573  * trace_array_printk - Print a message to a specific instance
3574  * @tr: The instance trace_array descriptor
3575  * @ip: The instruction pointer that this is called from.
3576  * @fmt: The format to print (printf format)
3577  *
3578  * If a subsystem sets up its own instance, they have the right to
3579  * printk strings into their tracing instance buffer using this
3580  * function. Note, this function will not write into the top level
3581  * buffer (use trace_printk() for that), as writing into the top level
3582  * buffer should only have events that can be individually disabled.
3583  * trace_printk() is only used for debugging a kernel, and should not
3584  * be ever incorporated in normal use.
3585  *
3586  * trace_array_printk() can be used, as it will not add noise to the
3587  * top level tracing buffer.
3588  *
3589  * Note, trace_array_init_printk() must be called on @tr before this
3590  * can be used.
3591  */
3592 __printf(3, 0)
3593 int trace_array_printk(struct trace_array *tr,
3594                        unsigned long ip, const char *fmt, ...)
3595 {
3596         int ret;
3597         va_list ap;
3598
3599         if (!tr)
3600                 return -ENOENT;
3601
3602         /* This is only allowed for created instances */
3603         if (tr == &global_trace)
3604                 return 0;
3605
3606         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3607                 return 0;
3608
3609         va_start(ap, fmt);
3610         ret = trace_array_vprintk(tr, ip, fmt, ap);
3611         va_end(ap);
3612         return ret;
3613 }
3614 EXPORT_SYMBOL_GPL(trace_array_printk);
3615
3616 /**
3617  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3618  * @tr: The trace array to initialize the buffers for
3619  *
3620  * As trace_array_printk() only writes into instances, they are OK to
3621  * have in the kernel (unlike trace_printk()). This needs to be called
3622  * before trace_array_printk() can be used on a trace_array.
3623  */
3624 int trace_array_init_printk(struct trace_array *tr)
3625 {
3626         if (!tr)
3627                 return -ENOENT;
3628
3629         /* This is only allowed for created instances */
3630         if (tr == &global_trace)
3631                 return -EINVAL;
3632
3633         return alloc_percpu_trace_buffer();
3634 }
3635 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3636
3637 __printf(3, 4)
3638 int trace_array_printk_buf(struct trace_buffer *buffer,
3639                            unsigned long ip, const char *fmt, ...)
3640 {
3641         int ret;
3642         va_list ap;
3643
3644         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3645                 return 0;
3646
3647         va_start(ap, fmt);
3648         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3649         va_end(ap);
3650         return ret;
3651 }
3652
3653 __printf(2, 0)
3654 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3655 {
3656         return trace_array_vprintk(&global_trace, ip, fmt, args);
3657 }
3658 EXPORT_SYMBOL_GPL(trace_vprintk);
3659
3660 static void trace_iterator_increment(struct trace_iterator *iter)
3661 {
3662         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3663
3664         iter->idx++;
3665         if (buf_iter)
3666                 ring_buffer_iter_advance(buf_iter);
3667 }
3668
3669 static struct trace_entry *
3670 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3671                 unsigned long *lost_events)
3672 {
3673         struct ring_buffer_event *event;
3674         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3675
3676         if (buf_iter) {
3677                 event = ring_buffer_iter_peek(buf_iter, ts);
3678                 if (lost_events)
3679                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3680                                 (unsigned long)-1 : 0;
3681         } else {
3682                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3683                                          lost_events);
3684         }
3685
3686         if (event) {
3687                 iter->ent_size = ring_buffer_event_length(event);
3688                 return ring_buffer_event_data(event);
3689         }
3690         iter->ent_size = 0;
3691         return NULL;
3692 }
3693
3694 static struct trace_entry *
3695 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3696                   unsigned long *missing_events, u64 *ent_ts)
3697 {
3698         struct trace_buffer *buffer = iter->array_buffer->buffer;
3699         struct trace_entry *ent, *next = NULL;
3700         unsigned long lost_events = 0, next_lost = 0;
3701         int cpu_file = iter->cpu_file;
3702         u64 next_ts = 0, ts;
3703         int next_cpu = -1;
3704         int next_size = 0;
3705         int cpu;
3706
3707         /*
3708          * If we are in a per_cpu trace file, don't bother by iterating over
3709          * all cpu and peek directly.
3710          */
3711         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3712                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3713                         return NULL;
3714                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3715                 if (ent_cpu)
3716                         *ent_cpu = cpu_file;
3717
3718                 return ent;
3719         }
3720
3721         for_each_tracing_cpu(cpu) {
3722
3723                 if (ring_buffer_empty_cpu(buffer, cpu))
3724                         continue;
3725
3726                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3727
3728                 /*
3729                  * Pick the entry with the smallest timestamp:
3730                  */
3731                 if (ent && (!next || ts < next_ts)) {
3732                         next = ent;
3733                         next_cpu = cpu;
3734                         next_ts = ts;
3735                         next_lost = lost_events;
3736                         next_size = iter->ent_size;
3737                 }
3738         }
3739
3740         iter->ent_size = next_size;
3741
3742         if (ent_cpu)
3743                 *ent_cpu = next_cpu;
3744
3745         if (ent_ts)
3746                 *ent_ts = next_ts;
3747
3748         if (missing_events)
3749                 *missing_events = next_lost;
3750
3751         return next;
3752 }
3753
3754 #define STATIC_FMT_BUF_SIZE     128
3755 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3756
3757 char *trace_iter_expand_format(struct trace_iterator *iter)
3758 {
3759         char *tmp;
3760
3761         /*
3762          * iter->tr is NULL when used with tp_printk, which makes
3763          * this get called where it is not safe to call krealloc().
3764          */
3765         if (!iter->tr || iter->fmt == static_fmt_buf)
3766                 return NULL;
3767
3768         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3769                        GFP_KERNEL);
3770         if (tmp) {
3771                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3772                 iter->fmt = tmp;
3773         }
3774
3775         return tmp;
3776 }
3777
3778 /* Returns true if the string is safe to dereference from an event */
3779 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3780                            bool star, int len)
3781 {
3782         unsigned long addr = (unsigned long)str;
3783         struct trace_event *trace_event;
3784         struct trace_event_call *event;
3785
3786         /* Ignore strings with no length */
3787         if (star && !len)
3788                 return true;
3789
3790         /* OK if part of the event data */
3791         if ((addr >= (unsigned long)iter->ent) &&
3792             (addr < (unsigned long)iter->ent + iter->ent_size))
3793                 return true;
3794
3795         /* OK if part of the temp seq buffer */
3796         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3797             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3798                 return true;
3799
3800         /* Core rodata can not be freed */
3801         if (is_kernel_rodata(addr))
3802                 return true;
3803
3804         if (trace_is_tracepoint_string(str))
3805                 return true;
3806
3807         /*
3808          * Now this could be a module event, referencing core module
3809          * data, which is OK.
3810          */
3811         if (!iter->ent)
3812                 return false;
3813
3814         trace_event = ftrace_find_event(iter->ent->type);
3815         if (!trace_event)
3816                 return false;
3817
3818         event = container_of(trace_event, struct trace_event_call, event);
3819         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3820                 return false;
3821
3822         /* Would rather have rodata, but this will suffice */
3823         if (within_module_core(addr, event->module))
3824                 return true;
3825
3826         return false;
3827 }
3828
3829 static const char *show_buffer(struct trace_seq *s)
3830 {
3831         struct seq_buf *seq = &s->seq;
3832
3833         seq_buf_terminate(seq);
3834
3835         return seq->buffer;
3836 }
3837
3838 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3839
3840 static int test_can_verify_check(const char *fmt, ...)
3841 {
3842         char buf[16];
3843         va_list ap;
3844         int ret;
3845
3846         /*
3847          * The verifier is dependent on vsnprintf() modifies the va_list
3848          * passed to it, where it is sent as a reference. Some architectures
3849          * (like x86_32) passes it by value, which means that vsnprintf()
3850          * does not modify the va_list passed to it, and the verifier
3851          * would then need to be able to understand all the values that
3852          * vsnprintf can use. If it is passed by value, then the verifier
3853          * is disabled.
3854          */
3855         va_start(ap, fmt);
3856         vsnprintf(buf, 16, "%d", ap);
3857         ret = va_arg(ap, int);
3858         va_end(ap);
3859
3860         return ret;
3861 }
3862
3863 static void test_can_verify(void)
3864 {
3865         if (!test_can_verify_check("%d %d", 0, 1)) {
3866                 pr_info("trace event string verifier disabled\n");
3867                 static_branch_inc(&trace_no_verify);
3868         }
3869 }
3870
3871 /**
3872  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3873  * @iter: The iterator that holds the seq buffer and the event being printed
3874  * @fmt: The format used to print the event
3875  * @ap: The va_list holding the data to print from @fmt.
3876  *
3877  * This writes the data into the @iter->seq buffer using the data from
3878  * @fmt and @ap. If the format has a %s, then the source of the string
3879  * is examined to make sure it is safe to print, otherwise it will
3880  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3881  * pointer.
3882  */
3883 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3884                          va_list ap)
3885 {
3886         const char *p = fmt;
3887         const char *str;
3888         int i, j;
3889
3890         if (WARN_ON_ONCE(!fmt))
3891                 return;
3892
3893         if (static_branch_unlikely(&trace_no_verify))
3894                 goto print;
3895
3896         /* Don't bother checking when doing a ftrace_dump() */
3897         if (iter->fmt == static_fmt_buf)
3898                 goto print;
3899
3900         while (*p) {
3901                 bool star = false;
3902                 int len = 0;
3903
3904                 j = 0;
3905
3906                 /* We only care about %s and variants */
3907                 for (i = 0; p[i]; i++) {
3908                         if (i + 1 >= iter->fmt_size) {
3909                                 /*
3910                                  * If we can't expand the copy buffer,
3911                                  * just print it.
3912                                  */
3913                                 if (!trace_iter_expand_format(iter))
3914                                         goto print;
3915                         }
3916
3917                         if (p[i] == '\\' && p[i+1]) {
3918                                 i++;
3919                                 continue;
3920                         }
3921                         if (p[i] == '%') {
3922                                 /* Need to test cases like %08.*s */
3923                                 for (j = 1; p[i+j]; j++) {
3924                                         if (isdigit(p[i+j]) ||
3925                                             p[i+j] == '.')
3926                                                 continue;
3927                                         if (p[i+j] == '*') {
3928                                                 star = true;
3929                                                 continue;
3930                                         }
3931                                         break;
3932                                 }
3933                                 if (p[i+j] == 's')
3934                                         break;
3935                                 star = false;
3936                         }
3937                         j = 0;
3938                 }
3939                 /* If no %s found then just print normally */
3940                 if (!p[i])
3941                         break;
3942
3943                 /* Copy up to the %s, and print that */
3944                 strncpy(iter->fmt, p, i);
3945                 iter->fmt[i] = '\0';
3946                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3947
3948                 /*
3949                  * If iter->seq is full, the above call no longer guarantees
3950                  * that ap is in sync with fmt processing, and further calls
3951                  * to va_arg() can return wrong positional arguments.
3952                  *
3953                  * Ensure that ap is no longer used in this case.
3954                  */
3955                 if (iter->seq.full) {
3956                         p = "";
3957                         break;
3958                 }
3959
3960                 if (star)
3961                         len = va_arg(ap, int);
3962
3963                 /* The ap now points to the string data of the %s */
3964                 str = va_arg(ap, const char *);
3965
3966                 /*
3967                  * If you hit this warning, it is likely that the
3968                  * trace event in question used %s on a string that
3969                  * was saved at the time of the event, but may not be
3970                  * around when the trace is read. Use __string(),
3971                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3972                  * instead. See samples/trace_events/trace-events-sample.h
3973                  * for reference.
3974                  */
3975                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3976                               "fmt: '%s' current_buffer: '%s'",
3977                               fmt, show_buffer(&iter->seq))) {
3978                         int ret;
3979
3980                         /* Try to safely read the string */
3981                         if (star) {
3982                                 if (len + 1 > iter->fmt_size)
3983                                         len = iter->fmt_size - 1;
3984                                 if (len < 0)
3985                                         len = 0;
3986                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3987                                 iter->fmt[len] = 0;
3988                                 star = false;
3989                         } else {
3990                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3991                                                                   iter->fmt_size);
3992                         }
3993                         if (ret < 0)
3994                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3995                         else
3996                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3997                                                  str, iter->fmt);
3998                         str = "[UNSAFE-MEMORY]";
3999                         strcpy(iter->fmt, "%s");
4000                 } else {
4001                         strncpy(iter->fmt, p + i, j + 1);
4002                         iter->fmt[j+1] = '\0';
4003                 }
4004                 if (star)
4005                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4006                 else
4007                         trace_seq_printf(&iter->seq, iter->fmt, str);
4008
4009                 p += i + j + 1;
4010         }
4011  print:
4012         if (*p)
4013                 trace_seq_vprintf(&iter->seq, p, ap);
4014 }
4015
4016 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4017 {
4018         const char *p, *new_fmt;
4019         char *q;
4020
4021         if (WARN_ON_ONCE(!fmt))
4022                 return fmt;
4023
4024         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4025                 return fmt;
4026
4027         p = fmt;
4028         new_fmt = q = iter->fmt;
4029         while (*p) {
4030                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4031                         if (!trace_iter_expand_format(iter))
4032                                 return fmt;
4033
4034                         q += iter->fmt - new_fmt;
4035                         new_fmt = iter->fmt;
4036                 }
4037
4038                 *q++ = *p++;
4039
4040                 /* Replace %p with %px */
4041                 if (p[-1] == '%') {
4042                         if (p[0] == '%') {
4043                                 *q++ = *p++;
4044                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4045                                 *q++ = *p++;
4046                                 *q++ = 'x';
4047                         }
4048                 }
4049         }
4050         *q = '\0';
4051
4052         return new_fmt;
4053 }
4054
4055 #define STATIC_TEMP_BUF_SIZE    128
4056 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4057
4058 /* Find the next real entry, without updating the iterator itself */
4059 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4060                                           int *ent_cpu, u64 *ent_ts)
4061 {
4062         /* __find_next_entry will reset ent_size */
4063         int ent_size = iter->ent_size;
4064         struct trace_entry *entry;
4065
4066         /*
4067          * If called from ftrace_dump(), then the iter->temp buffer
4068          * will be the static_temp_buf and not created from kmalloc.
4069          * If the entry size is greater than the buffer, we can
4070          * not save it. Just return NULL in that case. This is only
4071          * used to add markers when two consecutive events' time
4072          * stamps have a large delta. See trace_print_lat_context()
4073          */
4074         if (iter->temp == static_temp_buf &&
4075             STATIC_TEMP_BUF_SIZE < ent_size)
4076                 return NULL;
4077
4078         /*
4079          * The __find_next_entry() may call peek_next_entry(), which may
4080          * call ring_buffer_peek() that may make the contents of iter->ent
4081          * undefined. Need to copy iter->ent now.
4082          */
4083         if (iter->ent && iter->ent != iter->temp) {
4084                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4085                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4086                         void *temp;
4087                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4088                         if (!temp)
4089                                 return NULL;
4090                         kfree(iter->temp);
4091                         iter->temp = temp;
4092                         iter->temp_size = iter->ent_size;
4093                 }
4094                 memcpy(iter->temp, iter->ent, iter->ent_size);
4095                 iter->ent = iter->temp;
4096         }
4097         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4098         /* Put back the original ent_size */
4099         iter->ent_size = ent_size;
4100
4101         return entry;
4102 }
4103
4104 /* Find the next real entry, and increment the iterator to the next entry */
4105 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4106 {
4107         iter->ent = __find_next_entry(iter, &iter->cpu,
4108                                       &iter->lost_events, &iter->ts);
4109
4110         if (iter->ent)
4111                 trace_iterator_increment(iter);
4112
4113         return iter->ent ? iter : NULL;
4114 }
4115
4116 static void trace_consume(struct trace_iterator *iter)
4117 {
4118         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4119                             &iter->lost_events);
4120 }
4121
4122 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4123 {
4124         struct trace_iterator *iter = m->private;
4125         int i = (int)*pos;
4126         void *ent;
4127
4128         WARN_ON_ONCE(iter->leftover);
4129
4130         (*pos)++;
4131
4132         /* can't go backwards */
4133         if (iter->idx > i)
4134                 return NULL;
4135
4136         if (iter->idx < 0)
4137                 ent = trace_find_next_entry_inc(iter);
4138         else
4139                 ent = iter;
4140
4141         while (ent && iter->idx < i)
4142                 ent = trace_find_next_entry_inc(iter);
4143
4144         iter->pos = *pos;
4145
4146         return ent;
4147 }
4148
4149 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4150 {
4151         struct ring_buffer_iter *buf_iter;
4152         unsigned long entries = 0;
4153         u64 ts;
4154
4155         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4156
4157         buf_iter = trace_buffer_iter(iter, cpu);
4158         if (!buf_iter)
4159                 return;
4160
4161         ring_buffer_iter_reset(buf_iter);
4162
4163         /*
4164          * We could have the case with the max latency tracers
4165          * that a reset never took place on a cpu. This is evident
4166          * by the timestamp being before the start of the buffer.
4167          */
4168         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4169                 if (ts >= iter->array_buffer->time_start)
4170                         break;
4171                 entries++;
4172                 ring_buffer_iter_advance(buf_iter);
4173         }
4174
4175         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4176 }
4177
4178 /*
4179  * The current tracer is copied to avoid a global locking
4180  * all around.
4181  */
4182 static void *s_start(struct seq_file *m, loff_t *pos)
4183 {
4184         struct trace_iterator *iter = m->private;
4185         struct trace_array *tr = iter->tr;
4186         int cpu_file = iter->cpu_file;
4187         void *p = NULL;
4188         loff_t l = 0;
4189         int cpu;
4190
4191         /*
4192          * copy the tracer to avoid using a global lock all around.
4193          * iter->trace is a copy of current_trace, the pointer to the
4194          * name may be used instead of a strcmp(), as iter->trace->name
4195          * will point to the same string as current_trace->name.
4196          */
4197         mutex_lock(&trace_types_lock);
4198         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4199                 *iter->trace = *tr->current_trace;
4200         mutex_unlock(&trace_types_lock);
4201
4202 #ifdef CONFIG_TRACER_MAX_TRACE
4203         if (iter->snapshot && iter->trace->use_max_tr)
4204                 return ERR_PTR(-EBUSY);
4205 #endif
4206
4207         if (*pos != iter->pos) {
4208                 iter->ent = NULL;
4209                 iter->cpu = 0;
4210                 iter->idx = -1;
4211
4212                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4213                         for_each_tracing_cpu(cpu)
4214                                 tracing_iter_reset(iter, cpu);
4215                 } else
4216                         tracing_iter_reset(iter, cpu_file);
4217
4218                 iter->leftover = 0;
4219                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4220                         ;
4221
4222         } else {
4223                 /*
4224                  * If we overflowed the seq_file before, then we want
4225                  * to just reuse the trace_seq buffer again.
4226                  */
4227                 if (iter->leftover)
4228                         p = iter;
4229                 else {
4230                         l = *pos - 1;
4231                         p = s_next(m, p, &l);
4232                 }
4233         }
4234
4235         trace_event_read_lock();
4236         trace_access_lock(cpu_file);
4237         return p;
4238 }
4239
4240 static void s_stop(struct seq_file *m, void *p)
4241 {
4242         struct trace_iterator *iter = m->private;
4243
4244 #ifdef CONFIG_TRACER_MAX_TRACE
4245         if (iter->snapshot && iter->trace->use_max_tr)
4246                 return;
4247 #endif
4248
4249         trace_access_unlock(iter->cpu_file);
4250         trace_event_read_unlock();
4251 }
4252
4253 static void
4254 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4255                       unsigned long *entries, int cpu)
4256 {
4257         unsigned long count;
4258
4259         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4260         /*
4261          * If this buffer has skipped entries, then we hold all
4262          * entries for the trace and we need to ignore the
4263          * ones before the time stamp.
4264          */
4265         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4266                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4267                 /* total is the same as the entries */
4268                 *total = count;
4269         } else
4270                 *total = count +
4271                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4272         *entries = count;
4273 }
4274
4275 static void
4276 get_total_entries(struct array_buffer *buf,
4277                   unsigned long *total, unsigned long *entries)
4278 {
4279         unsigned long t, e;
4280         int cpu;
4281
4282         *total = 0;
4283         *entries = 0;
4284
4285         for_each_tracing_cpu(cpu) {
4286                 get_total_entries_cpu(buf, &t, &e, cpu);
4287                 *total += t;
4288                 *entries += e;
4289         }
4290 }
4291
4292 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4293 {
4294         unsigned long total, entries;
4295
4296         if (!tr)
4297                 tr = &global_trace;
4298
4299         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4300
4301         return entries;
4302 }
4303
4304 unsigned long trace_total_entries(struct trace_array *tr)
4305 {
4306         unsigned long total, entries;
4307
4308         if (!tr)
4309                 tr = &global_trace;
4310
4311         get_total_entries(&tr->array_buffer, &total, &entries);
4312
4313         return entries;
4314 }
4315
4316 static void print_lat_help_header(struct seq_file *m)
4317 {
4318         seq_puts(m, "#                    _------=> CPU#            \n"
4319                     "#                   / _-----=> irqs-off/BH-disabled\n"
4320                     "#                  | / _----=> need-resched    \n"
4321                     "#                  || / _---=> hardirq/softirq \n"
4322                     "#                  ||| / _--=> preempt-depth   \n"
4323                     "#                  |||| / _-=> migrate-disable \n"
4324                     "#                  ||||| /     delay           \n"
4325                     "#  cmd     pid     |||||| time  |   caller     \n"
4326                     "#     \\   /        ||||||  \\    |    /       \n");
4327 }
4328
4329 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4330 {
4331         unsigned long total;
4332         unsigned long entries;
4333
4334         get_total_entries(buf, &total, &entries);
4335         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4336                    entries, total, num_online_cpus());
4337         seq_puts(m, "#\n");
4338 }
4339
4340 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4341                                    unsigned int flags)
4342 {
4343         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4344
4345         print_event_info(buf, m);
4346
4347         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4348         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4349 }
4350
4351 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4352                                        unsigned int flags)
4353 {
4354         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4355         static const char space[] = "            ";
4356         int prec = tgid ? 12 : 2;
4357
4358         print_event_info(buf, m);
4359
4360         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4361         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4362         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4363         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4364         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4365         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4366         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4367         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4368 }
4369
4370 void
4371 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4372 {
4373         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4374         struct array_buffer *buf = iter->array_buffer;
4375         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4376         struct tracer *type = iter->trace;
4377         unsigned long entries;
4378         unsigned long total;
4379         const char *name = type->name;
4380
4381         get_total_entries(buf, &total, &entries);
4382
4383         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4384                    name, UTS_RELEASE);
4385         seq_puts(m, "# -----------------------------------"
4386                  "---------------------------------\n");
4387         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4388                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4389                    nsecs_to_usecs(data->saved_latency),
4390                    entries,
4391                    total,
4392                    buf->cpu,
4393                    preempt_model_none()      ? "server" :
4394                    preempt_model_voluntary() ? "desktop" :
4395                    preempt_model_full()      ? "preempt" :
4396                    preempt_model_rt()        ? "preempt_rt" :
4397                    "unknown",
4398                    /* These are reserved for later use */
4399                    0, 0, 0, 0);
4400 #ifdef CONFIG_SMP
4401         seq_printf(m, " #P:%d)\n", num_online_cpus());
4402 #else
4403         seq_puts(m, ")\n");
4404 #endif
4405         seq_puts(m, "#    -----------------\n");
4406         seq_printf(m, "#    | task: %.16s-%d "
4407                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4408                    data->comm, data->pid,
4409                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4410                    data->policy, data->rt_priority);
4411         seq_puts(m, "#    -----------------\n");
4412
4413         if (data->critical_start) {
4414                 seq_puts(m, "#  => started at: ");
4415                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4416                 trace_print_seq(m, &iter->seq);
4417                 seq_puts(m, "\n#  => ended at:   ");
4418                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4419                 trace_print_seq(m, &iter->seq);
4420                 seq_puts(m, "\n#\n");
4421         }
4422
4423         seq_puts(m, "#\n");
4424 }
4425
4426 static void test_cpu_buff_start(struct trace_iterator *iter)
4427 {
4428         struct trace_seq *s = &iter->seq;
4429         struct trace_array *tr = iter->tr;
4430
4431         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4432                 return;
4433
4434         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4435                 return;
4436
4437         if (cpumask_available(iter->started) &&
4438             cpumask_test_cpu(iter->cpu, iter->started))
4439                 return;
4440
4441         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4442                 return;
4443
4444         if (cpumask_available(iter->started))
4445                 cpumask_set_cpu(iter->cpu, iter->started);
4446
4447         /* Don't print started cpu buffer for the first entry of the trace */
4448         if (iter->idx > 1)
4449                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4450                                 iter->cpu);
4451 }
4452
4453 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4454 {
4455         struct trace_array *tr = iter->tr;
4456         struct trace_seq *s = &iter->seq;
4457         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4458         struct trace_entry *entry;
4459         struct trace_event *event;
4460
4461         entry = iter->ent;
4462
4463         test_cpu_buff_start(iter);
4464
4465         event = ftrace_find_event(entry->type);
4466
4467         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4468                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4469                         trace_print_lat_context(iter);
4470                 else
4471                         trace_print_context(iter);
4472         }
4473
4474         if (trace_seq_has_overflowed(s))
4475                 return TRACE_TYPE_PARTIAL_LINE;
4476
4477         if (event) {
4478                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4479                         return print_event_fields(iter, event);
4480                 return event->funcs->trace(iter, sym_flags, event);
4481         }
4482
4483         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4484
4485         return trace_handle_return(s);
4486 }
4487
4488 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4489 {
4490         struct trace_array *tr = iter->tr;
4491         struct trace_seq *s = &iter->seq;
4492         struct trace_entry *entry;
4493         struct trace_event *event;
4494
4495         entry = iter->ent;
4496
4497         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4498                 trace_seq_printf(s, "%d %d %llu ",
4499                                  entry->pid, iter->cpu, iter->ts);
4500
4501         if (trace_seq_has_overflowed(s))
4502                 return TRACE_TYPE_PARTIAL_LINE;
4503
4504         event = ftrace_find_event(entry->type);
4505         if (event)
4506                 return event->funcs->raw(iter, 0, event);
4507
4508         trace_seq_printf(s, "%d ?\n", entry->type);
4509
4510         return trace_handle_return(s);
4511 }
4512
4513 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4514 {
4515         struct trace_array *tr = iter->tr;
4516         struct trace_seq *s = &iter->seq;
4517         unsigned char newline = '\n';
4518         struct trace_entry *entry;
4519         struct trace_event *event;
4520
4521         entry = iter->ent;
4522
4523         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4524                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4525                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4526                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4527                 if (trace_seq_has_overflowed(s))
4528                         return TRACE_TYPE_PARTIAL_LINE;
4529         }
4530
4531         event = ftrace_find_event(entry->type);
4532         if (event) {
4533                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4534                 if (ret != TRACE_TYPE_HANDLED)
4535                         return ret;
4536         }
4537
4538         SEQ_PUT_FIELD(s, newline);
4539
4540         return trace_handle_return(s);
4541 }
4542
4543 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4544 {
4545         struct trace_array *tr = iter->tr;
4546         struct trace_seq *s = &iter->seq;
4547         struct trace_entry *entry;
4548         struct trace_event *event;
4549
4550         entry = iter->ent;
4551
4552         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4553                 SEQ_PUT_FIELD(s, entry->pid);
4554                 SEQ_PUT_FIELD(s, iter->cpu);
4555                 SEQ_PUT_FIELD(s, iter->ts);
4556                 if (trace_seq_has_overflowed(s))
4557                         return TRACE_TYPE_PARTIAL_LINE;
4558         }
4559
4560         event = ftrace_find_event(entry->type);
4561         return event ? event->funcs->binary(iter, 0, event) :
4562                 TRACE_TYPE_HANDLED;
4563 }
4564
4565 int trace_empty(struct trace_iterator *iter)
4566 {
4567         struct ring_buffer_iter *buf_iter;
4568         int cpu;
4569
4570         /* If we are looking at one CPU buffer, only check that one */
4571         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4572                 cpu = iter->cpu_file;
4573                 buf_iter = trace_buffer_iter(iter, cpu);
4574                 if (buf_iter) {
4575                         if (!ring_buffer_iter_empty(buf_iter))
4576                                 return 0;
4577                 } else {
4578                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4579                                 return 0;
4580                 }
4581                 return 1;
4582         }
4583
4584         for_each_tracing_cpu(cpu) {
4585                 buf_iter = trace_buffer_iter(iter, cpu);
4586                 if (buf_iter) {
4587                         if (!ring_buffer_iter_empty(buf_iter))
4588                                 return 0;
4589                 } else {
4590                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4591                                 return 0;
4592                 }
4593         }
4594
4595         return 1;
4596 }
4597
4598 /*  Called with trace_event_read_lock() held. */
4599 enum print_line_t print_trace_line(struct trace_iterator *iter)
4600 {
4601         struct trace_array *tr = iter->tr;
4602         unsigned long trace_flags = tr->trace_flags;
4603         enum print_line_t ret;
4604
4605         if (iter->lost_events) {
4606                 if (iter->lost_events == (unsigned long)-1)
4607                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4608                                          iter->cpu);
4609                 else
4610                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4611                                          iter->cpu, iter->lost_events);
4612                 if (trace_seq_has_overflowed(&iter->seq))
4613                         return TRACE_TYPE_PARTIAL_LINE;
4614         }
4615
4616         if (iter->trace && iter->trace->print_line) {
4617                 ret = iter->trace->print_line(iter);
4618                 if (ret != TRACE_TYPE_UNHANDLED)
4619                         return ret;
4620         }
4621
4622         if (iter->ent->type == TRACE_BPUTS &&
4623                         trace_flags & TRACE_ITER_PRINTK &&
4624                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4625                 return trace_print_bputs_msg_only(iter);
4626
4627         if (iter->ent->type == TRACE_BPRINT &&
4628                         trace_flags & TRACE_ITER_PRINTK &&
4629                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4630                 return trace_print_bprintk_msg_only(iter);
4631
4632         if (iter->ent->type == TRACE_PRINT &&
4633                         trace_flags & TRACE_ITER_PRINTK &&
4634                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4635                 return trace_print_printk_msg_only(iter);
4636
4637         if (trace_flags & TRACE_ITER_BIN)
4638                 return print_bin_fmt(iter);
4639
4640         if (trace_flags & TRACE_ITER_HEX)
4641                 return print_hex_fmt(iter);
4642
4643         if (trace_flags & TRACE_ITER_RAW)
4644                 return print_raw_fmt(iter);
4645
4646         return print_trace_fmt(iter);
4647 }
4648
4649 void trace_latency_header(struct seq_file *m)
4650 {
4651         struct trace_iterator *iter = m->private;
4652         struct trace_array *tr = iter->tr;
4653
4654         /* print nothing if the buffers are empty */
4655         if (trace_empty(iter))
4656                 return;
4657
4658         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4659                 print_trace_header(m, iter);
4660
4661         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4662                 print_lat_help_header(m);
4663 }
4664
4665 void trace_default_header(struct seq_file *m)
4666 {
4667         struct trace_iterator *iter = m->private;
4668         struct trace_array *tr = iter->tr;
4669         unsigned long trace_flags = tr->trace_flags;
4670
4671         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4672                 return;
4673
4674         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4675                 /* print nothing if the buffers are empty */
4676                 if (trace_empty(iter))
4677                         return;
4678                 print_trace_header(m, iter);
4679                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4680                         print_lat_help_header(m);
4681         } else {
4682                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4683                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4684                                 print_func_help_header_irq(iter->array_buffer,
4685                                                            m, trace_flags);
4686                         else
4687                                 print_func_help_header(iter->array_buffer, m,
4688                                                        trace_flags);
4689                 }
4690         }
4691 }
4692
4693 static void test_ftrace_alive(struct seq_file *m)
4694 {
4695         if (!ftrace_is_dead())
4696                 return;
4697         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4698                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4699 }
4700
4701 #ifdef CONFIG_TRACER_MAX_TRACE
4702 static void show_snapshot_main_help(struct seq_file *m)
4703 {
4704         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4705                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4706                     "#                      Takes a snapshot of the main buffer.\n"
4707                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4708                     "#                      (Doesn't have to be '2' works with any number that\n"
4709                     "#                       is not a '0' or '1')\n");
4710 }
4711
4712 static void show_snapshot_percpu_help(struct seq_file *m)
4713 {
4714         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4715 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4716         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4717                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4718 #else
4719         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4720                     "#                     Must use main snapshot file to allocate.\n");
4721 #endif
4722         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4723                     "#                      (Doesn't have to be '2' works with any number that\n"
4724                     "#                       is not a '0' or '1')\n");
4725 }
4726
4727 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4728 {
4729         if (iter->tr->allocated_snapshot)
4730                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4731         else
4732                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4733
4734         seq_puts(m, "# Snapshot commands:\n");
4735         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4736                 show_snapshot_main_help(m);
4737         else
4738                 show_snapshot_percpu_help(m);
4739 }
4740 #else
4741 /* Should never be called */
4742 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4743 #endif
4744
4745 static int s_show(struct seq_file *m, void *v)
4746 {
4747         struct trace_iterator *iter = v;
4748         int ret;
4749
4750         if (iter->ent == NULL) {
4751                 if (iter->tr) {
4752                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4753                         seq_puts(m, "#\n");
4754                         test_ftrace_alive(m);
4755                 }
4756                 if (iter->snapshot && trace_empty(iter))
4757                         print_snapshot_help(m, iter);
4758                 else if (iter->trace && iter->trace->print_header)
4759                         iter->trace->print_header(m);
4760                 else
4761                         trace_default_header(m);
4762
4763         } else if (iter->leftover) {
4764                 /*
4765                  * If we filled the seq_file buffer earlier, we
4766                  * want to just show it now.
4767                  */
4768                 ret = trace_print_seq(m, &iter->seq);
4769
4770                 /* ret should this time be zero, but you never know */
4771                 iter->leftover = ret;
4772
4773         } else {
4774                 print_trace_line(iter);
4775                 ret = trace_print_seq(m, &iter->seq);
4776                 /*
4777                  * If we overflow the seq_file buffer, then it will
4778                  * ask us for this data again at start up.
4779                  * Use that instead.
4780                  *  ret is 0 if seq_file write succeeded.
4781                  *        -1 otherwise.
4782                  */
4783                 iter->leftover = ret;
4784         }
4785
4786         return 0;
4787 }
4788
4789 /*
4790  * Should be used after trace_array_get(), trace_types_lock
4791  * ensures that i_cdev was already initialized.
4792  */
4793 static inline int tracing_get_cpu(struct inode *inode)
4794 {
4795         if (inode->i_cdev) /* See trace_create_cpu_file() */
4796                 return (long)inode->i_cdev - 1;
4797         return RING_BUFFER_ALL_CPUS;
4798 }
4799
4800 static const struct seq_operations tracer_seq_ops = {
4801         .start          = s_start,
4802         .next           = s_next,
4803         .stop           = s_stop,
4804         .show           = s_show,
4805 };
4806
4807 static struct trace_iterator *
4808 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4809 {
4810         struct trace_array *tr = inode->i_private;
4811         struct trace_iterator *iter;
4812         int cpu;
4813
4814         if (tracing_disabled)
4815                 return ERR_PTR(-ENODEV);
4816
4817         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4818         if (!iter)
4819                 return ERR_PTR(-ENOMEM);
4820
4821         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4822                                     GFP_KERNEL);
4823         if (!iter->buffer_iter)
4824                 goto release;
4825
4826         /*
4827          * trace_find_next_entry() may need to save off iter->ent.
4828          * It will place it into the iter->temp buffer. As most
4829          * events are less than 128, allocate a buffer of that size.
4830          * If one is greater, then trace_find_next_entry() will
4831          * allocate a new buffer to adjust for the bigger iter->ent.
4832          * It's not critical if it fails to get allocated here.
4833          */
4834         iter->temp = kmalloc(128, GFP_KERNEL);
4835         if (iter->temp)
4836                 iter->temp_size = 128;
4837
4838         /*
4839          * trace_event_printf() may need to modify given format
4840          * string to replace %p with %px so that it shows real address
4841          * instead of hash value. However, that is only for the event
4842          * tracing, other tracer may not need. Defer the allocation
4843          * until it is needed.
4844          */
4845         iter->fmt = NULL;
4846         iter->fmt_size = 0;
4847
4848         /*
4849          * We make a copy of the current tracer to avoid concurrent
4850          * changes on it while we are reading.
4851          */
4852         mutex_lock(&trace_types_lock);
4853         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4854         if (!iter->trace)
4855                 goto fail;
4856
4857         *iter->trace = *tr->current_trace;
4858
4859         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4860                 goto fail;
4861
4862         iter->tr = tr;
4863
4864 #ifdef CONFIG_TRACER_MAX_TRACE
4865         /* Currently only the top directory has a snapshot */
4866         if (tr->current_trace->print_max || snapshot)
4867                 iter->array_buffer = &tr->max_buffer;
4868         else
4869 #endif
4870                 iter->array_buffer = &tr->array_buffer;
4871         iter->snapshot = snapshot;
4872         iter->pos = -1;
4873         iter->cpu_file = tracing_get_cpu(inode);
4874         mutex_init(&iter->mutex);
4875
4876         /* Notify the tracer early; before we stop tracing. */
4877         if (iter->trace->open)
4878                 iter->trace->open(iter);
4879
4880         /* Annotate start of buffers if we had overruns */
4881         if (ring_buffer_overruns(iter->array_buffer->buffer))
4882                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4883
4884         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4885         if (trace_clocks[tr->clock_id].in_ns)
4886                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4887
4888         /*
4889          * If pause-on-trace is enabled, then stop the trace while
4890          * dumping, unless this is the "snapshot" file
4891          */
4892         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4893                 tracing_stop_tr(tr);
4894
4895         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4896                 for_each_tracing_cpu(cpu) {
4897                         iter->buffer_iter[cpu] =
4898                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4899                                                          cpu, GFP_KERNEL);
4900                 }
4901                 ring_buffer_read_prepare_sync();
4902                 for_each_tracing_cpu(cpu) {
4903                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4904                         tracing_iter_reset(iter, cpu);
4905                 }
4906         } else {
4907                 cpu = iter->cpu_file;
4908                 iter->buffer_iter[cpu] =
4909                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4910                                                  cpu, GFP_KERNEL);
4911                 ring_buffer_read_prepare_sync();
4912                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4913                 tracing_iter_reset(iter, cpu);
4914         }
4915
4916         mutex_unlock(&trace_types_lock);
4917
4918         return iter;
4919
4920  fail:
4921         mutex_unlock(&trace_types_lock);
4922         kfree(iter->trace);
4923         kfree(iter->temp);
4924         kfree(iter->buffer_iter);
4925 release:
4926         seq_release_private(inode, file);
4927         return ERR_PTR(-ENOMEM);
4928 }
4929
4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(NULL);
4935         if (ret)
4936                 return ret;
4937
4938         filp->private_data = inode->i_private;
4939         return 0;
4940 }
4941
4942 bool tracing_is_disabled(void)
4943 {
4944         return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948  * Open and update trace_array ref count.
4949  * Must have the current trace_array passed to it.
4950  */
4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953         struct trace_array *tr = inode->i_private;
4954         int ret;
4955
4956         ret = tracing_check_open_get_tr(tr);
4957         if (ret)
4958                 return ret;
4959
4960         filp->private_data = inode->i_private;
4961
4962         return 0;
4963 }
4964
4965 static int tracing_mark_open(struct inode *inode, struct file *filp)
4966 {
4967         stream_open(inode, filp);
4968         return tracing_open_generic_tr(inode, filp);
4969 }
4970
4971 static int tracing_release(struct inode *inode, struct file *file)
4972 {
4973         struct trace_array *tr = inode->i_private;
4974         struct seq_file *m = file->private_data;
4975         struct trace_iterator *iter;
4976         int cpu;
4977
4978         if (!(file->f_mode & FMODE_READ)) {
4979                 trace_array_put(tr);
4980                 return 0;
4981         }
4982
4983         /* Writes do not use seq_file */
4984         iter = m->private;
4985         mutex_lock(&trace_types_lock);
4986
4987         for_each_tracing_cpu(cpu) {
4988                 if (iter->buffer_iter[cpu])
4989                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4990         }
4991
4992         if (iter->trace && iter->trace->close)
4993                 iter->trace->close(iter);
4994
4995         if (!iter->snapshot && tr->stop_count)
4996                 /* reenable tracing if it was previously enabled */
4997                 tracing_start_tr(tr);
4998
4999         __trace_array_put(tr);
5000
5001         mutex_unlock(&trace_types_lock);
5002
5003         mutex_destroy(&iter->mutex);
5004         free_cpumask_var(iter->started);
5005         kfree(iter->fmt);
5006         kfree(iter->temp);
5007         kfree(iter->trace);
5008         kfree(iter->buffer_iter);
5009         seq_release_private(inode, file);
5010
5011         return 0;
5012 }
5013
5014 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5015 {
5016         struct trace_array *tr = inode->i_private;
5017
5018         trace_array_put(tr);
5019         return 0;
5020 }
5021
5022 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5023 {
5024         struct trace_array *tr = inode->i_private;
5025
5026         trace_array_put(tr);
5027
5028         return single_release(inode, file);
5029 }
5030
5031 static int tracing_open(struct inode *inode, struct file *file)
5032 {
5033         struct trace_array *tr = inode->i_private;
5034         struct trace_iterator *iter;
5035         int ret;
5036
5037         ret = tracing_check_open_get_tr(tr);
5038         if (ret)
5039                 return ret;
5040
5041         /* If this file was open for write, then erase contents */
5042         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5043                 int cpu = tracing_get_cpu(inode);
5044                 struct array_buffer *trace_buf = &tr->array_buffer;
5045
5046 #ifdef CONFIG_TRACER_MAX_TRACE
5047                 if (tr->current_trace->print_max)
5048                         trace_buf = &tr->max_buffer;
5049 #endif
5050
5051                 if (cpu == RING_BUFFER_ALL_CPUS)
5052                         tracing_reset_online_cpus(trace_buf);
5053                 else
5054                         tracing_reset_cpu(trace_buf, cpu);
5055         }
5056
5057         if (file->f_mode & FMODE_READ) {
5058                 iter = __tracing_open(inode, file, false);
5059                 if (IS_ERR(iter))
5060                         ret = PTR_ERR(iter);
5061                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5062                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5063         }
5064
5065         if (ret < 0)
5066                 trace_array_put(tr);
5067
5068         return ret;
5069 }
5070
5071 /*
5072  * Some tracers are not suitable for instance buffers.
5073  * A tracer is always available for the global array (toplevel)
5074  * or if it explicitly states that it is.
5075  */
5076 static bool
5077 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5078 {
5079         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5080 }
5081
5082 /* Find the next tracer that this trace array may use */
5083 static struct tracer *
5084 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5085 {
5086         while (t && !trace_ok_for_array(t, tr))
5087                 t = t->next;
5088
5089         return t;
5090 }
5091
5092 static void *
5093 t_next(struct seq_file *m, void *v, loff_t *pos)
5094 {
5095         struct trace_array *tr = m->private;
5096         struct tracer *t = v;
5097
5098         (*pos)++;
5099
5100         if (t)
5101                 t = get_tracer_for_array(tr, t->next);
5102
5103         return t;
5104 }
5105
5106 static void *t_start(struct seq_file *m, loff_t *pos)
5107 {
5108         struct trace_array *tr = m->private;
5109         struct tracer *t;
5110         loff_t l = 0;
5111
5112         mutex_lock(&trace_types_lock);
5113
5114         t = get_tracer_for_array(tr, trace_types);
5115         for (; t && l < *pos; t = t_next(m, t, &l))
5116                         ;
5117
5118         return t;
5119 }
5120
5121 static void t_stop(struct seq_file *m, void *p)
5122 {
5123         mutex_unlock(&trace_types_lock);
5124 }
5125
5126 static int t_show(struct seq_file *m, void *v)
5127 {
5128         struct tracer *t = v;
5129
5130         if (!t)
5131                 return 0;
5132
5133         seq_puts(m, t->name);
5134         if (t->next)
5135                 seq_putc(m, ' ');
5136         else
5137                 seq_putc(m, '\n');
5138
5139         return 0;
5140 }
5141
5142 static const struct seq_operations show_traces_seq_ops = {
5143         .start          = t_start,
5144         .next           = t_next,
5145         .stop           = t_stop,
5146         .show           = t_show,
5147 };
5148
5149 static int show_traces_open(struct inode *inode, struct file *file)
5150 {
5151         struct trace_array *tr = inode->i_private;
5152         struct seq_file *m;
5153         int ret;
5154
5155         ret = tracing_check_open_get_tr(tr);
5156         if (ret)
5157                 return ret;
5158
5159         ret = seq_open(file, &show_traces_seq_ops);
5160         if (ret) {
5161                 trace_array_put(tr);
5162                 return ret;
5163         }
5164
5165         m = file->private_data;
5166         m->private = tr;
5167
5168         return 0;
5169 }
5170
5171 static int show_traces_release(struct inode *inode, struct file *file)
5172 {
5173         struct trace_array *tr = inode->i_private;
5174
5175         trace_array_put(tr);
5176         return seq_release(inode, file);
5177 }
5178
5179 static ssize_t
5180 tracing_write_stub(struct file *filp, const char __user *ubuf,
5181                    size_t count, loff_t *ppos)
5182 {
5183         return count;
5184 }
5185
5186 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5187 {
5188         int ret;
5189
5190         if (file->f_mode & FMODE_READ)
5191                 ret = seq_lseek(file, offset, whence);
5192         else
5193                 file->f_pos = ret = 0;
5194
5195         return ret;
5196 }
5197
5198 static const struct file_operations tracing_fops = {
5199         .open           = tracing_open,
5200         .read           = seq_read,
5201         .read_iter      = seq_read_iter,
5202         .splice_read    = copy_splice_read,
5203         .write          = tracing_write_stub,
5204         .llseek         = tracing_lseek,
5205         .release        = tracing_release,
5206 };
5207
5208 static const struct file_operations show_traces_fops = {
5209         .open           = show_traces_open,
5210         .read           = seq_read,
5211         .llseek         = seq_lseek,
5212         .release        = show_traces_release,
5213 };
5214
5215 static ssize_t
5216 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5217                      size_t count, loff_t *ppos)
5218 {
5219         struct trace_array *tr = file_inode(filp)->i_private;
5220         char *mask_str;
5221         int len;
5222
5223         len = snprintf(NULL, 0, "%*pb\n",
5224                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5225         mask_str = kmalloc(len, GFP_KERNEL);
5226         if (!mask_str)
5227                 return -ENOMEM;
5228
5229         len = snprintf(mask_str, len, "%*pb\n",
5230                        cpumask_pr_args(tr->tracing_cpumask));
5231         if (len >= count) {
5232                 count = -EINVAL;
5233                 goto out_err;
5234         }
5235         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5236
5237 out_err:
5238         kfree(mask_str);
5239
5240         return count;
5241 }
5242
5243 int tracing_set_cpumask(struct trace_array *tr,
5244                         cpumask_var_t tracing_cpumask_new)
5245 {
5246         int cpu;
5247
5248         if (!tr)
5249                 return -EINVAL;
5250
5251         local_irq_disable();
5252         arch_spin_lock(&tr->max_lock);
5253         for_each_tracing_cpu(cpu) {
5254                 /*
5255                  * Increase/decrease the disabled counter if we are
5256                  * about to flip a bit in the cpumask:
5257                  */
5258                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5259                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5260                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5261                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5262                 }
5263                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5264                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5265                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5266                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5267                 }
5268         }
5269         arch_spin_unlock(&tr->max_lock);
5270         local_irq_enable();
5271
5272         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5273
5274         return 0;
5275 }
5276
5277 static ssize_t
5278 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5279                       size_t count, loff_t *ppos)
5280 {
5281         struct trace_array *tr = file_inode(filp)->i_private;
5282         cpumask_var_t tracing_cpumask_new;
5283         int err;
5284
5285         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5286                 return -ENOMEM;
5287
5288         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5289         if (err)
5290                 goto err_free;
5291
5292         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5293         if (err)
5294                 goto err_free;
5295
5296         free_cpumask_var(tracing_cpumask_new);
5297
5298         return count;
5299
5300 err_free:
5301         free_cpumask_var(tracing_cpumask_new);
5302
5303         return err;
5304 }
5305
5306 static const struct file_operations tracing_cpumask_fops = {
5307         .open           = tracing_open_generic_tr,
5308         .read           = tracing_cpumask_read,
5309         .write          = tracing_cpumask_write,
5310         .release        = tracing_release_generic_tr,
5311         .llseek         = generic_file_llseek,
5312 };
5313
5314 static int tracing_trace_options_show(struct seq_file *m, void *v)
5315 {
5316         struct tracer_opt *trace_opts;
5317         struct trace_array *tr = m->private;
5318         u32 tracer_flags;
5319         int i;
5320
5321         mutex_lock(&trace_types_lock);
5322         tracer_flags = tr->current_trace->flags->val;
5323         trace_opts = tr->current_trace->flags->opts;
5324
5325         for (i = 0; trace_options[i]; i++) {
5326                 if (tr->trace_flags & (1 << i))
5327                         seq_printf(m, "%s\n", trace_options[i]);
5328                 else
5329                         seq_printf(m, "no%s\n", trace_options[i]);
5330         }
5331
5332         for (i = 0; trace_opts[i].name; i++) {
5333                 if (tracer_flags & trace_opts[i].bit)
5334                         seq_printf(m, "%s\n", trace_opts[i].name);
5335                 else
5336                         seq_printf(m, "no%s\n", trace_opts[i].name);
5337         }
5338         mutex_unlock(&trace_types_lock);
5339
5340         return 0;
5341 }
5342
5343 static int __set_tracer_option(struct trace_array *tr,
5344                                struct tracer_flags *tracer_flags,
5345                                struct tracer_opt *opts, int neg)
5346 {
5347         struct tracer *trace = tracer_flags->trace;
5348         int ret;
5349
5350         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5351         if (ret)
5352                 return ret;
5353
5354         if (neg)
5355                 tracer_flags->val &= ~opts->bit;
5356         else
5357                 tracer_flags->val |= opts->bit;
5358         return 0;
5359 }
5360
5361 /* Try to assign a tracer specific option */
5362 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5363 {
5364         struct tracer *trace = tr->current_trace;
5365         struct tracer_flags *tracer_flags = trace->flags;
5366         struct tracer_opt *opts = NULL;
5367         int i;
5368
5369         for (i = 0; tracer_flags->opts[i].name; i++) {
5370                 opts = &tracer_flags->opts[i];
5371
5372                 if (strcmp(cmp, opts->name) == 0)
5373                         return __set_tracer_option(tr, trace->flags, opts, neg);
5374         }
5375
5376         return -EINVAL;
5377 }
5378
5379 /* Some tracers require overwrite to stay enabled */
5380 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5381 {
5382         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5383                 return -1;
5384
5385         return 0;
5386 }
5387
5388 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5389 {
5390         int *map;
5391
5392         if ((mask == TRACE_ITER_RECORD_TGID) ||
5393             (mask == TRACE_ITER_RECORD_CMD))
5394                 lockdep_assert_held(&event_mutex);
5395
5396         /* do nothing if flag is already set */
5397         if (!!(tr->trace_flags & mask) == !!enabled)
5398                 return 0;
5399
5400         /* Give the tracer a chance to approve the change */
5401         if (tr->current_trace->flag_changed)
5402                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5403                         return -EINVAL;
5404
5405         if (enabled)
5406                 tr->trace_flags |= mask;
5407         else
5408                 tr->trace_flags &= ~mask;
5409
5410         if (mask == TRACE_ITER_RECORD_CMD)
5411                 trace_event_enable_cmd_record(enabled);
5412
5413         if (mask == TRACE_ITER_RECORD_TGID) {
5414                 if (!tgid_map) {
5415                         tgid_map_max = pid_max;
5416                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5417                                        GFP_KERNEL);
5418
5419                         /*
5420                          * Pairs with smp_load_acquire() in
5421                          * trace_find_tgid_ptr() to ensure that if it observes
5422                          * the tgid_map we just allocated then it also observes
5423                          * the corresponding tgid_map_max value.
5424                          */
5425                         smp_store_release(&tgid_map, map);
5426                 }
5427                 if (!tgid_map) {
5428                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5429                         return -ENOMEM;
5430                 }
5431
5432                 trace_event_enable_tgid_record(enabled);
5433         }
5434
5435         if (mask == TRACE_ITER_EVENT_FORK)
5436                 trace_event_follow_fork(tr, enabled);
5437
5438         if (mask == TRACE_ITER_FUNC_FORK)
5439                 ftrace_pid_follow_fork(tr, enabled);
5440
5441         if (mask == TRACE_ITER_OVERWRITE) {
5442                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5443 #ifdef CONFIG_TRACER_MAX_TRACE
5444                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5445 #endif
5446         }
5447
5448         if (mask == TRACE_ITER_PRINTK) {
5449                 trace_printk_start_stop_comm(enabled);
5450                 trace_printk_control(enabled);
5451         }
5452
5453         return 0;
5454 }
5455
5456 int trace_set_options(struct trace_array *tr, char *option)
5457 {
5458         char *cmp;
5459         int neg = 0;
5460         int ret;
5461         size_t orig_len = strlen(option);
5462         int len;
5463
5464         cmp = strstrip(option);
5465
5466         len = str_has_prefix(cmp, "no");
5467         if (len)
5468                 neg = 1;
5469
5470         cmp += len;
5471
5472         mutex_lock(&event_mutex);
5473         mutex_lock(&trace_types_lock);
5474
5475         ret = match_string(trace_options, -1, cmp);
5476         /* If no option could be set, test the specific tracer options */
5477         if (ret < 0)
5478                 ret = set_tracer_option(tr, cmp, neg);
5479         else
5480                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5481
5482         mutex_unlock(&trace_types_lock);
5483         mutex_unlock(&event_mutex);
5484
5485         /*
5486          * If the first trailing whitespace is replaced with '\0' by strstrip,
5487          * turn it back into a space.
5488          */
5489         if (orig_len > strlen(option))
5490                 option[strlen(option)] = ' ';
5491
5492         return ret;
5493 }
5494
5495 static void __init apply_trace_boot_options(void)
5496 {
5497         char *buf = trace_boot_options_buf;
5498         char *option;
5499
5500         while (true) {
5501                 option = strsep(&buf, ",");
5502
5503                 if (!option)
5504                         break;
5505
5506                 if (*option)
5507                         trace_set_options(&global_trace, option);
5508
5509                 /* Put back the comma to allow this to be called again */
5510                 if (buf)
5511                         *(buf - 1) = ',';
5512         }
5513 }
5514
5515 static ssize_t
5516 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5517                         size_t cnt, loff_t *ppos)
5518 {
5519         struct seq_file *m = filp->private_data;
5520         struct trace_array *tr = m->private;
5521         char buf[64];
5522         int ret;
5523
5524         if (cnt >= sizeof(buf))
5525                 return -EINVAL;
5526
5527         if (copy_from_user(buf, ubuf, cnt))
5528                 return -EFAULT;
5529
5530         buf[cnt] = 0;
5531
5532         ret = trace_set_options(tr, buf);
5533         if (ret < 0)
5534                 return ret;
5535
5536         *ppos += cnt;
5537
5538         return cnt;
5539 }
5540
5541 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5542 {
5543         struct trace_array *tr = inode->i_private;
5544         int ret;
5545
5546         ret = tracing_check_open_get_tr(tr);
5547         if (ret)
5548                 return ret;
5549
5550         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5551         if (ret < 0)
5552                 trace_array_put(tr);
5553
5554         return ret;
5555 }
5556
5557 static const struct file_operations tracing_iter_fops = {
5558         .open           = tracing_trace_options_open,
5559         .read           = seq_read,
5560         .llseek         = seq_lseek,
5561         .release        = tracing_single_release_tr,
5562         .write          = tracing_trace_options_write,
5563 };
5564
5565 static const char readme_msg[] =
5566         "tracing mini-HOWTO:\n\n"
5567         "# echo 0 > tracing_on : quick way to disable tracing\n"
5568         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5569         " Important files:\n"
5570         "  trace\t\t\t- The static contents of the buffer\n"
5571         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5572         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5573         "  current_tracer\t- function and latency tracers\n"
5574         "  available_tracers\t- list of configured tracers for current_tracer\n"
5575         "  error_log\t- error log for failed commands (that support it)\n"
5576         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5577         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5578         "  trace_clock\t\t- change the clock used to order events\n"
5579         "       local:   Per cpu clock but may not be synced across CPUs\n"
5580         "      global:   Synced across CPUs but slows tracing down.\n"
5581         "     counter:   Not a clock, but just an increment\n"
5582         "      uptime:   Jiffy counter from time of boot\n"
5583         "        perf:   Same clock that perf events use\n"
5584 #ifdef CONFIG_X86_64
5585         "     x86-tsc:   TSC cycle counter\n"
5586 #endif
5587         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5588         "       delta:   Delta difference against a buffer-wide timestamp\n"
5589         "    absolute:   Absolute (standalone) timestamp\n"
5590         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5591         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5592         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5593         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5594         "\t\t\t  Remove sub-buffer with rmdir\n"
5595         "  trace_options\t\t- Set format or modify how tracing happens\n"
5596         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5597         "\t\t\t  option name\n"
5598         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5599 #ifdef CONFIG_DYNAMIC_FTRACE
5600         "\n  available_filter_functions - list of functions that can be filtered on\n"
5601         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5602         "\t\t\t  functions\n"
5603         "\t     accepts: func_full_name or glob-matching-pattern\n"
5604         "\t     modules: Can select a group via module\n"
5605         "\t      Format: :mod:<module-name>\n"
5606         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5607         "\t    triggers: a command to perform when function is hit\n"
5608         "\t      Format: <function>:<trigger>[:count]\n"
5609         "\t     trigger: traceon, traceoff\n"
5610         "\t\t      enable_event:<system>:<event>\n"
5611         "\t\t      disable_event:<system>:<event>\n"
5612 #ifdef CONFIG_STACKTRACE
5613         "\t\t      stacktrace\n"
5614 #endif
5615 #ifdef CONFIG_TRACER_SNAPSHOT
5616         "\t\t      snapshot\n"
5617 #endif
5618         "\t\t      dump\n"
5619         "\t\t      cpudump\n"
5620         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5621         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5622         "\t     The first one will disable tracing every time do_fault is hit\n"
5623         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5624         "\t       The first time do trap is hit and it disables tracing, the\n"
5625         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5626         "\t       the counter will not decrement. It only decrements when the\n"
5627         "\t       trigger did work\n"
5628         "\t     To remove trigger without count:\n"
5629         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5630         "\t     To remove trigger with a count:\n"
5631         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5632         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5633         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5634         "\t    modules: Can select a group via module command :mod:\n"
5635         "\t    Does not accept triggers\n"
5636 #endif /* CONFIG_DYNAMIC_FTRACE */
5637 #ifdef CONFIG_FUNCTION_TRACER
5638         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5639         "\t\t    (function)\n"
5640         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5641         "\t\t    (function)\n"
5642 #endif
5643 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5644         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5645         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5646         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5647 #endif
5648 #ifdef CONFIG_TRACER_SNAPSHOT
5649         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5650         "\t\t\t  snapshot buffer. Read the contents for more\n"
5651         "\t\t\t  information\n"
5652 #endif
5653 #ifdef CONFIG_STACK_TRACER
5654         "  stack_trace\t\t- Shows the max stack trace when active\n"
5655         "  stack_max_size\t- Shows current max stack size that was traced\n"
5656         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5657         "\t\t\t  new trace)\n"
5658 #ifdef CONFIG_DYNAMIC_FTRACE
5659         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5660         "\t\t\t  traces\n"
5661 #endif
5662 #endif /* CONFIG_STACK_TRACER */
5663 #ifdef CONFIG_DYNAMIC_EVENTS
5664         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5665         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5666 #endif
5667 #ifdef CONFIG_KPROBE_EVENTS
5668         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5669         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5670 #endif
5671 #ifdef CONFIG_UPROBE_EVENTS
5672         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5673         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5674 #endif
5675 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5676     defined(CONFIG_FPROBE_EVENTS)
5677         "\t  accepts: event-definitions (one definition per line)\n"
5678 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5679         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5680         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5681 #endif
5682 #ifdef CONFIG_FPROBE_EVENTS
5683         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5684         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5685 #endif
5686 #ifdef CONFIG_HIST_TRIGGERS
5687         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5688 #endif
5689         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5690         "\t           -:[<group>/][<event>]\n"
5691 #ifdef CONFIG_KPROBE_EVENTS
5692         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5693   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5694 #endif
5695 #ifdef CONFIG_UPROBE_EVENTS
5696   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5697 #endif
5698         "\t     args: <name>=fetcharg[:type]\n"
5699         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5700 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5701 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5702         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n"
5703 #else
5704         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5705 #endif
5706 #else
5707         "\t           $stack<index>, $stack, $retval, $comm,\n"
5708 #endif
5709         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5710         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5711         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5712         "\t           symstr, <type>\\[<array-size>\\]\n"
5713 #ifdef CONFIG_HIST_TRIGGERS
5714         "\t    field: <stype> <name>;\n"
5715         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5716         "\t           [unsigned] char/int/long\n"
5717 #endif
5718         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5719         "\t            of the <attached-group>/<attached-event>.\n"
5720 #endif
5721         "  events/\t\t- Directory containing all trace event subsystems:\n"
5722         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5723         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5724         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5725         "\t\t\t  events\n"
5726         "      filter\t\t- If set, only events passing filter are traced\n"
5727         "  events/<system>/<event>/\t- Directory containing control files for\n"
5728         "\t\t\t  <event>:\n"
5729         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5730         "      filter\t\t- If set, only events passing filter are traced\n"
5731         "      trigger\t\t- If set, a command to perform when event is hit\n"
5732         "\t    Format: <trigger>[:count][if <filter>]\n"
5733         "\t   trigger: traceon, traceoff\n"
5734         "\t            enable_event:<system>:<event>\n"
5735         "\t            disable_event:<system>:<event>\n"
5736 #ifdef CONFIG_HIST_TRIGGERS
5737         "\t            enable_hist:<system>:<event>\n"
5738         "\t            disable_hist:<system>:<event>\n"
5739 #endif
5740 #ifdef CONFIG_STACKTRACE
5741         "\t\t    stacktrace\n"
5742 #endif
5743 #ifdef CONFIG_TRACER_SNAPSHOT
5744         "\t\t    snapshot\n"
5745 #endif
5746 #ifdef CONFIG_HIST_TRIGGERS
5747         "\t\t    hist (see below)\n"
5748 #endif
5749         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5750         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5751         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5752         "\t                  events/block/block_unplug/trigger\n"
5753         "\t   The first disables tracing every time block_unplug is hit.\n"
5754         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5755         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5756         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5757         "\t   Like function triggers, the counter is only decremented if it\n"
5758         "\t    enabled or disabled tracing.\n"
5759         "\t   To remove a trigger without a count:\n"
5760         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5761         "\t   To remove a trigger with a count:\n"
5762         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5763         "\t   Filters can be ignored when removing a trigger.\n"
5764 #ifdef CONFIG_HIST_TRIGGERS
5765         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5766         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5767         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5768         "\t            [:values=<field1[,field2,...]>]\n"
5769         "\t            [:sort=<field1[,field2,...]>]\n"
5770         "\t            [:size=#entries]\n"
5771         "\t            [:pause][:continue][:clear]\n"
5772         "\t            [:name=histname1]\n"
5773         "\t            [:nohitcount]\n"
5774         "\t            [:<handler>.<action>]\n"
5775         "\t            [if <filter>]\n\n"
5776         "\t    Note, special fields can be used as well:\n"
5777         "\t            common_timestamp - to record current timestamp\n"
5778         "\t            common_cpu - to record the CPU the event happened on\n"
5779         "\n"
5780         "\t    A hist trigger variable can be:\n"
5781         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5782         "\t        - a reference to another variable e.g. y=$x,\n"
5783         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5784         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5785         "\n"
5786         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5787         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5788         "\t    variable reference, field or numeric literal.\n"
5789         "\n"
5790         "\t    When a matching event is hit, an entry is added to a hash\n"
5791         "\t    table using the key(s) and value(s) named, and the value of a\n"
5792         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5793         "\t    correspond to fields in the event's format description.  Keys\n"
5794         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5795         "\t    Compound keys consisting of up to two fields can be specified\n"
5796         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5797         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5798         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5799         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5800         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5801         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5802         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5803         "\t    its histogram data will be shared with other triggers of the\n"
5804         "\t    same name, and trigger hits will update this common data.\n\n"
5805         "\t    Reading the 'hist' file for the event will dump the hash\n"
5806         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5807         "\t    triggers attached to an event, there will be a table for each\n"
5808         "\t    trigger in the output.  The table displayed for a named\n"
5809         "\t    trigger will be the same as any other instance having the\n"
5810         "\t    same name.  The default format used to display a given field\n"
5811         "\t    can be modified by appending any of the following modifiers\n"
5812         "\t    to the field name, as applicable:\n\n"
5813         "\t            .hex        display a number as a hex value\n"
5814         "\t            .sym        display an address as a symbol\n"
5815         "\t            .sym-offset display an address as a symbol and offset\n"
5816         "\t            .execname   display a common_pid as a program name\n"
5817         "\t            .syscall    display a syscall id as a syscall name\n"
5818         "\t            .log2       display log2 value rather than raw number\n"
5819         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5820         "\t            .usecs      display a common_timestamp in microseconds\n"
5821         "\t            .percent    display a number of percentage value\n"
5822         "\t            .graph      display a bar-graph of a value\n\n"
5823         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5824         "\t    trigger or to start a hist trigger but not log any events\n"
5825         "\t    until told to do so.  'continue' can be used to start or\n"
5826         "\t    restart a paused hist trigger.\n\n"
5827         "\t    The 'clear' parameter will clear the contents of a running\n"
5828         "\t    hist trigger and leave its current paused/active state\n"
5829         "\t    unchanged.\n\n"
5830         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5831         "\t    raw hitcount in the histogram.\n\n"
5832         "\t    The enable_hist and disable_hist triggers can be used to\n"
5833         "\t    have one event conditionally start and stop another event's\n"
5834         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5835         "\t    the enable_event and disable_event triggers.\n\n"
5836         "\t    Hist trigger handlers and actions are executed whenever a\n"
5837         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5838         "\t        <handler>.<action>\n\n"
5839         "\t    The available handlers are:\n\n"
5840         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5841         "\t        onmax(var)               - invoke if var exceeds current max\n"
5842         "\t        onchange(var)            - invoke action if var changes\n\n"
5843         "\t    The available actions are:\n\n"
5844         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5845         "\t        save(field,...)                      - save current event fields\n"
5846 #ifdef CONFIG_TRACER_SNAPSHOT
5847         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5848 #endif
5849 #ifdef CONFIG_SYNTH_EVENTS
5850         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5851         "\t  Write into this file to define/undefine new synthetic events.\n"
5852         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5853 #endif
5854 #endif
5855 ;
5856
5857 static ssize_t
5858 tracing_readme_read(struct file *filp, char __user *ubuf,
5859                        size_t cnt, loff_t *ppos)
5860 {
5861         return simple_read_from_buffer(ubuf, cnt, ppos,
5862                                         readme_msg, strlen(readme_msg));
5863 }
5864
5865 static const struct file_operations tracing_readme_fops = {
5866         .open           = tracing_open_generic,
5867         .read           = tracing_readme_read,
5868         .llseek         = generic_file_llseek,
5869 };
5870
5871 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5872 {
5873         int pid = ++(*pos);
5874
5875         return trace_find_tgid_ptr(pid);
5876 }
5877
5878 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5879 {
5880         int pid = *pos;
5881
5882         return trace_find_tgid_ptr(pid);
5883 }
5884
5885 static void saved_tgids_stop(struct seq_file *m, void *v)
5886 {
5887 }
5888
5889 static int saved_tgids_show(struct seq_file *m, void *v)
5890 {
5891         int *entry = (int *)v;
5892         int pid = entry - tgid_map;
5893         int tgid = *entry;
5894
5895         if (tgid == 0)
5896                 return SEQ_SKIP;
5897
5898         seq_printf(m, "%d %d\n", pid, tgid);
5899         return 0;
5900 }
5901
5902 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5903         .start          = saved_tgids_start,
5904         .stop           = saved_tgids_stop,
5905         .next           = saved_tgids_next,
5906         .show           = saved_tgids_show,
5907 };
5908
5909 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5910 {
5911         int ret;
5912
5913         ret = tracing_check_open_get_tr(NULL);
5914         if (ret)
5915                 return ret;
5916
5917         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5918 }
5919
5920
5921 static const struct file_operations tracing_saved_tgids_fops = {
5922         .open           = tracing_saved_tgids_open,
5923         .read           = seq_read,
5924         .llseek         = seq_lseek,
5925         .release        = seq_release,
5926 };
5927
5928 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5929 {
5930         unsigned int *ptr = v;
5931
5932         if (*pos || m->count)
5933                 ptr++;
5934
5935         (*pos)++;
5936
5937         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5938              ptr++) {
5939                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5940                         continue;
5941
5942                 return ptr;
5943         }
5944
5945         return NULL;
5946 }
5947
5948 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5949 {
5950         void *v;
5951         loff_t l = 0;
5952
5953         preempt_disable();
5954         arch_spin_lock(&trace_cmdline_lock);
5955
5956         v = &savedcmd->map_cmdline_to_pid[0];
5957         while (l <= *pos) {
5958                 v = saved_cmdlines_next(m, v, &l);
5959                 if (!v)
5960                         return NULL;
5961         }
5962
5963         return v;
5964 }
5965
5966 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5967 {
5968         arch_spin_unlock(&trace_cmdline_lock);
5969         preempt_enable();
5970 }
5971
5972 static int saved_cmdlines_show(struct seq_file *m, void *v)
5973 {
5974         char buf[TASK_COMM_LEN];
5975         unsigned int *pid = v;
5976
5977         __trace_find_cmdline(*pid, buf);
5978         seq_printf(m, "%d %s\n", *pid, buf);
5979         return 0;
5980 }
5981
5982 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5983         .start          = saved_cmdlines_start,
5984         .next           = saved_cmdlines_next,
5985         .stop           = saved_cmdlines_stop,
5986         .show           = saved_cmdlines_show,
5987 };
5988
5989 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5990 {
5991         int ret;
5992
5993         ret = tracing_check_open_get_tr(NULL);
5994         if (ret)
5995                 return ret;
5996
5997         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5998 }
5999
6000 static const struct file_operations tracing_saved_cmdlines_fops = {
6001         .open           = tracing_saved_cmdlines_open,
6002         .read           = seq_read,
6003         .llseek         = seq_lseek,
6004         .release        = seq_release,
6005 };
6006
6007 static ssize_t
6008 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6009                                  size_t cnt, loff_t *ppos)
6010 {
6011         char buf[64];
6012         int r;
6013
6014         preempt_disable();
6015         arch_spin_lock(&trace_cmdline_lock);
6016         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6017         arch_spin_unlock(&trace_cmdline_lock);
6018         preempt_enable();
6019
6020         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6021 }
6022
6023 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6024 {
6025         kfree(s->saved_cmdlines);
6026         kfree(s->map_cmdline_to_pid);
6027         kfree(s);
6028 }
6029
6030 static int tracing_resize_saved_cmdlines(unsigned int val)
6031 {
6032         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6033
6034         s = kmalloc(sizeof(*s), GFP_KERNEL);
6035         if (!s)
6036                 return -ENOMEM;
6037
6038         if (allocate_cmdlines_buffer(val, s) < 0) {
6039                 kfree(s);
6040                 return -ENOMEM;
6041         }
6042
6043         preempt_disable();
6044         arch_spin_lock(&trace_cmdline_lock);
6045         savedcmd_temp = savedcmd;
6046         savedcmd = s;
6047         arch_spin_unlock(&trace_cmdline_lock);
6048         preempt_enable();
6049         free_saved_cmdlines_buffer(savedcmd_temp);
6050
6051         return 0;
6052 }
6053
6054 static ssize_t
6055 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6056                                   size_t cnt, loff_t *ppos)
6057 {
6058         unsigned long val;
6059         int ret;
6060
6061         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6062         if (ret)
6063                 return ret;
6064
6065         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6066         if (!val || val > PID_MAX_DEFAULT)
6067                 return -EINVAL;
6068
6069         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6070         if (ret < 0)
6071                 return ret;
6072
6073         *ppos += cnt;
6074
6075         return cnt;
6076 }
6077
6078 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6079         .open           = tracing_open_generic,
6080         .read           = tracing_saved_cmdlines_size_read,
6081         .write          = tracing_saved_cmdlines_size_write,
6082 };
6083
6084 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6085 static union trace_eval_map_item *
6086 update_eval_map(union trace_eval_map_item *ptr)
6087 {
6088         if (!ptr->map.eval_string) {
6089                 if (ptr->tail.next) {
6090                         ptr = ptr->tail.next;
6091                         /* Set ptr to the next real item (skip head) */
6092                         ptr++;
6093                 } else
6094                         return NULL;
6095         }
6096         return ptr;
6097 }
6098
6099 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6100 {
6101         union trace_eval_map_item *ptr = v;
6102
6103         /*
6104          * Paranoid! If ptr points to end, we don't want to increment past it.
6105          * This really should never happen.
6106          */
6107         (*pos)++;
6108         ptr = update_eval_map(ptr);
6109         if (WARN_ON_ONCE(!ptr))
6110                 return NULL;
6111
6112         ptr++;
6113         ptr = update_eval_map(ptr);
6114
6115         return ptr;
6116 }
6117
6118 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6119 {
6120         union trace_eval_map_item *v;
6121         loff_t l = 0;
6122
6123         mutex_lock(&trace_eval_mutex);
6124
6125         v = trace_eval_maps;
6126         if (v)
6127                 v++;
6128
6129         while (v && l < *pos) {
6130                 v = eval_map_next(m, v, &l);
6131         }
6132
6133         return v;
6134 }
6135
6136 static void eval_map_stop(struct seq_file *m, void *v)
6137 {
6138         mutex_unlock(&trace_eval_mutex);
6139 }
6140
6141 static int eval_map_show(struct seq_file *m, void *v)
6142 {
6143         union trace_eval_map_item *ptr = v;
6144
6145         seq_printf(m, "%s %ld (%s)\n",
6146                    ptr->map.eval_string, ptr->map.eval_value,
6147                    ptr->map.system);
6148
6149         return 0;
6150 }
6151
6152 static const struct seq_operations tracing_eval_map_seq_ops = {
6153         .start          = eval_map_start,
6154         .next           = eval_map_next,
6155         .stop           = eval_map_stop,
6156         .show           = eval_map_show,
6157 };
6158
6159 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6160 {
6161         int ret;
6162
6163         ret = tracing_check_open_get_tr(NULL);
6164         if (ret)
6165                 return ret;
6166
6167         return seq_open(filp, &tracing_eval_map_seq_ops);
6168 }
6169
6170 static const struct file_operations tracing_eval_map_fops = {
6171         .open           = tracing_eval_map_open,
6172         .read           = seq_read,
6173         .llseek         = seq_lseek,
6174         .release        = seq_release,
6175 };
6176
6177 static inline union trace_eval_map_item *
6178 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6179 {
6180         /* Return tail of array given the head */
6181         return ptr + ptr->head.length + 1;
6182 }
6183
6184 static void
6185 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6186                            int len)
6187 {
6188         struct trace_eval_map **stop;
6189         struct trace_eval_map **map;
6190         union trace_eval_map_item *map_array;
6191         union trace_eval_map_item *ptr;
6192
6193         stop = start + len;
6194
6195         /*
6196          * The trace_eval_maps contains the map plus a head and tail item,
6197          * where the head holds the module and length of array, and the
6198          * tail holds a pointer to the next list.
6199          */
6200         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6201         if (!map_array) {
6202                 pr_warn("Unable to allocate trace eval mapping\n");
6203                 return;
6204         }
6205
6206         mutex_lock(&trace_eval_mutex);
6207
6208         if (!trace_eval_maps)
6209                 trace_eval_maps = map_array;
6210         else {
6211                 ptr = trace_eval_maps;
6212                 for (;;) {
6213                         ptr = trace_eval_jmp_to_tail(ptr);
6214                         if (!ptr->tail.next)
6215                                 break;
6216                         ptr = ptr->tail.next;
6217
6218                 }
6219                 ptr->tail.next = map_array;
6220         }
6221         map_array->head.mod = mod;
6222         map_array->head.length = len;
6223         map_array++;
6224
6225         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6226                 map_array->map = **map;
6227                 map_array++;
6228         }
6229         memset(map_array, 0, sizeof(*map_array));
6230
6231         mutex_unlock(&trace_eval_mutex);
6232 }
6233
6234 static void trace_create_eval_file(struct dentry *d_tracer)
6235 {
6236         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6237                           NULL, &tracing_eval_map_fops);
6238 }
6239
6240 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6241 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6242 static inline void trace_insert_eval_map_file(struct module *mod,
6243                               struct trace_eval_map **start, int len) { }
6244 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6245
6246 static void trace_insert_eval_map(struct module *mod,
6247                                   struct trace_eval_map **start, int len)
6248 {
6249         struct trace_eval_map **map;
6250
6251         if (len <= 0)
6252                 return;
6253
6254         map = start;
6255
6256         trace_event_eval_update(map, len);
6257
6258         trace_insert_eval_map_file(mod, start, len);
6259 }
6260
6261 static ssize_t
6262 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6263                        size_t cnt, loff_t *ppos)
6264 {
6265         struct trace_array *tr = filp->private_data;
6266         char buf[MAX_TRACER_SIZE+2];
6267         int r;
6268
6269         mutex_lock(&trace_types_lock);
6270         r = sprintf(buf, "%s\n", tr->current_trace->name);
6271         mutex_unlock(&trace_types_lock);
6272
6273         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6274 }
6275
6276 int tracer_init(struct tracer *t, struct trace_array *tr)
6277 {
6278         tracing_reset_online_cpus(&tr->array_buffer);
6279         return t->init(tr);
6280 }
6281
6282 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6283 {
6284         int cpu;
6285
6286         for_each_tracing_cpu(cpu)
6287                 per_cpu_ptr(buf->data, cpu)->entries = val;
6288 }
6289
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291 /* resize @tr's buffer to the size of @size_tr's entries */
6292 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6293                                         struct array_buffer *size_buf, int cpu_id)
6294 {
6295         int cpu, ret = 0;
6296
6297         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6298                 for_each_tracing_cpu(cpu) {
6299                         ret = ring_buffer_resize(trace_buf->buffer,
6300                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6301                         if (ret < 0)
6302                                 break;
6303                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6304                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6305                 }
6306         } else {
6307                 ret = ring_buffer_resize(trace_buf->buffer,
6308                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6309                 if (ret == 0)
6310                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6311                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6312         }
6313
6314         return ret;
6315 }
6316 #endif /* CONFIG_TRACER_MAX_TRACE */
6317
6318 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6319                                         unsigned long size, int cpu)
6320 {
6321         int ret;
6322
6323         /*
6324          * If kernel or user changes the size of the ring buffer
6325          * we use the size that was given, and we can forget about
6326          * expanding it later.
6327          */
6328         ring_buffer_expanded = true;
6329
6330         /* May be called before buffers are initialized */
6331         if (!tr->array_buffer.buffer)
6332                 return 0;
6333
6334         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6335         if (ret < 0)
6336                 return ret;
6337
6338 #ifdef CONFIG_TRACER_MAX_TRACE
6339         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6340             !tr->current_trace->use_max_tr)
6341                 goto out;
6342
6343         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6344         if (ret < 0) {
6345                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6346                                                      &tr->array_buffer, cpu);
6347                 if (r < 0) {
6348                         /*
6349                          * AARGH! We are left with different
6350                          * size max buffer!!!!
6351                          * The max buffer is our "snapshot" buffer.
6352                          * When a tracer needs a snapshot (one of the
6353                          * latency tracers), it swaps the max buffer
6354                          * with the saved snap shot. We succeeded to
6355                          * update the size of the main buffer, but failed to
6356                          * update the size of the max buffer. But when we tried
6357                          * to reset the main buffer to the original size, we
6358                          * failed there too. This is very unlikely to
6359                          * happen, but if it does, warn and kill all
6360                          * tracing.
6361                          */
6362                         WARN_ON(1);
6363                         tracing_disabled = 1;
6364                 }
6365                 return ret;
6366         }
6367
6368         if (cpu == RING_BUFFER_ALL_CPUS)
6369                 set_buffer_entries(&tr->max_buffer, size);
6370         else
6371                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6372
6373  out:
6374 #endif /* CONFIG_TRACER_MAX_TRACE */
6375
6376         if (cpu == RING_BUFFER_ALL_CPUS)
6377                 set_buffer_entries(&tr->array_buffer, size);
6378         else
6379                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6380
6381         return ret;
6382 }
6383
6384 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6385                                   unsigned long size, int cpu_id)
6386 {
6387         int ret;
6388
6389         mutex_lock(&trace_types_lock);
6390
6391         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6392                 /* make sure, this cpu is enabled in the mask */
6393                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6394                         ret = -EINVAL;
6395                         goto out;
6396                 }
6397         }
6398
6399         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6400         if (ret < 0)
6401                 ret = -ENOMEM;
6402
6403 out:
6404         mutex_unlock(&trace_types_lock);
6405
6406         return ret;
6407 }
6408
6409
6410 /**
6411  * tracing_update_buffers - used by tracing facility to expand ring buffers
6412  *
6413  * To save on memory when the tracing is never used on a system with it
6414  * configured in. The ring buffers are set to a minimum size. But once
6415  * a user starts to use the tracing facility, then they need to grow
6416  * to their default size.
6417  *
6418  * This function is to be called when a tracer is about to be used.
6419  */
6420 int tracing_update_buffers(void)
6421 {
6422         int ret = 0;
6423
6424         mutex_lock(&trace_types_lock);
6425         if (!ring_buffer_expanded)
6426                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6427                                                 RING_BUFFER_ALL_CPUS);
6428         mutex_unlock(&trace_types_lock);
6429
6430         return ret;
6431 }
6432
6433 struct trace_option_dentry;
6434
6435 static void
6436 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6437
6438 /*
6439  * Used to clear out the tracer before deletion of an instance.
6440  * Must have trace_types_lock held.
6441  */
6442 static void tracing_set_nop(struct trace_array *tr)
6443 {
6444         if (tr->current_trace == &nop_trace)
6445                 return;
6446         
6447         tr->current_trace->enabled--;
6448
6449         if (tr->current_trace->reset)
6450                 tr->current_trace->reset(tr);
6451
6452         tr->current_trace = &nop_trace;
6453 }
6454
6455 static bool tracer_options_updated;
6456
6457 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6458 {
6459         /* Only enable if the directory has been created already. */
6460         if (!tr->dir)
6461                 return;
6462
6463         /* Only create trace option files after update_tracer_options finish */
6464         if (!tracer_options_updated)
6465                 return;
6466
6467         create_trace_option_files(tr, t);
6468 }
6469
6470 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6471 {
6472         struct tracer *t;
6473 #ifdef CONFIG_TRACER_MAX_TRACE
6474         bool had_max_tr;
6475 #endif
6476         int ret = 0;
6477
6478         mutex_lock(&trace_types_lock);
6479
6480         if (!ring_buffer_expanded) {
6481                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6482                                                 RING_BUFFER_ALL_CPUS);
6483                 if (ret < 0)
6484                         goto out;
6485                 ret = 0;
6486         }
6487
6488         for (t = trace_types; t; t = t->next) {
6489                 if (strcmp(t->name, buf) == 0)
6490                         break;
6491         }
6492         if (!t) {
6493                 ret = -EINVAL;
6494                 goto out;
6495         }
6496         if (t == tr->current_trace)
6497                 goto out;
6498
6499 #ifdef CONFIG_TRACER_SNAPSHOT
6500         if (t->use_max_tr) {
6501                 local_irq_disable();
6502                 arch_spin_lock(&tr->max_lock);
6503                 if (tr->cond_snapshot)
6504                         ret = -EBUSY;
6505                 arch_spin_unlock(&tr->max_lock);
6506                 local_irq_enable();
6507                 if (ret)
6508                         goto out;
6509         }
6510 #endif
6511         /* Some tracers won't work on kernel command line */
6512         if (system_state < SYSTEM_RUNNING && t->noboot) {
6513                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6514                         t->name);
6515                 goto out;
6516         }
6517
6518         /* Some tracers are only allowed for the top level buffer */
6519         if (!trace_ok_for_array(t, tr)) {
6520                 ret = -EINVAL;
6521                 goto out;
6522         }
6523
6524         /* If trace pipe files are being read, we can't change the tracer */
6525         if (tr->trace_ref) {
6526                 ret = -EBUSY;
6527                 goto out;
6528         }
6529
6530         trace_branch_disable();
6531
6532         tr->current_trace->enabled--;
6533
6534         if (tr->current_trace->reset)
6535                 tr->current_trace->reset(tr);
6536
6537 #ifdef CONFIG_TRACER_MAX_TRACE
6538         had_max_tr = tr->current_trace->use_max_tr;
6539
6540         /* Current trace needs to be nop_trace before synchronize_rcu */
6541         tr->current_trace = &nop_trace;
6542
6543         if (had_max_tr && !t->use_max_tr) {
6544                 /*
6545                  * We need to make sure that the update_max_tr sees that
6546                  * current_trace changed to nop_trace to keep it from
6547                  * swapping the buffers after we resize it.
6548                  * The update_max_tr is called from interrupts disabled
6549                  * so a synchronized_sched() is sufficient.
6550                  */
6551                 synchronize_rcu();
6552                 free_snapshot(tr);
6553         }
6554
6555         if (t->use_max_tr && !tr->allocated_snapshot) {
6556                 ret = tracing_alloc_snapshot_instance(tr);
6557                 if (ret < 0)
6558                         goto out;
6559         }
6560 #else
6561         tr->current_trace = &nop_trace;
6562 #endif
6563
6564         if (t->init) {
6565                 ret = tracer_init(t, tr);
6566                 if (ret)
6567                         goto out;
6568         }
6569
6570         tr->current_trace = t;
6571         tr->current_trace->enabled++;
6572         trace_branch_enable(tr);
6573  out:
6574         mutex_unlock(&trace_types_lock);
6575
6576         return ret;
6577 }
6578
6579 static ssize_t
6580 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6581                         size_t cnt, loff_t *ppos)
6582 {
6583         struct trace_array *tr = filp->private_data;
6584         char buf[MAX_TRACER_SIZE+1];
6585         char *name;
6586         size_t ret;
6587         int err;
6588
6589         ret = cnt;
6590
6591         if (cnt > MAX_TRACER_SIZE)
6592                 cnt = MAX_TRACER_SIZE;
6593
6594         if (copy_from_user(buf, ubuf, cnt))
6595                 return -EFAULT;
6596
6597         buf[cnt] = 0;
6598
6599         name = strim(buf);
6600
6601         err = tracing_set_tracer(tr, name);
6602         if (err)
6603                 return err;
6604
6605         *ppos += ret;
6606
6607         return ret;
6608 }
6609
6610 static ssize_t
6611 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6612                    size_t cnt, loff_t *ppos)
6613 {
6614         char buf[64];
6615         int r;
6616
6617         r = snprintf(buf, sizeof(buf), "%ld\n",
6618                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6619         if (r > sizeof(buf))
6620                 r = sizeof(buf);
6621         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6622 }
6623
6624 static ssize_t
6625 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6626                     size_t cnt, loff_t *ppos)
6627 {
6628         unsigned long val;
6629         int ret;
6630
6631         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6632         if (ret)
6633                 return ret;
6634
6635         *ptr = val * 1000;
6636
6637         return cnt;
6638 }
6639
6640 static ssize_t
6641 tracing_thresh_read(struct file *filp, char __user *ubuf,
6642                     size_t cnt, loff_t *ppos)
6643 {
6644         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6645 }
6646
6647 static ssize_t
6648 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6649                      size_t cnt, loff_t *ppos)
6650 {
6651         struct trace_array *tr = filp->private_data;
6652         int ret;
6653
6654         mutex_lock(&trace_types_lock);
6655         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6656         if (ret < 0)
6657                 goto out;
6658
6659         if (tr->current_trace->update_thresh) {
6660                 ret = tr->current_trace->update_thresh(tr);
6661                 if (ret < 0)
6662                         goto out;
6663         }
6664
6665         ret = cnt;
6666 out:
6667         mutex_unlock(&trace_types_lock);
6668
6669         return ret;
6670 }
6671
6672 #ifdef CONFIG_TRACER_MAX_TRACE
6673
6674 static ssize_t
6675 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6676                      size_t cnt, loff_t *ppos)
6677 {
6678         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6679 }
6680
6681 static ssize_t
6682 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6683                       size_t cnt, loff_t *ppos)
6684 {
6685         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6686 }
6687
6688 #endif
6689
6690 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6691 {
6692         struct trace_array *tr = inode->i_private;
6693         struct trace_iterator *iter;
6694         int ret;
6695
6696         ret = tracing_check_open_get_tr(tr);
6697         if (ret)
6698                 return ret;
6699
6700         mutex_lock(&trace_types_lock);
6701
6702         /* create a buffer to store the information to pass to userspace */
6703         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6704         if (!iter) {
6705                 ret = -ENOMEM;
6706                 __trace_array_put(tr);
6707                 goto out;
6708         }
6709
6710         trace_seq_init(&iter->seq);
6711         iter->trace = tr->current_trace;
6712
6713         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6714                 ret = -ENOMEM;
6715                 goto fail;
6716         }
6717
6718         /* trace pipe does not show start of buffer */
6719         cpumask_setall(iter->started);
6720
6721         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6722                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6723
6724         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6725         if (trace_clocks[tr->clock_id].in_ns)
6726                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6727
6728         iter->tr = tr;
6729         iter->array_buffer = &tr->array_buffer;
6730         iter->cpu_file = tracing_get_cpu(inode);
6731         mutex_init(&iter->mutex);
6732         filp->private_data = iter;
6733
6734         if (iter->trace->pipe_open)
6735                 iter->trace->pipe_open(iter);
6736
6737         nonseekable_open(inode, filp);
6738
6739         tr->trace_ref++;
6740 out:
6741         mutex_unlock(&trace_types_lock);
6742         return ret;
6743
6744 fail:
6745         kfree(iter);
6746         __trace_array_put(tr);
6747         mutex_unlock(&trace_types_lock);
6748         return ret;
6749 }
6750
6751 static int tracing_release_pipe(struct inode *inode, struct file *file)
6752 {
6753         struct trace_iterator *iter = file->private_data;
6754         struct trace_array *tr = inode->i_private;
6755
6756         mutex_lock(&trace_types_lock);
6757
6758         tr->trace_ref--;
6759
6760         if (iter->trace->pipe_close)
6761                 iter->trace->pipe_close(iter);
6762
6763         mutex_unlock(&trace_types_lock);
6764
6765         free_cpumask_var(iter->started);
6766         kfree(iter->fmt);
6767         mutex_destroy(&iter->mutex);
6768         kfree(iter);
6769
6770         trace_array_put(tr);
6771
6772         return 0;
6773 }
6774
6775 static __poll_t
6776 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6777 {
6778         struct trace_array *tr = iter->tr;
6779
6780         /* Iterators are static, they should be filled or empty */
6781         if (trace_buffer_iter(iter, iter->cpu_file))
6782                 return EPOLLIN | EPOLLRDNORM;
6783
6784         if (tr->trace_flags & TRACE_ITER_BLOCK)
6785                 /*
6786                  * Always select as readable when in blocking mode
6787                  */
6788                 return EPOLLIN | EPOLLRDNORM;
6789         else
6790                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6791                                              filp, poll_table, iter->tr->buffer_percent);
6792 }
6793
6794 static __poll_t
6795 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6796 {
6797         struct trace_iterator *iter = filp->private_data;
6798
6799         return trace_poll(iter, filp, poll_table);
6800 }
6801
6802 /* Must be called with iter->mutex held. */
6803 static int tracing_wait_pipe(struct file *filp)
6804 {
6805         struct trace_iterator *iter = filp->private_data;
6806         int ret;
6807
6808         while (trace_empty(iter)) {
6809
6810                 if ((filp->f_flags & O_NONBLOCK)) {
6811                         return -EAGAIN;
6812                 }
6813
6814                 /*
6815                  * We block until we read something and tracing is disabled.
6816                  * We still block if tracing is disabled, but we have never
6817                  * read anything. This allows a user to cat this file, and
6818                  * then enable tracing. But after we have read something,
6819                  * we give an EOF when tracing is again disabled.
6820                  *
6821                  * iter->pos will be 0 if we haven't read anything.
6822                  */
6823                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6824                         break;
6825
6826                 mutex_unlock(&iter->mutex);
6827
6828                 ret = wait_on_pipe(iter, 0);
6829
6830                 mutex_lock(&iter->mutex);
6831
6832                 if (ret)
6833                         return ret;
6834         }
6835
6836         return 1;
6837 }
6838
6839 /*
6840  * Consumer reader.
6841  */
6842 static ssize_t
6843 tracing_read_pipe(struct file *filp, char __user *ubuf,
6844                   size_t cnt, loff_t *ppos)
6845 {
6846         struct trace_iterator *iter = filp->private_data;
6847         ssize_t sret;
6848
6849         /*
6850          * Avoid more than one consumer on a single file descriptor
6851          * This is just a matter of traces coherency, the ring buffer itself
6852          * is protected.
6853          */
6854         mutex_lock(&iter->mutex);
6855
6856         /* return any leftover data */
6857         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6858         if (sret != -EBUSY)
6859                 goto out;
6860
6861         trace_seq_init(&iter->seq);
6862
6863         if (iter->trace->read) {
6864                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6865                 if (sret)
6866                         goto out;
6867         }
6868
6869 waitagain:
6870         sret = tracing_wait_pipe(filp);
6871         if (sret <= 0)
6872                 goto out;
6873
6874         /* stop when tracing is finished */
6875         if (trace_empty(iter)) {
6876                 sret = 0;
6877                 goto out;
6878         }
6879
6880         if (cnt >= PAGE_SIZE)
6881                 cnt = PAGE_SIZE - 1;
6882
6883         /* reset all but tr, trace, and overruns */
6884         trace_iterator_reset(iter);
6885         cpumask_clear(iter->started);
6886         trace_seq_init(&iter->seq);
6887
6888         trace_event_read_lock();
6889         trace_access_lock(iter->cpu_file);
6890         while (trace_find_next_entry_inc(iter) != NULL) {
6891                 enum print_line_t ret;
6892                 int save_len = iter->seq.seq.len;
6893
6894                 ret = print_trace_line(iter);
6895                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6896                         /*
6897                          * If one print_trace_line() fills entire trace_seq in one shot,
6898                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6899                          * In this case, we need to consume it, otherwise, loop will peek
6900                          * this event next time, resulting in an infinite loop.
6901                          */
6902                         if (save_len == 0) {
6903                                 iter->seq.full = 0;
6904                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6905                                 trace_consume(iter);
6906                                 break;
6907                         }
6908
6909                         /* In other cases, don't print partial lines */
6910                         iter->seq.seq.len = save_len;
6911                         break;
6912                 }
6913                 if (ret != TRACE_TYPE_NO_CONSUME)
6914                         trace_consume(iter);
6915
6916                 if (trace_seq_used(&iter->seq) >= cnt)
6917                         break;
6918
6919                 /*
6920                  * Setting the full flag means we reached the trace_seq buffer
6921                  * size and we should leave by partial output condition above.
6922                  * One of the trace_seq_* functions is not used properly.
6923                  */
6924                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6925                           iter->ent->type);
6926         }
6927         trace_access_unlock(iter->cpu_file);
6928         trace_event_read_unlock();
6929
6930         /* Now copy what we have to the user */
6931         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6932         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6933                 trace_seq_init(&iter->seq);
6934
6935         /*
6936          * If there was nothing to send to user, in spite of consuming trace
6937          * entries, go back to wait for more entries.
6938          */
6939         if (sret == -EBUSY)
6940                 goto waitagain;
6941
6942 out:
6943         mutex_unlock(&iter->mutex);
6944
6945         return sret;
6946 }
6947
6948 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6949                                      unsigned int idx)
6950 {
6951         __free_page(spd->pages[idx]);
6952 }
6953
6954 static size_t
6955 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6956 {
6957         size_t count;
6958         int save_len;
6959         int ret;
6960
6961         /* Seq buffer is page-sized, exactly what we need. */
6962         for (;;) {
6963                 save_len = iter->seq.seq.len;
6964                 ret = print_trace_line(iter);
6965
6966                 if (trace_seq_has_overflowed(&iter->seq)) {
6967                         iter->seq.seq.len = save_len;
6968                         break;
6969                 }
6970
6971                 /*
6972                  * This should not be hit, because it should only
6973                  * be set if the iter->seq overflowed. But check it
6974                  * anyway to be safe.
6975                  */
6976                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6977                         iter->seq.seq.len = save_len;
6978                         break;
6979                 }
6980
6981                 count = trace_seq_used(&iter->seq) - save_len;
6982                 if (rem < count) {
6983                         rem = 0;
6984                         iter->seq.seq.len = save_len;
6985                         break;
6986                 }
6987
6988                 if (ret != TRACE_TYPE_NO_CONSUME)
6989                         trace_consume(iter);
6990                 rem -= count;
6991                 if (!trace_find_next_entry_inc(iter))   {
6992                         rem = 0;
6993                         iter->ent = NULL;
6994                         break;
6995                 }
6996         }
6997
6998         return rem;
6999 }
7000
7001 static ssize_t tracing_splice_read_pipe(struct file *filp,
7002                                         loff_t *ppos,
7003                                         struct pipe_inode_info *pipe,
7004                                         size_t len,
7005                                         unsigned int flags)
7006 {
7007         struct page *pages_def[PIPE_DEF_BUFFERS];
7008         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7009         struct trace_iterator *iter = filp->private_data;
7010         struct splice_pipe_desc spd = {
7011                 .pages          = pages_def,
7012                 .partial        = partial_def,
7013                 .nr_pages       = 0, /* This gets updated below. */
7014                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7015                 .ops            = &default_pipe_buf_ops,
7016                 .spd_release    = tracing_spd_release_pipe,
7017         };
7018         ssize_t ret;
7019         size_t rem;
7020         unsigned int i;
7021
7022         if (splice_grow_spd(pipe, &spd))
7023                 return -ENOMEM;
7024
7025         mutex_lock(&iter->mutex);
7026
7027         if (iter->trace->splice_read) {
7028                 ret = iter->trace->splice_read(iter, filp,
7029                                                ppos, pipe, len, flags);
7030                 if (ret)
7031                         goto out_err;
7032         }
7033
7034         ret = tracing_wait_pipe(filp);
7035         if (ret <= 0)
7036                 goto out_err;
7037
7038         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7039                 ret = -EFAULT;
7040                 goto out_err;
7041         }
7042
7043         trace_event_read_lock();
7044         trace_access_lock(iter->cpu_file);
7045
7046         /* Fill as many pages as possible. */
7047         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7048                 spd.pages[i] = alloc_page(GFP_KERNEL);
7049                 if (!spd.pages[i])
7050                         break;
7051
7052                 rem = tracing_fill_pipe_page(rem, iter);
7053
7054                 /* Copy the data into the page, so we can start over. */
7055                 ret = trace_seq_to_buffer(&iter->seq,
7056                                           page_address(spd.pages[i]),
7057                                           trace_seq_used(&iter->seq));
7058                 if (ret < 0) {
7059                         __free_page(spd.pages[i]);
7060                         break;
7061                 }
7062                 spd.partial[i].offset = 0;
7063                 spd.partial[i].len = trace_seq_used(&iter->seq);
7064
7065                 trace_seq_init(&iter->seq);
7066         }
7067
7068         trace_access_unlock(iter->cpu_file);
7069         trace_event_read_unlock();
7070         mutex_unlock(&iter->mutex);
7071
7072         spd.nr_pages = i;
7073
7074         if (i)
7075                 ret = splice_to_pipe(pipe, &spd);
7076         else
7077                 ret = 0;
7078 out:
7079         splice_shrink_spd(&spd);
7080         return ret;
7081
7082 out_err:
7083         mutex_unlock(&iter->mutex);
7084         goto out;
7085 }
7086
7087 static ssize_t
7088 tracing_entries_read(struct file *filp, char __user *ubuf,
7089                      size_t cnt, loff_t *ppos)
7090 {
7091         struct inode *inode = file_inode(filp);
7092         struct trace_array *tr = inode->i_private;
7093         int cpu = tracing_get_cpu(inode);
7094         char buf[64];
7095         int r = 0;
7096         ssize_t ret;
7097
7098         mutex_lock(&trace_types_lock);
7099
7100         if (cpu == RING_BUFFER_ALL_CPUS) {
7101                 int cpu, buf_size_same;
7102                 unsigned long size;
7103
7104                 size = 0;
7105                 buf_size_same = 1;
7106                 /* check if all cpu sizes are same */
7107                 for_each_tracing_cpu(cpu) {
7108                         /* fill in the size from first enabled cpu */
7109                         if (size == 0)
7110                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7111                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7112                                 buf_size_same = 0;
7113                                 break;
7114                         }
7115                 }
7116
7117                 if (buf_size_same) {
7118                         if (!ring_buffer_expanded)
7119                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7120                                             size >> 10,
7121                                             trace_buf_size >> 10);
7122                         else
7123                                 r = sprintf(buf, "%lu\n", size >> 10);
7124                 } else
7125                         r = sprintf(buf, "X\n");
7126         } else
7127                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7128
7129         mutex_unlock(&trace_types_lock);
7130
7131         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7132         return ret;
7133 }
7134
7135 static ssize_t
7136 tracing_entries_write(struct file *filp, const char __user *ubuf,
7137                       size_t cnt, loff_t *ppos)
7138 {
7139         struct inode *inode = file_inode(filp);
7140         struct trace_array *tr = inode->i_private;
7141         unsigned long val;
7142         int ret;
7143
7144         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7145         if (ret)
7146                 return ret;
7147
7148         /* must have at least 1 entry */
7149         if (!val)
7150                 return -EINVAL;
7151
7152         /* value is in KB */
7153         val <<= 10;
7154         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7155         if (ret < 0)
7156                 return ret;
7157
7158         *ppos += cnt;
7159
7160         return cnt;
7161 }
7162
7163 static ssize_t
7164 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7165                                 size_t cnt, loff_t *ppos)
7166 {
7167         struct trace_array *tr = filp->private_data;
7168         char buf[64];
7169         int r, cpu;
7170         unsigned long size = 0, expanded_size = 0;
7171
7172         mutex_lock(&trace_types_lock);
7173         for_each_tracing_cpu(cpu) {
7174                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7175                 if (!ring_buffer_expanded)
7176                         expanded_size += trace_buf_size >> 10;
7177         }
7178         if (ring_buffer_expanded)
7179                 r = sprintf(buf, "%lu\n", size);
7180         else
7181                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7182         mutex_unlock(&trace_types_lock);
7183
7184         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7185 }
7186
7187 static ssize_t
7188 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7189                           size_t cnt, loff_t *ppos)
7190 {
7191         /*
7192          * There is no need to read what the user has written, this function
7193          * is just to make sure that there is no error when "echo" is used
7194          */
7195
7196         *ppos += cnt;
7197
7198         return cnt;
7199 }
7200
7201 static int
7202 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7203 {
7204         struct trace_array *tr = inode->i_private;
7205
7206         /* disable tracing ? */
7207         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7208                 tracer_tracing_off(tr);
7209         /* resize the ring buffer to 0 */
7210         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7211
7212         trace_array_put(tr);
7213
7214         return 0;
7215 }
7216
7217 static ssize_t
7218 tracing_mark_write(struct file *filp, const char __user *ubuf,
7219                                         size_t cnt, loff_t *fpos)
7220 {
7221         struct trace_array *tr = filp->private_data;
7222         struct ring_buffer_event *event;
7223         enum event_trigger_type tt = ETT_NONE;
7224         struct trace_buffer *buffer;
7225         struct print_entry *entry;
7226         ssize_t written;
7227         int size;
7228         int len;
7229
7230 /* Used in tracing_mark_raw_write() as well */
7231 #define FAULTED_STR "<faulted>"
7232 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7233
7234         if (tracing_disabled)
7235                 return -EINVAL;
7236
7237         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7238                 return -EINVAL;
7239
7240         if (cnt > TRACE_BUF_SIZE)
7241                 cnt = TRACE_BUF_SIZE;
7242
7243         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7244
7245         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7246
7247         /* If less than "<faulted>", then make sure we can still add that */
7248         if (cnt < FAULTED_SIZE)
7249                 size += FAULTED_SIZE - cnt;
7250
7251         buffer = tr->array_buffer.buffer;
7252         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7253                                             tracing_gen_ctx());
7254         if (unlikely(!event))
7255                 /* Ring buffer disabled, return as if not open for write */
7256                 return -EBADF;
7257
7258         entry = ring_buffer_event_data(event);
7259         entry->ip = _THIS_IP_;
7260
7261         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7262         if (len) {
7263                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7264                 cnt = FAULTED_SIZE;
7265                 written = -EFAULT;
7266         } else
7267                 written = cnt;
7268
7269         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7270                 /* do not add \n before testing triggers, but add \0 */
7271                 entry->buf[cnt] = '\0';
7272                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7273         }
7274
7275         if (entry->buf[cnt - 1] != '\n') {
7276                 entry->buf[cnt] = '\n';
7277                 entry->buf[cnt + 1] = '\0';
7278         } else
7279                 entry->buf[cnt] = '\0';
7280
7281         if (static_branch_unlikely(&trace_marker_exports_enabled))
7282                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7283         __buffer_unlock_commit(buffer, event);
7284
7285         if (tt)
7286                 event_triggers_post_call(tr->trace_marker_file, tt);
7287
7288         return written;
7289 }
7290
7291 /* Limit it for now to 3K (including tag) */
7292 #define RAW_DATA_MAX_SIZE (1024*3)
7293
7294 static ssize_t
7295 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7296                                         size_t cnt, loff_t *fpos)
7297 {
7298         struct trace_array *tr = filp->private_data;
7299         struct ring_buffer_event *event;
7300         struct trace_buffer *buffer;
7301         struct raw_data_entry *entry;
7302         ssize_t written;
7303         int size;
7304         int len;
7305
7306 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7307
7308         if (tracing_disabled)
7309                 return -EINVAL;
7310
7311         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7312                 return -EINVAL;
7313
7314         /* The marker must at least have a tag id */
7315         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7316                 return -EINVAL;
7317
7318         if (cnt > TRACE_BUF_SIZE)
7319                 cnt = TRACE_BUF_SIZE;
7320
7321         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7322
7323         size = sizeof(*entry) + cnt;
7324         if (cnt < FAULT_SIZE_ID)
7325                 size += FAULT_SIZE_ID - cnt;
7326
7327         buffer = tr->array_buffer.buffer;
7328         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7329                                             tracing_gen_ctx());
7330         if (!event)
7331                 /* Ring buffer disabled, return as if not open for write */
7332                 return -EBADF;
7333
7334         entry = ring_buffer_event_data(event);
7335
7336         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7337         if (len) {
7338                 entry->id = -1;
7339                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7340                 written = -EFAULT;
7341         } else
7342                 written = cnt;
7343
7344         __buffer_unlock_commit(buffer, event);
7345
7346         return written;
7347 }
7348
7349 static int tracing_clock_show(struct seq_file *m, void *v)
7350 {
7351         struct trace_array *tr = m->private;
7352         int i;
7353
7354         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7355                 seq_printf(m,
7356                         "%s%s%s%s", i ? " " : "",
7357                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7358                         i == tr->clock_id ? "]" : "");
7359         seq_putc(m, '\n');
7360
7361         return 0;
7362 }
7363
7364 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7365 {
7366         int i;
7367
7368         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7369                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7370                         break;
7371         }
7372         if (i == ARRAY_SIZE(trace_clocks))
7373                 return -EINVAL;
7374
7375         mutex_lock(&trace_types_lock);
7376
7377         tr->clock_id = i;
7378
7379         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7380
7381         /*
7382          * New clock may not be consistent with the previous clock.
7383          * Reset the buffer so that it doesn't have incomparable timestamps.
7384          */
7385         tracing_reset_online_cpus(&tr->array_buffer);
7386
7387 #ifdef CONFIG_TRACER_MAX_TRACE
7388         if (tr->max_buffer.buffer)
7389                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7390         tracing_reset_online_cpus(&tr->max_buffer);
7391 #endif
7392
7393         mutex_unlock(&trace_types_lock);
7394
7395         return 0;
7396 }
7397
7398 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7399                                    size_t cnt, loff_t *fpos)
7400 {
7401         struct seq_file *m = filp->private_data;
7402         struct trace_array *tr = m->private;
7403         char buf[64];
7404         const char *clockstr;
7405         int ret;
7406
7407         if (cnt >= sizeof(buf))
7408                 return -EINVAL;
7409
7410         if (copy_from_user(buf, ubuf, cnt))
7411                 return -EFAULT;
7412
7413         buf[cnt] = 0;
7414
7415         clockstr = strstrip(buf);
7416
7417         ret = tracing_set_clock(tr, clockstr);
7418         if (ret)
7419                 return ret;
7420
7421         *fpos += cnt;
7422
7423         return cnt;
7424 }
7425
7426 static int tracing_clock_open(struct inode *inode, struct file *file)
7427 {
7428         struct trace_array *tr = inode->i_private;
7429         int ret;
7430
7431         ret = tracing_check_open_get_tr(tr);
7432         if (ret)
7433                 return ret;
7434
7435         ret = single_open(file, tracing_clock_show, inode->i_private);
7436         if (ret < 0)
7437                 trace_array_put(tr);
7438
7439         return ret;
7440 }
7441
7442 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7443 {
7444         struct trace_array *tr = m->private;
7445
7446         mutex_lock(&trace_types_lock);
7447
7448         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7449                 seq_puts(m, "delta [absolute]\n");
7450         else
7451                 seq_puts(m, "[delta] absolute\n");
7452
7453         mutex_unlock(&trace_types_lock);
7454
7455         return 0;
7456 }
7457
7458 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7459 {
7460         struct trace_array *tr = inode->i_private;
7461         int ret;
7462
7463         ret = tracing_check_open_get_tr(tr);
7464         if (ret)
7465                 return ret;
7466
7467         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7468         if (ret < 0)
7469                 trace_array_put(tr);
7470
7471         return ret;
7472 }
7473
7474 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7475 {
7476         if (rbe == this_cpu_read(trace_buffered_event))
7477                 return ring_buffer_time_stamp(buffer);
7478
7479         return ring_buffer_event_time_stamp(buffer, rbe);
7480 }
7481
7482 /*
7483  * Set or disable using the per CPU trace_buffer_event when possible.
7484  */
7485 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7486 {
7487         int ret = 0;
7488
7489         mutex_lock(&trace_types_lock);
7490
7491         if (set && tr->no_filter_buffering_ref++)
7492                 goto out;
7493
7494         if (!set) {
7495                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7496                         ret = -EINVAL;
7497                         goto out;
7498                 }
7499
7500                 --tr->no_filter_buffering_ref;
7501         }
7502  out:
7503         mutex_unlock(&trace_types_lock);
7504
7505         return ret;
7506 }
7507
7508 struct ftrace_buffer_info {
7509         struct trace_iterator   iter;
7510         void                    *spare;
7511         unsigned int            spare_cpu;
7512         unsigned int            read;
7513 };
7514
7515 #ifdef CONFIG_TRACER_SNAPSHOT
7516 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7517 {
7518         struct trace_array *tr = inode->i_private;
7519         struct trace_iterator *iter;
7520         struct seq_file *m;
7521         int ret;
7522
7523         ret = tracing_check_open_get_tr(tr);
7524         if (ret)
7525                 return ret;
7526
7527         if (file->f_mode & FMODE_READ) {
7528                 iter = __tracing_open(inode, file, true);
7529                 if (IS_ERR(iter))
7530                         ret = PTR_ERR(iter);
7531         } else {
7532                 /* Writes still need the seq_file to hold the private data */
7533                 ret = -ENOMEM;
7534                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7535                 if (!m)
7536                         goto out;
7537                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7538                 if (!iter) {
7539                         kfree(m);
7540                         goto out;
7541                 }
7542                 ret = 0;
7543
7544                 iter->tr = tr;
7545                 iter->array_buffer = &tr->max_buffer;
7546                 iter->cpu_file = tracing_get_cpu(inode);
7547                 m->private = iter;
7548                 file->private_data = m;
7549         }
7550 out:
7551         if (ret < 0)
7552                 trace_array_put(tr);
7553
7554         return ret;
7555 }
7556
7557 static ssize_t
7558 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7559                        loff_t *ppos)
7560 {
7561         struct seq_file *m = filp->private_data;
7562         struct trace_iterator *iter = m->private;
7563         struct trace_array *tr = iter->tr;
7564         unsigned long val;
7565         int ret;
7566
7567         ret = tracing_update_buffers();
7568         if (ret < 0)
7569                 return ret;
7570
7571         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7572         if (ret)
7573                 return ret;
7574
7575         mutex_lock(&trace_types_lock);
7576
7577         if (tr->current_trace->use_max_tr) {
7578                 ret = -EBUSY;
7579                 goto out;
7580         }
7581
7582         local_irq_disable();
7583         arch_spin_lock(&tr->max_lock);
7584         if (tr->cond_snapshot)
7585                 ret = -EBUSY;
7586         arch_spin_unlock(&tr->max_lock);
7587         local_irq_enable();
7588         if (ret)
7589                 goto out;
7590
7591         switch (val) {
7592         case 0:
7593                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7594                         ret = -EINVAL;
7595                         break;
7596                 }
7597                 if (tr->allocated_snapshot)
7598                         free_snapshot(tr);
7599                 break;
7600         case 1:
7601 /* Only allow per-cpu swap if the ring buffer supports it */
7602 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7603                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7604                         ret = -EINVAL;
7605                         break;
7606                 }
7607 #endif
7608                 if (tr->allocated_snapshot)
7609                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7610                                         &tr->array_buffer, iter->cpu_file);
7611                 else
7612                         ret = tracing_alloc_snapshot_instance(tr);
7613                 if (ret < 0)
7614                         break;
7615                 local_irq_disable();
7616                 /* Now, we're going to swap */
7617                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7618                         update_max_tr(tr, current, smp_processor_id(), NULL);
7619                 else
7620                         update_max_tr_single(tr, current, iter->cpu_file);
7621                 local_irq_enable();
7622                 break;
7623         default:
7624                 if (tr->allocated_snapshot) {
7625                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7626                                 tracing_reset_online_cpus(&tr->max_buffer);
7627                         else
7628                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7629                 }
7630                 break;
7631         }
7632
7633         if (ret >= 0) {
7634                 *ppos += cnt;
7635                 ret = cnt;
7636         }
7637 out:
7638         mutex_unlock(&trace_types_lock);
7639         return ret;
7640 }
7641
7642 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7643 {
7644         struct seq_file *m = file->private_data;
7645         int ret;
7646
7647         ret = tracing_release(inode, file);
7648
7649         if (file->f_mode & FMODE_READ)
7650                 return ret;
7651
7652         /* If write only, the seq_file is just a stub */
7653         if (m)
7654                 kfree(m->private);
7655         kfree(m);
7656
7657         return 0;
7658 }
7659
7660 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7661 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7662                                     size_t count, loff_t *ppos);
7663 static int tracing_buffers_release(struct inode *inode, struct file *file);
7664 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7665                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7666
7667 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7668 {
7669         struct ftrace_buffer_info *info;
7670         int ret;
7671
7672         /* The following checks for tracefs lockdown */
7673         ret = tracing_buffers_open(inode, filp);
7674         if (ret < 0)
7675                 return ret;
7676
7677         info = filp->private_data;
7678
7679         if (info->iter.trace->use_max_tr) {
7680                 tracing_buffers_release(inode, filp);
7681                 return -EBUSY;
7682         }
7683
7684         info->iter.snapshot = true;
7685         info->iter.array_buffer = &info->iter.tr->max_buffer;
7686
7687         return ret;
7688 }
7689
7690 #endif /* CONFIG_TRACER_SNAPSHOT */
7691
7692
7693 static const struct file_operations tracing_thresh_fops = {
7694         .open           = tracing_open_generic,
7695         .read           = tracing_thresh_read,
7696         .write          = tracing_thresh_write,
7697         .llseek         = generic_file_llseek,
7698 };
7699
7700 #ifdef CONFIG_TRACER_MAX_TRACE
7701 static const struct file_operations tracing_max_lat_fops = {
7702         .open           = tracing_open_generic,
7703         .read           = tracing_max_lat_read,
7704         .write          = tracing_max_lat_write,
7705         .llseek         = generic_file_llseek,
7706 };
7707 #endif
7708
7709 static const struct file_operations set_tracer_fops = {
7710         .open           = tracing_open_generic,
7711         .read           = tracing_set_trace_read,
7712         .write          = tracing_set_trace_write,
7713         .llseek         = generic_file_llseek,
7714 };
7715
7716 static const struct file_operations tracing_pipe_fops = {
7717         .open           = tracing_open_pipe,
7718         .poll           = tracing_poll_pipe,
7719         .read           = tracing_read_pipe,
7720         .splice_read    = tracing_splice_read_pipe,
7721         .release        = tracing_release_pipe,
7722         .llseek         = no_llseek,
7723 };
7724
7725 static const struct file_operations tracing_entries_fops = {
7726         .open           = tracing_open_generic_tr,
7727         .read           = tracing_entries_read,
7728         .write          = tracing_entries_write,
7729         .llseek         = generic_file_llseek,
7730         .release        = tracing_release_generic_tr,
7731 };
7732
7733 static const struct file_operations tracing_total_entries_fops = {
7734         .open           = tracing_open_generic_tr,
7735         .read           = tracing_total_entries_read,
7736         .llseek         = generic_file_llseek,
7737         .release        = tracing_release_generic_tr,
7738 };
7739
7740 static const struct file_operations tracing_free_buffer_fops = {
7741         .open           = tracing_open_generic_tr,
7742         .write          = tracing_free_buffer_write,
7743         .release        = tracing_free_buffer_release,
7744 };
7745
7746 static const struct file_operations tracing_mark_fops = {
7747         .open           = tracing_mark_open,
7748         .write          = tracing_mark_write,
7749         .release        = tracing_release_generic_tr,
7750 };
7751
7752 static const struct file_operations tracing_mark_raw_fops = {
7753         .open           = tracing_mark_open,
7754         .write          = tracing_mark_raw_write,
7755         .release        = tracing_release_generic_tr,
7756 };
7757
7758 static const struct file_operations trace_clock_fops = {
7759         .open           = tracing_clock_open,
7760         .read           = seq_read,
7761         .llseek         = seq_lseek,
7762         .release        = tracing_single_release_tr,
7763         .write          = tracing_clock_write,
7764 };
7765
7766 static const struct file_operations trace_time_stamp_mode_fops = {
7767         .open           = tracing_time_stamp_mode_open,
7768         .read           = seq_read,
7769         .llseek         = seq_lseek,
7770         .release        = tracing_single_release_tr,
7771 };
7772
7773 #ifdef CONFIG_TRACER_SNAPSHOT
7774 static const struct file_operations snapshot_fops = {
7775         .open           = tracing_snapshot_open,
7776         .read           = seq_read,
7777         .write          = tracing_snapshot_write,
7778         .llseek         = tracing_lseek,
7779         .release        = tracing_snapshot_release,
7780 };
7781
7782 static const struct file_operations snapshot_raw_fops = {
7783         .open           = snapshot_raw_open,
7784         .read           = tracing_buffers_read,
7785         .release        = tracing_buffers_release,
7786         .splice_read    = tracing_buffers_splice_read,
7787         .llseek         = no_llseek,
7788 };
7789
7790 #endif /* CONFIG_TRACER_SNAPSHOT */
7791
7792 /*
7793  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7794  * @filp: The active open file structure
7795  * @ubuf: The userspace provided buffer to read value into
7796  * @cnt: The maximum number of bytes to read
7797  * @ppos: The current "file" position
7798  *
7799  * This function implements the write interface for a struct trace_min_max_param.
7800  * The filp->private_data must point to a trace_min_max_param structure that
7801  * defines where to write the value, the min and the max acceptable values,
7802  * and a lock to protect the write.
7803  */
7804 static ssize_t
7805 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7806 {
7807         struct trace_min_max_param *param = filp->private_data;
7808         u64 val;
7809         int err;
7810
7811         if (!param)
7812                 return -EFAULT;
7813
7814         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7815         if (err)
7816                 return err;
7817
7818         if (param->lock)
7819                 mutex_lock(param->lock);
7820
7821         if (param->min && val < *param->min)
7822                 err = -EINVAL;
7823
7824         if (param->max && val > *param->max)
7825                 err = -EINVAL;
7826
7827         if (!err)
7828                 *param->val = val;
7829
7830         if (param->lock)
7831                 mutex_unlock(param->lock);
7832
7833         if (err)
7834                 return err;
7835
7836         return cnt;
7837 }
7838
7839 /*
7840  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7841  * @filp: The active open file structure
7842  * @ubuf: The userspace provided buffer to read value into
7843  * @cnt: The maximum number of bytes to read
7844  * @ppos: The current "file" position
7845  *
7846  * This function implements the read interface for a struct trace_min_max_param.
7847  * The filp->private_data must point to a trace_min_max_param struct with valid
7848  * data.
7849  */
7850 static ssize_t
7851 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7852 {
7853         struct trace_min_max_param *param = filp->private_data;
7854         char buf[U64_STR_SIZE];
7855         int len;
7856         u64 val;
7857
7858         if (!param)
7859                 return -EFAULT;
7860
7861         val = *param->val;
7862
7863         if (cnt > sizeof(buf))
7864                 cnt = sizeof(buf);
7865
7866         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7867
7868         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7869 }
7870
7871 const struct file_operations trace_min_max_fops = {
7872         .open           = tracing_open_generic,
7873         .read           = trace_min_max_read,
7874         .write          = trace_min_max_write,
7875 };
7876
7877 #define TRACING_LOG_ERRS_MAX    8
7878 #define TRACING_LOG_LOC_MAX     128
7879
7880 #define CMD_PREFIX "  Command: "
7881
7882 struct err_info {
7883         const char      **errs; /* ptr to loc-specific array of err strings */
7884         u8              type;   /* index into errs -> specific err string */
7885         u16             pos;    /* caret position */
7886         u64             ts;
7887 };
7888
7889 struct tracing_log_err {
7890         struct list_head        list;
7891         struct err_info         info;
7892         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7893         char                    *cmd;                     /* what caused err */
7894 };
7895
7896 static DEFINE_MUTEX(tracing_err_log_lock);
7897
7898 static struct tracing_log_err *alloc_tracing_log_err(int len)
7899 {
7900         struct tracing_log_err *err;
7901
7902         err = kzalloc(sizeof(*err), GFP_KERNEL);
7903         if (!err)
7904                 return ERR_PTR(-ENOMEM);
7905
7906         err->cmd = kzalloc(len, GFP_KERNEL);
7907         if (!err->cmd) {
7908                 kfree(err);
7909                 return ERR_PTR(-ENOMEM);
7910         }
7911
7912         return err;
7913 }
7914
7915 static void free_tracing_log_err(struct tracing_log_err *err)
7916 {
7917         kfree(err->cmd);
7918         kfree(err);
7919 }
7920
7921 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7922                                                    int len)
7923 {
7924         struct tracing_log_err *err;
7925         char *cmd;
7926
7927         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7928                 err = alloc_tracing_log_err(len);
7929                 if (PTR_ERR(err) != -ENOMEM)
7930                         tr->n_err_log_entries++;
7931
7932                 return err;
7933         }
7934         cmd = kzalloc(len, GFP_KERNEL);
7935         if (!cmd)
7936                 return ERR_PTR(-ENOMEM);
7937         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7938         kfree(err->cmd);
7939         err->cmd = cmd;
7940         list_del(&err->list);
7941
7942         return err;
7943 }
7944
7945 /**
7946  * err_pos - find the position of a string within a command for error careting
7947  * @cmd: The tracing command that caused the error
7948  * @str: The string to position the caret at within @cmd
7949  *
7950  * Finds the position of the first occurrence of @str within @cmd.  The
7951  * return value can be passed to tracing_log_err() for caret placement
7952  * within @cmd.
7953  *
7954  * Returns the index within @cmd of the first occurrence of @str or 0
7955  * if @str was not found.
7956  */
7957 unsigned int err_pos(char *cmd, const char *str)
7958 {
7959         char *found;
7960
7961         if (WARN_ON(!strlen(cmd)))
7962                 return 0;
7963
7964         found = strstr(cmd, str);
7965         if (found)
7966                 return found - cmd;
7967
7968         return 0;
7969 }
7970
7971 /**
7972  * tracing_log_err - write an error to the tracing error log
7973  * @tr: The associated trace array for the error (NULL for top level array)
7974  * @loc: A string describing where the error occurred
7975  * @cmd: The tracing command that caused the error
7976  * @errs: The array of loc-specific static error strings
7977  * @type: The index into errs[], which produces the specific static err string
7978  * @pos: The position the caret should be placed in the cmd
7979  *
7980  * Writes an error into tracing/error_log of the form:
7981  *
7982  * <loc>: error: <text>
7983  *   Command: <cmd>
7984  *              ^
7985  *
7986  * tracing/error_log is a small log file containing the last
7987  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7988  * unless there has been a tracing error, and the error log can be
7989  * cleared and have its memory freed by writing the empty string in
7990  * truncation mode to it i.e. echo > tracing/error_log.
7991  *
7992  * NOTE: the @errs array along with the @type param are used to
7993  * produce a static error string - this string is not copied and saved
7994  * when the error is logged - only a pointer to it is saved.  See
7995  * existing callers for examples of how static strings are typically
7996  * defined for use with tracing_log_err().
7997  */
7998 void tracing_log_err(struct trace_array *tr,
7999                      const char *loc, const char *cmd,
8000                      const char **errs, u8 type, u16 pos)
8001 {
8002         struct tracing_log_err *err;
8003         int len = 0;
8004
8005         if (!tr)
8006                 tr = &global_trace;
8007
8008         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8009
8010         mutex_lock(&tracing_err_log_lock);
8011         err = get_tracing_log_err(tr, len);
8012         if (PTR_ERR(err) == -ENOMEM) {
8013                 mutex_unlock(&tracing_err_log_lock);
8014                 return;
8015         }
8016
8017         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8018         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8019
8020         err->info.errs = errs;
8021         err->info.type = type;
8022         err->info.pos = pos;
8023         err->info.ts = local_clock();
8024
8025         list_add_tail(&err->list, &tr->err_log);
8026         mutex_unlock(&tracing_err_log_lock);
8027 }
8028
8029 static void clear_tracing_err_log(struct trace_array *tr)
8030 {
8031         struct tracing_log_err *err, *next;
8032
8033         mutex_lock(&tracing_err_log_lock);
8034         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8035                 list_del(&err->list);
8036                 free_tracing_log_err(err);
8037         }
8038
8039         tr->n_err_log_entries = 0;
8040         mutex_unlock(&tracing_err_log_lock);
8041 }
8042
8043 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8044 {
8045         struct trace_array *tr = m->private;
8046
8047         mutex_lock(&tracing_err_log_lock);
8048
8049         return seq_list_start(&tr->err_log, *pos);
8050 }
8051
8052 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8053 {
8054         struct trace_array *tr = m->private;
8055
8056         return seq_list_next(v, &tr->err_log, pos);
8057 }
8058
8059 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8060 {
8061         mutex_unlock(&tracing_err_log_lock);
8062 }
8063
8064 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8065 {
8066         u16 i;
8067
8068         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8069                 seq_putc(m, ' ');
8070         for (i = 0; i < pos; i++)
8071                 seq_putc(m, ' ');
8072         seq_puts(m, "^\n");
8073 }
8074
8075 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8076 {
8077         struct tracing_log_err *err = v;
8078
8079         if (err) {
8080                 const char *err_text = err->info.errs[err->info.type];
8081                 u64 sec = err->info.ts;
8082                 u32 nsec;
8083
8084                 nsec = do_div(sec, NSEC_PER_SEC);
8085                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8086                            err->loc, err_text);
8087                 seq_printf(m, "%s", err->cmd);
8088                 tracing_err_log_show_pos(m, err->info.pos);
8089         }
8090
8091         return 0;
8092 }
8093
8094 static const struct seq_operations tracing_err_log_seq_ops = {
8095         .start  = tracing_err_log_seq_start,
8096         .next   = tracing_err_log_seq_next,
8097         .stop   = tracing_err_log_seq_stop,
8098         .show   = tracing_err_log_seq_show
8099 };
8100
8101 static int tracing_err_log_open(struct inode *inode, struct file *file)
8102 {
8103         struct trace_array *tr = inode->i_private;
8104         int ret = 0;
8105
8106         ret = tracing_check_open_get_tr(tr);
8107         if (ret)
8108                 return ret;
8109
8110         /* If this file was opened for write, then erase contents */
8111         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8112                 clear_tracing_err_log(tr);
8113
8114         if (file->f_mode & FMODE_READ) {
8115                 ret = seq_open(file, &tracing_err_log_seq_ops);
8116                 if (!ret) {
8117                         struct seq_file *m = file->private_data;
8118                         m->private = tr;
8119                 } else {
8120                         trace_array_put(tr);
8121                 }
8122         }
8123         return ret;
8124 }
8125
8126 static ssize_t tracing_err_log_write(struct file *file,
8127                                      const char __user *buffer,
8128                                      size_t count, loff_t *ppos)
8129 {
8130         return count;
8131 }
8132
8133 static int tracing_err_log_release(struct inode *inode, struct file *file)
8134 {
8135         struct trace_array *tr = inode->i_private;
8136
8137         trace_array_put(tr);
8138
8139         if (file->f_mode & FMODE_READ)
8140                 seq_release(inode, file);
8141
8142         return 0;
8143 }
8144
8145 static const struct file_operations tracing_err_log_fops = {
8146         .open           = tracing_err_log_open,
8147         .write          = tracing_err_log_write,
8148         .read           = seq_read,
8149         .llseek         = tracing_lseek,
8150         .release        = tracing_err_log_release,
8151 };
8152
8153 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8154 {
8155         struct trace_array *tr = inode->i_private;
8156         struct ftrace_buffer_info *info;
8157         int ret;
8158
8159         ret = tracing_check_open_get_tr(tr);
8160         if (ret)
8161                 return ret;
8162
8163         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8164         if (!info) {
8165                 trace_array_put(tr);
8166                 return -ENOMEM;
8167         }
8168
8169         mutex_lock(&trace_types_lock);
8170
8171         info->iter.tr           = tr;
8172         info->iter.cpu_file     = tracing_get_cpu(inode);
8173         info->iter.trace        = tr->current_trace;
8174         info->iter.array_buffer = &tr->array_buffer;
8175         info->spare             = NULL;
8176         /* Force reading ring buffer for first read */
8177         info->read              = (unsigned int)-1;
8178
8179         filp->private_data = info;
8180
8181         tr->trace_ref++;
8182
8183         mutex_unlock(&trace_types_lock);
8184
8185         ret = nonseekable_open(inode, filp);
8186         if (ret < 0)
8187                 trace_array_put(tr);
8188
8189         return ret;
8190 }
8191
8192 static __poll_t
8193 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8194 {
8195         struct ftrace_buffer_info *info = filp->private_data;
8196         struct trace_iterator *iter = &info->iter;
8197
8198         return trace_poll(iter, filp, poll_table);
8199 }
8200
8201 static ssize_t
8202 tracing_buffers_read(struct file *filp, char __user *ubuf,
8203                      size_t count, loff_t *ppos)
8204 {
8205         struct ftrace_buffer_info *info = filp->private_data;
8206         struct trace_iterator *iter = &info->iter;
8207         ssize_t ret = 0;
8208         ssize_t size;
8209
8210         if (!count)
8211                 return 0;
8212
8213 #ifdef CONFIG_TRACER_MAX_TRACE
8214         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8215                 return -EBUSY;
8216 #endif
8217
8218         if (!info->spare) {
8219                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8220                                                           iter->cpu_file);
8221                 if (IS_ERR(info->spare)) {
8222                         ret = PTR_ERR(info->spare);
8223                         info->spare = NULL;
8224                 } else {
8225                         info->spare_cpu = iter->cpu_file;
8226                 }
8227         }
8228         if (!info->spare)
8229                 return ret;
8230
8231         /* Do we have previous read data to read? */
8232         if (info->read < PAGE_SIZE)
8233                 goto read;
8234
8235  again:
8236         trace_access_lock(iter->cpu_file);
8237         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8238                                     &info->spare,
8239                                     count,
8240                                     iter->cpu_file, 0);
8241         trace_access_unlock(iter->cpu_file);
8242
8243         if (ret < 0) {
8244                 if (trace_empty(iter)) {
8245                         if ((filp->f_flags & O_NONBLOCK))
8246                                 return -EAGAIN;
8247
8248                         ret = wait_on_pipe(iter, 0);
8249                         if (ret)
8250                                 return ret;
8251
8252                         goto again;
8253                 }
8254                 return 0;
8255         }
8256
8257         info->read = 0;
8258  read:
8259         size = PAGE_SIZE - info->read;
8260         if (size > count)
8261                 size = count;
8262
8263         ret = copy_to_user(ubuf, info->spare + info->read, size);
8264         if (ret == size)
8265                 return -EFAULT;
8266
8267         size -= ret;
8268
8269         *ppos += size;
8270         info->read += size;
8271
8272         return size;
8273 }
8274
8275 static int tracing_buffers_release(struct inode *inode, struct file *file)
8276 {
8277         struct ftrace_buffer_info *info = file->private_data;
8278         struct trace_iterator *iter = &info->iter;
8279
8280         mutex_lock(&trace_types_lock);
8281
8282         iter->tr->trace_ref--;
8283
8284         __trace_array_put(iter->tr);
8285
8286         iter->wait_index++;
8287         /* Make sure the waiters see the new wait_index */
8288         smp_wmb();
8289
8290         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8291
8292         if (info->spare)
8293                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8294                                            info->spare_cpu, info->spare);
8295         kvfree(info);
8296
8297         mutex_unlock(&trace_types_lock);
8298
8299         return 0;
8300 }
8301
8302 struct buffer_ref {
8303         struct trace_buffer     *buffer;
8304         void                    *page;
8305         int                     cpu;
8306         refcount_t              refcount;
8307 };
8308
8309 static void buffer_ref_release(struct buffer_ref *ref)
8310 {
8311         if (!refcount_dec_and_test(&ref->refcount))
8312                 return;
8313         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8314         kfree(ref);
8315 }
8316
8317 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8318                                     struct pipe_buffer *buf)
8319 {
8320         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8321
8322         buffer_ref_release(ref);
8323         buf->private = 0;
8324 }
8325
8326 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8327                                 struct pipe_buffer *buf)
8328 {
8329         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8330
8331         if (refcount_read(&ref->refcount) > INT_MAX/2)
8332                 return false;
8333
8334         refcount_inc(&ref->refcount);
8335         return true;
8336 }
8337
8338 /* Pipe buffer operations for a buffer. */
8339 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8340         .release                = buffer_pipe_buf_release,
8341         .get                    = buffer_pipe_buf_get,
8342 };
8343
8344 /*
8345  * Callback from splice_to_pipe(), if we need to release some pages
8346  * at the end of the spd in case we error'ed out in filling the pipe.
8347  */
8348 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8349 {
8350         struct buffer_ref *ref =
8351                 (struct buffer_ref *)spd->partial[i].private;
8352
8353         buffer_ref_release(ref);
8354         spd->partial[i].private = 0;
8355 }
8356
8357 static ssize_t
8358 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8359                             struct pipe_inode_info *pipe, size_t len,
8360                             unsigned int flags)
8361 {
8362         struct ftrace_buffer_info *info = file->private_data;
8363         struct trace_iterator *iter = &info->iter;
8364         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8365         struct page *pages_def[PIPE_DEF_BUFFERS];
8366         struct splice_pipe_desc spd = {
8367                 .pages          = pages_def,
8368                 .partial        = partial_def,
8369                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8370                 .ops            = &buffer_pipe_buf_ops,
8371                 .spd_release    = buffer_spd_release,
8372         };
8373         struct buffer_ref *ref;
8374         int entries, i;
8375         ssize_t ret = 0;
8376
8377 #ifdef CONFIG_TRACER_MAX_TRACE
8378         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8379                 return -EBUSY;
8380 #endif
8381
8382         if (*ppos & (PAGE_SIZE - 1))
8383                 return -EINVAL;
8384
8385         if (len & (PAGE_SIZE - 1)) {
8386                 if (len < PAGE_SIZE)
8387                         return -EINVAL;
8388                 len &= PAGE_MASK;
8389         }
8390
8391         if (splice_grow_spd(pipe, &spd))
8392                 return -ENOMEM;
8393
8394  again:
8395         trace_access_lock(iter->cpu_file);
8396         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8397
8398         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8399                 struct page *page;
8400                 int r;
8401
8402                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8403                 if (!ref) {
8404                         ret = -ENOMEM;
8405                         break;
8406                 }
8407
8408                 refcount_set(&ref->refcount, 1);
8409                 ref->buffer = iter->array_buffer->buffer;
8410                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8411                 if (IS_ERR(ref->page)) {
8412                         ret = PTR_ERR(ref->page);
8413                         ref->page = NULL;
8414                         kfree(ref);
8415                         break;
8416                 }
8417                 ref->cpu = iter->cpu_file;
8418
8419                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8420                                           len, iter->cpu_file, 1);
8421                 if (r < 0) {
8422                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8423                                                    ref->page);
8424                         kfree(ref);
8425                         break;
8426                 }
8427
8428                 page = virt_to_page(ref->page);
8429
8430                 spd.pages[i] = page;
8431                 spd.partial[i].len = PAGE_SIZE;
8432                 spd.partial[i].offset = 0;
8433                 spd.partial[i].private = (unsigned long)ref;
8434                 spd.nr_pages++;
8435                 *ppos += PAGE_SIZE;
8436
8437                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8438         }
8439
8440         trace_access_unlock(iter->cpu_file);
8441         spd.nr_pages = i;
8442
8443         /* did we read anything? */
8444         if (!spd.nr_pages) {
8445                 long wait_index;
8446
8447                 if (ret)
8448                         goto out;
8449
8450                 ret = -EAGAIN;
8451                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8452                         goto out;
8453
8454                 wait_index = READ_ONCE(iter->wait_index);
8455
8456                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8457                 if (ret)
8458                         goto out;
8459
8460                 /* No need to wait after waking up when tracing is off */
8461                 if (!tracer_tracing_is_on(iter->tr))
8462                         goto out;
8463
8464                 /* Make sure we see the new wait_index */
8465                 smp_rmb();
8466                 if (wait_index != iter->wait_index)
8467                         goto out;
8468
8469                 goto again;
8470         }
8471
8472         ret = splice_to_pipe(pipe, &spd);
8473 out:
8474         splice_shrink_spd(&spd);
8475
8476         return ret;
8477 }
8478
8479 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8480 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8481 {
8482         struct ftrace_buffer_info *info = file->private_data;
8483         struct trace_iterator *iter = &info->iter;
8484
8485         if (cmd)
8486                 return -ENOIOCTLCMD;
8487
8488         mutex_lock(&trace_types_lock);
8489
8490         iter->wait_index++;
8491         /* Make sure the waiters see the new wait_index */
8492         smp_wmb();
8493
8494         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8495
8496         mutex_unlock(&trace_types_lock);
8497         return 0;
8498 }
8499
8500 static const struct file_operations tracing_buffers_fops = {
8501         .open           = tracing_buffers_open,
8502         .read           = tracing_buffers_read,
8503         .poll           = tracing_buffers_poll,
8504         .release        = tracing_buffers_release,
8505         .splice_read    = tracing_buffers_splice_read,
8506         .unlocked_ioctl = tracing_buffers_ioctl,
8507         .llseek         = no_llseek,
8508 };
8509
8510 static ssize_t
8511 tracing_stats_read(struct file *filp, char __user *ubuf,
8512                    size_t count, loff_t *ppos)
8513 {
8514         struct inode *inode = file_inode(filp);
8515         struct trace_array *tr = inode->i_private;
8516         struct array_buffer *trace_buf = &tr->array_buffer;
8517         int cpu = tracing_get_cpu(inode);
8518         struct trace_seq *s;
8519         unsigned long cnt;
8520         unsigned long long t;
8521         unsigned long usec_rem;
8522
8523         s = kmalloc(sizeof(*s), GFP_KERNEL);
8524         if (!s)
8525                 return -ENOMEM;
8526
8527         trace_seq_init(s);
8528
8529         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8530         trace_seq_printf(s, "entries: %ld\n", cnt);
8531
8532         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8533         trace_seq_printf(s, "overrun: %ld\n", cnt);
8534
8535         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8536         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8537
8538         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8539         trace_seq_printf(s, "bytes: %ld\n", cnt);
8540
8541         if (trace_clocks[tr->clock_id].in_ns) {
8542                 /* local or global for trace_clock */
8543                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8544                 usec_rem = do_div(t, USEC_PER_SEC);
8545                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8546                                                                 t, usec_rem);
8547
8548                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8549                 usec_rem = do_div(t, USEC_PER_SEC);
8550                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8551         } else {
8552                 /* counter or tsc mode for trace_clock */
8553                 trace_seq_printf(s, "oldest event ts: %llu\n",
8554                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8555
8556                 trace_seq_printf(s, "now ts: %llu\n",
8557                                 ring_buffer_time_stamp(trace_buf->buffer));
8558         }
8559
8560         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8561         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8562
8563         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8564         trace_seq_printf(s, "read events: %ld\n", cnt);
8565
8566         count = simple_read_from_buffer(ubuf, count, ppos,
8567                                         s->buffer, trace_seq_used(s));
8568
8569         kfree(s);
8570
8571         return count;
8572 }
8573
8574 static const struct file_operations tracing_stats_fops = {
8575         .open           = tracing_open_generic_tr,
8576         .read           = tracing_stats_read,
8577         .llseek         = generic_file_llseek,
8578         .release        = tracing_release_generic_tr,
8579 };
8580
8581 #ifdef CONFIG_DYNAMIC_FTRACE
8582
8583 static ssize_t
8584 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8585                   size_t cnt, loff_t *ppos)
8586 {
8587         ssize_t ret;
8588         char *buf;
8589         int r;
8590
8591         /* 256 should be plenty to hold the amount needed */
8592         buf = kmalloc(256, GFP_KERNEL);
8593         if (!buf)
8594                 return -ENOMEM;
8595
8596         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8597                       ftrace_update_tot_cnt,
8598                       ftrace_number_of_pages,
8599                       ftrace_number_of_groups);
8600
8601         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8602         kfree(buf);
8603         return ret;
8604 }
8605
8606 static const struct file_operations tracing_dyn_info_fops = {
8607         .open           = tracing_open_generic,
8608         .read           = tracing_read_dyn_info,
8609         .llseek         = generic_file_llseek,
8610 };
8611 #endif /* CONFIG_DYNAMIC_FTRACE */
8612
8613 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8614 static void
8615 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8616                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8617                 void *data)
8618 {
8619         tracing_snapshot_instance(tr);
8620 }
8621
8622 static void
8623 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8624                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8625                       void *data)
8626 {
8627         struct ftrace_func_mapper *mapper = data;
8628         long *count = NULL;
8629
8630         if (mapper)
8631                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8632
8633         if (count) {
8634
8635                 if (*count <= 0)
8636                         return;
8637
8638                 (*count)--;
8639         }
8640
8641         tracing_snapshot_instance(tr);
8642 }
8643
8644 static int
8645 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8646                       struct ftrace_probe_ops *ops, void *data)
8647 {
8648         struct ftrace_func_mapper *mapper = data;
8649         long *count = NULL;
8650
8651         seq_printf(m, "%ps:", (void *)ip);
8652
8653         seq_puts(m, "snapshot");
8654
8655         if (mapper)
8656                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8657
8658         if (count)
8659                 seq_printf(m, ":count=%ld\n", *count);
8660         else
8661                 seq_puts(m, ":unlimited\n");
8662
8663         return 0;
8664 }
8665
8666 static int
8667 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8668                      unsigned long ip, void *init_data, void **data)
8669 {
8670         struct ftrace_func_mapper *mapper = *data;
8671
8672         if (!mapper) {
8673                 mapper = allocate_ftrace_func_mapper();
8674                 if (!mapper)
8675                         return -ENOMEM;
8676                 *data = mapper;
8677         }
8678
8679         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8680 }
8681
8682 static void
8683 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8684                      unsigned long ip, void *data)
8685 {
8686         struct ftrace_func_mapper *mapper = data;
8687
8688         if (!ip) {
8689                 if (!mapper)
8690                         return;
8691                 free_ftrace_func_mapper(mapper, NULL);
8692                 return;
8693         }
8694
8695         ftrace_func_mapper_remove_ip(mapper, ip);
8696 }
8697
8698 static struct ftrace_probe_ops snapshot_probe_ops = {
8699         .func                   = ftrace_snapshot,
8700         .print                  = ftrace_snapshot_print,
8701 };
8702
8703 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8704         .func                   = ftrace_count_snapshot,
8705         .print                  = ftrace_snapshot_print,
8706         .init                   = ftrace_snapshot_init,
8707         .free                   = ftrace_snapshot_free,
8708 };
8709
8710 static int
8711 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8712                                char *glob, char *cmd, char *param, int enable)
8713 {
8714         struct ftrace_probe_ops *ops;
8715         void *count = (void *)-1;
8716         char *number;
8717         int ret;
8718
8719         if (!tr)
8720                 return -ENODEV;
8721
8722         /* hash funcs only work with set_ftrace_filter */
8723         if (!enable)
8724                 return -EINVAL;
8725
8726         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8727
8728         if (glob[0] == '!')
8729                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8730
8731         if (!param)
8732                 goto out_reg;
8733
8734         number = strsep(&param, ":");
8735
8736         if (!strlen(number))
8737                 goto out_reg;
8738
8739         /*
8740          * We use the callback data field (which is a pointer)
8741          * as our counter.
8742          */
8743         ret = kstrtoul(number, 0, (unsigned long *)&count);
8744         if (ret)
8745                 return ret;
8746
8747  out_reg:
8748         ret = tracing_alloc_snapshot_instance(tr);
8749         if (ret < 0)
8750                 goto out;
8751
8752         ret = register_ftrace_function_probe(glob, tr, ops, count);
8753
8754  out:
8755         return ret < 0 ? ret : 0;
8756 }
8757
8758 static struct ftrace_func_command ftrace_snapshot_cmd = {
8759         .name                   = "snapshot",
8760         .func                   = ftrace_trace_snapshot_callback,
8761 };
8762
8763 static __init int register_snapshot_cmd(void)
8764 {
8765         return register_ftrace_command(&ftrace_snapshot_cmd);
8766 }
8767 #else
8768 static inline __init int register_snapshot_cmd(void) { return 0; }
8769 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8770
8771 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8772 {
8773         if (WARN_ON(!tr->dir))
8774                 return ERR_PTR(-ENODEV);
8775
8776         /* Top directory uses NULL as the parent */
8777         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8778                 return NULL;
8779
8780         /* All sub buffers have a descriptor */
8781         return tr->dir;
8782 }
8783
8784 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8785 {
8786         struct dentry *d_tracer;
8787
8788         if (tr->percpu_dir)
8789                 return tr->percpu_dir;
8790
8791         d_tracer = tracing_get_dentry(tr);
8792         if (IS_ERR(d_tracer))
8793                 return NULL;
8794
8795         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8796
8797         MEM_FAIL(!tr->percpu_dir,
8798                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8799
8800         return tr->percpu_dir;
8801 }
8802
8803 static struct dentry *
8804 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8805                       void *data, long cpu, const struct file_operations *fops)
8806 {
8807         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8808
8809         if (ret) /* See tracing_get_cpu() */
8810                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8811         return ret;
8812 }
8813
8814 static void
8815 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8816 {
8817         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8818         struct dentry *d_cpu;
8819         char cpu_dir[30]; /* 30 characters should be more than enough */
8820
8821         if (!d_percpu)
8822                 return;
8823
8824         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8825         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8826         if (!d_cpu) {
8827                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8828                 return;
8829         }
8830
8831         /* per cpu trace_pipe */
8832         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8833                                 tr, cpu, &tracing_pipe_fops);
8834
8835         /* per cpu trace */
8836         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8837                                 tr, cpu, &tracing_fops);
8838
8839         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8840                                 tr, cpu, &tracing_buffers_fops);
8841
8842         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8843                                 tr, cpu, &tracing_stats_fops);
8844
8845         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8846                                 tr, cpu, &tracing_entries_fops);
8847
8848 #ifdef CONFIG_TRACER_SNAPSHOT
8849         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8850                                 tr, cpu, &snapshot_fops);
8851
8852         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8853                                 tr, cpu, &snapshot_raw_fops);
8854 #endif
8855 }
8856
8857 #ifdef CONFIG_FTRACE_SELFTEST
8858 /* Let selftest have access to static functions in this file */
8859 #include "trace_selftest.c"
8860 #endif
8861
8862 static ssize_t
8863 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8864                         loff_t *ppos)
8865 {
8866         struct trace_option_dentry *topt = filp->private_data;
8867         char *buf;
8868
8869         if (topt->flags->val & topt->opt->bit)
8870                 buf = "1\n";
8871         else
8872                 buf = "0\n";
8873
8874         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8875 }
8876
8877 static ssize_t
8878 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8879                          loff_t *ppos)
8880 {
8881         struct trace_option_dentry *topt = filp->private_data;
8882         unsigned long val;
8883         int ret;
8884
8885         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8886         if (ret)
8887                 return ret;
8888
8889         if (val != 0 && val != 1)
8890                 return -EINVAL;
8891
8892         if (!!(topt->flags->val & topt->opt->bit) != val) {
8893                 mutex_lock(&trace_types_lock);
8894                 ret = __set_tracer_option(topt->tr, topt->flags,
8895                                           topt->opt, !val);
8896                 mutex_unlock(&trace_types_lock);
8897                 if (ret)
8898                         return ret;
8899         }
8900
8901         *ppos += cnt;
8902
8903         return cnt;
8904 }
8905
8906
8907 static const struct file_operations trace_options_fops = {
8908         .open = tracing_open_generic,
8909         .read = trace_options_read,
8910         .write = trace_options_write,
8911         .llseek = generic_file_llseek,
8912 };
8913
8914 /*
8915  * In order to pass in both the trace_array descriptor as well as the index
8916  * to the flag that the trace option file represents, the trace_array
8917  * has a character array of trace_flags_index[], which holds the index
8918  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8919  * The address of this character array is passed to the flag option file
8920  * read/write callbacks.
8921  *
8922  * In order to extract both the index and the trace_array descriptor,
8923  * get_tr_index() uses the following algorithm.
8924  *
8925  *   idx = *ptr;
8926  *
8927  * As the pointer itself contains the address of the index (remember
8928  * index[1] == 1).
8929  *
8930  * Then to get the trace_array descriptor, by subtracting that index
8931  * from the ptr, we get to the start of the index itself.
8932  *
8933  *   ptr - idx == &index[0]
8934  *
8935  * Then a simple container_of() from that pointer gets us to the
8936  * trace_array descriptor.
8937  */
8938 static void get_tr_index(void *data, struct trace_array **ptr,
8939                          unsigned int *pindex)
8940 {
8941         *pindex = *(unsigned char *)data;
8942
8943         *ptr = container_of(data - *pindex, struct trace_array,
8944                             trace_flags_index);
8945 }
8946
8947 static ssize_t
8948 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8949                         loff_t *ppos)
8950 {
8951         void *tr_index = filp->private_data;
8952         struct trace_array *tr;
8953         unsigned int index;
8954         char *buf;
8955
8956         get_tr_index(tr_index, &tr, &index);
8957
8958         if (tr->trace_flags & (1 << index))
8959                 buf = "1\n";
8960         else
8961                 buf = "0\n";
8962
8963         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8964 }
8965
8966 static ssize_t
8967 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8968                          loff_t *ppos)
8969 {
8970         void *tr_index = filp->private_data;
8971         struct trace_array *tr;
8972         unsigned int index;
8973         unsigned long val;
8974         int ret;
8975
8976         get_tr_index(tr_index, &tr, &index);
8977
8978         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8979         if (ret)
8980                 return ret;
8981
8982         if (val != 0 && val != 1)
8983                 return -EINVAL;
8984
8985         mutex_lock(&event_mutex);
8986         mutex_lock(&trace_types_lock);
8987         ret = set_tracer_flag(tr, 1 << index, val);
8988         mutex_unlock(&trace_types_lock);
8989         mutex_unlock(&event_mutex);
8990
8991         if (ret < 0)
8992                 return ret;
8993
8994         *ppos += cnt;
8995
8996         return cnt;
8997 }
8998
8999 static const struct file_operations trace_options_core_fops = {
9000         .open = tracing_open_generic,
9001         .read = trace_options_core_read,
9002         .write = trace_options_core_write,
9003         .llseek = generic_file_llseek,
9004 };
9005
9006 struct dentry *trace_create_file(const char *name,
9007                                  umode_t mode,
9008                                  struct dentry *parent,
9009                                  void *data,
9010                                  const struct file_operations *fops)
9011 {
9012         struct dentry *ret;
9013
9014         ret = tracefs_create_file(name, mode, parent, data, fops);
9015         if (!ret)
9016                 pr_warn("Could not create tracefs '%s' entry\n", name);
9017
9018         return ret;
9019 }
9020
9021
9022 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9023 {
9024         struct dentry *d_tracer;
9025
9026         if (tr->options)
9027                 return tr->options;
9028
9029         d_tracer = tracing_get_dentry(tr);
9030         if (IS_ERR(d_tracer))
9031                 return NULL;
9032
9033         tr->options = tracefs_create_dir("options", d_tracer);
9034         if (!tr->options) {
9035                 pr_warn("Could not create tracefs directory 'options'\n");
9036                 return NULL;
9037         }
9038
9039         return tr->options;
9040 }
9041
9042 static void
9043 create_trace_option_file(struct trace_array *tr,
9044                          struct trace_option_dentry *topt,
9045                          struct tracer_flags *flags,
9046                          struct tracer_opt *opt)
9047 {
9048         struct dentry *t_options;
9049
9050         t_options = trace_options_init_dentry(tr);
9051         if (!t_options)
9052                 return;
9053
9054         topt->flags = flags;
9055         topt->opt = opt;
9056         topt->tr = tr;
9057
9058         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9059                                         t_options, topt, &trace_options_fops);
9060
9061 }
9062
9063 static void
9064 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9065 {
9066         struct trace_option_dentry *topts;
9067         struct trace_options *tr_topts;
9068         struct tracer_flags *flags;
9069         struct tracer_opt *opts;
9070         int cnt;
9071         int i;
9072
9073         if (!tracer)
9074                 return;
9075
9076         flags = tracer->flags;
9077
9078         if (!flags || !flags->opts)
9079                 return;
9080
9081         /*
9082          * If this is an instance, only create flags for tracers
9083          * the instance may have.
9084          */
9085         if (!trace_ok_for_array(tracer, tr))
9086                 return;
9087
9088         for (i = 0; i < tr->nr_topts; i++) {
9089                 /* Make sure there's no duplicate flags. */
9090                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9091                         return;
9092         }
9093
9094         opts = flags->opts;
9095
9096         for (cnt = 0; opts[cnt].name; cnt++)
9097                 ;
9098
9099         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9100         if (!topts)
9101                 return;
9102
9103         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9104                             GFP_KERNEL);
9105         if (!tr_topts) {
9106                 kfree(topts);
9107                 return;
9108         }
9109
9110         tr->topts = tr_topts;
9111         tr->topts[tr->nr_topts].tracer = tracer;
9112         tr->topts[tr->nr_topts].topts = topts;
9113         tr->nr_topts++;
9114
9115         for (cnt = 0; opts[cnt].name; cnt++) {
9116                 create_trace_option_file(tr, &topts[cnt], flags,
9117                                          &opts[cnt]);
9118                 MEM_FAIL(topts[cnt].entry == NULL,
9119                           "Failed to create trace option: %s",
9120                           opts[cnt].name);
9121         }
9122 }
9123
9124 static struct dentry *
9125 create_trace_option_core_file(struct trace_array *tr,
9126                               const char *option, long index)
9127 {
9128         struct dentry *t_options;
9129
9130         t_options = trace_options_init_dentry(tr);
9131         if (!t_options)
9132                 return NULL;
9133
9134         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9135                                  (void *)&tr->trace_flags_index[index],
9136                                  &trace_options_core_fops);
9137 }
9138
9139 static void create_trace_options_dir(struct trace_array *tr)
9140 {
9141         struct dentry *t_options;
9142         bool top_level = tr == &global_trace;
9143         int i;
9144
9145         t_options = trace_options_init_dentry(tr);
9146         if (!t_options)
9147                 return;
9148
9149         for (i = 0; trace_options[i]; i++) {
9150                 if (top_level ||
9151                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9152                         create_trace_option_core_file(tr, trace_options[i], i);
9153         }
9154 }
9155
9156 static ssize_t
9157 rb_simple_read(struct file *filp, char __user *ubuf,
9158                size_t cnt, loff_t *ppos)
9159 {
9160         struct trace_array *tr = filp->private_data;
9161         char buf[64];
9162         int r;
9163
9164         r = tracer_tracing_is_on(tr);
9165         r = sprintf(buf, "%d\n", r);
9166
9167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9168 }
9169
9170 static ssize_t
9171 rb_simple_write(struct file *filp, const char __user *ubuf,
9172                 size_t cnt, loff_t *ppos)
9173 {
9174         struct trace_array *tr = filp->private_data;
9175         struct trace_buffer *buffer = tr->array_buffer.buffer;
9176         unsigned long val;
9177         int ret;
9178
9179         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9180         if (ret)
9181                 return ret;
9182
9183         if (buffer) {
9184                 mutex_lock(&trace_types_lock);
9185                 if (!!val == tracer_tracing_is_on(tr)) {
9186                         val = 0; /* do nothing */
9187                 } else if (val) {
9188                         tracer_tracing_on(tr);
9189                         if (tr->current_trace->start)
9190                                 tr->current_trace->start(tr);
9191                 } else {
9192                         tracer_tracing_off(tr);
9193                         if (tr->current_trace->stop)
9194                                 tr->current_trace->stop(tr);
9195                         /* Wake up any waiters */
9196                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9197                 }
9198                 mutex_unlock(&trace_types_lock);
9199         }
9200
9201         (*ppos)++;
9202
9203         return cnt;
9204 }
9205
9206 static const struct file_operations rb_simple_fops = {
9207         .open           = tracing_open_generic_tr,
9208         .read           = rb_simple_read,
9209         .write          = rb_simple_write,
9210         .release        = tracing_release_generic_tr,
9211         .llseek         = default_llseek,
9212 };
9213
9214 static ssize_t
9215 buffer_percent_read(struct file *filp, char __user *ubuf,
9216                     size_t cnt, loff_t *ppos)
9217 {
9218         struct trace_array *tr = filp->private_data;
9219         char buf[64];
9220         int r;
9221
9222         r = tr->buffer_percent;
9223         r = sprintf(buf, "%d\n", r);
9224
9225         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9226 }
9227
9228 static ssize_t
9229 buffer_percent_write(struct file *filp, const char __user *ubuf,
9230                      size_t cnt, loff_t *ppos)
9231 {
9232         struct trace_array *tr = filp->private_data;
9233         unsigned long val;
9234         int ret;
9235
9236         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9237         if (ret)
9238                 return ret;
9239
9240         if (val > 100)
9241                 return -EINVAL;
9242
9243         tr->buffer_percent = val;
9244
9245         (*ppos)++;
9246
9247         return cnt;
9248 }
9249
9250 static const struct file_operations buffer_percent_fops = {
9251         .open           = tracing_open_generic_tr,
9252         .read           = buffer_percent_read,
9253         .write          = buffer_percent_write,
9254         .release        = tracing_release_generic_tr,
9255         .llseek         = default_llseek,
9256 };
9257
9258 static struct dentry *trace_instance_dir;
9259
9260 static void
9261 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9262
9263 static int
9264 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9265 {
9266         enum ring_buffer_flags rb_flags;
9267
9268         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9269
9270         buf->tr = tr;
9271
9272         buf->buffer = ring_buffer_alloc(size, rb_flags);
9273         if (!buf->buffer)
9274                 return -ENOMEM;
9275
9276         buf->data = alloc_percpu(struct trace_array_cpu);
9277         if (!buf->data) {
9278                 ring_buffer_free(buf->buffer);
9279                 buf->buffer = NULL;
9280                 return -ENOMEM;
9281         }
9282
9283         /* Allocate the first page for all buffers */
9284         set_buffer_entries(&tr->array_buffer,
9285                            ring_buffer_size(tr->array_buffer.buffer, 0));
9286
9287         return 0;
9288 }
9289
9290 static void free_trace_buffer(struct array_buffer *buf)
9291 {
9292         if (buf->buffer) {
9293                 ring_buffer_free(buf->buffer);
9294                 buf->buffer = NULL;
9295                 free_percpu(buf->data);
9296                 buf->data = NULL;
9297         }
9298 }
9299
9300 static int allocate_trace_buffers(struct trace_array *tr, int size)
9301 {
9302         int ret;
9303
9304         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9305         if (ret)
9306                 return ret;
9307
9308 #ifdef CONFIG_TRACER_MAX_TRACE
9309         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9310                                     allocate_snapshot ? size : 1);
9311         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9312                 free_trace_buffer(&tr->array_buffer);
9313                 return -ENOMEM;
9314         }
9315         tr->allocated_snapshot = allocate_snapshot;
9316
9317         allocate_snapshot = false;
9318 #endif
9319
9320         return 0;
9321 }
9322
9323 static void free_trace_buffers(struct trace_array *tr)
9324 {
9325         if (!tr)
9326                 return;
9327
9328         free_trace_buffer(&tr->array_buffer);
9329
9330 #ifdef CONFIG_TRACER_MAX_TRACE
9331         free_trace_buffer(&tr->max_buffer);
9332 #endif
9333 }
9334
9335 static void init_trace_flags_index(struct trace_array *tr)
9336 {
9337         int i;
9338
9339         /* Used by the trace options files */
9340         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9341                 tr->trace_flags_index[i] = i;
9342 }
9343
9344 static void __update_tracer_options(struct trace_array *tr)
9345 {
9346         struct tracer *t;
9347
9348         for (t = trace_types; t; t = t->next)
9349                 add_tracer_options(tr, t);
9350 }
9351
9352 static void update_tracer_options(struct trace_array *tr)
9353 {
9354         mutex_lock(&trace_types_lock);
9355         tracer_options_updated = true;
9356         __update_tracer_options(tr);
9357         mutex_unlock(&trace_types_lock);
9358 }
9359
9360 /* Must have trace_types_lock held */
9361 struct trace_array *trace_array_find(const char *instance)
9362 {
9363         struct trace_array *tr, *found = NULL;
9364
9365         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9366                 if (tr->name && strcmp(tr->name, instance) == 0) {
9367                         found = tr;
9368                         break;
9369                 }
9370         }
9371
9372         return found;
9373 }
9374
9375 struct trace_array *trace_array_find_get(const char *instance)
9376 {
9377         struct trace_array *tr;
9378
9379         mutex_lock(&trace_types_lock);
9380         tr = trace_array_find(instance);
9381         if (tr)
9382                 tr->ref++;
9383         mutex_unlock(&trace_types_lock);
9384
9385         return tr;
9386 }
9387
9388 static int trace_array_create_dir(struct trace_array *tr)
9389 {
9390         int ret;
9391
9392         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9393         if (!tr->dir)
9394                 return -EINVAL;
9395
9396         ret = event_trace_add_tracer(tr->dir, tr);
9397         if (ret) {
9398                 tracefs_remove(tr->dir);
9399                 return ret;
9400         }
9401
9402         init_tracer_tracefs(tr, tr->dir);
9403         __update_tracer_options(tr);
9404
9405         return ret;
9406 }
9407
9408 static struct trace_array *trace_array_create(const char *name)
9409 {
9410         struct trace_array *tr;
9411         int ret;
9412
9413         ret = -ENOMEM;
9414         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9415         if (!tr)
9416                 return ERR_PTR(ret);
9417
9418         tr->name = kstrdup(name, GFP_KERNEL);
9419         if (!tr->name)
9420                 goto out_free_tr;
9421
9422         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9423                 goto out_free_tr;
9424
9425         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9426
9427         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9428
9429         raw_spin_lock_init(&tr->start_lock);
9430
9431         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9432
9433         tr->current_trace = &nop_trace;
9434
9435         INIT_LIST_HEAD(&tr->systems);
9436         INIT_LIST_HEAD(&tr->events);
9437         INIT_LIST_HEAD(&tr->hist_vars);
9438         INIT_LIST_HEAD(&tr->err_log);
9439
9440         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9441                 goto out_free_tr;
9442
9443         if (ftrace_allocate_ftrace_ops(tr) < 0)
9444                 goto out_free_tr;
9445
9446         ftrace_init_trace_array(tr);
9447
9448         init_trace_flags_index(tr);
9449
9450         if (trace_instance_dir) {
9451                 ret = trace_array_create_dir(tr);
9452                 if (ret)
9453                         goto out_free_tr;
9454         } else
9455                 __trace_early_add_events(tr);
9456
9457         list_add(&tr->list, &ftrace_trace_arrays);
9458
9459         tr->ref++;
9460
9461         return tr;
9462
9463  out_free_tr:
9464         ftrace_free_ftrace_ops(tr);
9465         free_trace_buffers(tr);
9466         free_cpumask_var(tr->tracing_cpumask);
9467         kfree(tr->name);
9468         kfree(tr);
9469
9470         return ERR_PTR(ret);
9471 }
9472
9473 static int instance_mkdir(const char *name)
9474 {
9475         struct trace_array *tr;
9476         int ret;
9477
9478         mutex_lock(&event_mutex);
9479         mutex_lock(&trace_types_lock);
9480
9481         ret = -EEXIST;
9482         if (trace_array_find(name))
9483                 goto out_unlock;
9484
9485         tr = trace_array_create(name);
9486
9487         ret = PTR_ERR_OR_ZERO(tr);
9488
9489 out_unlock:
9490         mutex_unlock(&trace_types_lock);
9491         mutex_unlock(&event_mutex);
9492         return ret;
9493 }
9494
9495 /**
9496  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9497  * @name: The name of the trace array to be looked up/created.
9498  *
9499  * Returns pointer to trace array with given name.
9500  * NULL, if it cannot be created.
9501  *
9502  * NOTE: This function increments the reference counter associated with the
9503  * trace array returned. This makes sure it cannot be freed while in use.
9504  * Use trace_array_put() once the trace array is no longer needed.
9505  * If the trace_array is to be freed, trace_array_destroy() needs to
9506  * be called after the trace_array_put(), or simply let user space delete
9507  * it from the tracefs instances directory. But until the
9508  * trace_array_put() is called, user space can not delete it.
9509  *
9510  */
9511 struct trace_array *trace_array_get_by_name(const char *name)
9512 {
9513         struct trace_array *tr;
9514
9515         mutex_lock(&event_mutex);
9516         mutex_lock(&trace_types_lock);
9517
9518         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9519                 if (tr->name && strcmp(tr->name, name) == 0)
9520                         goto out_unlock;
9521         }
9522
9523         tr = trace_array_create(name);
9524
9525         if (IS_ERR(tr))
9526                 tr = NULL;
9527 out_unlock:
9528         if (tr)
9529                 tr->ref++;
9530
9531         mutex_unlock(&trace_types_lock);
9532         mutex_unlock(&event_mutex);
9533         return tr;
9534 }
9535 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9536
9537 static int __remove_instance(struct trace_array *tr)
9538 {
9539         int i;
9540
9541         /* Reference counter for a newly created trace array = 1. */
9542         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9543                 return -EBUSY;
9544
9545         list_del(&tr->list);
9546
9547         /* Disable all the flags that were enabled coming in */
9548         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9549                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9550                         set_tracer_flag(tr, 1 << i, 0);
9551         }
9552
9553         tracing_set_nop(tr);
9554         clear_ftrace_function_probes(tr);
9555         event_trace_del_tracer(tr);
9556         ftrace_clear_pids(tr);
9557         ftrace_destroy_function_files(tr);
9558         tracefs_remove(tr->dir);
9559         free_percpu(tr->last_func_repeats);
9560         free_trace_buffers(tr);
9561         clear_tracing_err_log(tr);
9562
9563         for (i = 0; i < tr->nr_topts; i++) {
9564                 kfree(tr->topts[i].topts);
9565         }
9566         kfree(tr->topts);
9567
9568         free_cpumask_var(tr->tracing_cpumask);
9569         kfree(tr->name);
9570         kfree(tr);
9571
9572         return 0;
9573 }
9574
9575 int trace_array_destroy(struct trace_array *this_tr)
9576 {
9577         struct trace_array *tr;
9578         int ret;
9579
9580         if (!this_tr)
9581                 return -EINVAL;
9582
9583         mutex_lock(&event_mutex);
9584         mutex_lock(&trace_types_lock);
9585
9586         ret = -ENODEV;
9587
9588         /* Making sure trace array exists before destroying it. */
9589         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9590                 if (tr == this_tr) {
9591                         ret = __remove_instance(tr);
9592                         break;
9593                 }
9594         }
9595
9596         mutex_unlock(&trace_types_lock);
9597         mutex_unlock(&event_mutex);
9598
9599         return ret;
9600 }
9601 EXPORT_SYMBOL_GPL(trace_array_destroy);
9602
9603 static int instance_rmdir(const char *name)
9604 {
9605         struct trace_array *tr;
9606         int ret;
9607
9608         mutex_lock(&event_mutex);
9609         mutex_lock(&trace_types_lock);
9610
9611         ret = -ENODEV;
9612         tr = trace_array_find(name);
9613         if (tr)
9614                 ret = __remove_instance(tr);
9615
9616         mutex_unlock(&trace_types_lock);
9617         mutex_unlock(&event_mutex);
9618
9619         return ret;
9620 }
9621
9622 static __init void create_trace_instances(struct dentry *d_tracer)
9623 {
9624         struct trace_array *tr;
9625
9626         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9627                                                          instance_mkdir,
9628                                                          instance_rmdir);
9629         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9630                 return;
9631
9632         mutex_lock(&event_mutex);
9633         mutex_lock(&trace_types_lock);
9634
9635         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9636                 if (!tr->name)
9637                         continue;
9638                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9639                              "Failed to create instance directory\n"))
9640                         break;
9641         }
9642
9643         mutex_unlock(&trace_types_lock);
9644         mutex_unlock(&event_mutex);
9645 }
9646
9647 static void
9648 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9649 {
9650         struct trace_event_file *file;
9651         int cpu;
9652
9653         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9654                         tr, &show_traces_fops);
9655
9656         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9657                         tr, &set_tracer_fops);
9658
9659         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9660                           tr, &tracing_cpumask_fops);
9661
9662         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9663                           tr, &tracing_iter_fops);
9664
9665         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9666                           tr, &tracing_fops);
9667
9668         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9669                           tr, &tracing_pipe_fops);
9670
9671         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9672                           tr, &tracing_entries_fops);
9673
9674         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9675                           tr, &tracing_total_entries_fops);
9676
9677         trace_create_file("free_buffer", 0200, d_tracer,
9678                           tr, &tracing_free_buffer_fops);
9679
9680         trace_create_file("trace_marker", 0220, d_tracer,
9681                           tr, &tracing_mark_fops);
9682
9683         file = __find_event_file(tr, "ftrace", "print");
9684         if (file && file->dir)
9685                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9686                                   file, &event_trigger_fops);
9687         tr->trace_marker_file = file;
9688
9689         trace_create_file("trace_marker_raw", 0220, d_tracer,
9690                           tr, &tracing_mark_raw_fops);
9691
9692         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9693                           &trace_clock_fops);
9694
9695         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9696                           tr, &rb_simple_fops);
9697
9698         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9699                           &trace_time_stamp_mode_fops);
9700
9701         tr->buffer_percent = 50;
9702
9703         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9704                         tr, &buffer_percent_fops);
9705
9706         create_trace_options_dir(tr);
9707
9708 #ifdef CONFIG_TRACER_MAX_TRACE
9709         trace_create_maxlat_file(tr, d_tracer);
9710 #endif
9711
9712         if (ftrace_create_function_files(tr, d_tracer))
9713                 MEM_FAIL(1, "Could not allocate function filter files");
9714
9715 #ifdef CONFIG_TRACER_SNAPSHOT
9716         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9717                           tr, &snapshot_fops);
9718 #endif
9719
9720         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9721                           tr, &tracing_err_log_fops);
9722
9723         for_each_tracing_cpu(cpu)
9724                 tracing_init_tracefs_percpu(tr, cpu);
9725
9726         ftrace_init_tracefs(tr, d_tracer);
9727 }
9728
9729 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9730 {
9731         struct vfsmount *mnt;
9732         struct file_system_type *type;
9733
9734         /*
9735          * To maintain backward compatibility for tools that mount
9736          * debugfs to get to the tracing facility, tracefs is automatically
9737          * mounted to the debugfs/tracing directory.
9738          */
9739         type = get_fs_type("tracefs");
9740         if (!type)
9741                 return NULL;
9742         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9743         put_filesystem(type);
9744         if (IS_ERR(mnt))
9745                 return NULL;
9746         mntget(mnt);
9747
9748         return mnt;
9749 }
9750
9751 /**
9752  * tracing_init_dentry - initialize top level trace array
9753  *
9754  * This is called when creating files or directories in the tracing
9755  * directory. It is called via fs_initcall() by any of the boot up code
9756  * and expects to return the dentry of the top level tracing directory.
9757  */
9758 int tracing_init_dentry(void)
9759 {
9760         struct trace_array *tr = &global_trace;
9761
9762         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9763                 pr_warn("Tracing disabled due to lockdown\n");
9764                 return -EPERM;
9765         }
9766
9767         /* The top level trace array uses  NULL as parent */
9768         if (tr->dir)
9769                 return 0;
9770
9771         if (WARN_ON(!tracefs_initialized()))
9772                 return -ENODEV;
9773
9774         /*
9775          * As there may still be users that expect the tracing
9776          * files to exist in debugfs/tracing, we must automount
9777          * the tracefs file system there, so older tools still
9778          * work with the newer kernel.
9779          */
9780         tr->dir = debugfs_create_automount("tracing", NULL,
9781                                            trace_automount, NULL);
9782
9783         return 0;
9784 }
9785
9786 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9787 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9788
9789 static struct workqueue_struct *eval_map_wq __initdata;
9790 static struct work_struct eval_map_work __initdata;
9791 static struct work_struct tracerfs_init_work __initdata;
9792
9793 static void __init eval_map_work_func(struct work_struct *work)
9794 {
9795         int len;
9796
9797         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9798         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9799 }
9800
9801 static int __init trace_eval_init(void)
9802 {
9803         INIT_WORK(&eval_map_work, eval_map_work_func);
9804
9805         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9806         if (!eval_map_wq) {
9807                 pr_err("Unable to allocate eval_map_wq\n");
9808                 /* Do work here */
9809                 eval_map_work_func(&eval_map_work);
9810                 return -ENOMEM;
9811         }
9812
9813         queue_work(eval_map_wq, &eval_map_work);
9814         return 0;
9815 }
9816
9817 subsys_initcall(trace_eval_init);
9818
9819 static int __init trace_eval_sync(void)
9820 {
9821         /* Make sure the eval map updates are finished */
9822         if (eval_map_wq)
9823                 destroy_workqueue(eval_map_wq);
9824         return 0;
9825 }
9826
9827 late_initcall_sync(trace_eval_sync);
9828
9829
9830 #ifdef CONFIG_MODULES
9831 static void trace_module_add_evals(struct module *mod)
9832 {
9833         if (!mod->num_trace_evals)
9834                 return;
9835
9836         /*
9837          * Modules with bad taint do not have events created, do
9838          * not bother with enums either.
9839          */
9840         if (trace_module_has_bad_taint(mod))
9841                 return;
9842
9843         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9844 }
9845
9846 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9847 static void trace_module_remove_evals(struct module *mod)
9848 {
9849         union trace_eval_map_item *map;
9850         union trace_eval_map_item **last = &trace_eval_maps;
9851
9852         if (!mod->num_trace_evals)
9853                 return;
9854
9855         mutex_lock(&trace_eval_mutex);
9856
9857         map = trace_eval_maps;
9858
9859         while (map) {
9860                 if (map->head.mod == mod)
9861                         break;
9862                 map = trace_eval_jmp_to_tail(map);
9863                 last = &map->tail.next;
9864                 map = map->tail.next;
9865         }
9866         if (!map)
9867                 goto out;
9868
9869         *last = trace_eval_jmp_to_tail(map)->tail.next;
9870         kfree(map);
9871  out:
9872         mutex_unlock(&trace_eval_mutex);
9873 }
9874 #else
9875 static inline void trace_module_remove_evals(struct module *mod) { }
9876 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9877
9878 static int trace_module_notify(struct notifier_block *self,
9879                                unsigned long val, void *data)
9880 {
9881         struct module *mod = data;
9882
9883         switch (val) {
9884         case MODULE_STATE_COMING:
9885                 trace_module_add_evals(mod);
9886                 break;
9887         case MODULE_STATE_GOING:
9888                 trace_module_remove_evals(mod);
9889                 break;
9890         }
9891
9892         return NOTIFY_OK;
9893 }
9894
9895 static struct notifier_block trace_module_nb = {
9896         .notifier_call = trace_module_notify,
9897         .priority = 0,
9898 };
9899 #endif /* CONFIG_MODULES */
9900
9901 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9902 {
9903
9904         event_trace_init();
9905
9906         init_tracer_tracefs(&global_trace, NULL);
9907         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9908
9909         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9910                         &global_trace, &tracing_thresh_fops);
9911
9912         trace_create_file("README", TRACE_MODE_READ, NULL,
9913                         NULL, &tracing_readme_fops);
9914
9915         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9916                         NULL, &tracing_saved_cmdlines_fops);
9917
9918         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9919                           NULL, &tracing_saved_cmdlines_size_fops);
9920
9921         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9922                         NULL, &tracing_saved_tgids_fops);
9923
9924         trace_create_eval_file(NULL);
9925
9926 #ifdef CONFIG_MODULES
9927         register_module_notifier(&trace_module_nb);
9928 #endif
9929
9930 #ifdef CONFIG_DYNAMIC_FTRACE
9931         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9932                         NULL, &tracing_dyn_info_fops);
9933 #endif
9934
9935         create_trace_instances(NULL);
9936
9937         update_tracer_options(&global_trace);
9938 }
9939
9940 static __init int tracer_init_tracefs(void)
9941 {
9942         int ret;
9943
9944         trace_access_lock_init();
9945
9946         ret = tracing_init_dentry();
9947         if (ret)
9948                 return 0;
9949
9950         if (eval_map_wq) {
9951                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9952                 queue_work(eval_map_wq, &tracerfs_init_work);
9953         } else {
9954                 tracer_init_tracefs_work_func(NULL);
9955         }
9956
9957         rv_init_interface();
9958
9959         return 0;
9960 }
9961
9962 fs_initcall(tracer_init_tracefs);
9963
9964 static int trace_die_panic_handler(struct notifier_block *self,
9965                                 unsigned long ev, void *unused);
9966
9967 static struct notifier_block trace_panic_notifier = {
9968         .notifier_call = trace_die_panic_handler,
9969         .priority = INT_MAX - 1,
9970 };
9971
9972 static struct notifier_block trace_die_notifier = {
9973         .notifier_call = trace_die_panic_handler,
9974         .priority = INT_MAX - 1,
9975 };
9976
9977 /*
9978  * The idea is to execute the following die/panic callback early, in order
9979  * to avoid showing irrelevant information in the trace (like other panic
9980  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9981  * warnings get disabled (to prevent potential log flooding).
9982  */
9983 static int trace_die_panic_handler(struct notifier_block *self,
9984                                 unsigned long ev, void *unused)
9985 {
9986         if (!ftrace_dump_on_oops)
9987                 return NOTIFY_DONE;
9988
9989         /* The die notifier requires DIE_OOPS to trigger */
9990         if (self == &trace_die_notifier && ev != DIE_OOPS)
9991                 return NOTIFY_DONE;
9992
9993         ftrace_dump(ftrace_dump_on_oops);
9994
9995         return NOTIFY_DONE;
9996 }
9997
9998 /*
9999  * printk is set to max of 1024, we really don't need it that big.
10000  * Nothing should be printing 1000 characters anyway.
10001  */
10002 #define TRACE_MAX_PRINT         1000
10003
10004 /*
10005  * Define here KERN_TRACE so that we have one place to modify
10006  * it if we decide to change what log level the ftrace dump
10007  * should be at.
10008  */
10009 #define KERN_TRACE              KERN_EMERG
10010
10011 void
10012 trace_printk_seq(struct trace_seq *s)
10013 {
10014         /* Probably should print a warning here. */
10015         if (s->seq.len >= TRACE_MAX_PRINT)
10016                 s->seq.len = TRACE_MAX_PRINT;
10017
10018         /*
10019          * More paranoid code. Although the buffer size is set to
10020          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10021          * an extra layer of protection.
10022          */
10023         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10024                 s->seq.len = s->seq.size - 1;
10025
10026         /* should be zero ended, but we are paranoid. */
10027         s->buffer[s->seq.len] = 0;
10028
10029         printk(KERN_TRACE "%s", s->buffer);
10030
10031         trace_seq_init(s);
10032 }
10033
10034 void trace_init_global_iter(struct trace_iterator *iter)
10035 {
10036         iter->tr = &global_trace;
10037         iter->trace = iter->tr->current_trace;
10038         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10039         iter->array_buffer = &global_trace.array_buffer;
10040
10041         if (iter->trace && iter->trace->open)
10042                 iter->trace->open(iter);
10043
10044         /* Annotate start of buffers if we had overruns */
10045         if (ring_buffer_overruns(iter->array_buffer->buffer))
10046                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10047
10048         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10049         if (trace_clocks[iter->tr->clock_id].in_ns)
10050                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10051
10052         /* Can not use kmalloc for iter.temp and iter.fmt */
10053         iter->temp = static_temp_buf;
10054         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10055         iter->fmt = static_fmt_buf;
10056         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10057 }
10058
10059 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10060 {
10061         /* use static because iter can be a bit big for the stack */
10062         static struct trace_iterator iter;
10063         static atomic_t dump_running;
10064         struct trace_array *tr = &global_trace;
10065         unsigned int old_userobj;
10066         unsigned long flags;
10067         int cnt = 0, cpu;
10068
10069         /* Only allow one dump user at a time. */
10070         if (atomic_inc_return(&dump_running) != 1) {
10071                 atomic_dec(&dump_running);
10072                 return;
10073         }
10074
10075         /*
10076          * Always turn off tracing when we dump.
10077          * We don't need to show trace output of what happens
10078          * between multiple crashes.
10079          *
10080          * If the user does a sysrq-z, then they can re-enable
10081          * tracing with echo 1 > tracing_on.
10082          */
10083         tracing_off();
10084
10085         local_irq_save(flags);
10086
10087         /* Simulate the iterator */
10088         trace_init_global_iter(&iter);
10089
10090         for_each_tracing_cpu(cpu) {
10091                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10092         }
10093
10094         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10095
10096         /* don't look at user memory in panic mode */
10097         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10098
10099         switch (oops_dump_mode) {
10100         case DUMP_ALL:
10101                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10102                 break;
10103         case DUMP_ORIG:
10104                 iter.cpu_file = raw_smp_processor_id();
10105                 break;
10106         case DUMP_NONE:
10107                 goto out_enable;
10108         default:
10109                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10110                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10111         }
10112
10113         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10114
10115         /* Did function tracer already get disabled? */
10116         if (ftrace_is_dead()) {
10117                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10118                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10119         }
10120
10121         /*
10122          * We need to stop all tracing on all CPUS to read
10123          * the next buffer. This is a bit expensive, but is
10124          * not done often. We fill all what we can read,
10125          * and then release the locks again.
10126          */
10127
10128         while (!trace_empty(&iter)) {
10129
10130                 if (!cnt)
10131                         printk(KERN_TRACE "---------------------------------\n");
10132
10133                 cnt++;
10134
10135                 trace_iterator_reset(&iter);
10136                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10137
10138                 if (trace_find_next_entry_inc(&iter) != NULL) {
10139                         int ret;
10140
10141                         ret = print_trace_line(&iter);
10142                         if (ret != TRACE_TYPE_NO_CONSUME)
10143                                 trace_consume(&iter);
10144                 }
10145                 touch_nmi_watchdog();
10146
10147                 trace_printk_seq(&iter.seq);
10148         }
10149
10150         if (!cnt)
10151                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10152         else
10153                 printk(KERN_TRACE "---------------------------------\n");
10154
10155  out_enable:
10156         tr->trace_flags |= old_userobj;
10157
10158         for_each_tracing_cpu(cpu) {
10159                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10160         }
10161         atomic_dec(&dump_running);
10162         local_irq_restore(flags);
10163 }
10164 EXPORT_SYMBOL_GPL(ftrace_dump);
10165
10166 #define WRITE_BUFSIZE  4096
10167
10168 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10169                                 size_t count, loff_t *ppos,
10170                                 int (*createfn)(const char *))
10171 {
10172         char *kbuf, *buf, *tmp;
10173         int ret = 0;
10174         size_t done = 0;
10175         size_t size;
10176
10177         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10178         if (!kbuf)
10179                 return -ENOMEM;
10180
10181         while (done < count) {
10182                 size = count - done;
10183
10184                 if (size >= WRITE_BUFSIZE)
10185                         size = WRITE_BUFSIZE - 1;
10186
10187                 if (copy_from_user(kbuf, buffer + done, size)) {
10188                         ret = -EFAULT;
10189                         goto out;
10190                 }
10191                 kbuf[size] = '\0';
10192                 buf = kbuf;
10193                 do {
10194                         tmp = strchr(buf, '\n');
10195                         if (tmp) {
10196                                 *tmp = '\0';
10197                                 size = tmp - buf + 1;
10198                         } else {
10199                                 size = strlen(buf);
10200                                 if (done + size < count) {
10201                                         if (buf != kbuf)
10202                                                 break;
10203                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10204                                         pr_warn("Line length is too long: Should be less than %d\n",
10205                                                 WRITE_BUFSIZE - 2);
10206                                         ret = -EINVAL;
10207                                         goto out;
10208                                 }
10209                         }
10210                         done += size;
10211
10212                         /* Remove comments */
10213                         tmp = strchr(buf, '#');
10214
10215                         if (tmp)
10216                                 *tmp = '\0';
10217
10218                         ret = createfn(buf);
10219                         if (ret)
10220                                 goto out;
10221                         buf += size;
10222
10223                 } while (done < count);
10224         }
10225         ret = done;
10226
10227 out:
10228         kfree(kbuf);
10229
10230         return ret;
10231 }
10232
10233 #ifdef CONFIG_TRACER_MAX_TRACE
10234 __init static bool tr_needs_alloc_snapshot(const char *name)
10235 {
10236         char *test;
10237         int len = strlen(name);
10238         bool ret;
10239
10240         if (!boot_snapshot_index)
10241                 return false;
10242
10243         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10244             boot_snapshot_info[len] == '\t')
10245                 return true;
10246
10247         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10248         if (!test)
10249                 return false;
10250
10251         sprintf(test, "\t%s\t", name);
10252         ret = strstr(boot_snapshot_info, test) == NULL;
10253         kfree(test);
10254         return ret;
10255 }
10256
10257 __init static void do_allocate_snapshot(const char *name)
10258 {
10259         if (!tr_needs_alloc_snapshot(name))
10260                 return;
10261
10262         /*
10263          * When allocate_snapshot is set, the next call to
10264          * allocate_trace_buffers() (called by trace_array_get_by_name())
10265          * will allocate the snapshot buffer. That will alse clear
10266          * this flag.
10267          */
10268         allocate_snapshot = true;
10269 }
10270 #else
10271 static inline void do_allocate_snapshot(const char *name) { }
10272 #endif
10273
10274 __init static void enable_instances(void)
10275 {
10276         struct trace_array *tr;
10277         char *curr_str;
10278         char *str;
10279         char *tok;
10280
10281         /* A tab is always appended */
10282         boot_instance_info[boot_instance_index - 1] = '\0';
10283         str = boot_instance_info;
10284
10285         while ((curr_str = strsep(&str, "\t"))) {
10286
10287                 tok = strsep(&curr_str, ",");
10288
10289                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10290                         do_allocate_snapshot(tok);
10291
10292                 tr = trace_array_get_by_name(tok);
10293                 if (!tr) {
10294                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10295                         continue;
10296                 }
10297                 /* Allow user space to delete it */
10298                 trace_array_put(tr);
10299
10300                 while ((tok = strsep(&curr_str, ","))) {
10301                         early_enable_events(tr, tok, true);
10302                 }
10303         }
10304 }
10305
10306 __init static int tracer_alloc_buffers(void)
10307 {
10308         int ring_buf_size;
10309         int ret = -ENOMEM;
10310
10311
10312         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10313                 pr_warn("Tracing disabled due to lockdown\n");
10314                 return -EPERM;
10315         }
10316
10317         /*
10318          * Make sure we don't accidentally add more trace options
10319          * than we have bits for.
10320          */
10321         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10322
10323         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10324                 goto out;
10325
10326         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10327                 goto out_free_buffer_mask;
10328
10329         /* Only allocate trace_printk buffers if a trace_printk exists */
10330         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10331                 /* Must be called before global_trace.buffer is allocated */
10332                 trace_printk_init_buffers();
10333
10334         /* To save memory, keep the ring buffer size to its minimum */
10335         if (ring_buffer_expanded)
10336                 ring_buf_size = trace_buf_size;
10337         else
10338                 ring_buf_size = 1;
10339
10340         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10341         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10342
10343         raw_spin_lock_init(&global_trace.start_lock);
10344
10345         /*
10346          * The prepare callbacks allocates some memory for the ring buffer. We
10347          * don't free the buffer if the CPU goes down. If we were to free
10348          * the buffer, then the user would lose any trace that was in the
10349          * buffer. The memory will be removed once the "instance" is removed.
10350          */
10351         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10352                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10353                                       NULL);
10354         if (ret < 0)
10355                 goto out_free_cpumask;
10356         /* Used for event triggers */
10357         ret = -ENOMEM;
10358         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10359         if (!temp_buffer)
10360                 goto out_rm_hp_state;
10361
10362         if (trace_create_savedcmd() < 0)
10363                 goto out_free_temp_buffer;
10364
10365         /* TODO: make the number of buffers hot pluggable with CPUS */
10366         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10367                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10368                 goto out_free_savedcmd;
10369         }
10370
10371         if (global_trace.buffer_disabled)
10372                 tracing_off();
10373
10374         if (trace_boot_clock) {
10375                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10376                 if (ret < 0)
10377                         pr_warn("Trace clock %s not defined, going back to default\n",
10378                                 trace_boot_clock);
10379         }
10380
10381         /*
10382          * register_tracer() might reference current_trace, so it
10383          * needs to be set before we register anything. This is
10384          * just a bootstrap of current_trace anyway.
10385          */
10386         global_trace.current_trace = &nop_trace;
10387
10388         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10389
10390         ftrace_init_global_array_ops(&global_trace);
10391
10392         init_trace_flags_index(&global_trace);
10393
10394         register_tracer(&nop_trace);
10395
10396         /* Function tracing may start here (via kernel command line) */
10397         init_function_trace();
10398
10399         /* All seems OK, enable tracing */
10400         tracing_disabled = 0;
10401
10402         atomic_notifier_chain_register(&panic_notifier_list,
10403                                        &trace_panic_notifier);
10404
10405         register_die_notifier(&trace_die_notifier);
10406
10407         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10408
10409         INIT_LIST_HEAD(&global_trace.systems);
10410         INIT_LIST_HEAD(&global_trace.events);
10411         INIT_LIST_HEAD(&global_trace.hist_vars);
10412         INIT_LIST_HEAD(&global_trace.err_log);
10413         list_add(&global_trace.list, &ftrace_trace_arrays);
10414
10415         apply_trace_boot_options();
10416
10417         register_snapshot_cmd();
10418
10419         test_can_verify();
10420
10421         return 0;
10422
10423 out_free_savedcmd:
10424         free_saved_cmdlines_buffer(savedcmd);
10425 out_free_temp_buffer:
10426         ring_buffer_free(temp_buffer);
10427 out_rm_hp_state:
10428         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10429 out_free_cpumask:
10430         free_cpumask_var(global_trace.tracing_cpumask);
10431 out_free_buffer_mask:
10432         free_cpumask_var(tracing_buffer_mask);
10433 out:
10434         return ret;
10435 }
10436
10437 void __init ftrace_boot_snapshot(void)
10438 {
10439 #ifdef CONFIG_TRACER_MAX_TRACE
10440         struct trace_array *tr;
10441
10442         if (!snapshot_at_boot)
10443                 return;
10444
10445         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10446                 if (!tr->allocated_snapshot)
10447                         continue;
10448
10449                 tracing_snapshot_instance(tr);
10450                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10451         }
10452 #endif
10453 }
10454
10455 void __init early_trace_init(void)
10456 {
10457         if (tracepoint_printk) {
10458                 tracepoint_print_iter =
10459                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10460                 if (MEM_FAIL(!tracepoint_print_iter,
10461                              "Failed to allocate trace iterator\n"))
10462                         tracepoint_printk = 0;
10463                 else
10464                         static_key_enable(&tracepoint_printk_key.key);
10465         }
10466         tracer_alloc_buffers();
10467
10468         init_events();
10469 }
10470
10471 void __init trace_init(void)
10472 {
10473         trace_event_init();
10474
10475         if (boot_instance_index)
10476                 enable_instances();
10477 }
10478
10479 __init static void clear_boot_tracer(void)
10480 {
10481         /*
10482          * The default tracer at boot buffer is an init section.
10483          * This function is called in lateinit. If we did not
10484          * find the boot tracer, then clear it out, to prevent
10485          * later registration from accessing the buffer that is
10486          * about to be freed.
10487          */
10488         if (!default_bootup_tracer)
10489                 return;
10490
10491         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10492                default_bootup_tracer);
10493         default_bootup_tracer = NULL;
10494 }
10495
10496 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10497 __init static void tracing_set_default_clock(void)
10498 {
10499         /* sched_clock_stable() is determined in late_initcall */
10500         if (!trace_boot_clock && !sched_clock_stable()) {
10501                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10502                         pr_warn("Can not set tracing clock due to lockdown\n");
10503                         return;
10504                 }
10505
10506                 printk(KERN_WARNING
10507                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10508                        "If you want to keep using the local clock, then add:\n"
10509                        "  \"trace_clock=local\"\n"
10510                        "on the kernel command line\n");
10511                 tracing_set_clock(&global_trace, "global");
10512         }
10513 }
10514 #else
10515 static inline void tracing_set_default_clock(void) { }
10516 #endif
10517
10518 __init static int late_trace_init(void)
10519 {
10520         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10521                 static_key_disable(&tracepoint_printk_key.key);
10522                 tracepoint_printk = 0;
10523         }
10524
10525         tracing_set_default_clock();
10526         clear_boot_tracer();
10527         return 0;
10528 }
10529
10530 late_initcall_sync(late_trace_init);