Merge tag 'thermal-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193
194 static int __init set_cmdline_ftrace(char *str)
195 {
196         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197         default_bootup_tracer = bootup_tracer_buf;
198         /* We are using ftrace early, expand it */
199         trace_set_ring_buffer_expanded(NULL);
200         return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206         if (*str++ != '=' || !*str || !strcmp("1", str)) {
207                 ftrace_dump_on_oops = DUMP_ALL;
208                 return 1;
209         }
210
211         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212                 ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219
220 static int __init stop_trace_on_warning(char *str)
221 {
222         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223                 __disable_trace_on_warning = 1;
224         return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227
228 static int __init boot_alloc_snapshot(char *str)
229 {
230         char *slot = boot_snapshot_info + boot_snapshot_index;
231         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232         int ret;
233
234         if (str[0] == '=') {
235                 str++;
236                 if (strlen(str) >= left)
237                         return -1;
238
239                 ret = snprintf(slot, left, "%s\t", str);
240                 boot_snapshot_index += ret;
241         } else {
242                 allocate_snapshot = true;
243                 /* We also need the main ring buffer expanded */
244                 trace_set_ring_buffer_expanded(NULL);
245         }
246         return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249
250
251 static int __init boot_snapshot(char *str)
252 {
253         snapshot_at_boot = true;
254         boot_alloc_snapshot(str);
255         return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258
259
260 static int __init boot_instance(char *str)
261 {
262         char *slot = boot_instance_info + boot_instance_index;
263         int left = sizeof(boot_instance_info) - boot_instance_index;
264         int ret;
265
266         if (strlen(str) >= left)
267                 return -1;
268
269         ret = snprintf(slot, left, "%s\t", str);
270         boot_instance_index += ret;
271
272         return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275
276
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278
279 static int __init set_trace_boot_options(char *str)
280 {
281         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282         return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288
289 static int __init set_trace_boot_clock(char *str)
290 {
291         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292         trace_boot_clock = trace_boot_clock_buf;
293         return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296
297 static int __init set_tracepoint_printk(char *str)
298 {
299         /* Ignore the "tp_printk_stop_on_boot" param */
300         if (*str == '_')
301                 return 0;
302
303         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304                 tracepoint_printk = 1;
305         return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311         tracepoint_printk_stop_on_boot = true;
312         return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315
316 unsigned long long ns2usecs(u64 nsec)
317 {
318         nsec += 500;
319         do_div(nsec, 1000);
320         return nsec;
321 }
322
323 static void
324 trace_process_export(struct trace_export *export,
325                struct ring_buffer_event *event, int flag)
326 {
327         struct trace_entry *entry;
328         unsigned int size = 0;
329
330         if (export->flags & flag) {
331                 entry = ring_buffer_event_data(event);
332                 size = ring_buffer_event_length(event);
333                 export->write(export, entry, size);
334         }
335 }
336
337 static DEFINE_MUTEX(ftrace_export_lock);
338
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347         if (export->flags & TRACE_EXPORT_FUNCTION)
348                 static_branch_inc(&trace_function_exports_enabled);
349
350         if (export->flags & TRACE_EXPORT_EVENT)
351                 static_branch_inc(&trace_event_exports_enabled);
352
353         if (export->flags & TRACE_EXPORT_MARKER)
354                 static_branch_inc(&trace_marker_exports_enabled);
355 }
356
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359         if (export->flags & TRACE_EXPORT_FUNCTION)
360                 static_branch_dec(&trace_function_exports_enabled);
361
362         if (export->flags & TRACE_EXPORT_EVENT)
363                 static_branch_dec(&trace_event_exports_enabled);
364
365         if (export->flags & TRACE_EXPORT_MARKER)
366                 static_branch_dec(&trace_marker_exports_enabled);
367 }
368
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371         struct trace_export *export;
372
373         preempt_disable_notrace();
374
375         export = rcu_dereference_raw_check(ftrace_exports_list);
376         while (export) {
377                 trace_process_export(export, event, flag);
378                 export = rcu_dereference_raw_check(export->next);
379         }
380
381         preempt_enable_notrace();
382 }
383
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387         rcu_assign_pointer(export->next, *list);
388         /*
389          * We are entering export into the list but another
390          * CPU might be walking that list. We need to make sure
391          * the export->next pointer is valid before another CPU sees
392          * the export pointer included into the list.
393          */
394         rcu_assign_pointer(*list, export);
395 }
396
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400         struct trace_export **p;
401
402         for (p = list; *p != NULL; p = &(*p)->next)
403                 if (*p == export)
404                         break;
405
406         if (*p != export)
407                 return -1;
408
409         rcu_assign_pointer(*p, (*p)->next);
410
411         return 0;
412 }
413
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417         ftrace_exports_enable(export);
418
419         add_trace_export(list, export);
420 }
421
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425         int ret;
426
427         ret = rm_trace_export(list, export);
428         ftrace_exports_disable(export);
429
430         return ret;
431 }
432
433 int register_ftrace_export(struct trace_export *export)
434 {
435         if (WARN_ON_ONCE(!export->write))
436                 return -1;
437
438         mutex_lock(&ftrace_export_lock);
439
440         add_ftrace_export(&ftrace_exports_list, export);
441
442         mutex_unlock(&ftrace_export_lock);
443
444         return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450         int ret;
451
452         mutex_lock(&ftrace_export_lock);
453
454         ret = rm_ftrace_export(&ftrace_exports_list, export);
455
456         mutex_unlock(&ftrace_export_lock);
457
458         return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS                                             \
464         (FUNCTION_DEFAULT_FLAGS |                                       \
465          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
466          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
467          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
468          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
469          TRACE_ITER_HASH_PTR)
470
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
473                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484         .trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489         if (!tr)
490                 tr = &global_trace;
491         tr->ring_buffer_expanded = true;
492 }
493
494 LIST_HEAD(ftrace_trace_arrays);
495
496 int trace_array_get(struct trace_array *this_tr)
497 {
498         struct trace_array *tr;
499         int ret = -ENODEV;
500
501         mutex_lock(&trace_types_lock);
502         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503                 if (tr == this_tr) {
504                         tr->ref++;
505                         ret = 0;
506                         break;
507                 }
508         }
509         mutex_unlock(&trace_types_lock);
510
511         return ret;
512 }
513
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516         WARN_ON(!this_tr->ref);
517         this_tr->ref--;
518 }
519
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531         if (!this_tr)
532                 return;
533
534         mutex_lock(&trace_types_lock);
535         __trace_array_put(this_tr);
536         mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542         int ret;
543
544         ret = security_locked_down(LOCKDOWN_TRACEFS);
545         if (ret)
546                 return ret;
547
548         if (tracing_disabled)
549                 return -ENODEV;
550
551         if (tr && trace_array_get(tr) < 0)
552                 return -ENODEV;
553
554         return 0;
555 }
556
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558                               struct trace_buffer *buffer,
559                               struct ring_buffer_event *event)
560 {
561         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562             !filter_match_preds(call->filter, rec)) {
563                 __trace_event_discard_commit(buffer, event);
564                 return 1;
565         }
566
567         return 0;
568 }
569
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580         return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595                        struct trace_pid_list *filtered_no_pids,
596                        struct task_struct *task)
597 {
598         /*
599          * If filtered_no_pids is not empty, and the task's pid is listed
600          * in filtered_no_pids, then return true.
601          * Otherwise, if filtered_pids is empty, that means we can
602          * trace all tasks. If it has content, then only trace pids
603          * within filtered_pids.
604          */
605
606         return (filtered_pids &&
607                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608                 (filtered_no_pids &&
609                  trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625                                   struct task_struct *self,
626                                   struct task_struct *task)
627 {
628         if (!pid_list)
629                 return;
630
631         /* For forks, we only add if the forking task is listed */
632         if (self) {
633                 if (!trace_find_filtered_pid(pid_list, self->pid))
634                         return;
635         }
636
637         /* "self" is set for forks, and NULL for exits */
638         if (self)
639                 trace_pid_list_set(pid_list, task->pid);
640         else
641                 trace_pid_list_clear(pid_list, task->pid);
642 }
643
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658         long pid = (unsigned long)v;
659         unsigned int next;
660
661         (*pos)++;
662
663         /* pid already is +1 of the actual previous bit */
664         if (trace_pid_list_next(pid_list, pid, &next) < 0)
665                 return NULL;
666
667         pid = next;
668
669         /* Return pid + 1 to allow zero to be represented */
670         return (void *)(pid + 1);
671 }
672
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686         unsigned long pid;
687         unsigned int first;
688         loff_t l = 0;
689
690         if (trace_pid_list_first(pid_list, &first) < 0)
691                 return NULL;
692
693         pid = first;
694
695         /* Return pid + 1 so that zero can be the exit value */
696         for (pid++; pid && l < *pos;
697              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698                 ;
699         return (void *)pid;
700 }
701
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712         unsigned long pid = (unsigned long)v - 1;
713
714         seq_printf(m, "%lu\n", pid);
715         return 0;
716 }
717
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE            127
720
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722                     struct trace_pid_list **new_pid_list,
723                     const char __user *ubuf, size_t cnt)
724 {
725         struct trace_pid_list *pid_list;
726         struct trace_parser parser;
727         unsigned long val;
728         int nr_pids = 0;
729         ssize_t read = 0;
730         ssize_t ret;
731         loff_t pos;
732         pid_t pid;
733
734         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735                 return -ENOMEM;
736
737         /*
738          * Always recreate a new array. The write is an all or nothing
739          * operation. Always create a new array when adding new pids by
740          * the user. If the operation fails, then the current list is
741          * not modified.
742          */
743         pid_list = trace_pid_list_alloc();
744         if (!pid_list) {
745                 trace_parser_put(&parser);
746                 return -ENOMEM;
747         }
748
749         if (filtered_pids) {
750                 /* copy the current bits to the new max */
751                 ret = trace_pid_list_first(filtered_pids, &pid);
752                 while (!ret) {
753                         trace_pid_list_set(pid_list, pid);
754                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755                         nr_pids++;
756                 }
757         }
758
759         ret = 0;
760         while (cnt > 0) {
761
762                 pos = 0;
763
764                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
765                 if (ret < 0)
766                         break;
767
768                 read += ret;
769                 ubuf += ret;
770                 cnt -= ret;
771
772                 if (!trace_parser_loaded(&parser))
773                         break;
774
775                 ret = -EINVAL;
776                 if (kstrtoul(parser.buffer, 0, &val))
777                         break;
778
779                 pid = (pid_t)val;
780
781                 if (trace_pid_list_set(pid_list, pid) < 0) {
782                         ret = -1;
783                         break;
784                 }
785                 nr_pids++;
786
787                 trace_parser_clear(&parser);
788                 ret = 0;
789         }
790         trace_parser_put(&parser);
791
792         if (ret < 0) {
793                 trace_pid_list_free(pid_list);
794                 return ret;
795         }
796
797         if (!nr_pids) {
798                 /* Cleared the list of pids */
799                 trace_pid_list_free(pid_list);
800                 pid_list = NULL;
801         }
802
803         *new_pid_list = pid_list;
804
805         return read;
806 }
807
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810         u64 ts;
811
812         /* Early boot up does not have a buffer yet */
813         if (!buf->buffer)
814                 return trace_clock_local();
815
816         ts = ring_buffer_time_stamp(buf->buffer);
817         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818
819         return ts;
820 }
821
822 u64 ftrace_now(int cpu)
823 {
824         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838         /*
839          * For quick access (irqsoff uses this in fast path), just
840          * return the mirror variable of the state of the ring buffer.
841          * It's a little racy, but we don't really care.
842          */
843         smp_rmb();
844         return !global_trace.buffer_disabled;
845 }
846
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer            *trace_types __read_mostly;
863
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
895 static inline void trace_access_lock(int cpu)
896 {
897         if (cpu == RING_BUFFER_ALL_CPUS) {
898                 /* gain it for accessing the whole ring buffer. */
899                 down_write(&all_cpu_access_lock);
900         } else {
901                 /* gain it for accessing a cpu ring buffer. */
902
903                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904                 down_read(&all_cpu_access_lock);
905
906                 /* Secondly block other access to this @cpu ring buffer. */
907                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908         }
909 }
910
911 static inline void trace_access_unlock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 up_write(&all_cpu_access_lock);
915         } else {
916                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917                 up_read(&all_cpu_access_lock);
918         }
919 }
920
921 static inline void trace_access_lock_init(void)
922 {
923         int cpu;
924
925         for_each_possible_cpu(cpu)
926                 mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928
929 #else
930
931 static DEFINE_MUTEX(access_lock);
932
933 static inline void trace_access_lock(int cpu)
934 {
935         (void)cpu;
936         mutex_lock(&access_lock);
937 }
938
939 static inline void trace_access_unlock(int cpu)
940 {
941         (void)cpu;
942         mutex_unlock(&access_lock);
943 }
944
945 static inline void trace_access_lock_init(void)
946 {
947 }
948
949 #endif
950
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953                                  unsigned int trace_ctx,
954                                  int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956                                       struct trace_buffer *buffer,
957                                       unsigned int trace_ctx,
958                                       int skip, struct pt_regs *regs);
959
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962                                         unsigned int trace_ctx,
963                                         int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967                                       struct trace_buffer *buffer,
968                                       unsigned long trace_ctx,
969                                       int skip, struct pt_regs *regs)
970 {
971 }
972
973 #endif
974
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977                   int type, unsigned int trace_ctx)
978 {
979         struct trace_entry *ent = ring_buffer_event_data(event);
980
981         tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986                           int type,
987                           unsigned long len,
988                           unsigned int trace_ctx)
989 {
990         struct ring_buffer_event *event;
991
992         event = ring_buffer_lock_reserve(buffer, len);
993         if (event != NULL)
994                 trace_event_setup(event, type, trace_ctx);
995
996         return event;
997 }
998
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001         if (tr->array_buffer.buffer)
1002                 ring_buffer_record_on(tr->array_buffer.buffer);
1003         /*
1004          * This flag is looked at when buffers haven't been allocated
1005          * yet, or by some tracers (like irqsoff), that just want to
1006          * know if the ring buffer has been disabled, but it can handle
1007          * races of where it gets disabled but we still do a record.
1008          * As the check is in the fast path of the tracers, it is more
1009          * important to be fast than accurate.
1010          */
1011         tr->buffer_disabled = 0;
1012         /* Make the flag seen by readers */
1013         smp_wmb();
1014 }
1015
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024         tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032         __this_cpu_write(trace_taskinfo_save, true);
1033
1034         /* If this is the temp buffer, we need to commit fully */
1035         if (this_cpu_read(trace_buffered_event) == event) {
1036                 /* Length is in event->array[0] */
1037                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038                 /* Release the temp buffer */
1039                 this_cpu_dec(trace_buffered_event_cnt);
1040                 /* ring_buffer_unlock_commit() enables preemption */
1041                 preempt_enable_notrace();
1042         } else
1043                 ring_buffer_unlock_commit(buffer);
1044 }
1045
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047                        const char *str, int size)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct print_entry *entry;
1052         unsigned int trace_ctx;
1053         int alloc;
1054
1055         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056                 return 0;
1057
1058         if (unlikely(tracing_selftest_running && tr == &global_trace))
1059                 return 0;
1060
1061         if (unlikely(tracing_disabled))
1062                 return 0;
1063
1064         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066         trace_ctx = tracing_gen_ctx();
1067         buffer = tr->array_buffer.buffer;
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070                                             trace_ctx);
1071         if (!event) {
1072                 size = 0;
1073                 goto out;
1074         }
1075
1076         entry = ring_buffer_event_data(event);
1077         entry->ip = ip;
1078
1079         memcpy(&entry->buf, str, size);
1080
1081         /* Add a newline if necessary */
1082         if (entry->buf[size - 1] != '\n') {
1083                 entry->buf[size] = '\n';
1084                 entry->buf[size + 1] = '\0';
1085         } else
1086                 entry->buf[size] = '\0';
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091         ring_buffer_nest_end(buffer);
1092         return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:    The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104         return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:    The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115         struct ring_buffer_event *event;
1116         struct trace_buffer *buffer;
1117         struct bputs_entry *entry;
1118         unsigned int trace_ctx;
1119         int size = sizeof(struct bputs_entry);
1120         int ret = 0;
1121
1122         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123                 return 0;
1124
1125         if (unlikely(tracing_selftest_running || tracing_disabled))
1126                 return 0;
1127
1128         trace_ctx = tracing_gen_ctx();
1129         buffer = global_trace.array_buffer.buffer;
1130
1131         ring_buffer_nest_start(buffer);
1132         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133                                             trace_ctx);
1134         if (!event)
1135                 goto out;
1136
1137         entry = ring_buffer_event_data(event);
1138         entry->ip                       = ip;
1139         entry->str                      = str;
1140
1141         __buffer_unlock_commit(buffer, event);
1142         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143
1144         ret = 1;
1145  out:
1146         ring_buffer_nest_end(buffer);
1147         return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153                                            void *cond_data)
1154 {
1155         struct tracer *tracer = tr->current_trace;
1156         unsigned long flags;
1157
1158         if (in_nmi()) {
1159                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161                 return;
1162         }
1163
1164         if (!tr->allocated_snapshot) {
1165                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167                 tracer_tracing_off(tr);
1168                 return;
1169         }
1170
1171         /* Note, snapshot can not be used when the tracer uses it */
1172         if (tracer->use_max_tr) {
1173                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175                 return;
1176         }
1177
1178         local_irq_save(flags);
1179         update_max_tr(tr, current, smp_processor_id(), cond_data);
1180         local_irq_restore(flags);
1181 }
1182
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185         tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204         struct trace_array *tr = &global_trace;
1205
1206         tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:         The tracing instance to snapshot
1213  * @cond_data:  The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225         tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:         The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245         void *cond_data = NULL;
1246
1247         local_irq_disable();
1248         arch_spin_lock(&tr->max_lock);
1249
1250         if (tr->cond_snapshot)
1251                 cond_data = tr->cond_snapshot->cond_data;
1252
1253         arch_spin_unlock(&tr->max_lock);
1254         local_irq_enable();
1255
1256         return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261                                         struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266         int ret;
1267
1268         if (!tr->allocated_snapshot) {
1269
1270                 /* allocate spare buffer */
1271                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273                 if (ret < 0)
1274                         return ret;
1275
1276                 tr->allocated_snapshot = true;
1277         }
1278
1279         return 0;
1280 }
1281
1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284         /*
1285          * We don't free the ring buffer. instead, resize it because
1286          * The max_tr ring buffer has some state (e.g. ring->clock) and
1287          * we want preserve it.
1288          */
1289         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290         set_buffer_entries(&tr->max_buffer, 1);
1291         tracing_reset_online_cpus(&tr->max_buffer);
1292         tr->allocated_snapshot = false;
1293 }
1294
1295 /**
1296  * tracing_alloc_snapshot - allocate snapshot buffer.
1297  *
1298  * This only allocates the snapshot buffer if it isn't already
1299  * allocated - it doesn't also take a snapshot.
1300  *
1301  * This is meant to be used in cases where the snapshot buffer needs
1302  * to be set up for events that can't sleep but need to be able to
1303  * trigger a snapshot.
1304  */
1305 int tracing_alloc_snapshot(void)
1306 {
1307         struct trace_array *tr = &global_trace;
1308         int ret;
1309
1310         ret = tracing_alloc_snapshot_instance(tr);
1311         WARN_ON(ret < 0);
1312
1313         return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316
1317 /**
1318  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319  *
1320  * This is similar to tracing_snapshot(), but it will allocate the
1321  * snapshot buffer if it isn't already allocated. Use this only
1322  * where it is safe to sleep, as the allocation may sleep.
1323  *
1324  * This causes a swap between the snapshot buffer and the current live
1325  * tracing buffer. You can use this to take snapshots of the live
1326  * trace when some condition is triggered, but continue to trace.
1327  */
1328 void tracing_snapshot_alloc(void)
1329 {
1330         int ret;
1331
1332         ret = tracing_alloc_snapshot();
1333         if (ret < 0)
1334                 return;
1335
1336         tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339
1340 /**
1341  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342  * @tr:         The tracing instance
1343  * @cond_data:  User data to associate with the snapshot
1344  * @update:     Implementation of the cond_snapshot update function
1345  *
1346  * Check whether the conditional snapshot for the given instance has
1347  * already been enabled, or if the current tracer is already using a
1348  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349  * save the cond_data and update function inside.
1350  *
1351  * Returns 0 if successful, error otherwise.
1352  */
1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354                                  cond_update_fn_t update)
1355 {
1356         struct cond_snapshot *cond_snapshot;
1357         int ret = 0;
1358
1359         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360         if (!cond_snapshot)
1361                 return -ENOMEM;
1362
1363         cond_snapshot->cond_data = cond_data;
1364         cond_snapshot->update = update;
1365
1366         mutex_lock(&trace_types_lock);
1367
1368         ret = tracing_alloc_snapshot_instance(tr);
1369         if (ret)
1370                 goto fail_unlock;
1371
1372         if (tr->current_trace->use_max_tr) {
1373                 ret = -EBUSY;
1374                 goto fail_unlock;
1375         }
1376
1377         /*
1378          * The cond_snapshot can only change to NULL without the
1379          * trace_types_lock. We don't care if we race with it going
1380          * to NULL, but we want to make sure that it's not set to
1381          * something other than NULL when we get here, which we can
1382          * do safely with only holding the trace_types_lock and not
1383          * having to take the max_lock.
1384          */
1385         if (tr->cond_snapshot) {
1386                 ret = -EBUSY;
1387                 goto fail_unlock;
1388         }
1389
1390         local_irq_disable();
1391         arch_spin_lock(&tr->max_lock);
1392         tr->cond_snapshot = cond_snapshot;
1393         arch_spin_unlock(&tr->max_lock);
1394         local_irq_enable();
1395
1396         mutex_unlock(&trace_types_lock);
1397
1398         return ret;
1399
1400  fail_unlock:
1401         mutex_unlock(&trace_types_lock);
1402         kfree(cond_snapshot);
1403         return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406
1407 /**
1408  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409  * @tr:         The tracing instance
1410  *
1411  * Check whether the conditional snapshot for the given instance is
1412  * enabled; if so, free the cond_snapshot associated with it,
1413  * otherwise return -EINVAL.
1414  *
1415  * Returns 0 if successful, error otherwise.
1416  */
1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419         int ret = 0;
1420
1421         local_irq_disable();
1422         arch_spin_lock(&tr->max_lock);
1423
1424         if (!tr->cond_snapshot)
1425                 ret = -EINVAL;
1426         else {
1427                 kfree(tr->cond_snapshot);
1428                 tr->cond_snapshot = NULL;
1429         }
1430
1431         arch_spin_unlock(&tr->max_lock);
1432         local_irq_enable();
1433
1434         return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
1438 void tracing_snapshot(void)
1439 {
1440         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1448 int tracing_alloc_snapshot(void)
1449 {
1450         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451         return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1454 void tracing_snapshot_alloc(void)
1455 {
1456         /* Give warning */
1457         tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462         return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467         return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472         return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr)       do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477
1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480         if (tr->array_buffer.buffer)
1481                 ring_buffer_record_off(tr->array_buffer.buffer);
1482         /*
1483          * This flag is looked at when buffers haven't been allocated
1484          * yet, or by some tracers (like irqsoff), that just want to
1485          * know if the ring buffer has been disabled, but it can handle
1486          * races of where it gets disabled but we still do a record.
1487          * As the check is in the fast path of the tracers, it is more
1488          * important to be fast than accurate.
1489          */
1490         tr->buffer_disabled = 1;
1491         /* Make the flag seen by readers */
1492         smp_wmb();
1493 }
1494
1495 /**
1496  * tracing_off - turn off tracing buffers
1497  *
1498  * This function stops the tracing buffers from recording data.
1499  * It does not disable any overhead the tracers themselves may
1500  * be causing. This function simply causes all recording to
1501  * the ring buffers to fail.
1502  */
1503 void tracing_off(void)
1504 {
1505         tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508
1509 void disable_trace_on_warning(void)
1510 {
1511         if (__disable_trace_on_warning) {
1512                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513                         "Disabling tracing due to warning\n");
1514                 tracing_off();
1515         }
1516 }
1517
1518 /**
1519  * tracer_tracing_is_on - show real state of ring buffer enabled
1520  * @tr : the trace array to know if ring buffer is enabled
1521  *
1522  * Shows real state of the ring buffer if it is enabled or not.
1523  */
1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526         if (tr->array_buffer.buffer)
1527                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528         return !tr->buffer_disabled;
1529 }
1530
1531 /**
1532  * tracing_is_on - show state of ring buffers enabled
1533  */
1534 int tracing_is_on(void)
1535 {
1536         return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539
1540 static int __init set_buf_size(char *str)
1541 {
1542         unsigned long buf_size;
1543
1544         if (!str)
1545                 return 0;
1546         buf_size = memparse(str, &str);
1547         /*
1548          * nr_entries can not be zero and the startup
1549          * tests require some buffer space. Therefore
1550          * ensure we have at least 4096 bytes of buffer.
1551          */
1552         trace_buf_size = max(4096UL, buf_size);
1553         return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556
1557 static int __init set_tracing_thresh(char *str)
1558 {
1559         unsigned long threshold;
1560         int ret;
1561
1562         if (!str)
1563                 return 0;
1564         ret = kstrtoul(str, 0, &threshold);
1565         if (ret < 0)
1566                 return 0;
1567         tracing_thresh = threshold * 1000;
1568         return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571
1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574         return nsecs / 1000;
1575 }
1576
1577 /*
1578  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581  * of strings in the order that the evals (enum) were defined.
1582  */
1583 #undef C
1584 #define C(a, b) b
1585
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588         TRACE_FLAGS
1589         NULL
1590 };
1591
1592 static struct {
1593         u64 (*func)(void);
1594         const char *name;
1595         int in_ns;              /* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597         { trace_clock_local,            "local",        1 },
1598         { trace_clock_global,           "global",       1 },
1599         { trace_clock_counter,          "counter",      0 },
1600         { trace_clock_jiffies,          "uptime",       0 },
1601         { trace_clock,                  "perf",         1 },
1602         { ktime_get_mono_fast_ns,       "mono",         1 },
1603         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1604         { ktime_get_boot_fast_ns,       "boot",         1 },
1605         { ktime_get_tai_fast_ns,        "tai",          1 },
1606         ARCH_TRACE_CLOCKS
1607 };
1608
1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611         if (trace_clocks[tr->clock_id].in_ns)
1612                 return true;
1613
1614         return false;
1615 }
1616
1617 /*
1618  * trace_parser_get_init - gets the buffer for trace parser
1619  */
1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622         memset(parser, 0, sizeof(*parser));
1623
1624         parser->buffer = kmalloc(size, GFP_KERNEL);
1625         if (!parser->buffer)
1626                 return 1;
1627
1628         parser->size = size;
1629         return 0;
1630 }
1631
1632 /*
1633  * trace_parser_put - frees the buffer for trace parser
1634  */
1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637         kfree(parser->buffer);
1638         parser->buffer = NULL;
1639 }
1640
1641 /*
1642  * trace_get_user - reads the user input string separated by  space
1643  * (matched by isspace(ch))
1644  *
1645  * For each string found the 'struct trace_parser' is updated,
1646  * and the function returns.
1647  *
1648  * Returns number of bytes read.
1649  *
1650  * See kernel/trace/trace.h for 'struct trace_parser' details.
1651  */
1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653         size_t cnt, loff_t *ppos)
1654 {
1655         char ch;
1656         size_t read = 0;
1657         ssize_t ret;
1658
1659         if (!*ppos)
1660                 trace_parser_clear(parser);
1661
1662         ret = get_user(ch, ubuf++);
1663         if (ret)
1664                 goto out;
1665
1666         read++;
1667         cnt--;
1668
1669         /*
1670          * The parser is not finished with the last write,
1671          * continue reading the user input without skipping spaces.
1672          */
1673         if (!parser->cont) {
1674                 /* skip white space */
1675                 while (cnt && isspace(ch)) {
1676                         ret = get_user(ch, ubuf++);
1677                         if (ret)
1678                                 goto out;
1679                         read++;
1680                         cnt--;
1681                 }
1682
1683                 parser->idx = 0;
1684
1685                 /* only spaces were written */
1686                 if (isspace(ch) || !ch) {
1687                         *ppos += read;
1688                         ret = read;
1689                         goto out;
1690                 }
1691         }
1692
1693         /* read the non-space input */
1694         while (cnt && !isspace(ch) && ch) {
1695                 if (parser->idx < parser->size - 1)
1696                         parser->buffer[parser->idx++] = ch;
1697                 else {
1698                         ret = -EINVAL;
1699                         goto out;
1700                 }
1701                 ret = get_user(ch, ubuf++);
1702                 if (ret)
1703                         goto out;
1704                 read++;
1705                 cnt--;
1706         }
1707
1708         /* We either got finished input or we have to wait for another call. */
1709         if (isspace(ch) || !ch) {
1710                 parser->buffer[parser->idx] = 0;
1711                 parser->cont = false;
1712         } else if (parser->idx < parser->size - 1) {
1713                 parser->cont = true;
1714                 parser->buffer[parser->idx++] = ch;
1715                 /* Make sure the parsed string always terminates with '\0'. */
1716                 parser->buffer[parser->idx] = 0;
1717         } else {
1718                 ret = -EINVAL;
1719                 goto out;
1720         }
1721
1722         *ppos += read;
1723         ret = read;
1724
1725 out:
1726         return ret;
1727 }
1728
1729 /* TODO add a seq_buf_to_buffer() */
1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732         int len;
1733
1734         if (trace_seq_used(s) <= s->readpos)
1735                 return -EBUSY;
1736
1737         len = trace_seq_used(s) - s->readpos;
1738         if (cnt > len)
1739                 cnt = len;
1740         memcpy(buf, s->buffer + s->readpos, cnt);
1741
1742         s->readpos += cnt;
1743         return cnt;
1744 }
1745
1746 unsigned long __read_mostly     tracing_thresh;
1747
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750
1751 #ifdef LATENCY_FS_NOTIFY
1752
1753 static struct workqueue_struct *fsnotify_wq;
1754
1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757         struct trace_array *tr = container_of(work, struct trace_array,
1758                                               fsnotify_work);
1759         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761
1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764         struct trace_array *tr = container_of(iwork, struct trace_array,
1765                                               fsnotify_irqwork);
1766         queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768
1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770                                      struct dentry *d_tracer)
1771 {
1772         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774         tr->d_max_latency = trace_create_file("tracing_max_latency",
1775                                               TRACE_MODE_WRITE,
1776                                               d_tracer, tr,
1777                                               &tracing_max_lat_fops);
1778 }
1779
1780 __init static int latency_fsnotify_init(void)
1781 {
1782         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1784         if (!fsnotify_wq) {
1785                 pr_err("Unable to allocate tr_max_lat_wq\n");
1786                 return -ENOMEM;
1787         }
1788         return 0;
1789 }
1790
1791 late_initcall_sync(latency_fsnotify_init);
1792
1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795         if (!fsnotify_wq)
1796                 return;
1797         /*
1798          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799          * possible that we are called from __schedule() or do_idle(), which
1800          * could cause a deadlock.
1801          */
1802         irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804
1805 #else /* !LATENCY_FS_NOTIFY */
1806
1807 #define trace_create_maxlat_file(tr, d_tracer)                          \
1808         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1809                           d_tracer, tr, &tracing_max_lat_fops)
1810
1811 #endif
1812
1813 /*
1814  * Copy the new maximum trace into the separate maximum-trace
1815  * structure. (this way the maximum trace is permanently saved,
1816  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817  */
1818 static void
1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821         struct array_buffer *trace_buf = &tr->array_buffer;
1822         struct array_buffer *max_buf = &tr->max_buffer;
1823         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825
1826         max_buf->cpu = cpu;
1827         max_buf->time_start = data->preempt_timestamp;
1828
1829         max_data->saved_latency = tr->max_latency;
1830         max_data->critical_start = data->critical_start;
1831         max_data->critical_end = data->critical_end;
1832
1833         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834         max_data->pid = tsk->pid;
1835         /*
1836          * If tsk == current, then use current_uid(), as that does not use
1837          * RCU. The irq tracer can be called out of RCU scope.
1838          */
1839         if (tsk == current)
1840                 max_data->uid = current_uid();
1841         else
1842                 max_data->uid = task_uid(tsk);
1843
1844         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845         max_data->policy = tsk->policy;
1846         max_data->rt_priority = tsk->rt_priority;
1847
1848         /* record this tasks comm */
1849         tracing_record_cmdline(tsk);
1850         latency_fsnotify(tr);
1851 }
1852
1853 /**
1854  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855  * @tr: tracer
1856  * @tsk: the task with the latency
1857  * @cpu: The cpu that initiated the trace.
1858  * @cond_data: User data associated with a conditional snapshot
1859  *
1860  * Flip the buffers between the @tr and the max_tr and record information
1861  * about which task was the cause of this latency.
1862  */
1863 void
1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865               void *cond_data)
1866 {
1867         if (tr->stop_count)
1868                 return;
1869
1870         WARN_ON_ONCE(!irqs_disabled());
1871
1872         if (!tr->allocated_snapshot) {
1873                 /* Only the nop tracer should hit this when disabling */
1874                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875                 return;
1876         }
1877
1878         arch_spin_lock(&tr->max_lock);
1879
1880         /* Inherit the recordable setting from array_buffer */
1881         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882                 ring_buffer_record_on(tr->max_buffer.buffer);
1883         else
1884                 ring_buffer_record_off(tr->max_buffer.buffer);
1885
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888                 arch_spin_unlock(&tr->max_lock);
1889                 return;
1890         }
1891 #endif
1892         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893
1894         __update_max_tr(tr, tsk, cpu);
1895
1896         arch_spin_unlock(&tr->max_lock);
1897
1898         /* Any waiters on the old snapshot buffer need to wake up */
1899         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901
1902 /**
1903  * update_max_tr_single - only copy one trace over, and reset the rest
1904  * @tr: tracer
1905  * @tsk: task with the latency
1906  * @cpu: the cpu of the buffer to copy.
1907  *
1908  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909  */
1910 void
1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913         int ret;
1914
1915         if (tr->stop_count)
1916                 return;
1917
1918         WARN_ON_ONCE(!irqs_disabled());
1919         if (!tr->allocated_snapshot) {
1920                 /* Only the nop tracer should hit this when disabling */
1921                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922                 return;
1923         }
1924
1925         arch_spin_lock(&tr->max_lock);
1926
1927         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928
1929         if (ret == -EBUSY) {
1930                 /*
1931                  * We failed to swap the buffer due to a commit taking
1932                  * place on this CPU. We fail to record, but we reset
1933                  * the max trace buffer (no one writes directly to it)
1934                  * and flag that it failed.
1935                  * Another reason is resize is in progress.
1936                  */
1937                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938                         "Failed to swap buffers due to commit or resize in progress\n");
1939         }
1940
1941         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942
1943         __update_max_tr(tr, tsk, cpu);
1944         arch_spin_unlock(&tr->max_lock);
1945 }
1946
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948
1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951         int ret;
1952
1953         /* Iterators are static, they should be filled or empty */
1954         if (trace_buffer_iter(iter, iter->cpu_file))
1955                 return 0;
1956
1957         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960         /*
1961          * Make sure this is still the snapshot buffer, as if a snapshot were
1962          * to happen, this would now be the main buffer.
1963          */
1964         if (iter->snapshot)
1965                 iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967         return ret;
1968 }
1969
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972
1973 struct trace_selftests {
1974         struct list_head                list;
1975         struct tracer                   *type;
1976 };
1977
1978 static LIST_HEAD(postponed_selftests);
1979
1980 static int save_selftest(struct tracer *type)
1981 {
1982         struct trace_selftests *selftest;
1983
1984         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985         if (!selftest)
1986                 return -ENOMEM;
1987
1988         selftest->type = type;
1989         list_add(&selftest->list, &postponed_selftests);
1990         return 0;
1991 }
1992
1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995         struct trace_array *tr = &global_trace;
1996         struct tracer *saved_tracer = tr->current_trace;
1997         int ret;
1998
1999         if (!type->selftest || tracing_selftest_disabled)
2000                 return 0;
2001
2002         /*
2003          * If a tracer registers early in boot up (before scheduling is
2004          * initialized and such), then do not run its selftests yet.
2005          * Instead, run it a little later in the boot process.
2006          */
2007         if (!selftests_can_run)
2008                 return save_selftest(type);
2009
2010         if (!tracing_is_on()) {
2011                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012                         type->name);
2013                 return 0;
2014         }
2015
2016         /*
2017          * Run a selftest on this tracer.
2018          * Here we reset the trace buffer, and set the current
2019          * tracer to be this tracer. The tracer can then run some
2020          * internal tracing to verify that everything is in order.
2021          * If we fail, we do not register this tracer.
2022          */
2023         tracing_reset_online_cpus(&tr->array_buffer);
2024
2025         tr->current_trace = type;
2026
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028         if (type->use_max_tr) {
2029                 /* If we expanded the buffers, make sure the max is expanded too */
2030                 if (tr->ring_buffer_expanded)
2031                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032                                            RING_BUFFER_ALL_CPUS);
2033                 tr->allocated_snapshot = true;
2034         }
2035 #endif
2036
2037         /* the test is responsible for initializing and enabling */
2038         pr_info("Testing tracer %s: ", type->name);
2039         ret = type->selftest(type, tr);
2040         /* the test is responsible for resetting too */
2041         tr->current_trace = saved_tracer;
2042         if (ret) {
2043                 printk(KERN_CONT "FAILED!\n");
2044                 /* Add the warning after printing 'FAILED' */
2045                 WARN_ON(1);
2046                 return -1;
2047         }
2048         /* Only reset on passing, to avoid touching corrupted buffers */
2049         tracing_reset_online_cpus(&tr->array_buffer);
2050
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052         if (type->use_max_tr) {
2053                 tr->allocated_snapshot = false;
2054
2055                 /* Shrink the max buffer again */
2056                 if (tr->ring_buffer_expanded)
2057                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2058                                            RING_BUFFER_ALL_CPUS);
2059         }
2060 #endif
2061
2062         printk(KERN_CONT "PASSED\n");
2063         return 0;
2064 }
2065
2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068         int ret;
2069
2070         /*
2071          * Tests can take a long time, especially if they are run one after the
2072          * other, as does happen during bootup when all the tracers are
2073          * registered. This could cause the soft lockup watchdog to trigger.
2074          */
2075         cond_resched();
2076
2077         tracing_selftest_running = true;
2078         ret = run_tracer_selftest(type);
2079         tracing_selftest_running = false;
2080
2081         return ret;
2082 }
2083
2084 static __init int init_trace_selftests(void)
2085 {
2086         struct trace_selftests *p, *n;
2087         struct tracer *t, **last;
2088         int ret;
2089
2090         selftests_can_run = true;
2091
2092         mutex_lock(&trace_types_lock);
2093
2094         if (list_empty(&postponed_selftests))
2095                 goto out;
2096
2097         pr_info("Running postponed tracer tests:\n");
2098
2099         tracing_selftest_running = true;
2100         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101                 /* This loop can take minutes when sanitizers are enabled, so
2102                  * lets make sure we allow RCU processing.
2103                  */
2104                 cond_resched();
2105                 ret = run_tracer_selftest(p->type);
2106                 /* If the test fails, then warn and remove from available_tracers */
2107                 if (ret < 0) {
2108                         WARN(1, "tracer: %s failed selftest, disabling\n",
2109                              p->type->name);
2110                         last = &trace_types;
2111                         for (t = trace_types; t; t = t->next) {
2112                                 if (t == p->type) {
2113                                         *last = t->next;
2114                                         break;
2115                                 }
2116                                 last = &t->next;
2117                         }
2118                 }
2119                 list_del(&p->list);
2120                 kfree(p);
2121         }
2122         tracing_selftest_running = false;
2123
2124  out:
2125         mutex_unlock(&trace_types_lock);
2126
2127         return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133         return 0;
2134 }
2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137         return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142
2143 static void __init apply_trace_boot_options(void);
2144
2145 /**
2146  * register_tracer - register a tracer with the ftrace system.
2147  * @type: the plugin for the tracer
2148  *
2149  * Register a new plugin tracer.
2150  */
2151 int __init register_tracer(struct tracer *type)
2152 {
2153         struct tracer *t;
2154         int ret = 0;
2155
2156         if (!type->name) {
2157                 pr_info("Tracer must have a name\n");
2158                 return -1;
2159         }
2160
2161         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163                 return -1;
2164         }
2165
2166         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167                 pr_warn("Can not register tracer %s due to lockdown\n",
2168                            type->name);
2169                 return -EPERM;
2170         }
2171
2172         mutex_lock(&trace_types_lock);
2173
2174         for (t = trace_types; t; t = t->next) {
2175                 if (strcmp(type->name, t->name) == 0) {
2176                         /* already found */
2177                         pr_info("Tracer %s already registered\n",
2178                                 type->name);
2179                         ret = -1;
2180                         goto out;
2181                 }
2182         }
2183
2184         if (!type->set_flag)
2185                 type->set_flag = &dummy_set_flag;
2186         if (!type->flags) {
2187                 /*allocate a dummy tracer_flags*/
2188                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189                 if (!type->flags) {
2190                         ret = -ENOMEM;
2191                         goto out;
2192                 }
2193                 type->flags->val = 0;
2194                 type->flags->opts = dummy_tracer_opt;
2195         } else
2196                 if (!type->flags->opts)
2197                         type->flags->opts = dummy_tracer_opt;
2198
2199         /* store the tracer for __set_tracer_option */
2200         type->flags->trace = type;
2201
2202         ret = do_run_tracer_selftest(type);
2203         if (ret < 0)
2204                 goto out;
2205
2206         type->next = trace_types;
2207         trace_types = type;
2208         add_tracer_options(&global_trace, type);
2209
2210  out:
2211         mutex_unlock(&trace_types_lock);
2212
2213         if (ret || !default_bootup_tracer)
2214                 goto out_unlock;
2215
2216         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217                 goto out_unlock;
2218
2219         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220         /* Do we want this tracer to start on bootup? */
2221         tracing_set_tracer(&global_trace, type->name);
2222         default_bootup_tracer = NULL;
2223
2224         apply_trace_boot_options();
2225
2226         /* disable other selftests, since this will break it. */
2227         disable_tracing_selftest("running a tracer");
2228
2229  out_unlock:
2230         return ret;
2231 }
2232
2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235         struct trace_buffer *buffer = buf->buffer;
2236
2237         if (!buffer)
2238                 return;
2239
2240         ring_buffer_record_disable(buffer);
2241
2242         /* Make sure all commits have finished */
2243         synchronize_rcu();
2244         ring_buffer_reset_cpu(buffer, cpu);
2245
2246         ring_buffer_record_enable(buffer);
2247 }
2248
2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251         struct trace_buffer *buffer = buf->buffer;
2252
2253         if (!buffer)
2254                 return;
2255
2256         ring_buffer_record_disable(buffer);
2257
2258         /* Make sure all commits have finished */
2259         synchronize_rcu();
2260
2261         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262
2263         ring_buffer_reset_online_cpus(buffer);
2264
2265         ring_buffer_record_enable(buffer);
2266 }
2267
2268 /* Must have trace_types_lock held */
2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271         struct trace_array *tr;
2272
2273         lockdep_assert_held(&trace_types_lock);
2274
2275         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276                 if (!tr->clear_trace)
2277                         continue;
2278                 tr->clear_trace = false;
2279                 tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281                 tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283         }
2284 }
2285
2286 void tracing_reset_all_online_cpus(void)
2287 {
2288         mutex_lock(&trace_types_lock);
2289         tracing_reset_all_online_cpus_unlocked();
2290         mutex_unlock(&trace_types_lock);
2291 }
2292
2293 /*
2294  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295  * is the tgid last observed corresponding to pid=i.
2296  */
2297 static int *tgid_map;
2298
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305  * Preemption must be disabled before acquiring trace_cmdline_lock.
2306  * The various trace_arrays' max_lock must be acquired in a context
2307  * where interrupt is disabled.
2308  */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312         unsigned *map_cmdline_to_pid;
2313         unsigned cmdline_num;
2314         int cmdline_idx;
2315         char *saved_cmdlines;
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318
2319 static inline char *get_saved_cmdlines(int idx)
2320 {
2321         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2322 }
2323
2324 static inline void set_cmdline(int idx, const char *cmdline)
2325 {
2326         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2327 }
2328
2329 static int allocate_cmdlines_buffer(unsigned int val,
2330                                     struct saved_cmdlines_buffer *s)
2331 {
2332         s->map_cmdline_to_pid = kmalloc_array(val,
2333                                               sizeof(*s->map_cmdline_to_pid),
2334                                               GFP_KERNEL);
2335         if (!s->map_cmdline_to_pid)
2336                 return -ENOMEM;
2337
2338         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2339         if (!s->saved_cmdlines) {
2340                 kfree(s->map_cmdline_to_pid);
2341                 return -ENOMEM;
2342         }
2343
2344         s->cmdline_idx = 0;
2345         s->cmdline_num = val;
2346         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2347                sizeof(s->map_pid_to_cmdline));
2348         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2349                val * sizeof(*s->map_cmdline_to_pid));
2350
2351         return 0;
2352 }
2353
2354 static int trace_create_savedcmd(void)
2355 {
2356         int ret;
2357
2358         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2359         if (!savedcmd)
2360                 return -ENOMEM;
2361
2362         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2363         if (ret < 0) {
2364                 kfree(savedcmd);
2365                 savedcmd = NULL;
2366                 return -ENOMEM;
2367         }
2368
2369         return 0;
2370 }
2371
2372 int is_tracing_stopped(void)
2373 {
2374         return global_trace.stop_count;
2375 }
2376
2377 static void tracing_start_tr(struct trace_array *tr)
2378 {
2379         struct trace_buffer *buffer;
2380         unsigned long flags;
2381
2382         if (tracing_disabled)
2383                 return;
2384
2385         raw_spin_lock_irqsave(&tr->start_lock, flags);
2386         if (--tr->stop_count) {
2387                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2388                         /* Someone screwed up their debugging */
2389                         tr->stop_count = 0;
2390                 }
2391                 goto out;
2392         }
2393
2394         /* Prevent the buffers from switching */
2395         arch_spin_lock(&tr->max_lock);
2396
2397         buffer = tr->array_buffer.buffer;
2398         if (buffer)
2399                 ring_buffer_record_enable(buffer);
2400
2401 #ifdef CONFIG_TRACER_MAX_TRACE
2402         buffer = tr->max_buffer.buffer;
2403         if (buffer)
2404                 ring_buffer_record_enable(buffer);
2405 #endif
2406
2407         arch_spin_unlock(&tr->max_lock);
2408
2409  out:
2410         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2411 }
2412
2413 /**
2414  * tracing_start - quick start of the tracer
2415  *
2416  * If tracing is enabled but was stopped by tracing_stop,
2417  * this will start the tracer back up.
2418  */
2419 void tracing_start(void)
2420
2421 {
2422         return tracing_start_tr(&global_trace);
2423 }
2424
2425 static void tracing_stop_tr(struct trace_array *tr)
2426 {
2427         struct trace_buffer *buffer;
2428         unsigned long flags;
2429
2430         raw_spin_lock_irqsave(&tr->start_lock, flags);
2431         if (tr->stop_count++)
2432                 goto out;
2433
2434         /* Prevent the buffers from switching */
2435         arch_spin_lock(&tr->max_lock);
2436
2437         buffer = tr->array_buffer.buffer;
2438         if (buffer)
2439                 ring_buffer_record_disable(buffer);
2440
2441 #ifdef CONFIG_TRACER_MAX_TRACE
2442         buffer = tr->max_buffer.buffer;
2443         if (buffer)
2444                 ring_buffer_record_disable(buffer);
2445 #endif
2446
2447         arch_spin_unlock(&tr->max_lock);
2448
2449  out:
2450         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2451 }
2452
2453 /**
2454  * tracing_stop - quick stop of the tracer
2455  *
2456  * Light weight way to stop tracing. Use in conjunction with
2457  * tracing_start.
2458  */
2459 void tracing_stop(void)
2460 {
2461         return tracing_stop_tr(&global_trace);
2462 }
2463
2464 static int trace_save_cmdline(struct task_struct *tsk)
2465 {
2466         unsigned tpid, idx;
2467
2468         /* treat recording of idle task as a success */
2469         if (!tsk->pid)
2470                 return 1;
2471
2472         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2473
2474         /*
2475          * It's not the end of the world if we don't get
2476          * the lock, but we also don't want to spin
2477          * nor do we want to disable interrupts,
2478          * so if we miss here, then better luck next time.
2479          *
2480          * This is called within the scheduler and wake up, so interrupts
2481          * had better been disabled and run queue lock been held.
2482          */
2483         lockdep_assert_preemption_disabled();
2484         if (!arch_spin_trylock(&trace_cmdline_lock))
2485                 return 0;
2486
2487         idx = savedcmd->map_pid_to_cmdline[tpid];
2488         if (idx == NO_CMDLINE_MAP) {
2489                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2490
2491                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2492                 savedcmd->cmdline_idx = idx;
2493         }
2494
2495         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2496         set_cmdline(idx, tsk->comm);
2497
2498         arch_spin_unlock(&trace_cmdline_lock);
2499
2500         return 1;
2501 }
2502
2503 static void __trace_find_cmdline(int pid, char comm[])
2504 {
2505         unsigned map;
2506         int tpid;
2507
2508         if (!pid) {
2509                 strcpy(comm, "<idle>");
2510                 return;
2511         }
2512
2513         if (WARN_ON_ONCE(pid < 0)) {
2514                 strcpy(comm, "<XXX>");
2515                 return;
2516         }
2517
2518         tpid = pid & (PID_MAX_DEFAULT - 1);
2519         map = savedcmd->map_pid_to_cmdline[tpid];
2520         if (map != NO_CMDLINE_MAP) {
2521                 tpid = savedcmd->map_cmdline_to_pid[map];
2522                 if (tpid == pid) {
2523                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2524                         return;
2525                 }
2526         }
2527         strcpy(comm, "<...>");
2528 }
2529
2530 void trace_find_cmdline(int pid, char comm[])
2531 {
2532         preempt_disable();
2533         arch_spin_lock(&trace_cmdline_lock);
2534
2535         __trace_find_cmdline(pid, comm);
2536
2537         arch_spin_unlock(&trace_cmdline_lock);
2538         preempt_enable();
2539 }
2540
2541 static int *trace_find_tgid_ptr(int pid)
2542 {
2543         /*
2544          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2545          * if we observe a non-NULL tgid_map then we also observe the correct
2546          * tgid_map_max.
2547          */
2548         int *map = smp_load_acquire(&tgid_map);
2549
2550         if (unlikely(!map || pid > tgid_map_max))
2551                 return NULL;
2552
2553         return &map[pid];
2554 }
2555
2556 int trace_find_tgid(int pid)
2557 {
2558         int *ptr = trace_find_tgid_ptr(pid);
2559
2560         return ptr ? *ptr : 0;
2561 }
2562
2563 static int trace_save_tgid(struct task_struct *tsk)
2564 {
2565         int *ptr;
2566
2567         /* treat recording of idle task as a success */
2568         if (!tsk->pid)
2569                 return 1;
2570
2571         ptr = trace_find_tgid_ptr(tsk->pid);
2572         if (!ptr)
2573                 return 0;
2574
2575         *ptr = tsk->tgid;
2576         return 1;
2577 }
2578
2579 static bool tracing_record_taskinfo_skip(int flags)
2580 {
2581         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2582                 return true;
2583         if (!__this_cpu_read(trace_taskinfo_save))
2584                 return true;
2585         return false;
2586 }
2587
2588 /**
2589  * tracing_record_taskinfo - record the task info of a task
2590  *
2591  * @task:  task to record
2592  * @flags: TRACE_RECORD_CMDLINE for recording comm
2593  *         TRACE_RECORD_TGID for recording tgid
2594  */
2595 void tracing_record_taskinfo(struct task_struct *task, int flags)
2596 {
2597         bool done;
2598
2599         if (tracing_record_taskinfo_skip(flags))
2600                 return;
2601
2602         /*
2603          * Record as much task information as possible. If some fail, continue
2604          * to try to record the others.
2605          */
2606         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2607         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2608
2609         /* If recording any information failed, retry again soon. */
2610         if (!done)
2611                 return;
2612
2613         __this_cpu_write(trace_taskinfo_save, false);
2614 }
2615
2616 /**
2617  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2618  *
2619  * @prev: previous task during sched_switch
2620  * @next: next task during sched_switch
2621  * @flags: TRACE_RECORD_CMDLINE for recording comm
2622  *         TRACE_RECORD_TGID for recording tgid
2623  */
2624 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2625                                           struct task_struct *next, int flags)
2626 {
2627         bool done;
2628
2629         if (tracing_record_taskinfo_skip(flags))
2630                 return;
2631
2632         /*
2633          * Record as much task information as possible. If some fail, continue
2634          * to try to record the others.
2635          */
2636         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2637         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2638         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2639         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2640
2641         /* If recording any information failed, retry again soon. */
2642         if (!done)
2643                 return;
2644
2645         __this_cpu_write(trace_taskinfo_save, false);
2646 }
2647
2648 /* Helpers to record a specific task information */
2649 void tracing_record_cmdline(struct task_struct *task)
2650 {
2651         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2652 }
2653
2654 void tracing_record_tgid(struct task_struct *task)
2655 {
2656         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2657 }
2658
2659 /*
2660  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2661  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2662  * simplifies those functions and keeps them in sync.
2663  */
2664 enum print_line_t trace_handle_return(struct trace_seq *s)
2665 {
2666         return trace_seq_has_overflowed(s) ?
2667                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2668 }
2669 EXPORT_SYMBOL_GPL(trace_handle_return);
2670
2671 static unsigned short migration_disable_value(void)
2672 {
2673 #if defined(CONFIG_SMP)
2674         return current->migration_disabled;
2675 #else
2676         return 0;
2677 #endif
2678 }
2679
2680 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2681 {
2682         unsigned int trace_flags = irqs_status;
2683         unsigned int pc;
2684
2685         pc = preempt_count();
2686
2687         if (pc & NMI_MASK)
2688                 trace_flags |= TRACE_FLAG_NMI;
2689         if (pc & HARDIRQ_MASK)
2690                 trace_flags |= TRACE_FLAG_HARDIRQ;
2691         if (in_serving_softirq())
2692                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2693         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2694                 trace_flags |= TRACE_FLAG_BH_OFF;
2695
2696         if (tif_need_resched())
2697                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2698         if (test_preempt_need_resched())
2699                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2700         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2701                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2702 }
2703
2704 struct ring_buffer_event *
2705 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2706                           int type,
2707                           unsigned long len,
2708                           unsigned int trace_ctx)
2709 {
2710         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2711 }
2712
2713 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2714 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2715 static int trace_buffered_event_ref;
2716
2717 /**
2718  * trace_buffered_event_enable - enable buffering events
2719  *
2720  * When events are being filtered, it is quicker to use a temporary
2721  * buffer to write the event data into if there's a likely chance
2722  * that it will not be committed. The discard of the ring buffer
2723  * is not as fast as committing, and is much slower than copying
2724  * a commit.
2725  *
2726  * When an event is to be filtered, allocate per cpu buffers to
2727  * write the event data into, and if the event is filtered and discarded
2728  * it is simply dropped, otherwise, the entire data is to be committed
2729  * in one shot.
2730  */
2731 void trace_buffered_event_enable(void)
2732 {
2733         struct ring_buffer_event *event;
2734         struct page *page;
2735         int cpu;
2736
2737         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738
2739         if (trace_buffered_event_ref++)
2740                 return;
2741
2742         for_each_tracing_cpu(cpu) {
2743                 page = alloc_pages_node(cpu_to_node(cpu),
2744                                         GFP_KERNEL | __GFP_NORETRY, 0);
2745                 /* This is just an optimization and can handle failures */
2746                 if (!page) {
2747                         pr_err("Failed to allocate event buffer\n");
2748                         break;
2749                 }
2750
2751                 event = page_address(page);
2752                 memset(event, 0, sizeof(*event));
2753
2754                 per_cpu(trace_buffered_event, cpu) = event;
2755
2756                 preempt_disable();
2757                 if (cpu == smp_processor_id() &&
2758                     __this_cpu_read(trace_buffered_event) !=
2759                     per_cpu(trace_buffered_event, cpu))
2760                         WARN_ON_ONCE(1);
2761                 preempt_enable();
2762         }
2763 }
2764
2765 static void enable_trace_buffered_event(void *data)
2766 {
2767         /* Probably not needed, but do it anyway */
2768         smp_rmb();
2769         this_cpu_dec(trace_buffered_event_cnt);
2770 }
2771
2772 static void disable_trace_buffered_event(void *data)
2773 {
2774         this_cpu_inc(trace_buffered_event_cnt);
2775 }
2776
2777 /**
2778  * trace_buffered_event_disable - disable buffering events
2779  *
2780  * When a filter is removed, it is faster to not use the buffered
2781  * events, and to commit directly into the ring buffer. Free up
2782  * the temp buffers when there are no more users. This requires
2783  * special synchronization with current events.
2784  */
2785 void trace_buffered_event_disable(void)
2786 {
2787         int cpu;
2788
2789         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2790
2791         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2792                 return;
2793
2794         if (--trace_buffered_event_ref)
2795                 return;
2796
2797         /* For each CPU, set the buffer as used. */
2798         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2799                          NULL, true);
2800
2801         /* Wait for all current users to finish */
2802         synchronize_rcu();
2803
2804         for_each_tracing_cpu(cpu) {
2805                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2806                 per_cpu(trace_buffered_event, cpu) = NULL;
2807         }
2808
2809         /*
2810          * Wait for all CPUs that potentially started checking if they can use
2811          * their event buffer only after the previous synchronize_rcu() call and
2812          * they still read a valid pointer from trace_buffered_event. It must be
2813          * ensured they don't see cleared trace_buffered_event_cnt else they
2814          * could wrongly decide to use the pointed-to buffer which is now freed.
2815          */
2816         synchronize_rcu();
2817
2818         /* For each CPU, relinquish the buffer */
2819         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2820                          true);
2821 }
2822
2823 static struct trace_buffer *temp_buffer;
2824
2825 struct ring_buffer_event *
2826 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2827                           struct trace_event_file *trace_file,
2828                           int type, unsigned long len,
2829                           unsigned int trace_ctx)
2830 {
2831         struct ring_buffer_event *entry;
2832         struct trace_array *tr = trace_file->tr;
2833         int val;
2834
2835         *current_rb = tr->array_buffer.buffer;
2836
2837         if (!tr->no_filter_buffering_ref &&
2838             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2839                 preempt_disable_notrace();
2840                 /*
2841                  * Filtering is on, so try to use the per cpu buffer first.
2842                  * This buffer will simulate a ring_buffer_event,
2843                  * where the type_len is zero and the array[0] will
2844                  * hold the full length.
2845                  * (see include/linux/ring-buffer.h for details on
2846                  *  how the ring_buffer_event is structured).
2847                  *
2848                  * Using a temp buffer during filtering and copying it
2849                  * on a matched filter is quicker than writing directly
2850                  * into the ring buffer and then discarding it when
2851                  * it doesn't match. That is because the discard
2852                  * requires several atomic operations to get right.
2853                  * Copying on match and doing nothing on a failed match
2854                  * is still quicker than no copy on match, but having
2855                  * to discard out of the ring buffer on a failed match.
2856                  */
2857                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2858                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2859
2860                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2861
2862                         /*
2863                          * Preemption is disabled, but interrupts and NMIs
2864                          * can still come in now. If that happens after
2865                          * the above increment, then it will have to go
2866                          * back to the old method of allocating the event
2867                          * on the ring buffer, and if the filter fails, it
2868                          * will have to call ring_buffer_discard_commit()
2869                          * to remove it.
2870                          *
2871                          * Need to also check the unlikely case that the
2872                          * length is bigger than the temp buffer size.
2873                          * If that happens, then the reserve is pretty much
2874                          * guaranteed to fail, as the ring buffer currently
2875                          * only allows events less than a page. But that may
2876                          * change in the future, so let the ring buffer reserve
2877                          * handle the failure in that case.
2878                          */
2879                         if (val == 1 && likely(len <= max_len)) {
2880                                 trace_event_setup(entry, type, trace_ctx);
2881                                 entry->array[0] = len;
2882                                 /* Return with preemption disabled */
2883                                 return entry;
2884                         }
2885                         this_cpu_dec(trace_buffered_event_cnt);
2886                 }
2887                 /* __trace_buffer_lock_reserve() disables preemption */
2888                 preempt_enable_notrace();
2889         }
2890
2891         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2892                                             trace_ctx);
2893         /*
2894          * If tracing is off, but we have triggers enabled
2895          * we still need to look at the event data. Use the temp_buffer
2896          * to store the trace event for the trigger to use. It's recursive
2897          * safe and will not be recorded anywhere.
2898          */
2899         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2900                 *current_rb = temp_buffer;
2901                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2902                                                     trace_ctx);
2903         }
2904         return entry;
2905 }
2906 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2907
2908 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2909 static DEFINE_MUTEX(tracepoint_printk_mutex);
2910
2911 static void output_printk(struct trace_event_buffer *fbuffer)
2912 {
2913         struct trace_event_call *event_call;
2914         struct trace_event_file *file;
2915         struct trace_event *event;
2916         unsigned long flags;
2917         struct trace_iterator *iter = tracepoint_print_iter;
2918
2919         /* We should never get here if iter is NULL */
2920         if (WARN_ON_ONCE(!iter))
2921                 return;
2922
2923         event_call = fbuffer->trace_file->event_call;
2924         if (!event_call || !event_call->event.funcs ||
2925             !event_call->event.funcs->trace)
2926                 return;
2927
2928         file = fbuffer->trace_file;
2929         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2930             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2931              !filter_match_preds(file->filter, fbuffer->entry)))
2932                 return;
2933
2934         event = &fbuffer->trace_file->event_call->event;
2935
2936         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2937         trace_seq_init(&iter->seq);
2938         iter->ent = fbuffer->entry;
2939         event_call->event.funcs->trace(iter, 0, event);
2940         trace_seq_putc(&iter->seq, 0);
2941         printk("%s", iter->seq.buffer);
2942
2943         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2944 }
2945
2946 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2947                              void *buffer, size_t *lenp,
2948                              loff_t *ppos)
2949 {
2950         int save_tracepoint_printk;
2951         int ret;
2952
2953         mutex_lock(&tracepoint_printk_mutex);
2954         save_tracepoint_printk = tracepoint_printk;
2955
2956         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2957
2958         /*
2959          * This will force exiting early, as tracepoint_printk
2960          * is always zero when tracepoint_printk_iter is not allocated
2961          */
2962         if (!tracepoint_print_iter)
2963                 tracepoint_printk = 0;
2964
2965         if (save_tracepoint_printk == tracepoint_printk)
2966                 goto out;
2967
2968         if (tracepoint_printk)
2969                 static_key_enable(&tracepoint_printk_key.key);
2970         else
2971                 static_key_disable(&tracepoint_printk_key.key);
2972
2973  out:
2974         mutex_unlock(&tracepoint_printk_mutex);
2975
2976         return ret;
2977 }
2978
2979 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2980 {
2981         enum event_trigger_type tt = ETT_NONE;
2982         struct trace_event_file *file = fbuffer->trace_file;
2983
2984         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2985                         fbuffer->entry, &tt))
2986                 goto discard;
2987
2988         if (static_key_false(&tracepoint_printk_key.key))
2989                 output_printk(fbuffer);
2990
2991         if (static_branch_unlikely(&trace_event_exports_enabled))
2992                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2993
2994         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2995                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2996
2997 discard:
2998         if (tt)
2999                 event_triggers_post_call(file, tt);
3000
3001 }
3002 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3003
3004 /*
3005  * Skip 3:
3006  *
3007  *   trace_buffer_unlock_commit_regs()
3008  *   trace_event_buffer_commit()
3009  *   trace_event_raw_event_xxx()
3010  */
3011 # define STACK_SKIP 3
3012
3013 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3014                                      struct trace_buffer *buffer,
3015                                      struct ring_buffer_event *event,
3016                                      unsigned int trace_ctx,
3017                                      struct pt_regs *regs)
3018 {
3019         __buffer_unlock_commit(buffer, event);
3020
3021         /*
3022          * If regs is not set, then skip the necessary functions.
3023          * Note, we can still get here via blktrace, wakeup tracer
3024          * and mmiotrace, but that's ok if they lose a function or
3025          * two. They are not that meaningful.
3026          */
3027         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3028         ftrace_trace_userstack(tr, buffer, trace_ctx);
3029 }
3030
3031 /*
3032  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3033  */
3034 void
3035 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3036                                    struct ring_buffer_event *event)
3037 {
3038         __buffer_unlock_commit(buffer, event);
3039 }
3040
3041 void
3042 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3043                parent_ip, unsigned int trace_ctx)
3044 {
3045         struct trace_event_call *call = &event_function;
3046         struct trace_buffer *buffer = tr->array_buffer.buffer;
3047         struct ring_buffer_event *event;
3048         struct ftrace_entry *entry;
3049
3050         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3051                                             trace_ctx);
3052         if (!event)
3053                 return;
3054         entry   = ring_buffer_event_data(event);
3055         entry->ip                       = ip;
3056         entry->parent_ip                = parent_ip;
3057
3058         if (!call_filter_check_discard(call, entry, buffer, event)) {
3059                 if (static_branch_unlikely(&trace_function_exports_enabled))
3060                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3061                 __buffer_unlock_commit(buffer, event);
3062         }
3063 }
3064
3065 #ifdef CONFIG_STACKTRACE
3066
3067 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3068 #define FTRACE_KSTACK_NESTING   4
3069
3070 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3071
3072 struct ftrace_stack {
3073         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3074 };
3075
3076
3077 struct ftrace_stacks {
3078         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3079 };
3080
3081 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3082 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3083
3084 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3085                                  unsigned int trace_ctx,
3086                                  int skip, struct pt_regs *regs)
3087 {
3088         struct trace_event_call *call = &event_kernel_stack;
3089         struct ring_buffer_event *event;
3090         unsigned int size, nr_entries;
3091         struct ftrace_stack *fstack;
3092         struct stack_entry *entry;
3093         int stackidx;
3094
3095         /*
3096          * Add one, for this function and the call to save_stack_trace()
3097          * If regs is set, then these functions will not be in the way.
3098          */
3099 #ifndef CONFIG_UNWINDER_ORC
3100         if (!regs)
3101                 skip++;
3102 #endif
3103
3104         preempt_disable_notrace();
3105
3106         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3107
3108         /* This should never happen. If it does, yell once and skip */
3109         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3110                 goto out;
3111
3112         /*
3113          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3114          * interrupt will either see the value pre increment or post
3115          * increment. If the interrupt happens pre increment it will have
3116          * restored the counter when it returns.  We just need a barrier to
3117          * keep gcc from moving things around.
3118          */
3119         barrier();
3120
3121         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3122         size = ARRAY_SIZE(fstack->calls);
3123
3124         if (regs) {
3125                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3126                                                    size, skip);
3127         } else {
3128                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3129         }
3130
3131         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3132                                     struct_size(entry, caller, nr_entries),
3133                                     trace_ctx);
3134         if (!event)
3135                 goto out;
3136         entry = ring_buffer_event_data(event);
3137
3138         entry->size = nr_entries;
3139         memcpy(&entry->caller, fstack->calls,
3140                flex_array_size(entry, caller, nr_entries));
3141
3142         if (!call_filter_check_discard(call, entry, buffer, event))
3143                 __buffer_unlock_commit(buffer, event);
3144
3145  out:
3146         /* Again, don't let gcc optimize things here */
3147         barrier();
3148         __this_cpu_dec(ftrace_stack_reserve);
3149         preempt_enable_notrace();
3150
3151 }
3152
3153 static inline void ftrace_trace_stack(struct trace_array *tr,
3154                                       struct trace_buffer *buffer,
3155                                       unsigned int trace_ctx,
3156                                       int skip, struct pt_regs *regs)
3157 {
3158         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3159                 return;
3160
3161         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3162 }
3163
3164 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3165                    int skip)
3166 {
3167         struct trace_buffer *buffer = tr->array_buffer.buffer;
3168
3169         if (rcu_is_watching()) {
3170                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3171                 return;
3172         }
3173
3174         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3175                 return;
3176
3177         /*
3178          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3179          * but if the above rcu_is_watching() failed, then the NMI
3180          * triggered someplace critical, and ct_irq_enter() should
3181          * not be called from NMI.
3182          */
3183         if (unlikely(in_nmi()))
3184                 return;
3185
3186         ct_irq_enter_irqson();
3187         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3188         ct_irq_exit_irqson();
3189 }
3190
3191 /**
3192  * trace_dump_stack - record a stack back trace in the trace buffer
3193  * @skip: Number of functions to skip (helper handlers)
3194  */
3195 void trace_dump_stack(int skip)
3196 {
3197         if (tracing_disabled || tracing_selftest_running)
3198                 return;
3199
3200 #ifndef CONFIG_UNWINDER_ORC
3201         /* Skip 1 to skip this function. */
3202         skip++;
3203 #endif
3204         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3205                              tracing_gen_ctx(), skip, NULL);
3206 }
3207 EXPORT_SYMBOL_GPL(trace_dump_stack);
3208
3209 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3210 static DEFINE_PER_CPU(int, user_stack_count);
3211
3212 static void
3213 ftrace_trace_userstack(struct trace_array *tr,
3214                        struct trace_buffer *buffer, unsigned int trace_ctx)
3215 {
3216         struct trace_event_call *call = &event_user_stack;
3217         struct ring_buffer_event *event;
3218         struct userstack_entry *entry;
3219
3220         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3221                 return;
3222
3223         /*
3224          * NMIs can not handle page faults, even with fix ups.
3225          * The save user stack can (and often does) fault.
3226          */
3227         if (unlikely(in_nmi()))
3228                 return;
3229
3230         /*
3231          * prevent recursion, since the user stack tracing may
3232          * trigger other kernel events.
3233          */
3234         preempt_disable();
3235         if (__this_cpu_read(user_stack_count))
3236                 goto out;
3237
3238         __this_cpu_inc(user_stack_count);
3239
3240         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3241                                             sizeof(*entry), trace_ctx);
3242         if (!event)
3243                 goto out_drop_count;
3244         entry   = ring_buffer_event_data(event);
3245
3246         entry->tgid             = current->tgid;
3247         memset(&entry->caller, 0, sizeof(entry->caller));
3248
3249         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3250         if (!call_filter_check_discard(call, entry, buffer, event))
3251                 __buffer_unlock_commit(buffer, event);
3252
3253  out_drop_count:
3254         __this_cpu_dec(user_stack_count);
3255  out:
3256         preempt_enable();
3257 }
3258 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3259 static void ftrace_trace_userstack(struct trace_array *tr,
3260                                    struct trace_buffer *buffer,
3261                                    unsigned int trace_ctx)
3262 {
3263 }
3264 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3265
3266 #endif /* CONFIG_STACKTRACE */
3267
3268 static inline void
3269 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3270                           unsigned long long delta)
3271 {
3272         entry->bottom_delta_ts = delta & U32_MAX;
3273         entry->top_delta_ts = (delta >> 32);
3274 }
3275
3276 void trace_last_func_repeats(struct trace_array *tr,
3277                              struct trace_func_repeats *last_info,
3278                              unsigned int trace_ctx)
3279 {
3280         struct trace_buffer *buffer = tr->array_buffer.buffer;
3281         struct func_repeats_entry *entry;
3282         struct ring_buffer_event *event;
3283         u64 delta;
3284
3285         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3286                                             sizeof(*entry), trace_ctx);
3287         if (!event)
3288                 return;
3289
3290         delta = ring_buffer_event_time_stamp(buffer, event) -
3291                 last_info->ts_last_call;
3292
3293         entry = ring_buffer_event_data(event);
3294         entry->ip = last_info->ip;
3295         entry->parent_ip = last_info->parent_ip;
3296         entry->count = last_info->count;
3297         func_repeats_set_delta_ts(entry, delta);
3298
3299         __buffer_unlock_commit(buffer, event);
3300 }
3301
3302 /* created for use with alloc_percpu */
3303 struct trace_buffer_struct {
3304         int nesting;
3305         char buffer[4][TRACE_BUF_SIZE];
3306 };
3307
3308 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3309
3310 /*
3311  * This allows for lockless recording.  If we're nested too deeply, then
3312  * this returns NULL.
3313  */
3314 static char *get_trace_buf(void)
3315 {
3316         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3317
3318         if (!trace_percpu_buffer || buffer->nesting >= 4)
3319                 return NULL;
3320
3321         buffer->nesting++;
3322
3323         /* Interrupts must see nesting incremented before we use the buffer */
3324         barrier();
3325         return &buffer->buffer[buffer->nesting - 1][0];
3326 }
3327
3328 static void put_trace_buf(void)
3329 {
3330         /* Don't let the decrement of nesting leak before this */
3331         barrier();
3332         this_cpu_dec(trace_percpu_buffer->nesting);
3333 }
3334
3335 static int alloc_percpu_trace_buffer(void)
3336 {
3337         struct trace_buffer_struct __percpu *buffers;
3338
3339         if (trace_percpu_buffer)
3340                 return 0;
3341
3342         buffers = alloc_percpu(struct trace_buffer_struct);
3343         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3344                 return -ENOMEM;
3345
3346         trace_percpu_buffer = buffers;
3347         return 0;
3348 }
3349
3350 static int buffers_allocated;
3351
3352 void trace_printk_init_buffers(void)
3353 {
3354         if (buffers_allocated)
3355                 return;
3356
3357         if (alloc_percpu_trace_buffer())
3358                 return;
3359
3360         /* trace_printk() is for debug use only. Don't use it in production. */
3361
3362         pr_warn("\n");
3363         pr_warn("**********************************************************\n");
3364         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3365         pr_warn("**                                                      **\n");
3366         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3367         pr_warn("**                                                      **\n");
3368         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3369         pr_warn("** unsafe for production use.                           **\n");
3370         pr_warn("**                                                      **\n");
3371         pr_warn("** If you see this message and you are not debugging    **\n");
3372         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3373         pr_warn("**                                                      **\n");
3374         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3375         pr_warn("**********************************************************\n");
3376
3377         /* Expand the buffers to set size */
3378         tracing_update_buffers(&global_trace);
3379
3380         buffers_allocated = 1;
3381
3382         /*
3383          * trace_printk_init_buffers() can be called by modules.
3384          * If that happens, then we need to start cmdline recording
3385          * directly here. If the global_trace.buffer is already
3386          * allocated here, then this was called by module code.
3387          */
3388         if (global_trace.array_buffer.buffer)
3389                 tracing_start_cmdline_record();
3390 }
3391 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3392
3393 void trace_printk_start_comm(void)
3394 {
3395         /* Start tracing comms if trace printk is set */
3396         if (!buffers_allocated)
3397                 return;
3398         tracing_start_cmdline_record();
3399 }
3400
3401 static void trace_printk_start_stop_comm(int enabled)
3402 {
3403         if (!buffers_allocated)
3404                 return;
3405
3406         if (enabled)
3407                 tracing_start_cmdline_record();
3408         else
3409                 tracing_stop_cmdline_record();
3410 }
3411
3412 /**
3413  * trace_vbprintk - write binary msg to tracing buffer
3414  * @ip:    The address of the caller
3415  * @fmt:   The string format to write to the buffer
3416  * @args:  Arguments for @fmt
3417  */
3418 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3419 {
3420         struct trace_event_call *call = &event_bprint;
3421         struct ring_buffer_event *event;
3422         struct trace_buffer *buffer;
3423         struct trace_array *tr = &global_trace;
3424         struct bprint_entry *entry;
3425         unsigned int trace_ctx;
3426         char *tbuffer;
3427         int len = 0, size;
3428
3429         if (unlikely(tracing_selftest_running || tracing_disabled))
3430                 return 0;
3431
3432         /* Don't pollute graph traces with trace_vprintk internals */
3433         pause_graph_tracing();
3434
3435         trace_ctx = tracing_gen_ctx();
3436         preempt_disable_notrace();
3437
3438         tbuffer = get_trace_buf();
3439         if (!tbuffer) {
3440                 len = 0;
3441                 goto out_nobuffer;
3442         }
3443
3444         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3445
3446         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3447                 goto out_put;
3448
3449         size = sizeof(*entry) + sizeof(u32) * len;
3450         buffer = tr->array_buffer.buffer;
3451         ring_buffer_nest_start(buffer);
3452         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3453                                             trace_ctx);
3454         if (!event)
3455                 goto out;
3456         entry = ring_buffer_event_data(event);
3457         entry->ip                       = ip;
3458         entry->fmt                      = fmt;
3459
3460         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3461         if (!call_filter_check_discard(call, entry, buffer, event)) {
3462                 __buffer_unlock_commit(buffer, event);
3463                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3464         }
3465
3466 out:
3467         ring_buffer_nest_end(buffer);
3468 out_put:
3469         put_trace_buf();
3470
3471 out_nobuffer:
3472         preempt_enable_notrace();
3473         unpause_graph_tracing();
3474
3475         return len;
3476 }
3477 EXPORT_SYMBOL_GPL(trace_vbprintk);
3478
3479 __printf(3, 0)
3480 static int
3481 __trace_array_vprintk(struct trace_buffer *buffer,
3482                       unsigned long ip, const char *fmt, va_list args)
3483 {
3484         struct trace_event_call *call = &event_print;
3485         struct ring_buffer_event *event;
3486         int len = 0, size;
3487         struct print_entry *entry;
3488         unsigned int trace_ctx;
3489         char *tbuffer;
3490
3491         if (tracing_disabled)
3492                 return 0;
3493
3494         /* Don't pollute graph traces with trace_vprintk internals */
3495         pause_graph_tracing();
3496
3497         trace_ctx = tracing_gen_ctx();
3498         preempt_disable_notrace();
3499
3500
3501         tbuffer = get_trace_buf();
3502         if (!tbuffer) {
3503                 len = 0;
3504                 goto out_nobuffer;
3505         }
3506
3507         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3508
3509         size = sizeof(*entry) + len + 1;
3510         ring_buffer_nest_start(buffer);
3511         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3512                                             trace_ctx);
3513         if (!event)
3514                 goto out;
3515         entry = ring_buffer_event_data(event);
3516         entry->ip = ip;
3517
3518         memcpy(&entry->buf, tbuffer, len + 1);
3519         if (!call_filter_check_discard(call, entry, buffer, event)) {
3520                 __buffer_unlock_commit(buffer, event);
3521                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3522         }
3523
3524 out:
3525         ring_buffer_nest_end(buffer);
3526         put_trace_buf();
3527
3528 out_nobuffer:
3529         preempt_enable_notrace();
3530         unpause_graph_tracing();
3531
3532         return len;
3533 }
3534
3535 __printf(3, 0)
3536 int trace_array_vprintk(struct trace_array *tr,
3537                         unsigned long ip, const char *fmt, va_list args)
3538 {
3539         if (tracing_selftest_running && tr == &global_trace)
3540                 return 0;
3541
3542         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3543 }
3544
3545 /**
3546  * trace_array_printk - Print a message to a specific instance
3547  * @tr: The instance trace_array descriptor
3548  * @ip: The instruction pointer that this is called from.
3549  * @fmt: The format to print (printf format)
3550  *
3551  * If a subsystem sets up its own instance, they have the right to
3552  * printk strings into their tracing instance buffer using this
3553  * function. Note, this function will not write into the top level
3554  * buffer (use trace_printk() for that), as writing into the top level
3555  * buffer should only have events that can be individually disabled.
3556  * trace_printk() is only used for debugging a kernel, and should not
3557  * be ever incorporated in normal use.
3558  *
3559  * trace_array_printk() can be used, as it will not add noise to the
3560  * top level tracing buffer.
3561  *
3562  * Note, trace_array_init_printk() must be called on @tr before this
3563  * can be used.
3564  */
3565 __printf(3, 0)
3566 int trace_array_printk(struct trace_array *tr,
3567                        unsigned long ip, const char *fmt, ...)
3568 {
3569         int ret;
3570         va_list ap;
3571
3572         if (!tr)
3573                 return -ENOENT;
3574
3575         /* This is only allowed for created instances */
3576         if (tr == &global_trace)
3577                 return 0;
3578
3579         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3580                 return 0;
3581
3582         va_start(ap, fmt);
3583         ret = trace_array_vprintk(tr, ip, fmt, ap);
3584         va_end(ap);
3585         return ret;
3586 }
3587 EXPORT_SYMBOL_GPL(trace_array_printk);
3588
3589 /**
3590  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3591  * @tr: The trace array to initialize the buffers for
3592  *
3593  * As trace_array_printk() only writes into instances, they are OK to
3594  * have in the kernel (unlike trace_printk()). This needs to be called
3595  * before trace_array_printk() can be used on a trace_array.
3596  */
3597 int trace_array_init_printk(struct trace_array *tr)
3598 {
3599         if (!tr)
3600                 return -ENOENT;
3601
3602         /* This is only allowed for created instances */
3603         if (tr == &global_trace)
3604                 return -EINVAL;
3605
3606         return alloc_percpu_trace_buffer();
3607 }
3608 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3609
3610 __printf(3, 4)
3611 int trace_array_printk_buf(struct trace_buffer *buffer,
3612                            unsigned long ip, const char *fmt, ...)
3613 {
3614         int ret;
3615         va_list ap;
3616
3617         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3618                 return 0;
3619
3620         va_start(ap, fmt);
3621         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3622         va_end(ap);
3623         return ret;
3624 }
3625
3626 __printf(2, 0)
3627 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3628 {
3629         return trace_array_vprintk(&global_trace, ip, fmt, args);
3630 }
3631 EXPORT_SYMBOL_GPL(trace_vprintk);
3632
3633 static void trace_iterator_increment(struct trace_iterator *iter)
3634 {
3635         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3636
3637         iter->idx++;
3638         if (buf_iter)
3639                 ring_buffer_iter_advance(buf_iter);
3640 }
3641
3642 static struct trace_entry *
3643 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3644                 unsigned long *lost_events)
3645 {
3646         struct ring_buffer_event *event;
3647         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3648
3649         if (buf_iter) {
3650                 event = ring_buffer_iter_peek(buf_iter, ts);
3651                 if (lost_events)
3652                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3653                                 (unsigned long)-1 : 0;
3654         } else {
3655                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3656                                          lost_events);
3657         }
3658
3659         if (event) {
3660                 iter->ent_size = ring_buffer_event_length(event);
3661                 return ring_buffer_event_data(event);
3662         }
3663         iter->ent_size = 0;
3664         return NULL;
3665 }
3666
3667 static struct trace_entry *
3668 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3669                   unsigned long *missing_events, u64 *ent_ts)
3670 {
3671         struct trace_buffer *buffer = iter->array_buffer->buffer;
3672         struct trace_entry *ent, *next = NULL;
3673         unsigned long lost_events = 0, next_lost = 0;
3674         int cpu_file = iter->cpu_file;
3675         u64 next_ts = 0, ts;
3676         int next_cpu = -1;
3677         int next_size = 0;
3678         int cpu;
3679
3680         /*
3681          * If we are in a per_cpu trace file, don't bother by iterating over
3682          * all cpu and peek directly.
3683          */
3684         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3685                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3686                         return NULL;
3687                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3688                 if (ent_cpu)
3689                         *ent_cpu = cpu_file;
3690
3691                 return ent;
3692         }
3693
3694         for_each_tracing_cpu(cpu) {
3695
3696                 if (ring_buffer_empty_cpu(buffer, cpu))
3697                         continue;
3698
3699                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3700
3701                 /*
3702                  * Pick the entry with the smallest timestamp:
3703                  */
3704                 if (ent && (!next || ts < next_ts)) {
3705                         next = ent;
3706                         next_cpu = cpu;
3707                         next_ts = ts;
3708                         next_lost = lost_events;
3709                         next_size = iter->ent_size;
3710                 }
3711         }
3712
3713         iter->ent_size = next_size;
3714
3715         if (ent_cpu)
3716                 *ent_cpu = next_cpu;
3717
3718         if (ent_ts)
3719                 *ent_ts = next_ts;
3720
3721         if (missing_events)
3722                 *missing_events = next_lost;
3723
3724         return next;
3725 }
3726
3727 #define STATIC_FMT_BUF_SIZE     128
3728 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3729
3730 char *trace_iter_expand_format(struct trace_iterator *iter)
3731 {
3732         char *tmp;
3733
3734         /*
3735          * iter->tr is NULL when used with tp_printk, which makes
3736          * this get called where it is not safe to call krealloc().
3737          */
3738         if (!iter->tr || iter->fmt == static_fmt_buf)
3739                 return NULL;
3740
3741         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3742                        GFP_KERNEL);
3743         if (tmp) {
3744                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3745                 iter->fmt = tmp;
3746         }
3747
3748         return tmp;
3749 }
3750
3751 /* Returns true if the string is safe to dereference from an event */
3752 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3753                            bool star, int len)
3754 {
3755         unsigned long addr = (unsigned long)str;
3756         struct trace_event *trace_event;
3757         struct trace_event_call *event;
3758
3759         /* Ignore strings with no length */
3760         if (star && !len)
3761                 return true;
3762
3763         /* OK if part of the event data */
3764         if ((addr >= (unsigned long)iter->ent) &&
3765             (addr < (unsigned long)iter->ent + iter->ent_size))
3766                 return true;
3767
3768         /* OK if part of the temp seq buffer */
3769         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3770             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3771                 return true;
3772
3773         /* Core rodata can not be freed */
3774         if (is_kernel_rodata(addr))
3775                 return true;
3776
3777         if (trace_is_tracepoint_string(str))
3778                 return true;
3779
3780         /*
3781          * Now this could be a module event, referencing core module
3782          * data, which is OK.
3783          */
3784         if (!iter->ent)
3785                 return false;
3786
3787         trace_event = ftrace_find_event(iter->ent->type);
3788         if (!trace_event)
3789                 return false;
3790
3791         event = container_of(trace_event, struct trace_event_call, event);
3792         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3793                 return false;
3794
3795         /* Would rather have rodata, but this will suffice */
3796         if (within_module_core(addr, event->module))
3797                 return true;
3798
3799         return false;
3800 }
3801
3802 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3803
3804 static int test_can_verify_check(const char *fmt, ...)
3805 {
3806         char buf[16];
3807         va_list ap;
3808         int ret;
3809
3810         /*
3811          * The verifier is dependent on vsnprintf() modifies the va_list
3812          * passed to it, where it is sent as a reference. Some architectures
3813          * (like x86_32) passes it by value, which means that vsnprintf()
3814          * does not modify the va_list passed to it, and the verifier
3815          * would then need to be able to understand all the values that
3816          * vsnprintf can use. If it is passed by value, then the verifier
3817          * is disabled.
3818          */
3819         va_start(ap, fmt);
3820         vsnprintf(buf, 16, "%d", ap);
3821         ret = va_arg(ap, int);
3822         va_end(ap);
3823
3824         return ret;
3825 }
3826
3827 static void test_can_verify(void)
3828 {
3829         if (!test_can_verify_check("%d %d", 0, 1)) {
3830                 pr_info("trace event string verifier disabled\n");
3831                 static_branch_inc(&trace_no_verify);
3832         }
3833 }
3834
3835 /**
3836  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3837  * @iter: The iterator that holds the seq buffer and the event being printed
3838  * @fmt: The format used to print the event
3839  * @ap: The va_list holding the data to print from @fmt.
3840  *
3841  * This writes the data into the @iter->seq buffer using the data from
3842  * @fmt and @ap. If the format has a %s, then the source of the string
3843  * is examined to make sure it is safe to print, otherwise it will
3844  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3845  * pointer.
3846  */
3847 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3848                          va_list ap)
3849 {
3850         const char *p = fmt;
3851         const char *str;
3852         int i, j;
3853
3854         if (WARN_ON_ONCE(!fmt))
3855                 return;
3856
3857         if (static_branch_unlikely(&trace_no_verify))
3858                 goto print;
3859
3860         /* Don't bother checking when doing a ftrace_dump() */
3861         if (iter->fmt == static_fmt_buf)
3862                 goto print;
3863
3864         while (*p) {
3865                 bool star = false;
3866                 int len = 0;
3867
3868                 j = 0;
3869
3870                 /* We only care about %s and variants */
3871                 for (i = 0; p[i]; i++) {
3872                         if (i + 1 >= iter->fmt_size) {
3873                                 /*
3874                                  * If we can't expand the copy buffer,
3875                                  * just print it.
3876                                  */
3877                                 if (!trace_iter_expand_format(iter))
3878                                         goto print;
3879                         }
3880
3881                         if (p[i] == '\\' && p[i+1]) {
3882                                 i++;
3883                                 continue;
3884                         }
3885                         if (p[i] == '%') {
3886                                 /* Need to test cases like %08.*s */
3887                                 for (j = 1; p[i+j]; j++) {
3888                                         if (isdigit(p[i+j]) ||
3889                                             p[i+j] == '.')
3890                                                 continue;
3891                                         if (p[i+j] == '*') {
3892                                                 star = true;
3893                                                 continue;
3894                                         }
3895                                         break;
3896                                 }
3897                                 if (p[i+j] == 's')
3898                                         break;
3899                                 star = false;
3900                         }
3901                         j = 0;
3902                 }
3903                 /* If no %s found then just print normally */
3904                 if (!p[i])
3905                         break;
3906
3907                 /* Copy up to the %s, and print that */
3908                 strncpy(iter->fmt, p, i);
3909                 iter->fmt[i] = '\0';
3910                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3911
3912                 /*
3913                  * If iter->seq is full, the above call no longer guarantees
3914                  * that ap is in sync with fmt processing, and further calls
3915                  * to va_arg() can return wrong positional arguments.
3916                  *
3917                  * Ensure that ap is no longer used in this case.
3918                  */
3919                 if (iter->seq.full) {
3920                         p = "";
3921                         break;
3922                 }
3923
3924                 if (star)
3925                         len = va_arg(ap, int);
3926
3927                 /* The ap now points to the string data of the %s */
3928                 str = va_arg(ap, const char *);
3929
3930                 /*
3931                  * If you hit this warning, it is likely that the
3932                  * trace event in question used %s on a string that
3933                  * was saved at the time of the event, but may not be
3934                  * around when the trace is read. Use __string(),
3935                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3936                  * instead. See samples/trace_events/trace-events-sample.h
3937                  * for reference.
3938                  */
3939                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3940                               "fmt: '%s' current_buffer: '%s'",
3941                               fmt, seq_buf_str(&iter->seq.seq))) {
3942                         int ret;
3943
3944                         /* Try to safely read the string */
3945                         if (star) {
3946                                 if (len + 1 > iter->fmt_size)
3947                                         len = iter->fmt_size - 1;
3948                                 if (len < 0)
3949                                         len = 0;
3950                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3951                                 iter->fmt[len] = 0;
3952                                 star = false;
3953                         } else {
3954                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3955                                                                   iter->fmt_size);
3956                         }
3957                         if (ret < 0)
3958                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3959                         else
3960                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3961                                                  str, iter->fmt);
3962                         str = "[UNSAFE-MEMORY]";
3963                         strcpy(iter->fmt, "%s");
3964                 } else {
3965                         strncpy(iter->fmt, p + i, j + 1);
3966                         iter->fmt[j+1] = '\0';
3967                 }
3968                 if (star)
3969                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3970                 else
3971                         trace_seq_printf(&iter->seq, iter->fmt, str);
3972
3973                 p += i + j + 1;
3974         }
3975  print:
3976         if (*p)
3977                 trace_seq_vprintf(&iter->seq, p, ap);
3978 }
3979
3980 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3981 {
3982         const char *p, *new_fmt;
3983         char *q;
3984
3985         if (WARN_ON_ONCE(!fmt))
3986                 return fmt;
3987
3988         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3989                 return fmt;
3990
3991         p = fmt;
3992         new_fmt = q = iter->fmt;
3993         while (*p) {
3994                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3995                         if (!trace_iter_expand_format(iter))
3996                                 return fmt;
3997
3998                         q += iter->fmt - new_fmt;
3999                         new_fmt = iter->fmt;
4000                 }
4001
4002                 *q++ = *p++;
4003
4004                 /* Replace %p with %px */
4005                 if (p[-1] == '%') {
4006                         if (p[0] == '%') {
4007                                 *q++ = *p++;
4008                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4009                                 *q++ = *p++;
4010                                 *q++ = 'x';
4011                         }
4012                 }
4013         }
4014         *q = '\0';
4015
4016         return new_fmt;
4017 }
4018
4019 #define STATIC_TEMP_BUF_SIZE    128
4020 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4021
4022 /* Find the next real entry, without updating the iterator itself */
4023 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4024                                           int *ent_cpu, u64 *ent_ts)
4025 {
4026         /* __find_next_entry will reset ent_size */
4027         int ent_size = iter->ent_size;
4028         struct trace_entry *entry;
4029
4030         /*
4031          * If called from ftrace_dump(), then the iter->temp buffer
4032          * will be the static_temp_buf and not created from kmalloc.
4033          * If the entry size is greater than the buffer, we can
4034          * not save it. Just return NULL in that case. This is only
4035          * used to add markers when two consecutive events' time
4036          * stamps have a large delta. See trace_print_lat_context()
4037          */
4038         if (iter->temp == static_temp_buf &&
4039             STATIC_TEMP_BUF_SIZE < ent_size)
4040                 return NULL;
4041
4042         /*
4043          * The __find_next_entry() may call peek_next_entry(), which may
4044          * call ring_buffer_peek() that may make the contents of iter->ent
4045          * undefined. Need to copy iter->ent now.
4046          */
4047         if (iter->ent && iter->ent != iter->temp) {
4048                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4049                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4050                         void *temp;
4051                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4052                         if (!temp)
4053                                 return NULL;
4054                         kfree(iter->temp);
4055                         iter->temp = temp;
4056                         iter->temp_size = iter->ent_size;
4057                 }
4058                 memcpy(iter->temp, iter->ent, iter->ent_size);
4059                 iter->ent = iter->temp;
4060         }
4061         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4062         /* Put back the original ent_size */
4063         iter->ent_size = ent_size;
4064
4065         return entry;
4066 }
4067
4068 /* Find the next real entry, and increment the iterator to the next entry */
4069 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4070 {
4071         iter->ent = __find_next_entry(iter, &iter->cpu,
4072                                       &iter->lost_events, &iter->ts);
4073
4074         if (iter->ent)
4075                 trace_iterator_increment(iter);
4076
4077         return iter->ent ? iter : NULL;
4078 }
4079
4080 static void trace_consume(struct trace_iterator *iter)
4081 {
4082         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4083                             &iter->lost_events);
4084 }
4085
4086 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4087 {
4088         struct trace_iterator *iter = m->private;
4089         int i = (int)*pos;
4090         void *ent;
4091
4092         WARN_ON_ONCE(iter->leftover);
4093
4094         (*pos)++;
4095
4096         /* can't go backwards */
4097         if (iter->idx > i)
4098                 return NULL;
4099
4100         if (iter->idx < 0)
4101                 ent = trace_find_next_entry_inc(iter);
4102         else
4103                 ent = iter;
4104
4105         while (ent && iter->idx < i)
4106                 ent = trace_find_next_entry_inc(iter);
4107
4108         iter->pos = *pos;
4109
4110         return ent;
4111 }
4112
4113 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4114 {
4115         struct ring_buffer_iter *buf_iter;
4116         unsigned long entries = 0;
4117         u64 ts;
4118
4119         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4120
4121         buf_iter = trace_buffer_iter(iter, cpu);
4122         if (!buf_iter)
4123                 return;
4124
4125         ring_buffer_iter_reset(buf_iter);
4126
4127         /*
4128          * We could have the case with the max latency tracers
4129          * that a reset never took place on a cpu. This is evident
4130          * by the timestamp being before the start of the buffer.
4131          */
4132         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4133                 if (ts >= iter->array_buffer->time_start)
4134                         break;
4135                 entries++;
4136                 ring_buffer_iter_advance(buf_iter);
4137         }
4138
4139         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4140 }
4141
4142 /*
4143  * The current tracer is copied to avoid a global locking
4144  * all around.
4145  */
4146 static void *s_start(struct seq_file *m, loff_t *pos)
4147 {
4148         struct trace_iterator *iter = m->private;
4149         struct trace_array *tr = iter->tr;
4150         int cpu_file = iter->cpu_file;
4151         void *p = NULL;
4152         loff_t l = 0;
4153         int cpu;
4154
4155         mutex_lock(&trace_types_lock);
4156         if (unlikely(tr->current_trace != iter->trace)) {
4157                 /* Close iter->trace before switching to the new current tracer */
4158                 if (iter->trace->close)
4159                         iter->trace->close(iter);
4160                 iter->trace = tr->current_trace;
4161                 /* Reopen the new current tracer */
4162                 if (iter->trace->open)
4163                         iter->trace->open(iter);
4164         }
4165         mutex_unlock(&trace_types_lock);
4166
4167 #ifdef CONFIG_TRACER_MAX_TRACE
4168         if (iter->snapshot && iter->trace->use_max_tr)
4169                 return ERR_PTR(-EBUSY);
4170 #endif
4171
4172         if (*pos != iter->pos) {
4173                 iter->ent = NULL;
4174                 iter->cpu = 0;
4175                 iter->idx = -1;
4176
4177                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4178                         for_each_tracing_cpu(cpu)
4179                                 tracing_iter_reset(iter, cpu);
4180                 } else
4181                         tracing_iter_reset(iter, cpu_file);
4182
4183                 iter->leftover = 0;
4184                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4185                         ;
4186
4187         } else {
4188                 /*
4189                  * If we overflowed the seq_file before, then we want
4190                  * to just reuse the trace_seq buffer again.
4191                  */
4192                 if (iter->leftover)
4193                         p = iter;
4194                 else {
4195                         l = *pos - 1;
4196                         p = s_next(m, p, &l);
4197                 }
4198         }
4199
4200         trace_event_read_lock();
4201         trace_access_lock(cpu_file);
4202         return p;
4203 }
4204
4205 static void s_stop(struct seq_file *m, void *p)
4206 {
4207         struct trace_iterator *iter = m->private;
4208
4209 #ifdef CONFIG_TRACER_MAX_TRACE
4210         if (iter->snapshot && iter->trace->use_max_tr)
4211                 return;
4212 #endif
4213
4214         trace_access_unlock(iter->cpu_file);
4215         trace_event_read_unlock();
4216 }
4217
4218 static void
4219 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4220                       unsigned long *entries, int cpu)
4221 {
4222         unsigned long count;
4223
4224         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4225         /*
4226          * If this buffer has skipped entries, then we hold all
4227          * entries for the trace and we need to ignore the
4228          * ones before the time stamp.
4229          */
4230         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4231                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4232                 /* total is the same as the entries */
4233                 *total = count;
4234         } else
4235                 *total = count +
4236                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4237         *entries = count;
4238 }
4239
4240 static void
4241 get_total_entries(struct array_buffer *buf,
4242                   unsigned long *total, unsigned long *entries)
4243 {
4244         unsigned long t, e;
4245         int cpu;
4246
4247         *total = 0;
4248         *entries = 0;
4249
4250         for_each_tracing_cpu(cpu) {
4251                 get_total_entries_cpu(buf, &t, &e, cpu);
4252                 *total += t;
4253                 *entries += e;
4254         }
4255 }
4256
4257 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4258 {
4259         unsigned long total, entries;
4260
4261         if (!tr)
4262                 tr = &global_trace;
4263
4264         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4265
4266         return entries;
4267 }
4268
4269 unsigned long trace_total_entries(struct trace_array *tr)
4270 {
4271         unsigned long total, entries;
4272
4273         if (!tr)
4274                 tr = &global_trace;
4275
4276         get_total_entries(&tr->array_buffer, &total, &entries);
4277
4278         return entries;
4279 }
4280
4281 static void print_lat_help_header(struct seq_file *m)
4282 {
4283         seq_puts(m, "#                    _------=> CPU#            \n"
4284                     "#                   / _-----=> irqs-off/BH-disabled\n"
4285                     "#                  | / _----=> need-resched    \n"
4286                     "#                  || / _---=> hardirq/softirq \n"
4287                     "#                  ||| / _--=> preempt-depth   \n"
4288                     "#                  |||| / _-=> migrate-disable \n"
4289                     "#                  ||||| /     delay           \n"
4290                     "#  cmd     pid     |||||| time  |   caller     \n"
4291                     "#     \\   /        ||||||  \\    |    /       \n");
4292 }
4293
4294 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4295 {
4296         unsigned long total;
4297         unsigned long entries;
4298
4299         get_total_entries(buf, &total, &entries);
4300         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4301                    entries, total, num_online_cpus());
4302         seq_puts(m, "#\n");
4303 }
4304
4305 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4306                                    unsigned int flags)
4307 {
4308         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4309
4310         print_event_info(buf, m);
4311
4312         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4313         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4314 }
4315
4316 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4317                                        unsigned int flags)
4318 {
4319         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4320         static const char space[] = "            ";
4321         int prec = tgid ? 12 : 2;
4322
4323         print_event_info(buf, m);
4324
4325         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4326         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4327         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4328         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4329         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4330         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4331         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4332         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4333 }
4334
4335 void
4336 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4337 {
4338         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4339         struct array_buffer *buf = iter->array_buffer;
4340         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4341         struct tracer *type = iter->trace;
4342         unsigned long entries;
4343         unsigned long total;
4344         const char *name = type->name;
4345
4346         get_total_entries(buf, &total, &entries);
4347
4348         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4349                    name, UTS_RELEASE);
4350         seq_puts(m, "# -----------------------------------"
4351                  "---------------------------------\n");
4352         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4353                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4354                    nsecs_to_usecs(data->saved_latency),
4355                    entries,
4356                    total,
4357                    buf->cpu,
4358                    preempt_model_none()      ? "server" :
4359                    preempt_model_voluntary() ? "desktop" :
4360                    preempt_model_full()      ? "preempt" :
4361                    preempt_model_rt()        ? "preempt_rt" :
4362                    "unknown",
4363                    /* These are reserved for later use */
4364                    0, 0, 0, 0);
4365 #ifdef CONFIG_SMP
4366         seq_printf(m, " #P:%d)\n", num_online_cpus());
4367 #else
4368         seq_puts(m, ")\n");
4369 #endif
4370         seq_puts(m, "#    -----------------\n");
4371         seq_printf(m, "#    | task: %.16s-%d "
4372                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4373                    data->comm, data->pid,
4374                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4375                    data->policy, data->rt_priority);
4376         seq_puts(m, "#    -----------------\n");
4377
4378         if (data->critical_start) {
4379                 seq_puts(m, "#  => started at: ");
4380                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4381                 trace_print_seq(m, &iter->seq);
4382                 seq_puts(m, "\n#  => ended at:   ");
4383                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4384                 trace_print_seq(m, &iter->seq);
4385                 seq_puts(m, "\n#\n");
4386         }
4387
4388         seq_puts(m, "#\n");
4389 }
4390
4391 static void test_cpu_buff_start(struct trace_iterator *iter)
4392 {
4393         struct trace_seq *s = &iter->seq;
4394         struct trace_array *tr = iter->tr;
4395
4396         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4397                 return;
4398
4399         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4400                 return;
4401
4402         if (cpumask_available(iter->started) &&
4403             cpumask_test_cpu(iter->cpu, iter->started))
4404                 return;
4405
4406         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4407                 return;
4408
4409         if (cpumask_available(iter->started))
4410                 cpumask_set_cpu(iter->cpu, iter->started);
4411
4412         /* Don't print started cpu buffer for the first entry of the trace */
4413         if (iter->idx > 1)
4414                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4415                                 iter->cpu);
4416 }
4417
4418 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4419 {
4420         struct trace_array *tr = iter->tr;
4421         struct trace_seq *s = &iter->seq;
4422         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4423         struct trace_entry *entry;
4424         struct trace_event *event;
4425
4426         entry = iter->ent;
4427
4428         test_cpu_buff_start(iter);
4429
4430         event = ftrace_find_event(entry->type);
4431
4432         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4433                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4434                         trace_print_lat_context(iter);
4435                 else
4436                         trace_print_context(iter);
4437         }
4438
4439         if (trace_seq_has_overflowed(s))
4440                 return TRACE_TYPE_PARTIAL_LINE;
4441
4442         if (event) {
4443                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4444                         return print_event_fields(iter, event);
4445                 return event->funcs->trace(iter, sym_flags, event);
4446         }
4447
4448         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4449
4450         return trace_handle_return(s);
4451 }
4452
4453 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4454 {
4455         struct trace_array *tr = iter->tr;
4456         struct trace_seq *s = &iter->seq;
4457         struct trace_entry *entry;
4458         struct trace_event *event;
4459
4460         entry = iter->ent;
4461
4462         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4463                 trace_seq_printf(s, "%d %d %llu ",
4464                                  entry->pid, iter->cpu, iter->ts);
4465
4466         if (trace_seq_has_overflowed(s))
4467                 return TRACE_TYPE_PARTIAL_LINE;
4468
4469         event = ftrace_find_event(entry->type);
4470         if (event)
4471                 return event->funcs->raw(iter, 0, event);
4472
4473         trace_seq_printf(s, "%d ?\n", entry->type);
4474
4475         return trace_handle_return(s);
4476 }
4477
4478 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4479 {
4480         struct trace_array *tr = iter->tr;
4481         struct trace_seq *s = &iter->seq;
4482         unsigned char newline = '\n';
4483         struct trace_entry *entry;
4484         struct trace_event *event;
4485
4486         entry = iter->ent;
4487
4488         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4489                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4490                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4491                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4492                 if (trace_seq_has_overflowed(s))
4493                         return TRACE_TYPE_PARTIAL_LINE;
4494         }
4495
4496         event = ftrace_find_event(entry->type);
4497         if (event) {
4498                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4499                 if (ret != TRACE_TYPE_HANDLED)
4500                         return ret;
4501         }
4502
4503         SEQ_PUT_FIELD(s, newline);
4504
4505         return trace_handle_return(s);
4506 }
4507
4508 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4509 {
4510         struct trace_array *tr = iter->tr;
4511         struct trace_seq *s = &iter->seq;
4512         struct trace_entry *entry;
4513         struct trace_event *event;
4514
4515         entry = iter->ent;
4516
4517         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4518                 SEQ_PUT_FIELD(s, entry->pid);
4519                 SEQ_PUT_FIELD(s, iter->cpu);
4520                 SEQ_PUT_FIELD(s, iter->ts);
4521                 if (trace_seq_has_overflowed(s))
4522                         return TRACE_TYPE_PARTIAL_LINE;
4523         }
4524
4525         event = ftrace_find_event(entry->type);
4526         return event ? event->funcs->binary(iter, 0, event) :
4527                 TRACE_TYPE_HANDLED;
4528 }
4529
4530 int trace_empty(struct trace_iterator *iter)
4531 {
4532         struct ring_buffer_iter *buf_iter;
4533         int cpu;
4534
4535         /* If we are looking at one CPU buffer, only check that one */
4536         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4537                 cpu = iter->cpu_file;
4538                 buf_iter = trace_buffer_iter(iter, cpu);
4539                 if (buf_iter) {
4540                         if (!ring_buffer_iter_empty(buf_iter))
4541                                 return 0;
4542                 } else {
4543                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4544                                 return 0;
4545                 }
4546                 return 1;
4547         }
4548
4549         for_each_tracing_cpu(cpu) {
4550                 buf_iter = trace_buffer_iter(iter, cpu);
4551                 if (buf_iter) {
4552                         if (!ring_buffer_iter_empty(buf_iter))
4553                                 return 0;
4554                 } else {
4555                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4556                                 return 0;
4557                 }
4558         }
4559
4560         return 1;
4561 }
4562
4563 /*  Called with trace_event_read_lock() held. */
4564 enum print_line_t print_trace_line(struct trace_iterator *iter)
4565 {
4566         struct trace_array *tr = iter->tr;
4567         unsigned long trace_flags = tr->trace_flags;
4568         enum print_line_t ret;
4569
4570         if (iter->lost_events) {
4571                 if (iter->lost_events == (unsigned long)-1)
4572                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4573                                          iter->cpu);
4574                 else
4575                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4576                                          iter->cpu, iter->lost_events);
4577                 if (trace_seq_has_overflowed(&iter->seq))
4578                         return TRACE_TYPE_PARTIAL_LINE;
4579         }
4580
4581         if (iter->trace && iter->trace->print_line) {
4582                 ret = iter->trace->print_line(iter);
4583                 if (ret != TRACE_TYPE_UNHANDLED)
4584                         return ret;
4585         }
4586
4587         if (iter->ent->type == TRACE_BPUTS &&
4588                         trace_flags & TRACE_ITER_PRINTK &&
4589                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4590                 return trace_print_bputs_msg_only(iter);
4591
4592         if (iter->ent->type == TRACE_BPRINT &&
4593                         trace_flags & TRACE_ITER_PRINTK &&
4594                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4595                 return trace_print_bprintk_msg_only(iter);
4596
4597         if (iter->ent->type == TRACE_PRINT &&
4598                         trace_flags & TRACE_ITER_PRINTK &&
4599                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4600                 return trace_print_printk_msg_only(iter);
4601
4602         if (trace_flags & TRACE_ITER_BIN)
4603                 return print_bin_fmt(iter);
4604
4605         if (trace_flags & TRACE_ITER_HEX)
4606                 return print_hex_fmt(iter);
4607
4608         if (trace_flags & TRACE_ITER_RAW)
4609                 return print_raw_fmt(iter);
4610
4611         return print_trace_fmt(iter);
4612 }
4613
4614 void trace_latency_header(struct seq_file *m)
4615 {
4616         struct trace_iterator *iter = m->private;
4617         struct trace_array *tr = iter->tr;
4618
4619         /* print nothing if the buffers are empty */
4620         if (trace_empty(iter))
4621                 return;
4622
4623         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4624                 print_trace_header(m, iter);
4625
4626         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4627                 print_lat_help_header(m);
4628 }
4629
4630 void trace_default_header(struct seq_file *m)
4631 {
4632         struct trace_iterator *iter = m->private;
4633         struct trace_array *tr = iter->tr;
4634         unsigned long trace_flags = tr->trace_flags;
4635
4636         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4637                 return;
4638
4639         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4640                 /* print nothing if the buffers are empty */
4641                 if (trace_empty(iter))
4642                         return;
4643                 print_trace_header(m, iter);
4644                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4645                         print_lat_help_header(m);
4646         } else {
4647                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4648                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4649                                 print_func_help_header_irq(iter->array_buffer,
4650                                                            m, trace_flags);
4651                         else
4652                                 print_func_help_header(iter->array_buffer, m,
4653                                                        trace_flags);
4654                 }
4655         }
4656 }
4657
4658 static void test_ftrace_alive(struct seq_file *m)
4659 {
4660         if (!ftrace_is_dead())
4661                 return;
4662         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4663                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4664 }
4665
4666 #ifdef CONFIG_TRACER_MAX_TRACE
4667 static void show_snapshot_main_help(struct seq_file *m)
4668 {
4669         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4670                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4671                     "#                      Takes a snapshot of the main buffer.\n"
4672                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4673                     "#                      (Doesn't have to be '2' works with any number that\n"
4674                     "#                       is not a '0' or '1')\n");
4675 }
4676
4677 static void show_snapshot_percpu_help(struct seq_file *m)
4678 {
4679         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4680 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4681         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4682                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4683 #else
4684         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4685                     "#                     Must use main snapshot file to allocate.\n");
4686 #endif
4687         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4688                     "#                      (Doesn't have to be '2' works with any number that\n"
4689                     "#                       is not a '0' or '1')\n");
4690 }
4691
4692 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4693 {
4694         if (iter->tr->allocated_snapshot)
4695                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4696         else
4697                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4698
4699         seq_puts(m, "# Snapshot commands:\n");
4700         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4701                 show_snapshot_main_help(m);
4702         else
4703                 show_snapshot_percpu_help(m);
4704 }
4705 #else
4706 /* Should never be called */
4707 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4708 #endif
4709
4710 static int s_show(struct seq_file *m, void *v)
4711 {
4712         struct trace_iterator *iter = v;
4713         int ret;
4714
4715         if (iter->ent == NULL) {
4716                 if (iter->tr) {
4717                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4718                         seq_puts(m, "#\n");
4719                         test_ftrace_alive(m);
4720                 }
4721                 if (iter->snapshot && trace_empty(iter))
4722                         print_snapshot_help(m, iter);
4723                 else if (iter->trace && iter->trace->print_header)
4724                         iter->trace->print_header(m);
4725                 else
4726                         trace_default_header(m);
4727
4728         } else if (iter->leftover) {
4729                 /*
4730                  * If we filled the seq_file buffer earlier, we
4731                  * want to just show it now.
4732                  */
4733                 ret = trace_print_seq(m, &iter->seq);
4734
4735                 /* ret should this time be zero, but you never know */
4736                 iter->leftover = ret;
4737
4738         } else {
4739                 ret = print_trace_line(iter);
4740                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4741                         iter->seq.full = 0;
4742                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4743                 }
4744                 ret = trace_print_seq(m, &iter->seq);
4745                 /*
4746                  * If we overflow the seq_file buffer, then it will
4747                  * ask us for this data again at start up.
4748                  * Use that instead.
4749                  *  ret is 0 if seq_file write succeeded.
4750                  *        -1 otherwise.
4751                  */
4752                 iter->leftover = ret;
4753         }
4754
4755         return 0;
4756 }
4757
4758 /*
4759  * Should be used after trace_array_get(), trace_types_lock
4760  * ensures that i_cdev was already initialized.
4761  */
4762 static inline int tracing_get_cpu(struct inode *inode)
4763 {
4764         if (inode->i_cdev) /* See trace_create_cpu_file() */
4765                 return (long)inode->i_cdev - 1;
4766         return RING_BUFFER_ALL_CPUS;
4767 }
4768
4769 static const struct seq_operations tracer_seq_ops = {
4770         .start          = s_start,
4771         .next           = s_next,
4772         .stop           = s_stop,
4773         .show           = s_show,
4774 };
4775
4776 /*
4777  * Note, as iter itself can be allocated and freed in different
4778  * ways, this function is only used to free its content, and not
4779  * the iterator itself. The only requirement to all the allocations
4780  * is that it must zero all fields (kzalloc), as freeing works with
4781  * ethier allocated content or NULL.
4782  */
4783 static void free_trace_iter_content(struct trace_iterator *iter)
4784 {
4785         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4786         if (iter->fmt != static_fmt_buf)
4787                 kfree(iter->fmt);
4788
4789         kfree(iter->temp);
4790         kfree(iter->buffer_iter);
4791         mutex_destroy(&iter->mutex);
4792         free_cpumask_var(iter->started);
4793 }
4794
4795 static struct trace_iterator *
4796 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4797 {
4798         struct trace_array *tr = inode->i_private;
4799         struct trace_iterator *iter;
4800         int cpu;
4801
4802         if (tracing_disabled)
4803                 return ERR_PTR(-ENODEV);
4804
4805         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4806         if (!iter)
4807                 return ERR_PTR(-ENOMEM);
4808
4809         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4810                                     GFP_KERNEL);
4811         if (!iter->buffer_iter)
4812                 goto release;
4813
4814         /*
4815          * trace_find_next_entry() may need to save off iter->ent.
4816          * It will place it into the iter->temp buffer. As most
4817          * events are less than 128, allocate a buffer of that size.
4818          * If one is greater, then trace_find_next_entry() will
4819          * allocate a new buffer to adjust for the bigger iter->ent.
4820          * It's not critical if it fails to get allocated here.
4821          */
4822         iter->temp = kmalloc(128, GFP_KERNEL);
4823         if (iter->temp)
4824                 iter->temp_size = 128;
4825
4826         /*
4827          * trace_event_printf() may need to modify given format
4828          * string to replace %p with %px so that it shows real address
4829          * instead of hash value. However, that is only for the event
4830          * tracing, other tracer may not need. Defer the allocation
4831          * until it is needed.
4832          */
4833         iter->fmt = NULL;
4834         iter->fmt_size = 0;
4835
4836         mutex_lock(&trace_types_lock);
4837         iter->trace = tr->current_trace;
4838
4839         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4840                 goto fail;
4841
4842         iter->tr = tr;
4843
4844 #ifdef CONFIG_TRACER_MAX_TRACE
4845         /* Currently only the top directory has a snapshot */
4846         if (tr->current_trace->print_max || snapshot)
4847                 iter->array_buffer = &tr->max_buffer;
4848         else
4849 #endif
4850                 iter->array_buffer = &tr->array_buffer;
4851         iter->snapshot = snapshot;
4852         iter->pos = -1;
4853         iter->cpu_file = tracing_get_cpu(inode);
4854         mutex_init(&iter->mutex);
4855
4856         /* Notify the tracer early; before we stop tracing. */
4857         if (iter->trace->open)
4858                 iter->trace->open(iter);
4859
4860         /* Annotate start of buffers if we had overruns */
4861         if (ring_buffer_overruns(iter->array_buffer->buffer))
4862                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4863
4864         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4865         if (trace_clocks[tr->clock_id].in_ns)
4866                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4867
4868         /*
4869          * If pause-on-trace is enabled, then stop the trace while
4870          * dumping, unless this is the "snapshot" file
4871          */
4872         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4873                 tracing_stop_tr(tr);
4874
4875         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4876                 for_each_tracing_cpu(cpu) {
4877                         iter->buffer_iter[cpu] =
4878                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4879                                                          cpu, GFP_KERNEL);
4880                 }
4881                 ring_buffer_read_prepare_sync();
4882                 for_each_tracing_cpu(cpu) {
4883                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4884                         tracing_iter_reset(iter, cpu);
4885                 }
4886         } else {
4887                 cpu = iter->cpu_file;
4888                 iter->buffer_iter[cpu] =
4889                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4890                                                  cpu, GFP_KERNEL);
4891                 ring_buffer_read_prepare_sync();
4892                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4893                 tracing_iter_reset(iter, cpu);
4894         }
4895
4896         mutex_unlock(&trace_types_lock);
4897
4898         return iter;
4899
4900  fail:
4901         mutex_unlock(&trace_types_lock);
4902         free_trace_iter_content(iter);
4903 release:
4904         seq_release_private(inode, file);
4905         return ERR_PTR(-ENOMEM);
4906 }
4907
4908 int tracing_open_generic(struct inode *inode, struct file *filp)
4909 {
4910         int ret;
4911
4912         ret = tracing_check_open_get_tr(NULL);
4913         if (ret)
4914                 return ret;
4915
4916         filp->private_data = inode->i_private;
4917         return 0;
4918 }
4919
4920 bool tracing_is_disabled(void)
4921 {
4922         return (tracing_disabled) ? true: false;
4923 }
4924
4925 /*
4926  * Open and update trace_array ref count.
4927  * Must have the current trace_array passed to it.
4928  */
4929 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4930 {
4931         struct trace_array *tr = inode->i_private;
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(tr);
4935         if (ret)
4936                 return ret;
4937
4938         filp->private_data = inode->i_private;
4939
4940         return 0;
4941 }
4942
4943 /*
4944  * The private pointer of the inode is the trace_event_file.
4945  * Update the tr ref count associated to it.
4946  */
4947 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4948 {
4949         struct trace_event_file *file = inode->i_private;
4950         int ret;
4951
4952         ret = tracing_check_open_get_tr(file->tr);
4953         if (ret)
4954                 return ret;
4955
4956         mutex_lock(&event_mutex);
4957
4958         /* Fail if the file is marked for removal */
4959         if (file->flags & EVENT_FILE_FL_FREED) {
4960                 trace_array_put(file->tr);
4961                 ret = -ENODEV;
4962         } else {
4963                 event_file_get(file);
4964         }
4965
4966         mutex_unlock(&event_mutex);
4967         if (ret)
4968                 return ret;
4969
4970         filp->private_data = inode->i_private;
4971
4972         return 0;
4973 }
4974
4975 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4976 {
4977         struct trace_event_file *file = inode->i_private;
4978
4979         trace_array_put(file->tr);
4980         event_file_put(file);
4981
4982         return 0;
4983 }
4984
4985 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4986 {
4987         tracing_release_file_tr(inode, filp);
4988         return single_release(inode, filp);
4989 }
4990
4991 static int tracing_mark_open(struct inode *inode, struct file *filp)
4992 {
4993         stream_open(inode, filp);
4994         return tracing_open_generic_tr(inode, filp);
4995 }
4996
4997 static int tracing_release(struct inode *inode, struct file *file)
4998 {
4999         struct trace_array *tr = inode->i_private;
5000         struct seq_file *m = file->private_data;
5001         struct trace_iterator *iter;
5002         int cpu;
5003
5004         if (!(file->f_mode & FMODE_READ)) {
5005                 trace_array_put(tr);
5006                 return 0;
5007         }
5008
5009         /* Writes do not use seq_file */
5010         iter = m->private;
5011         mutex_lock(&trace_types_lock);
5012
5013         for_each_tracing_cpu(cpu) {
5014                 if (iter->buffer_iter[cpu])
5015                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5016         }
5017
5018         if (iter->trace && iter->trace->close)
5019                 iter->trace->close(iter);
5020
5021         if (!iter->snapshot && tr->stop_count)
5022                 /* reenable tracing if it was previously enabled */
5023                 tracing_start_tr(tr);
5024
5025         __trace_array_put(tr);
5026
5027         mutex_unlock(&trace_types_lock);
5028
5029         free_trace_iter_content(iter);
5030         seq_release_private(inode, file);
5031
5032         return 0;
5033 }
5034
5035 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5036 {
5037         struct trace_array *tr = inode->i_private;
5038
5039         trace_array_put(tr);
5040         return 0;
5041 }
5042
5043 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5044 {
5045         struct trace_array *tr = inode->i_private;
5046
5047         trace_array_put(tr);
5048
5049         return single_release(inode, file);
5050 }
5051
5052 static int tracing_open(struct inode *inode, struct file *file)
5053 {
5054         struct trace_array *tr = inode->i_private;
5055         struct trace_iterator *iter;
5056         int ret;
5057
5058         ret = tracing_check_open_get_tr(tr);
5059         if (ret)
5060                 return ret;
5061
5062         /* If this file was open for write, then erase contents */
5063         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5064                 int cpu = tracing_get_cpu(inode);
5065                 struct array_buffer *trace_buf = &tr->array_buffer;
5066
5067 #ifdef CONFIG_TRACER_MAX_TRACE
5068                 if (tr->current_trace->print_max)
5069                         trace_buf = &tr->max_buffer;
5070 #endif
5071
5072                 if (cpu == RING_BUFFER_ALL_CPUS)
5073                         tracing_reset_online_cpus(trace_buf);
5074                 else
5075                         tracing_reset_cpu(trace_buf, cpu);
5076         }
5077
5078         if (file->f_mode & FMODE_READ) {
5079                 iter = __tracing_open(inode, file, false);
5080                 if (IS_ERR(iter))
5081                         ret = PTR_ERR(iter);
5082                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5083                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5084         }
5085
5086         if (ret < 0)
5087                 trace_array_put(tr);
5088
5089         return ret;
5090 }
5091
5092 /*
5093  * Some tracers are not suitable for instance buffers.
5094  * A tracer is always available for the global array (toplevel)
5095  * or if it explicitly states that it is.
5096  */
5097 static bool
5098 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5099 {
5100         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5101 }
5102
5103 /* Find the next tracer that this trace array may use */
5104 static struct tracer *
5105 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5106 {
5107         while (t && !trace_ok_for_array(t, tr))
5108                 t = t->next;
5109
5110         return t;
5111 }
5112
5113 static void *
5114 t_next(struct seq_file *m, void *v, loff_t *pos)
5115 {
5116         struct trace_array *tr = m->private;
5117         struct tracer *t = v;
5118
5119         (*pos)++;
5120
5121         if (t)
5122                 t = get_tracer_for_array(tr, t->next);
5123
5124         return t;
5125 }
5126
5127 static void *t_start(struct seq_file *m, loff_t *pos)
5128 {
5129         struct trace_array *tr = m->private;
5130         struct tracer *t;
5131         loff_t l = 0;
5132
5133         mutex_lock(&trace_types_lock);
5134
5135         t = get_tracer_for_array(tr, trace_types);
5136         for (; t && l < *pos; t = t_next(m, t, &l))
5137                         ;
5138
5139         return t;
5140 }
5141
5142 static void t_stop(struct seq_file *m, void *p)
5143 {
5144         mutex_unlock(&trace_types_lock);
5145 }
5146
5147 static int t_show(struct seq_file *m, void *v)
5148 {
5149         struct tracer *t = v;
5150
5151         if (!t)
5152                 return 0;
5153
5154         seq_puts(m, t->name);
5155         if (t->next)
5156                 seq_putc(m, ' ');
5157         else
5158                 seq_putc(m, '\n');
5159
5160         return 0;
5161 }
5162
5163 static const struct seq_operations show_traces_seq_ops = {
5164         .start          = t_start,
5165         .next           = t_next,
5166         .stop           = t_stop,
5167         .show           = t_show,
5168 };
5169
5170 static int show_traces_open(struct inode *inode, struct file *file)
5171 {
5172         struct trace_array *tr = inode->i_private;
5173         struct seq_file *m;
5174         int ret;
5175
5176         ret = tracing_check_open_get_tr(tr);
5177         if (ret)
5178                 return ret;
5179
5180         ret = seq_open(file, &show_traces_seq_ops);
5181         if (ret) {
5182                 trace_array_put(tr);
5183                 return ret;
5184         }
5185
5186         m = file->private_data;
5187         m->private = tr;
5188
5189         return 0;
5190 }
5191
5192 static int show_traces_release(struct inode *inode, struct file *file)
5193 {
5194         struct trace_array *tr = inode->i_private;
5195
5196         trace_array_put(tr);
5197         return seq_release(inode, file);
5198 }
5199
5200 static ssize_t
5201 tracing_write_stub(struct file *filp, const char __user *ubuf,
5202                    size_t count, loff_t *ppos)
5203 {
5204         return count;
5205 }
5206
5207 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5208 {
5209         int ret;
5210
5211         if (file->f_mode & FMODE_READ)
5212                 ret = seq_lseek(file, offset, whence);
5213         else
5214                 file->f_pos = ret = 0;
5215
5216         return ret;
5217 }
5218
5219 static const struct file_operations tracing_fops = {
5220         .open           = tracing_open,
5221         .read           = seq_read,
5222         .read_iter      = seq_read_iter,
5223         .splice_read    = copy_splice_read,
5224         .write          = tracing_write_stub,
5225         .llseek         = tracing_lseek,
5226         .release        = tracing_release,
5227 };
5228
5229 static const struct file_operations show_traces_fops = {
5230         .open           = show_traces_open,
5231         .read           = seq_read,
5232         .llseek         = seq_lseek,
5233         .release        = show_traces_release,
5234 };
5235
5236 static ssize_t
5237 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5238                      size_t count, loff_t *ppos)
5239 {
5240         struct trace_array *tr = file_inode(filp)->i_private;
5241         char *mask_str;
5242         int len;
5243
5244         len = snprintf(NULL, 0, "%*pb\n",
5245                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5246         mask_str = kmalloc(len, GFP_KERNEL);
5247         if (!mask_str)
5248                 return -ENOMEM;
5249
5250         len = snprintf(mask_str, len, "%*pb\n",
5251                        cpumask_pr_args(tr->tracing_cpumask));
5252         if (len >= count) {
5253                 count = -EINVAL;
5254                 goto out_err;
5255         }
5256         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5257
5258 out_err:
5259         kfree(mask_str);
5260
5261         return count;
5262 }
5263
5264 int tracing_set_cpumask(struct trace_array *tr,
5265                         cpumask_var_t tracing_cpumask_new)
5266 {
5267         int cpu;
5268
5269         if (!tr)
5270                 return -EINVAL;
5271
5272         local_irq_disable();
5273         arch_spin_lock(&tr->max_lock);
5274         for_each_tracing_cpu(cpu) {
5275                 /*
5276                  * Increase/decrease the disabled counter if we are
5277                  * about to flip a bit in the cpumask:
5278                  */
5279                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5280                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5281                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5282                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5283 #ifdef CONFIG_TRACER_MAX_TRACE
5284                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5285 #endif
5286                 }
5287                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5288                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5289                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5290                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5291 #ifdef CONFIG_TRACER_MAX_TRACE
5292                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5293 #endif
5294                 }
5295         }
5296         arch_spin_unlock(&tr->max_lock);
5297         local_irq_enable();
5298
5299         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5300
5301         return 0;
5302 }
5303
5304 static ssize_t
5305 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5306                       size_t count, loff_t *ppos)
5307 {
5308         struct trace_array *tr = file_inode(filp)->i_private;
5309         cpumask_var_t tracing_cpumask_new;
5310         int err;
5311
5312         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5313                 return -ENOMEM;
5314
5315         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5316         if (err)
5317                 goto err_free;
5318
5319         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5320         if (err)
5321                 goto err_free;
5322
5323         free_cpumask_var(tracing_cpumask_new);
5324
5325         return count;
5326
5327 err_free:
5328         free_cpumask_var(tracing_cpumask_new);
5329
5330         return err;
5331 }
5332
5333 static const struct file_operations tracing_cpumask_fops = {
5334         .open           = tracing_open_generic_tr,
5335         .read           = tracing_cpumask_read,
5336         .write          = tracing_cpumask_write,
5337         .release        = tracing_release_generic_tr,
5338         .llseek         = generic_file_llseek,
5339 };
5340
5341 static int tracing_trace_options_show(struct seq_file *m, void *v)
5342 {
5343         struct tracer_opt *trace_opts;
5344         struct trace_array *tr = m->private;
5345         u32 tracer_flags;
5346         int i;
5347
5348         mutex_lock(&trace_types_lock);
5349         tracer_flags = tr->current_trace->flags->val;
5350         trace_opts = tr->current_trace->flags->opts;
5351
5352         for (i = 0; trace_options[i]; i++) {
5353                 if (tr->trace_flags & (1 << i))
5354                         seq_printf(m, "%s\n", trace_options[i]);
5355                 else
5356                         seq_printf(m, "no%s\n", trace_options[i]);
5357         }
5358
5359         for (i = 0; trace_opts[i].name; i++) {
5360                 if (tracer_flags & trace_opts[i].bit)
5361                         seq_printf(m, "%s\n", trace_opts[i].name);
5362                 else
5363                         seq_printf(m, "no%s\n", trace_opts[i].name);
5364         }
5365         mutex_unlock(&trace_types_lock);
5366
5367         return 0;
5368 }
5369
5370 static int __set_tracer_option(struct trace_array *tr,
5371                                struct tracer_flags *tracer_flags,
5372                                struct tracer_opt *opts, int neg)
5373 {
5374         struct tracer *trace = tracer_flags->trace;
5375         int ret;
5376
5377         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5378         if (ret)
5379                 return ret;
5380
5381         if (neg)
5382                 tracer_flags->val &= ~opts->bit;
5383         else
5384                 tracer_flags->val |= opts->bit;
5385         return 0;
5386 }
5387
5388 /* Try to assign a tracer specific option */
5389 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5390 {
5391         struct tracer *trace = tr->current_trace;
5392         struct tracer_flags *tracer_flags = trace->flags;
5393         struct tracer_opt *opts = NULL;
5394         int i;
5395
5396         for (i = 0; tracer_flags->opts[i].name; i++) {
5397                 opts = &tracer_flags->opts[i];
5398
5399                 if (strcmp(cmp, opts->name) == 0)
5400                         return __set_tracer_option(tr, trace->flags, opts, neg);
5401         }
5402
5403         return -EINVAL;
5404 }
5405
5406 /* Some tracers require overwrite to stay enabled */
5407 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5408 {
5409         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5410                 return -1;
5411
5412         return 0;
5413 }
5414
5415 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5416 {
5417         int *map;
5418
5419         if ((mask == TRACE_ITER_RECORD_TGID) ||
5420             (mask == TRACE_ITER_RECORD_CMD))
5421                 lockdep_assert_held(&event_mutex);
5422
5423         /* do nothing if flag is already set */
5424         if (!!(tr->trace_flags & mask) == !!enabled)
5425                 return 0;
5426
5427         /* Give the tracer a chance to approve the change */
5428         if (tr->current_trace->flag_changed)
5429                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5430                         return -EINVAL;
5431
5432         if (enabled)
5433                 tr->trace_flags |= mask;
5434         else
5435                 tr->trace_flags &= ~mask;
5436
5437         if (mask == TRACE_ITER_RECORD_CMD)
5438                 trace_event_enable_cmd_record(enabled);
5439
5440         if (mask == TRACE_ITER_RECORD_TGID) {
5441                 if (!tgid_map) {
5442                         tgid_map_max = pid_max;
5443                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5444                                        GFP_KERNEL);
5445
5446                         /*
5447                          * Pairs with smp_load_acquire() in
5448                          * trace_find_tgid_ptr() to ensure that if it observes
5449                          * the tgid_map we just allocated then it also observes
5450                          * the corresponding tgid_map_max value.
5451                          */
5452                         smp_store_release(&tgid_map, map);
5453                 }
5454                 if (!tgid_map) {
5455                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5456                         return -ENOMEM;
5457                 }
5458
5459                 trace_event_enable_tgid_record(enabled);
5460         }
5461
5462         if (mask == TRACE_ITER_EVENT_FORK)
5463                 trace_event_follow_fork(tr, enabled);
5464
5465         if (mask == TRACE_ITER_FUNC_FORK)
5466                 ftrace_pid_follow_fork(tr, enabled);
5467
5468         if (mask == TRACE_ITER_OVERWRITE) {
5469                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5470 #ifdef CONFIG_TRACER_MAX_TRACE
5471                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5472 #endif
5473         }
5474
5475         if (mask == TRACE_ITER_PRINTK) {
5476                 trace_printk_start_stop_comm(enabled);
5477                 trace_printk_control(enabled);
5478         }
5479
5480         return 0;
5481 }
5482
5483 int trace_set_options(struct trace_array *tr, char *option)
5484 {
5485         char *cmp;
5486         int neg = 0;
5487         int ret;
5488         size_t orig_len = strlen(option);
5489         int len;
5490
5491         cmp = strstrip(option);
5492
5493         len = str_has_prefix(cmp, "no");
5494         if (len)
5495                 neg = 1;
5496
5497         cmp += len;
5498
5499         mutex_lock(&event_mutex);
5500         mutex_lock(&trace_types_lock);
5501
5502         ret = match_string(trace_options, -1, cmp);
5503         /* If no option could be set, test the specific tracer options */
5504         if (ret < 0)
5505                 ret = set_tracer_option(tr, cmp, neg);
5506         else
5507                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5508
5509         mutex_unlock(&trace_types_lock);
5510         mutex_unlock(&event_mutex);
5511
5512         /*
5513          * If the first trailing whitespace is replaced with '\0' by strstrip,
5514          * turn it back into a space.
5515          */
5516         if (orig_len > strlen(option))
5517                 option[strlen(option)] = ' ';
5518
5519         return ret;
5520 }
5521
5522 static void __init apply_trace_boot_options(void)
5523 {
5524         char *buf = trace_boot_options_buf;
5525         char *option;
5526
5527         while (true) {
5528                 option = strsep(&buf, ",");
5529
5530                 if (!option)
5531                         break;
5532
5533                 if (*option)
5534                         trace_set_options(&global_trace, option);
5535
5536                 /* Put back the comma to allow this to be called again */
5537                 if (buf)
5538                         *(buf - 1) = ',';
5539         }
5540 }
5541
5542 static ssize_t
5543 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5544                         size_t cnt, loff_t *ppos)
5545 {
5546         struct seq_file *m = filp->private_data;
5547         struct trace_array *tr = m->private;
5548         char buf[64];
5549         int ret;
5550
5551         if (cnt >= sizeof(buf))
5552                 return -EINVAL;
5553
5554         if (copy_from_user(buf, ubuf, cnt))
5555                 return -EFAULT;
5556
5557         buf[cnt] = 0;
5558
5559         ret = trace_set_options(tr, buf);
5560         if (ret < 0)
5561                 return ret;
5562
5563         *ppos += cnt;
5564
5565         return cnt;
5566 }
5567
5568 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5569 {
5570         struct trace_array *tr = inode->i_private;
5571         int ret;
5572
5573         ret = tracing_check_open_get_tr(tr);
5574         if (ret)
5575                 return ret;
5576
5577         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5578         if (ret < 0)
5579                 trace_array_put(tr);
5580
5581         return ret;
5582 }
5583
5584 static const struct file_operations tracing_iter_fops = {
5585         .open           = tracing_trace_options_open,
5586         .read           = seq_read,
5587         .llseek         = seq_lseek,
5588         .release        = tracing_single_release_tr,
5589         .write          = tracing_trace_options_write,
5590 };
5591
5592 static const char readme_msg[] =
5593         "tracing mini-HOWTO:\n\n"
5594         "# echo 0 > tracing_on : quick way to disable tracing\n"
5595         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5596         " Important files:\n"
5597         "  trace\t\t\t- The static contents of the buffer\n"
5598         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5599         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5600         "  current_tracer\t- function and latency tracers\n"
5601         "  available_tracers\t- list of configured tracers for current_tracer\n"
5602         "  error_log\t- error log for failed commands (that support it)\n"
5603         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5604         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5605         "  trace_clock\t\t- change the clock used to order events\n"
5606         "       local:   Per cpu clock but may not be synced across CPUs\n"
5607         "      global:   Synced across CPUs but slows tracing down.\n"
5608         "     counter:   Not a clock, but just an increment\n"
5609         "      uptime:   Jiffy counter from time of boot\n"
5610         "        perf:   Same clock that perf events use\n"
5611 #ifdef CONFIG_X86_64
5612         "     x86-tsc:   TSC cycle counter\n"
5613 #endif
5614         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5615         "       delta:   Delta difference against a buffer-wide timestamp\n"
5616         "    absolute:   Absolute (standalone) timestamp\n"
5617         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5618         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5619         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5620         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5621         "\t\t\t  Remove sub-buffer with rmdir\n"
5622         "  trace_options\t\t- Set format or modify how tracing happens\n"
5623         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5624         "\t\t\t  option name\n"
5625         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5626 #ifdef CONFIG_DYNAMIC_FTRACE
5627         "\n  available_filter_functions - list of functions that can be filtered on\n"
5628         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5629         "\t\t\t  functions\n"
5630         "\t     accepts: func_full_name or glob-matching-pattern\n"
5631         "\t     modules: Can select a group via module\n"
5632         "\t      Format: :mod:<module-name>\n"
5633         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5634         "\t    triggers: a command to perform when function is hit\n"
5635         "\t      Format: <function>:<trigger>[:count]\n"
5636         "\t     trigger: traceon, traceoff\n"
5637         "\t\t      enable_event:<system>:<event>\n"
5638         "\t\t      disable_event:<system>:<event>\n"
5639 #ifdef CONFIG_STACKTRACE
5640         "\t\t      stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643         "\t\t      snapshot\n"
5644 #endif
5645         "\t\t      dump\n"
5646         "\t\t      cpudump\n"
5647         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5648         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5649         "\t     The first one will disable tracing every time do_fault is hit\n"
5650         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5651         "\t       The first time do trap is hit and it disables tracing, the\n"
5652         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5653         "\t       the counter will not decrement. It only decrements when the\n"
5654         "\t       trigger did work\n"
5655         "\t     To remove trigger without count:\n"
5656         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5657         "\t     To remove trigger with a count:\n"
5658         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5659         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5660         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5661         "\t    modules: Can select a group via module command :mod:\n"
5662         "\t    Does not accept triggers\n"
5663 #endif /* CONFIG_DYNAMIC_FTRACE */
5664 #ifdef CONFIG_FUNCTION_TRACER
5665         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5666         "\t\t    (function)\n"
5667         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5668         "\t\t    (function)\n"
5669 #endif
5670 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5671         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5672         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5673         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5674 #endif
5675 #ifdef CONFIG_TRACER_SNAPSHOT
5676         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5677         "\t\t\t  snapshot buffer. Read the contents for more\n"
5678         "\t\t\t  information\n"
5679 #endif
5680 #ifdef CONFIG_STACK_TRACER
5681         "  stack_trace\t\t- Shows the max stack trace when active\n"
5682         "  stack_max_size\t- Shows current max stack size that was traced\n"
5683         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5684         "\t\t\t  new trace)\n"
5685 #ifdef CONFIG_DYNAMIC_FTRACE
5686         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5687         "\t\t\t  traces\n"
5688 #endif
5689 #endif /* CONFIG_STACK_TRACER */
5690 #ifdef CONFIG_DYNAMIC_EVENTS
5691         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5692         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5693 #endif
5694 #ifdef CONFIG_KPROBE_EVENTS
5695         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5696         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5697 #endif
5698 #ifdef CONFIG_UPROBE_EVENTS
5699         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5700         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5701 #endif
5702 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5703     defined(CONFIG_FPROBE_EVENTS)
5704         "\t  accepts: event-definitions (one definition per line)\n"
5705 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5706         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5707         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5708 #endif
5709 #ifdef CONFIG_FPROBE_EVENTS
5710         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5711         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5712 #endif
5713 #ifdef CONFIG_HIST_TRIGGERS
5714         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5715 #endif
5716         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5717         "\t           -:[<group>/][<event>]\n"
5718 #ifdef CONFIG_KPROBE_EVENTS
5719         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5720   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5721 #endif
5722 #ifdef CONFIG_UPROBE_EVENTS
5723   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5724 #endif
5725         "\t     args: <name>=fetcharg[:type]\n"
5726         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5727 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5728 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5729         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5730         "\t           <argname>[->field[->field|.field...]],\n"
5731 #else
5732         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5733 #endif
5734 #else
5735         "\t           $stack<index>, $stack, $retval, $comm,\n"
5736 #endif
5737         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5738         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5739         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5740         "\t           symstr, <type>\\[<array-size>\\]\n"
5741 #ifdef CONFIG_HIST_TRIGGERS
5742         "\t    field: <stype> <name>;\n"
5743         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5744         "\t           [unsigned] char/int/long\n"
5745 #endif
5746         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5747         "\t            of the <attached-group>/<attached-event>.\n"
5748 #endif
5749         "  events/\t\t- Directory containing all trace event subsystems:\n"
5750         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5751         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5752         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5753         "\t\t\t  events\n"
5754         "      filter\t\t- If set, only events passing filter are traced\n"
5755         "  events/<system>/<event>/\t- Directory containing control files for\n"
5756         "\t\t\t  <event>:\n"
5757         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5758         "      filter\t\t- If set, only events passing filter are traced\n"
5759         "      trigger\t\t- If set, a command to perform when event is hit\n"
5760         "\t    Format: <trigger>[:count][if <filter>]\n"
5761         "\t   trigger: traceon, traceoff\n"
5762         "\t            enable_event:<system>:<event>\n"
5763         "\t            disable_event:<system>:<event>\n"
5764 #ifdef CONFIG_HIST_TRIGGERS
5765         "\t            enable_hist:<system>:<event>\n"
5766         "\t            disable_hist:<system>:<event>\n"
5767 #endif
5768 #ifdef CONFIG_STACKTRACE
5769         "\t\t    stacktrace\n"
5770 #endif
5771 #ifdef CONFIG_TRACER_SNAPSHOT
5772         "\t\t    snapshot\n"
5773 #endif
5774 #ifdef CONFIG_HIST_TRIGGERS
5775         "\t\t    hist (see below)\n"
5776 #endif
5777         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5778         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5779         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5780         "\t                  events/block/block_unplug/trigger\n"
5781         "\t   The first disables tracing every time block_unplug is hit.\n"
5782         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5783         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5784         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5785         "\t   Like function triggers, the counter is only decremented if it\n"
5786         "\t    enabled or disabled tracing.\n"
5787         "\t   To remove a trigger without a count:\n"
5788         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5789         "\t   To remove a trigger with a count:\n"
5790         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5791         "\t   Filters can be ignored when removing a trigger.\n"
5792 #ifdef CONFIG_HIST_TRIGGERS
5793         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5794         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5795         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5796         "\t            [:values=<field1[,field2,...]>]\n"
5797         "\t            [:sort=<field1[,field2,...]>]\n"
5798         "\t            [:size=#entries]\n"
5799         "\t            [:pause][:continue][:clear]\n"
5800         "\t            [:name=histname1]\n"
5801         "\t            [:nohitcount]\n"
5802         "\t            [:<handler>.<action>]\n"
5803         "\t            [if <filter>]\n\n"
5804         "\t    Note, special fields can be used as well:\n"
5805         "\t            common_timestamp - to record current timestamp\n"
5806         "\t            common_cpu - to record the CPU the event happened on\n"
5807         "\n"
5808         "\t    A hist trigger variable can be:\n"
5809         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5810         "\t        - a reference to another variable e.g. y=$x,\n"
5811         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5812         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5813         "\n"
5814         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5815         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5816         "\t    variable reference, field or numeric literal.\n"
5817         "\n"
5818         "\t    When a matching event is hit, an entry is added to a hash\n"
5819         "\t    table using the key(s) and value(s) named, and the value of a\n"
5820         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5821         "\t    correspond to fields in the event's format description.  Keys\n"
5822         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5823         "\t    Compound keys consisting of up to two fields can be specified\n"
5824         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5825         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5826         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5827         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5828         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5829         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5830         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5831         "\t    its histogram data will be shared with other triggers of the\n"
5832         "\t    same name, and trigger hits will update this common data.\n\n"
5833         "\t    Reading the 'hist' file for the event will dump the hash\n"
5834         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5835         "\t    triggers attached to an event, there will be a table for each\n"
5836         "\t    trigger in the output.  The table displayed for a named\n"
5837         "\t    trigger will be the same as any other instance having the\n"
5838         "\t    same name.  The default format used to display a given field\n"
5839         "\t    can be modified by appending any of the following modifiers\n"
5840         "\t    to the field name, as applicable:\n\n"
5841         "\t            .hex        display a number as a hex value\n"
5842         "\t            .sym        display an address as a symbol\n"
5843         "\t            .sym-offset display an address as a symbol and offset\n"
5844         "\t            .execname   display a common_pid as a program name\n"
5845         "\t            .syscall    display a syscall id as a syscall name\n"
5846         "\t            .log2       display log2 value rather than raw number\n"
5847         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5848         "\t            .usecs      display a common_timestamp in microseconds\n"
5849         "\t            .percent    display a number of percentage value\n"
5850         "\t            .graph      display a bar-graph of a value\n\n"
5851         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5852         "\t    trigger or to start a hist trigger but not log any events\n"
5853         "\t    until told to do so.  'continue' can be used to start or\n"
5854         "\t    restart a paused hist trigger.\n\n"
5855         "\t    The 'clear' parameter will clear the contents of a running\n"
5856         "\t    hist trigger and leave its current paused/active state\n"
5857         "\t    unchanged.\n\n"
5858         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5859         "\t    raw hitcount in the histogram.\n\n"
5860         "\t    The enable_hist and disable_hist triggers can be used to\n"
5861         "\t    have one event conditionally start and stop another event's\n"
5862         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5863         "\t    the enable_event and disable_event triggers.\n\n"
5864         "\t    Hist trigger handlers and actions are executed whenever a\n"
5865         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5866         "\t        <handler>.<action>\n\n"
5867         "\t    The available handlers are:\n\n"
5868         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5869         "\t        onmax(var)               - invoke if var exceeds current max\n"
5870         "\t        onchange(var)            - invoke action if var changes\n\n"
5871         "\t    The available actions are:\n\n"
5872         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5873         "\t        save(field,...)                      - save current event fields\n"
5874 #ifdef CONFIG_TRACER_SNAPSHOT
5875         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5876 #endif
5877 #ifdef CONFIG_SYNTH_EVENTS
5878         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5879         "\t  Write into this file to define/undefine new synthetic events.\n"
5880         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5881 #endif
5882 #endif
5883 ;
5884
5885 static ssize_t
5886 tracing_readme_read(struct file *filp, char __user *ubuf,
5887                        size_t cnt, loff_t *ppos)
5888 {
5889         return simple_read_from_buffer(ubuf, cnt, ppos,
5890                                         readme_msg, strlen(readme_msg));
5891 }
5892
5893 static const struct file_operations tracing_readme_fops = {
5894         .open           = tracing_open_generic,
5895         .read           = tracing_readme_read,
5896         .llseek         = generic_file_llseek,
5897 };
5898
5899 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5900 {
5901         int pid = ++(*pos);
5902
5903         return trace_find_tgid_ptr(pid);
5904 }
5905
5906 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5907 {
5908         int pid = *pos;
5909
5910         return trace_find_tgid_ptr(pid);
5911 }
5912
5913 static void saved_tgids_stop(struct seq_file *m, void *v)
5914 {
5915 }
5916
5917 static int saved_tgids_show(struct seq_file *m, void *v)
5918 {
5919         int *entry = (int *)v;
5920         int pid = entry - tgid_map;
5921         int tgid = *entry;
5922
5923         if (tgid == 0)
5924                 return SEQ_SKIP;
5925
5926         seq_printf(m, "%d %d\n", pid, tgid);
5927         return 0;
5928 }
5929
5930 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5931         .start          = saved_tgids_start,
5932         .stop           = saved_tgids_stop,
5933         .next           = saved_tgids_next,
5934         .show           = saved_tgids_show,
5935 };
5936
5937 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5938 {
5939         int ret;
5940
5941         ret = tracing_check_open_get_tr(NULL);
5942         if (ret)
5943                 return ret;
5944
5945         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5946 }
5947
5948
5949 static const struct file_operations tracing_saved_tgids_fops = {
5950         .open           = tracing_saved_tgids_open,
5951         .read           = seq_read,
5952         .llseek         = seq_lseek,
5953         .release        = seq_release,
5954 };
5955
5956 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5957 {
5958         unsigned int *ptr = v;
5959
5960         if (*pos || m->count)
5961                 ptr++;
5962
5963         (*pos)++;
5964
5965         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5966              ptr++) {
5967                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5968                         continue;
5969
5970                 return ptr;
5971         }
5972
5973         return NULL;
5974 }
5975
5976 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5977 {
5978         void *v;
5979         loff_t l = 0;
5980
5981         preempt_disable();
5982         arch_spin_lock(&trace_cmdline_lock);
5983
5984         v = &savedcmd->map_cmdline_to_pid[0];
5985         while (l <= *pos) {
5986                 v = saved_cmdlines_next(m, v, &l);
5987                 if (!v)
5988                         return NULL;
5989         }
5990
5991         return v;
5992 }
5993
5994 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5995 {
5996         arch_spin_unlock(&trace_cmdline_lock);
5997         preempt_enable();
5998 }
5999
6000 static int saved_cmdlines_show(struct seq_file *m, void *v)
6001 {
6002         char buf[TASK_COMM_LEN];
6003         unsigned int *pid = v;
6004
6005         __trace_find_cmdline(*pid, buf);
6006         seq_printf(m, "%d %s\n", *pid, buf);
6007         return 0;
6008 }
6009
6010 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6011         .start          = saved_cmdlines_start,
6012         .next           = saved_cmdlines_next,
6013         .stop           = saved_cmdlines_stop,
6014         .show           = saved_cmdlines_show,
6015 };
6016
6017 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6018 {
6019         int ret;
6020
6021         ret = tracing_check_open_get_tr(NULL);
6022         if (ret)
6023                 return ret;
6024
6025         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6026 }
6027
6028 static const struct file_operations tracing_saved_cmdlines_fops = {
6029         .open           = tracing_saved_cmdlines_open,
6030         .read           = seq_read,
6031         .llseek         = seq_lseek,
6032         .release        = seq_release,
6033 };
6034
6035 static ssize_t
6036 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6037                                  size_t cnt, loff_t *ppos)
6038 {
6039         char buf[64];
6040         int r;
6041
6042         preempt_disable();
6043         arch_spin_lock(&trace_cmdline_lock);
6044         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6045         arch_spin_unlock(&trace_cmdline_lock);
6046         preempt_enable();
6047
6048         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6049 }
6050
6051 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6052 {
6053         kfree(s->saved_cmdlines);
6054         kfree(s->map_cmdline_to_pid);
6055         kfree(s);
6056 }
6057
6058 static int tracing_resize_saved_cmdlines(unsigned int val)
6059 {
6060         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6061
6062         s = kmalloc(sizeof(*s), GFP_KERNEL);
6063         if (!s)
6064                 return -ENOMEM;
6065
6066         if (allocate_cmdlines_buffer(val, s) < 0) {
6067                 kfree(s);
6068                 return -ENOMEM;
6069         }
6070
6071         preempt_disable();
6072         arch_spin_lock(&trace_cmdline_lock);
6073         savedcmd_temp = savedcmd;
6074         savedcmd = s;
6075         arch_spin_unlock(&trace_cmdline_lock);
6076         preempt_enable();
6077         free_saved_cmdlines_buffer(savedcmd_temp);
6078
6079         return 0;
6080 }
6081
6082 static ssize_t
6083 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6084                                   size_t cnt, loff_t *ppos)
6085 {
6086         unsigned long val;
6087         int ret;
6088
6089         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6090         if (ret)
6091                 return ret;
6092
6093         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6094         if (!val || val > PID_MAX_DEFAULT)
6095                 return -EINVAL;
6096
6097         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6098         if (ret < 0)
6099                 return ret;
6100
6101         *ppos += cnt;
6102
6103         return cnt;
6104 }
6105
6106 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6107         .open           = tracing_open_generic,
6108         .read           = tracing_saved_cmdlines_size_read,
6109         .write          = tracing_saved_cmdlines_size_write,
6110 };
6111
6112 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6113 static union trace_eval_map_item *
6114 update_eval_map(union trace_eval_map_item *ptr)
6115 {
6116         if (!ptr->map.eval_string) {
6117                 if (ptr->tail.next) {
6118                         ptr = ptr->tail.next;
6119                         /* Set ptr to the next real item (skip head) */
6120                         ptr++;
6121                 } else
6122                         return NULL;
6123         }
6124         return ptr;
6125 }
6126
6127 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6128 {
6129         union trace_eval_map_item *ptr = v;
6130
6131         /*
6132          * Paranoid! If ptr points to end, we don't want to increment past it.
6133          * This really should never happen.
6134          */
6135         (*pos)++;
6136         ptr = update_eval_map(ptr);
6137         if (WARN_ON_ONCE(!ptr))
6138                 return NULL;
6139
6140         ptr++;
6141         ptr = update_eval_map(ptr);
6142
6143         return ptr;
6144 }
6145
6146 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6147 {
6148         union trace_eval_map_item *v;
6149         loff_t l = 0;
6150
6151         mutex_lock(&trace_eval_mutex);
6152
6153         v = trace_eval_maps;
6154         if (v)
6155                 v++;
6156
6157         while (v && l < *pos) {
6158                 v = eval_map_next(m, v, &l);
6159         }
6160
6161         return v;
6162 }
6163
6164 static void eval_map_stop(struct seq_file *m, void *v)
6165 {
6166         mutex_unlock(&trace_eval_mutex);
6167 }
6168
6169 static int eval_map_show(struct seq_file *m, void *v)
6170 {
6171         union trace_eval_map_item *ptr = v;
6172
6173         seq_printf(m, "%s %ld (%s)\n",
6174                    ptr->map.eval_string, ptr->map.eval_value,
6175                    ptr->map.system);
6176
6177         return 0;
6178 }
6179
6180 static const struct seq_operations tracing_eval_map_seq_ops = {
6181         .start          = eval_map_start,
6182         .next           = eval_map_next,
6183         .stop           = eval_map_stop,
6184         .show           = eval_map_show,
6185 };
6186
6187 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6188 {
6189         int ret;
6190
6191         ret = tracing_check_open_get_tr(NULL);
6192         if (ret)
6193                 return ret;
6194
6195         return seq_open(filp, &tracing_eval_map_seq_ops);
6196 }
6197
6198 static const struct file_operations tracing_eval_map_fops = {
6199         .open           = tracing_eval_map_open,
6200         .read           = seq_read,
6201         .llseek         = seq_lseek,
6202         .release        = seq_release,
6203 };
6204
6205 static inline union trace_eval_map_item *
6206 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6207 {
6208         /* Return tail of array given the head */
6209         return ptr + ptr->head.length + 1;
6210 }
6211
6212 static void
6213 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6214                            int len)
6215 {
6216         struct trace_eval_map **stop;
6217         struct trace_eval_map **map;
6218         union trace_eval_map_item *map_array;
6219         union trace_eval_map_item *ptr;
6220
6221         stop = start + len;
6222
6223         /*
6224          * The trace_eval_maps contains the map plus a head and tail item,
6225          * where the head holds the module and length of array, and the
6226          * tail holds a pointer to the next list.
6227          */
6228         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6229         if (!map_array) {
6230                 pr_warn("Unable to allocate trace eval mapping\n");
6231                 return;
6232         }
6233
6234         mutex_lock(&trace_eval_mutex);
6235
6236         if (!trace_eval_maps)
6237                 trace_eval_maps = map_array;
6238         else {
6239                 ptr = trace_eval_maps;
6240                 for (;;) {
6241                         ptr = trace_eval_jmp_to_tail(ptr);
6242                         if (!ptr->tail.next)
6243                                 break;
6244                         ptr = ptr->tail.next;
6245
6246                 }
6247                 ptr->tail.next = map_array;
6248         }
6249         map_array->head.mod = mod;
6250         map_array->head.length = len;
6251         map_array++;
6252
6253         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6254                 map_array->map = **map;
6255                 map_array++;
6256         }
6257         memset(map_array, 0, sizeof(*map_array));
6258
6259         mutex_unlock(&trace_eval_mutex);
6260 }
6261
6262 static void trace_create_eval_file(struct dentry *d_tracer)
6263 {
6264         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6265                           NULL, &tracing_eval_map_fops);
6266 }
6267
6268 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6269 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6270 static inline void trace_insert_eval_map_file(struct module *mod,
6271                               struct trace_eval_map **start, int len) { }
6272 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6273
6274 static void trace_insert_eval_map(struct module *mod,
6275                                   struct trace_eval_map **start, int len)
6276 {
6277         struct trace_eval_map **map;
6278
6279         if (len <= 0)
6280                 return;
6281
6282         map = start;
6283
6284         trace_event_eval_update(map, len);
6285
6286         trace_insert_eval_map_file(mod, start, len);
6287 }
6288
6289 static ssize_t
6290 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6291                        size_t cnt, loff_t *ppos)
6292 {
6293         struct trace_array *tr = filp->private_data;
6294         char buf[MAX_TRACER_SIZE+2];
6295         int r;
6296
6297         mutex_lock(&trace_types_lock);
6298         r = sprintf(buf, "%s\n", tr->current_trace->name);
6299         mutex_unlock(&trace_types_lock);
6300
6301         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6302 }
6303
6304 int tracer_init(struct tracer *t, struct trace_array *tr)
6305 {
6306         tracing_reset_online_cpus(&tr->array_buffer);
6307         return t->init(tr);
6308 }
6309
6310 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6311 {
6312         int cpu;
6313
6314         for_each_tracing_cpu(cpu)
6315                 per_cpu_ptr(buf->data, cpu)->entries = val;
6316 }
6317
6318 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6319 {
6320         if (cpu == RING_BUFFER_ALL_CPUS) {
6321                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6322         } else {
6323                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6324         }
6325 }
6326
6327 #ifdef CONFIG_TRACER_MAX_TRACE
6328 /* resize @tr's buffer to the size of @size_tr's entries */
6329 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6330                                         struct array_buffer *size_buf, int cpu_id)
6331 {
6332         int cpu, ret = 0;
6333
6334         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6335                 for_each_tracing_cpu(cpu) {
6336                         ret = ring_buffer_resize(trace_buf->buffer,
6337                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6338                         if (ret < 0)
6339                                 break;
6340                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6341                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6342                 }
6343         } else {
6344                 ret = ring_buffer_resize(trace_buf->buffer,
6345                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6346                 if (ret == 0)
6347                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6348                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6349         }
6350
6351         return ret;
6352 }
6353 #endif /* CONFIG_TRACER_MAX_TRACE */
6354
6355 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6356                                         unsigned long size, int cpu)
6357 {
6358         int ret;
6359
6360         /*
6361          * If kernel or user changes the size of the ring buffer
6362          * we use the size that was given, and we can forget about
6363          * expanding it later.
6364          */
6365         trace_set_ring_buffer_expanded(tr);
6366
6367         /* May be called before buffers are initialized */
6368         if (!tr->array_buffer.buffer)
6369                 return 0;
6370
6371         /* Do not allow tracing while resizing ring buffer */
6372         tracing_stop_tr(tr);
6373
6374         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6375         if (ret < 0)
6376                 goto out_start;
6377
6378 #ifdef CONFIG_TRACER_MAX_TRACE
6379         if (!tr->allocated_snapshot)
6380                 goto out;
6381
6382         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6383         if (ret < 0) {
6384                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6385                                                      &tr->array_buffer, cpu);
6386                 if (r < 0) {
6387                         /*
6388                          * AARGH! We are left with different
6389                          * size max buffer!!!!
6390                          * The max buffer is our "snapshot" buffer.
6391                          * When a tracer needs a snapshot (one of the
6392                          * latency tracers), it swaps the max buffer
6393                          * with the saved snap shot. We succeeded to
6394                          * update the size of the main buffer, but failed to
6395                          * update the size of the max buffer. But when we tried
6396                          * to reset the main buffer to the original size, we
6397                          * failed there too. This is very unlikely to
6398                          * happen, but if it does, warn and kill all
6399                          * tracing.
6400                          */
6401                         WARN_ON(1);
6402                         tracing_disabled = 1;
6403                 }
6404                 goto out_start;
6405         }
6406
6407         update_buffer_entries(&tr->max_buffer, cpu);
6408
6409  out:
6410 #endif /* CONFIG_TRACER_MAX_TRACE */
6411
6412         update_buffer_entries(&tr->array_buffer, cpu);
6413  out_start:
6414         tracing_start_tr(tr);
6415         return ret;
6416 }
6417
6418 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6419                                   unsigned long size, int cpu_id)
6420 {
6421         int ret;
6422
6423         mutex_lock(&trace_types_lock);
6424
6425         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6426                 /* make sure, this cpu is enabled in the mask */
6427                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6428                         ret = -EINVAL;
6429                         goto out;
6430                 }
6431         }
6432
6433         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6434         if (ret < 0)
6435                 ret = -ENOMEM;
6436
6437 out:
6438         mutex_unlock(&trace_types_lock);
6439
6440         return ret;
6441 }
6442
6443
6444 /**
6445  * tracing_update_buffers - used by tracing facility to expand ring buffers
6446  * @tr: The tracing instance
6447  *
6448  * To save on memory when the tracing is never used on a system with it
6449  * configured in. The ring buffers are set to a minimum size. But once
6450  * a user starts to use the tracing facility, then they need to grow
6451  * to their default size.
6452  *
6453  * This function is to be called when a tracer is about to be used.
6454  */
6455 int tracing_update_buffers(struct trace_array *tr)
6456 {
6457         int ret = 0;
6458
6459         mutex_lock(&trace_types_lock);
6460         if (!tr->ring_buffer_expanded)
6461                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6462                                                 RING_BUFFER_ALL_CPUS);
6463         mutex_unlock(&trace_types_lock);
6464
6465         return ret;
6466 }
6467
6468 struct trace_option_dentry;
6469
6470 static void
6471 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6472
6473 /*
6474  * Used to clear out the tracer before deletion of an instance.
6475  * Must have trace_types_lock held.
6476  */
6477 static void tracing_set_nop(struct trace_array *tr)
6478 {
6479         if (tr->current_trace == &nop_trace)
6480                 return;
6481         
6482         tr->current_trace->enabled--;
6483
6484         if (tr->current_trace->reset)
6485                 tr->current_trace->reset(tr);
6486
6487         tr->current_trace = &nop_trace;
6488 }
6489
6490 static bool tracer_options_updated;
6491
6492 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6493 {
6494         /* Only enable if the directory has been created already. */
6495         if (!tr->dir)
6496                 return;
6497
6498         /* Only create trace option files after update_tracer_options finish */
6499         if (!tracer_options_updated)
6500                 return;
6501
6502         create_trace_option_files(tr, t);
6503 }
6504
6505 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6506 {
6507         struct tracer *t;
6508 #ifdef CONFIG_TRACER_MAX_TRACE
6509         bool had_max_tr;
6510 #endif
6511         int ret = 0;
6512
6513         mutex_lock(&trace_types_lock);
6514
6515         if (!tr->ring_buffer_expanded) {
6516                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6517                                                 RING_BUFFER_ALL_CPUS);
6518                 if (ret < 0)
6519                         goto out;
6520                 ret = 0;
6521         }
6522
6523         for (t = trace_types; t; t = t->next) {
6524                 if (strcmp(t->name, buf) == 0)
6525                         break;
6526         }
6527         if (!t) {
6528                 ret = -EINVAL;
6529                 goto out;
6530         }
6531         if (t == tr->current_trace)
6532                 goto out;
6533
6534 #ifdef CONFIG_TRACER_SNAPSHOT
6535         if (t->use_max_tr) {
6536                 local_irq_disable();
6537                 arch_spin_lock(&tr->max_lock);
6538                 if (tr->cond_snapshot)
6539                         ret = -EBUSY;
6540                 arch_spin_unlock(&tr->max_lock);
6541                 local_irq_enable();
6542                 if (ret)
6543                         goto out;
6544         }
6545 #endif
6546         /* Some tracers won't work on kernel command line */
6547         if (system_state < SYSTEM_RUNNING && t->noboot) {
6548                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6549                         t->name);
6550                 goto out;
6551         }
6552
6553         /* Some tracers are only allowed for the top level buffer */
6554         if (!trace_ok_for_array(t, tr)) {
6555                 ret = -EINVAL;
6556                 goto out;
6557         }
6558
6559         /* If trace pipe files are being read, we can't change the tracer */
6560         if (tr->trace_ref) {
6561                 ret = -EBUSY;
6562                 goto out;
6563         }
6564
6565         trace_branch_disable();
6566
6567         tr->current_trace->enabled--;
6568
6569         if (tr->current_trace->reset)
6570                 tr->current_trace->reset(tr);
6571
6572 #ifdef CONFIG_TRACER_MAX_TRACE
6573         had_max_tr = tr->current_trace->use_max_tr;
6574
6575         /* Current trace needs to be nop_trace before synchronize_rcu */
6576         tr->current_trace = &nop_trace;
6577
6578         if (had_max_tr && !t->use_max_tr) {
6579                 /*
6580                  * We need to make sure that the update_max_tr sees that
6581                  * current_trace changed to nop_trace to keep it from
6582                  * swapping the buffers after we resize it.
6583                  * The update_max_tr is called from interrupts disabled
6584                  * so a synchronized_sched() is sufficient.
6585                  */
6586                 synchronize_rcu();
6587                 free_snapshot(tr);
6588         }
6589
6590         if (t->use_max_tr && !tr->allocated_snapshot) {
6591                 ret = tracing_alloc_snapshot_instance(tr);
6592                 if (ret < 0)
6593                         goto out;
6594         }
6595 #else
6596         tr->current_trace = &nop_trace;
6597 #endif
6598
6599         if (t->init) {
6600                 ret = tracer_init(t, tr);
6601                 if (ret)
6602                         goto out;
6603         }
6604
6605         tr->current_trace = t;
6606         tr->current_trace->enabled++;
6607         trace_branch_enable(tr);
6608  out:
6609         mutex_unlock(&trace_types_lock);
6610
6611         return ret;
6612 }
6613
6614 static ssize_t
6615 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6616                         size_t cnt, loff_t *ppos)
6617 {
6618         struct trace_array *tr = filp->private_data;
6619         char buf[MAX_TRACER_SIZE+1];
6620         char *name;
6621         size_t ret;
6622         int err;
6623
6624         ret = cnt;
6625
6626         if (cnt > MAX_TRACER_SIZE)
6627                 cnt = MAX_TRACER_SIZE;
6628
6629         if (copy_from_user(buf, ubuf, cnt))
6630                 return -EFAULT;
6631
6632         buf[cnt] = 0;
6633
6634         name = strim(buf);
6635
6636         err = tracing_set_tracer(tr, name);
6637         if (err)
6638                 return err;
6639
6640         *ppos += ret;
6641
6642         return ret;
6643 }
6644
6645 static ssize_t
6646 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6647                    size_t cnt, loff_t *ppos)
6648 {
6649         char buf[64];
6650         int r;
6651
6652         r = snprintf(buf, sizeof(buf), "%ld\n",
6653                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6654         if (r > sizeof(buf))
6655                 r = sizeof(buf);
6656         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6657 }
6658
6659 static ssize_t
6660 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6661                     size_t cnt, loff_t *ppos)
6662 {
6663         unsigned long val;
6664         int ret;
6665
6666         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6667         if (ret)
6668                 return ret;
6669
6670         *ptr = val * 1000;
6671
6672         return cnt;
6673 }
6674
6675 static ssize_t
6676 tracing_thresh_read(struct file *filp, char __user *ubuf,
6677                     size_t cnt, loff_t *ppos)
6678 {
6679         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6680 }
6681
6682 static ssize_t
6683 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6684                      size_t cnt, loff_t *ppos)
6685 {
6686         struct trace_array *tr = filp->private_data;
6687         int ret;
6688
6689         mutex_lock(&trace_types_lock);
6690         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6691         if (ret < 0)
6692                 goto out;
6693
6694         if (tr->current_trace->update_thresh) {
6695                 ret = tr->current_trace->update_thresh(tr);
6696                 if (ret < 0)
6697                         goto out;
6698         }
6699
6700         ret = cnt;
6701 out:
6702         mutex_unlock(&trace_types_lock);
6703
6704         return ret;
6705 }
6706
6707 #ifdef CONFIG_TRACER_MAX_TRACE
6708
6709 static ssize_t
6710 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6711                      size_t cnt, loff_t *ppos)
6712 {
6713         struct trace_array *tr = filp->private_data;
6714
6715         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6716 }
6717
6718 static ssize_t
6719 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6720                       size_t cnt, loff_t *ppos)
6721 {
6722         struct trace_array *tr = filp->private_data;
6723
6724         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6725 }
6726
6727 #endif
6728
6729 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6730 {
6731         if (cpu == RING_BUFFER_ALL_CPUS) {
6732                 if (cpumask_empty(tr->pipe_cpumask)) {
6733                         cpumask_setall(tr->pipe_cpumask);
6734                         return 0;
6735                 }
6736         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6737                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6738                 return 0;
6739         }
6740         return -EBUSY;
6741 }
6742
6743 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6744 {
6745         if (cpu == RING_BUFFER_ALL_CPUS) {
6746                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6747                 cpumask_clear(tr->pipe_cpumask);
6748         } else {
6749                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6750                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6751         }
6752 }
6753
6754 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6755 {
6756         struct trace_array *tr = inode->i_private;
6757         struct trace_iterator *iter;
6758         int cpu;
6759         int ret;
6760
6761         ret = tracing_check_open_get_tr(tr);
6762         if (ret)
6763                 return ret;
6764
6765         mutex_lock(&trace_types_lock);
6766         cpu = tracing_get_cpu(inode);
6767         ret = open_pipe_on_cpu(tr, cpu);
6768         if (ret)
6769                 goto fail_pipe_on_cpu;
6770
6771         /* create a buffer to store the information to pass to userspace */
6772         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6773         if (!iter) {
6774                 ret = -ENOMEM;
6775                 goto fail_alloc_iter;
6776         }
6777
6778         trace_seq_init(&iter->seq);
6779         iter->trace = tr->current_trace;
6780
6781         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6782                 ret = -ENOMEM;
6783                 goto fail;
6784         }
6785
6786         /* trace pipe does not show start of buffer */
6787         cpumask_setall(iter->started);
6788
6789         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6790                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6791
6792         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6793         if (trace_clocks[tr->clock_id].in_ns)
6794                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6795
6796         iter->tr = tr;
6797         iter->array_buffer = &tr->array_buffer;
6798         iter->cpu_file = cpu;
6799         mutex_init(&iter->mutex);
6800         filp->private_data = iter;
6801
6802         if (iter->trace->pipe_open)
6803                 iter->trace->pipe_open(iter);
6804
6805         nonseekable_open(inode, filp);
6806
6807         tr->trace_ref++;
6808
6809         mutex_unlock(&trace_types_lock);
6810         return ret;
6811
6812 fail:
6813         kfree(iter);
6814 fail_alloc_iter:
6815         close_pipe_on_cpu(tr, cpu);
6816 fail_pipe_on_cpu:
6817         __trace_array_put(tr);
6818         mutex_unlock(&trace_types_lock);
6819         return ret;
6820 }
6821
6822 static int tracing_release_pipe(struct inode *inode, struct file *file)
6823 {
6824         struct trace_iterator *iter = file->private_data;
6825         struct trace_array *tr = inode->i_private;
6826
6827         mutex_lock(&trace_types_lock);
6828
6829         tr->trace_ref--;
6830
6831         if (iter->trace->pipe_close)
6832                 iter->trace->pipe_close(iter);
6833         close_pipe_on_cpu(tr, iter->cpu_file);
6834         mutex_unlock(&trace_types_lock);
6835
6836         free_trace_iter_content(iter);
6837         kfree(iter);
6838
6839         trace_array_put(tr);
6840
6841         return 0;
6842 }
6843
6844 static __poll_t
6845 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6846 {
6847         struct trace_array *tr = iter->tr;
6848
6849         /* Iterators are static, they should be filled or empty */
6850         if (trace_buffer_iter(iter, iter->cpu_file))
6851                 return EPOLLIN | EPOLLRDNORM;
6852
6853         if (tr->trace_flags & TRACE_ITER_BLOCK)
6854                 /*
6855                  * Always select as readable when in blocking mode
6856                  */
6857                 return EPOLLIN | EPOLLRDNORM;
6858         else
6859                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6860                                              filp, poll_table, iter->tr->buffer_percent);
6861 }
6862
6863 static __poll_t
6864 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6865 {
6866         struct trace_iterator *iter = filp->private_data;
6867
6868         return trace_poll(iter, filp, poll_table);
6869 }
6870
6871 /* Must be called with iter->mutex held. */
6872 static int tracing_wait_pipe(struct file *filp)
6873 {
6874         struct trace_iterator *iter = filp->private_data;
6875         int ret;
6876
6877         while (trace_empty(iter)) {
6878
6879                 if ((filp->f_flags & O_NONBLOCK)) {
6880                         return -EAGAIN;
6881                 }
6882
6883                 /*
6884                  * We block until we read something and tracing is disabled.
6885                  * We still block if tracing is disabled, but we have never
6886                  * read anything. This allows a user to cat this file, and
6887                  * then enable tracing. But after we have read something,
6888                  * we give an EOF when tracing is again disabled.
6889                  *
6890                  * iter->pos will be 0 if we haven't read anything.
6891                  */
6892                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6893                         break;
6894
6895                 mutex_unlock(&iter->mutex);
6896
6897                 ret = wait_on_pipe(iter, 0);
6898
6899                 mutex_lock(&iter->mutex);
6900
6901                 if (ret)
6902                         return ret;
6903         }
6904
6905         return 1;
6906 }
6907
6908 /*
6909  * Consumer reader.
6910  */
6911 static ssize_t
6912 tracing_read_pipe(struct file *filp, char __user *ubuf,
6913                   size_t cnt, loff_t *ppos)
6914 {
6915         struct trace_iterator *iter = filp->private_data;
6916         ssize_t sret;
6917
6918         /*
6919          * Avoid more than one consumer on a single file descriptor
6920          * This is just a matter of traces coherency, the ring buffer itself
6921          * is protected.
6922          */
6923         mutex_lock(&iter->mutex);
6924
6925         /* return any leftover data */
6926         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6927         if (sret != -EBUSY)
6928                 goto out;
6929
6930         trace_seq_init(&iter->seq);
6931
6932         if (iter->trace->read) {
6933                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6934                 if (sret)
6935                         goto out;
6936         }
6937
6938 waitagain:
6939         sret = tracing_wait_pipe(filp);
6940         if (sret <= 0)
6941                 goto out;
6942
6943         /* stop when tracing is finished */
6944         if (trace_empty(iter)) {
6945                 sret = 0;
6946                 goto out;
6947         }
6948
6949         if (cnt >= PAGE_SIZE)
6950                 cnt = PAGE_SIZE - 1;
6951
6952         /* reset all but tr, trace, and overruns */
6953         trace_iterator_reset(iter);
6954         cpumask_clear(iter->started);
6955         trace_seq_init(&iter->seq);
6956
6957         trace_event_read_lock();
6958         trace_access_lock(iter->cpu_file);
6959         while (trace_find_next_entry_inc(iter) != NULL) {
6960                 enum print_line_t ret;
6961                 int save_len = iter->seq.seq.len;
6962
6963                 ret = print_trace_line(iter);
6964                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6965                         /*
6966                          * If one print_trace_line() fills entire trace_seq in one shot,
6967                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6968                          * In this case, we need to consume it, otherwise, loop will peek
6969                          * this event next time, resulting in an infinite loop.
6970                          */
6971                         if (save_len == 0) {
6972                                 iter->seq.full = 0;
6973                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6974                                 trace_consume(iter);
6975                                 break;
6976                         }
6977
6978                         /* In other cases, don't print partial lines */
6979                         iter->seq.seq.len = save_len;
6980                         break;
6981                 }
6982                 if (ret != TRACE_TYPE_NO_CONSUME)
6983                         trace_consume(iter);
6984
6985                 if (trace_seq_used(&iter->seq) >= cnt)
6986                         break;
6987
6988                 /*
6989                  * Setting the full flag means we reached the trace_seq buffer
6990                  * size and we should leave by partial output condition above.
6991                  * One of the trace_seq_* functions is not used properly.
6992                  */
6993                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6994                           iter->ent->type);
6995         }
6996         trace_access_unlock(iter->cpu_file);
6997         trace_event_read_unlock();
6998
6999         /* Now copy what we have to the user */
7000         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7001         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7002                 trace_seq_init(&iter->seq);
7003
7004         /*
7005          * If there was nothing to send to user, in spite of consuming trace
7006          * entries, go back to wait for more entries.
7007          */
7008         if (sret == -EBUSY)
7009                 goto waitagain;
7010
7011 out:
7012         mutex_unlock(&iter->mutex);
7013
7014         return sret;
7015 }
7016
7017 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7018                                      unsigned int idx)
7019 {
7020         __free_page(spd->pages[idx]);
7021 }
7022
7023 static size_t
7024 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7025 {
7026         size_t count;
7027         int save_len;
7028         int ret;
7029
7030         /* Seq buffer is page-sized, exactly what we need. */
7031         for (;;) {
7032                 save_len = iter->seq.seq.len;
7033                 ret = print_trace_line(iter);
7034
7035                 if (trace_seq_has_overflowed(&iter->seq)) {
7036                         iter->seq.seq.len = save_len;
7037                         break;
7038                 }
7039
7040                 /*
7041                  * This should not be hit, because it should only
7042                  * be set if the iter->seq overflowed. But check it
7043                  * anyway to be safe.
7044                  */
7045                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7046                         iter->seq.seq.len = save_len;
7047                         break;
7048                 }
7049
7050                 count = trace_seq_used(&iter->seq) - save_len;
7051                 if (rem < count) {
7052                         rem = 0;
7053                         iter->seq.seq.len = save_len;
7054                         break;
7055                 }
7056
7057                 if (ret != TRACE_TYPE_NO_CONSUME)
7058                         trace_consume(iter);
7059                 rem -= count;
7060                 if (!trace_find_next_entry_inc(iter))   {
7061                         rem = 0;
7062                         iter->ent = NULL;
7063                         break;
7064                 }
7065         }
7066
7067         return rem;
7068 }
7069
7070 static ssize_t tracing_splice_read_pipe(struct file *filp,
7071                                         loff_t *ppos,
7072                                         struct pipe_inode_info *pipe,
7073                                         size_t len,
7074                                         unsigned int flags)
7075 {
7076         struct page *pages_def[PIPE_DEF_BUFFERS];
7077         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7078         struct trace_iterator *iter = filp->private_data;
7079         struct splice_pipe_desc spd = {
7080                 .pages          = pages_def,
7081                 .partial        = partial_def,
7082                 .nr_pages       = 0, /* This gets updated below. */
7083                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7084                 .ops            = &default_pipe_buf_ops,
7085                 .spd_release    = tracing_spd_release_pipe,
7086         };
7087         ssize_t ret;
7088         size_t rem;
7089         unsigned int i;
7090
7091         if (splice_grow_spd(pipe, &spd))
7092                 return -ENOMEM;
7093
7094         mutex_lock(&iter->mutex);
7095
7096         if (iter->trace->splice_read) {
7097                 ret = iter->trace->splice_read(iter, filp,
7098                                                ppos, pipe, len, flags);
7099                 if (ret)
7100                         goto out_err;
7101         }
7102
7103         ret = tracing_wait_pipe(filp);
7104         if (ret <= 0)
7105                 goto out_err;
7106
7107         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7108                 ret = -EFAULT;
7109                 goto out_err;
7110         }
7111
7112         trace_event_read_lock();
7113         trace_access_lock(iter->cpu_file);
7114
7115         /* Fill as many pages as possible. */
7116         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7117                 spd.pages[i] = alloc_page(GFP_KERNEL);
7118                 if (!spd.pages[i])
7119                         break;
7120
7121                 rem = tracing_fill_pipe_page(rem, iter);
7122
7123                 /* Copy the data into the page, so we can start over. */
7124                 ret = trace_seq_to_buffer(&iter->seq,
7125                                           page_address(spd.pages[i]),
7126                                           trace_seq_used(&iter->seq));
7127                 if (ret < 0) {
7128                         __free_page(spd.pages[i]);
7129                         break;
7130                 }
7131                 spd.partial[i].offset = 0;
7132                 spd.partial[i].len = trace_seq_used(&iter->seq);
7133
7134                 trace_seq_init(&iter->seq);
7135         }
7136
7137         trace_access_unlock(iter->cpu_file);
7138         trace_event_read_unlock();
7139         mutex_unlock(&iter->mutex);
7140
7141         spd.nr_pages = i;
7142
7143         if (i)
7144                 ret = splice_to_pipe(pipe, &spd);
7145         else
7146                 ret = 0;
7147 out:
7148         splice_shrink_spd(&spd);
7149         return ret;
7150
7151 out_err:
7152         mutex_unlock(&iter->mutex);
7153         goto out;
7154 }
7155
7156 static ssize_t
7157 tracing_entries_read(struct file *filp, char __user *ubuf,
7158                      size_t cnt, loff_t *ppos)
7159 {
7160         struct inode *inode = file_inode(filp);
7161         struct trace_array *tr = inode->i_private;
7162         int cpu = tracing_get_cpu(inode);
7163         char buf[64];
7164         int r = 0;
7165         ssize_t ret;
7166
7167         mutex_lock(&trace_types_lock);
7168
7169         if (cpu == RING_BUFFER_ALL_CPUS) {
7170                 int cpu, buf_size_same;
7171                 unsigned long size;
7172
7173                 size = 0;
7174                 buf_size_same = 1;
7175                 /* check if all cpu sizes are same */
7176                 for_each_tracing_cpu(cpu) {
7177                         /* fill in the size from first enabled cpu */
7178                         if (size == 0)
7179                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7180                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7181                                 buf_size_same = 0;
7182                                 break;
7183                         }
7184                 }
7185
7186                 if (buf_size_same) {
7187                         if (!tr->ring_buffer_expanded)
7188                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7189                                             size >> 10,
7190                                             trace_buf_size >> 10);
7191                         else
7192                                 r = sprintf(buf, "%lu\n", size >> 10);
7193                 } else
7194                         r = sprintf(buf, "X\n");
7195         } else
7196                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7197
7198         mutex_unlock(&trace_types_lock);
7199
7200         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7201         return ret;
7202 }
7203
7204 static ssize_t
7205 tracing_entries_write(struct file *filp, const char __user *ubuf,
7206                       size_t cnt, loff_t *ppos)
7207 {
7208         struct inode *inode = file_inode(filp);
7209         struct trace_array *tr = inode->i_private;
7210         unsigned long val;
7211         int ret;
7212
7213         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7214         if (ret)
7215                 return ret;
7216
7217         /* must have at least 1 entry */
7218         if (!val)
7219                 return -EINVAL;
7220
7221         /* value is in KB */
7222         val <<= 10;
7223         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7224         if (ret < 0)
7225                 return ret;
7226
7227         *ppos += cnt;
7228
7229         return cnt;
7230 }
7231
7232 static ssize_t
7233 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7234                                 size_t cnt, loff_t *ppos)
7235 {
7236         struct trace_array *tr = filp->private_data;
7237         char buf[64];
7238         int r, cpu;
7239         unsigned long size = 0, expanded_size = 0;
7240
7241         mutex_lock(&trace_types_lock);
7242         for_each_tracing_cpu(cpu) {
7243                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7244                 if (!tr->ring_buffer_expanded)
7245                         expanded_size += trace_buf_size >> 10;
7246         }
7247         if (tr->ring_buffer_expanded)
7248                 r = sprintf(buf, "%lu\n", size);
7249         else
7250                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7251         mutex_unlock(&trace_types_lock);
7252
7253         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7254 }
7255
7256 static ssize_t
7257 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7258                           size_t cnt, loff_t *ppos)
7259 {
7260         /*
7261          * There is no need to read what the user has written, this function
7262          * is just to make sure that there is no error when "echo" is used
7263          */
7264
7265         *ppos += cnt;
7266
7267         return cnt;
7268 }
7269
7270 static int
7271 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7272 {
7273         struct trace_array *tr = inode->i_private;
7274
7275         /* disable tracing ? */
7276         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7277                 tracer_tracing_off(tr);
7278         /* resize the ring buffer to 0 */
7279         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7280
7281         trace_array_put(tr);
7282
7283         return 0;
7284 }
7285
7286 static ssize_t
7287 tracing_mark_write(struct file *filp, const char __user *ubuf,
7288                                         size_t cnt, loff_t *fpos)
7289 {
7290         struct trace_array *tr = filp->private_data;
7291         struct ring_buffer_event *event;
7292         enum event_trigger_type tt = ETT_NONE;
7293         struct trace_buffer *buffer;
7294         struct print_entry *entry;
7295         ssize_t written;
7296         int size;
7297         int len;
7298
7299 /* Used in tracing_mark_raw_write() as well */
7300 #define FAULTED_STR "<faulted>"
7301 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7302
7303         if (tracing_disabled)
7304                 return -EINVAL;
7305
7306         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7307                 return -EINVAL;
7308
7309         if (cnt > TRACE_BUF_SIZE)
7310                 cnt = TRACE_BUF_SIZE;
7311
7312         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7313
7314         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7315
7316         /* If less than "<faulted>", then make sure we can still add that */
7317         if (cnt < FAULTED_SIZE)
7318                 size += FAULTED_SIZE - cnt;
7319
7320         buffer = tr->array_buffer.buffer;
7321         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7322                                             tracing_gen_ctx());
7323         if (unlikely(!event))
7324                 /* Ring buffer disabled, return as if not open for write */
7325                 return -EBADF;
7326
7327         entry = ring_buffer_event_data(event);
7328         entry->ip = _THIS_IP_;
7329
7330         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7331         if (len) {
7332                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7333                 cnt = FAULTED_SIZE;
7334                 written = -EFAULT;
7335         } else
7336                 written = cnt;
7337
7338         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7339                 /* do not add \n before testing triggers, but add \0 */
7340                 entry->buf[cnt] = '\0';
7341                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7342         }
7343
7344         if (entry->buf[cnt - 1] != '\n') {
7345                 entry->buf[cnt] = '\n';
7346                 entry->buf[cnt + 1] = '\0';
7347         } else
7348                 entry->buf[cnt] = '\0';
7349
7350         if (static_branch_unlikely(&trace_marker_exports_enabled))
7351                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7352         __buffer_unlock_commit(buffer, event);
7353
7354         if (tt)
7355                 event_triggers_post_call(tr->trace_marker_file, tt);
7356
7357         return written;
7358 }
7359
7360 /* Limit it for now to 3K (including tag) */
7361 #define RAW_DATA_MAX_SIZE (1024*3)
7362
7363 static ssize_t
7364 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7365                                         size_t cnt, loff_t *fpos)
7366 {
7367         struct trace_array *tr = filp->private_data;
7368         struct ring_buffer_event *event;
7369         struct trace_buffer *buffer;
7370         struct raw_data_entry *entry;
7371         ssize_t written;
7372         int size;
7373         int len;
7374
7375 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7376
7377         if (tracing_disabled)
7378                 return -EINVAL;
7379
7380         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7381                 return -EINVAL;
7382
7383         /* The marker must at least have a tag id */
7384         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7385                 return -EINVAL;
7386
7387         if (cnt > TRACE_BUF_SIZE)
7388                 cnt = TRACE_BUF_SIZE;
7389
7390         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7391
7392         size = sizeof(*entry) + cnt;
7393         if (cnt < FAULT_SIZE_ID)
7394                 size += FAULT_SIZE_ID - cnt;
7395
7396         buffer = tr->array_buffer.buffer;
7397         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7398                                             tracing_gen_ctx());
7399         if (!event)
7400                 /* Ring buffer disabled, return as if not open for write */
7401                 return -EBADF;
7402
7403         entry = ring_buffer_event_data(event);
7404
7405         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7406         if (len) {
7407                 entry->id = -1;
7408                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7409                 written = -EFAULT;
7410         } else
7411                 written = cnt;
7412
7413         __buffer_unlock_commit(buffer, event);
7414
7415         return written;
7416 }
7417
7418 static int tracing_clock_show(struct seq_file *m, void *v)
7419 {
7420         struct trace_array *tr = m->private;
7421         int i;
7422
7423         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7424                 seq_printf(m,
7425                         "%s%s%s%s", i ? " " : "",
7426                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7427                         i == tr->clock_id ? "]" : "");
7428         seq_putc(m, '\n');
7429
7430         return 0;
7431 }
7432
7433 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7434 {
7435         int i;
7436
7437         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7438                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7439                         break;
7440         }
7441         if (i == ARRAY_SIZE(trace_clocks))
7442                 return -EINVAL;
7443
7444         mutex_lock(&trace_types_lock);
7445
7446         tr->clock_id = i;
7447
7448         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7449
7450         /*
7451          * New clock may not be consistent with the previous clock.
7452          * Reset the buffer so that it doesn't have incomparable timestamps.
7453          */
7454         tracing_reset_online_cpus(&tr->array_buffer);
7455
7456 #ifdef CONFIG_TRACER_MAX_TRACE
7457         if (tr->max_buffer.buffer)
7458                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7459         tracing_reset_online_cpus(&tr->max_buffer);
7460 #endif
7461
7462         mutex_unlock(&trace_types_lock);
7463
7464         return 0;
7465 }
7466
7467 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7468                                    size_t cnt, loff_t *fpos)
7469 {
7470         struct seq_file *m = filp->private_data;
7471         struct trace_array *tr = m->private;
7472         char buf[64];
7473         const char *clockstr;
7474         int ret;
7475
7476         if (cnt >= sizeof(buf))
7477                 return -EINVAL;
7478
7479         if (copy_from_user(buf, ubuf, cnt))
7480                 return -EFAULT;
7481
7482         buf[cnt] = 0;
7483
7484         clockstr = strstrip(buf);
7485
7486         ret = tracing_set_clock(tr, clockstr);
7487         if (ret)
7488                 return ret;
7489
7490         *fpos += cnt;
7491
7492         return cnt;
7493 }
7494
7495 static int tracing_clock_open(struct inode *inode, struct file *file)
7496 {
7497         struct trace_array *tr = inode->i_private;
7498         int ret;
7499
7500         ret = tracing_check_open_get_tr(tr);
7501         if (ret)
7502                 return ret;
7503
7504         ret = single_open(file, tracing_clock_show, inode->i_private);
7505         if (ret < 0)
7506                 trace_array_put(tr);
7507
7508         return ret;
7509 }
7510
7511 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7512 {
7513         struct trace_array *tr = m->private;
7514
7515         mutex_lock(&trace_types_lock);
7516
7517         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7518                 seq_puts(m, "delta [absolute]\n");
7519         else
7520                 seq_puts(m, "[delta] absolute\n");
7521
7522         mutex_unlock(&trace_types_lock);
7523
7524         return 0;
7525 }
7526
7527 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7528 {
7529         struct trace_array *tr = inode->i_private;
7530         int ret;
7531
7532         ret = tracing_check_open_get_tr(tr);
7533         if (ret)
7534                 return ret;
7535
7536         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7537         if (ret < 0)
7538                 trace_array_put(tr);
7539
7540         return ret;
7541 }
7542
7543 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7544 {
7545         if (rbe == this_cpu_read(trace_buffered_event))
7546                 return ring_buffer_time_stamp(buffer);
7547
7548         return ring_buffer_event_time_stamp(buffer, rbe);
7549 }
7550
7551 /*
7552  * Set or disable using the per CPU trace_buffer_event when possible.
7553  */
7554 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7555 {
7556         int ret = 0;
7557
7558         mutex_lock(&trace_types_lock);
7559
7560         if (set && tr->no_filter_buffering_ref++)
7561                 goto out;
7562
7563         if (!set) {
7564                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7565                         ret = -EINVAL;
7566                         goto out;
7567                 }
7568
7569                 --tr->no_filter_buffering_ref;
7570         }
7571  out:
7572         mutex_unlock(&trace_types_lock);
7573
7574         return ret;
7575 }
7576
7577 struct ftrace_buffer_info {
7578         struct trace_iterator   iter;
7579         void                    *spare;
7580         unsigned int            spare_cpu;
7581         unsigned int            read;
7582 };
7583
7584 #ifdef CONFIG_TRACER_SNAPSHOT
7585 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7586 {
7587         struct trace_array *tr = inode->i_private;
7588         struct trace_iterator *iter;
7589         struct seq_file *m;
7590         int ret;
7591
7592         ret = tracing_check_open_get_tr(tr);
7593         if (ret)
7594                 return ret;
7595
7596         if (file->f_mode & FMODE_READ) {
7597                 iter = __tracing_open(inode, file, true);
7598                 if (IS_ERR(iter))
7599                         ret = PTR_ERR(iter);
7600         } else {
7601                 /* Writes still need the seq_file to hold the private data */
7602                 ret = -ENOMEM;
7603                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7604                 if (!m)
7605                         goto out;
7606                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7607                 if (!iter) {
7608                         kfree(m);
7609                         goto out;
7610                 }
7611                 ret = 0;
7612
7613                 iter->tr = tr;
7614                 iter->array_buffer = &tr->max_buffer;
7615                 iter->cpu_file = tracing_get_cpu(inode);
7616                 m->private = iter;
7617                 file->private_data = m;
7618         }
7619 out:
7620         if (ret < 0)
7621                 trace_array_put(tr);
7622
7623         return ret;
7624 }
7625
7626 static void tracing_swap_cpu_buffer(void *tr)
7627 {
7628         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7629 }
7630
7631 static ssize_t
7632 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7633                        loff_t *ppos)
7634 {
7635         struct seq_file *m = filp->private_data;
7636         struct trace_iterator *iter = m->private;
7637         struct trace_array *tr = iter->tr;
7638         unsigned long val;
7639         int ret;
7640
7641         ret = tracing_update_buffers(tr);
7642         if (ret < 0)
7643                 return ret;
7644
7645         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7646         if (ret)
7647                 return ret;
7648
7649         mutex_lock(&trace_types_lock);
7650
7651         if (tr->current_trace->use_max_tr) {
7652                 ret = -EBUSY;
7653                 goto out;
7654         }
7655
7656         local_irq_disable();
7657         arch_spin_lock(&tr->max_lock);
7658         if (tr->cond_snapshot)
7659                 ret = -EBUSY;
7660         arch_spin_unlock(&tr->max_lock);
7661         local_irq_enable();
7662         if (ret)
7663                 goto out;
7664
7665         switch (val) {
7666         case 0:
7667                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7668                         ret = -EINVAL;
7669                         break;
7670                 }
7671                 if (tr->allocated_snapshot)
7672                         free_snapshot(tr);
7673                 break;
7674         case 1:
7675 /* Only allow per-cpu swap if the ring buffer supports it */
7676 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7677                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7678                         ret = -EINVAL;
7679                         break;
7680                 }
7681 #endif
7682                 if (tr->allocated_snapshot)
7683                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7684                                         &tr->array_buffer, iter->cpu_file);
7685                 else
7686                         ret = tracing_alloc_snapshot_instance(tr);
7687                 if (ret < 0)
7688                         break;
7689                 /* Now, we're going to swap */
7690                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7691                         local_irq_disable();
7692                         update_max_tr(tr, current, smp_processor_id(), NULL);
7693                         local_irq_enable();
7694                 } else {
7695                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7696                                                  (void *)tr, 1);
7697                 }
7698                 break;
7699         default:
7700                 if (tr->allocated_snapshot) {
7701                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7702                                 tracing_reset_online_cpus(&tr->max_buffer);
7703                         else
7704                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7705                 }
7706                 break;
7707         }
7708
7709         if (ret >= 0) {
7710                 *ppos += cnt;
7711                 ret = cnt;
7712         }
7713 out:
7714         mutex_unlock(&trace_types_lock);
7715         return ret;
7716 }
7717
7718 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7719 {
7720         struct seq_file *m = file->private_data;
7721         int ret;
7722
7723         ret = tracing_release(inode, file);
7724
7725         if (file->f_mode & FMODE_READ)
7726                 return ret;
7727
7728         /* If write only, the seq_file is just a stub */
7729         if (m)
7730                 kfree(m->private);
7731         kfree(m);
7732
7733         return 0;
7734 }
7735
7736 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7737 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7738                                     size_t count, loff_t *ppos);
7739 static int tracing_buffers_release(struct inode *inode, struct file *file);
7740 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7741                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7742
7743 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7744 {
7745         struct ftrace_buffer_info *info;
7746         int ret;
7747
7748         /* The following checks for tracefs lockdown */
7749         ret = tracing_buffers_open(inode, filp);
7750         if (ret < 0)
7751                 return ret;
7752
7753         info = filp->private_data;
7754
7755         if (info->iter.trace->use_max_tr) {
7756                 tracing_buffers_release(inode, filp);
7757                 return -EBUSY;
7758         }
7759
7760         info->iter.snapshot = true;
7761         info->iter.array_buffer = &info->iter.tr->max_buffer;
7762
7763         return ret;
7764 }
7765
7766 #endif /* CONFIG_TRACER_SNAPSHOT */
7767
7768
7769 static const struct file_operations tracing_thresh_fops = {
7770         .open           = tracing_open_generic,
7771         .read           = tracing_thresh_read,
7772         .write          = tracing_thresh_write,
7773         .llseek         = generic_file_llseek,
7774 };
7775
7776 #ifdef CONFIG_TRACER_MAX_TRACE
7777 static const struct file_operations tracing_max_lat_fops = {
7778         .open           = tracing_open_generic_tr,
7779         .read           = tracing_max_lat_read,
7780         .write          = tracing_max_lat_write,
7781         .llseek         = generic_file_llseek,
7782         .release        = tracing_release_generic_tr,
7783 };
7784 #endif
7785
7786 static const struct file_operations set_tracer_fops = {
7787         .open           = tracing_open_generic_tr,
7788         .read           = tracing_set_trace_read,
7789         .write          = tracing_set_trace_write,
7790         .llseek         = generic_file_llseek,
7791         .release        = tracing_release_generic_tr,
7792 };
7793
7794 static const struct file_operations tracing_pipe_fops = {
7795         .open           = tracing_open_pipe,
7796         .poll           = tracing_poll_pipe,
7797         .read           = tracing_read_pipe,
7798         .splice_read    = tracing_splice_read_pipe,
7799         .release        = tracing_release_pipe,
7800         .llseek         = no_llseek,
7801 };
7802
7803 static const struct file_operations tracing_entries_fops = {
7804         .open           = tracing_open_generic_tr,
7805         .read           = tracing_entries_read,
7806         .write          = tracing_entries_write,
7807         .llseek         = generic_file_llseek,
7808         .release        = tracing_release_generic_tr,
7809 };
7810
7811 static const struct file_operations tracing_total_entries_fops = {
7812         .open           = tracing_open_generic_tr,
7813         .read           = tracing_total_entries_read,
7814         .llseek         = generic_file_llseek,
7815         .release        = tracing_release_generic_tr,
7816 };
7817
7818 static const struct file_operations tracing_free_buffer_fops = {
7819         .open           = tracing_open_generic_tr,
7820         .write          = tracing_free_buffer_write,
7821         .release        = tracing_free_buffer_release,
7822 };
7823
7824 static const struct file_operations tracing_mark_fops = {
7825         .open           = tracing_mark_open,
7826         .write          = tracing_mark_write,
7827         .release        = tracing_release_generic_tr,
7828 };
7829
7830 static const struct file_operations tracing_mark_raw_fops = {
7831         .open           = tracing_mark_open,
7832         .write          = tracing_mark_raw_write,
7833         .release        = tracing_release_generic_tr,
7834 };
7835
7836 static const struct file_operations trace_clock_fops = {
7837         .open           = tracing_clock_open,
7838         .read           = seq_read,
7839         .llseek         = seq_lseek,
7840         .release        = tracing_single_release_tr,
7841         .write          = tracing_clock_write,
7842 };
7843
7844 static const struct file_operations trace_time_stamp_mode_fops = {
7845         .open           = tracing_time_stamp_mode_open,
7846         .read           = seq_read,
7847         .llseek         = seq_lseek,
7848         .release        = tracing_single_release_tr,
7849 };
7850
7851 #ifdef CONFIG_TRACER_SNAPSHOT
7852 static const struct file_operations snapshot_fops = {
7853         .open           = tracing_snapshot_open,
7854         .read           = seq_read,
7855         .write          = tracing_snapshot_write,
7856         .llseek         = tracing_lseek,
7857         .release        = tracing_snapshot_release,
7858 };
7859
7860 static const struct file_operations snapshot_raw_fops = {
7861         .open           = snapshot_raw_open,
7862         .read           = tracing_buffers_read,
7863         .release        = tracing_buffers_release,
7864         .splice_read    = tracing_buffers_splice_read,
7865         .llseek         = no_llseek,
7866 };
7867
7868 #endif /* CONFIG_TRACER_SNAPSHOT */
7869
7870 /*
7871  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7872  * @filp: The active open file structure
7873  * @ubuf: The userspace provided buffer to read value into
7874  * @cnt: The maximum number of bytes to read
7875  * @ppos: The current "file" position
7876  *
7877  * This function implements the write interface for a struct trace_min_max_param.
7878  * The filp->private_data must point to a trace_min_max_param structure that
7879  * defines where to write the value, the min and the max acceptable values,
7880  * and a lock to protect the write.
7881  */
7882 static ssize_t
7883 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7884 {
7885         struct trace_min_max_param *param = filp->private_data;
7886         u64 val;
7887         int err;
7888
7889         if (!param)
7890                 return -EFAULT;
7891
7892         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7893         if (err)
7894                 return err;
7895
7896         if (param->lock)
7897                 mutex_lock(param->lock);
7898
7899         if (param->min && val < *param->min)
7900                 err = -EINVAL;
7901
7902         if (param->max && val > *param->max)
7903                 err = -EINVAL;
7904
7905         if (!err)
7906                 *param->val = val;
7907
7908         if (param->lock)
7909                 mutex_unlock(param->lock);
7910
7911         if (err)
7912                 return err;
7913
7914         return cnt;
7915 }
7916
7917 /*
7918  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7919  * @filp: The active open file structure
7920  * @ubuf: The userspace provided buffer to read value into
7921  * @cnt: The maximum number of bytes to read
7922  * @ppos: The current "file" position
7923  *
7924  * This function implements the read interface for a struct trace_min_max_param.
7925  * The filp->private_data must point to a trace_min_max_param struct with valid
7926  * data.
7927  */
7928 static ssize_t
7929 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7930 {
7931         struct trace_min_max_param *param = filp->private_data;
7932         char buf[U64_STR_SIZE];
7933         int len;
7934         u64 val;
7935
7936         if (!param)
7937                 return -EFAULT;
7938
7939         val = *param->val;
7940
7941         if (cnt > sizeof(buf))
7942                 cnt = sizeof(buf);
7943
7944         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7945
7946         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7947 }
7948
7949 const struct file_operations trace_min_max_fops = {
7950         .open           = tracing_open_generic,
7951         .read           = trace_min_max_read,
7952         .write          = trace_min_max_write,
7953 };
7954
7955 #define TRACING_LOG_ERRS_MAX    8
7956 #define TRACING_LOG_LOC_MAX     128
7957
7958 #define CMD_PREFIX "  Command: "
7959
7960 struct err_info {
7961         const char      **errs; /* ptr to loc-specific array of err strings */
7962         u8              type;   /* index into errs -> specific err string */
7963         u16             pos;    /* caret position */
7964         u64             ts;
7965 };
7966
7967 struct tracing_log_err {
7968         struct list_head        list;
7969         struct err_info         info;
7970         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7971         char                    *cmd;                     /* what caused err */
7972 };
7973
7974 static DEFINE_MUTEX(tracing_err_log_lock);
7975
7976 static struct tracing_log_err *alloc_tracing_log_err(int len)
7977 {
7978         struct tracing_log_err *err;
7979
7980         err = kzalloc(sizeof(*err), GFP_KERNEL);
7981         if (!err)
7982                 return ERR_PTR(-ENOMEM);
7983
7984         err->cmd = kzalloc(len, GFP_KERNEL);
7985         if (!err->cmd) {
7986                 kfree(err);
7987                 return ERR_PTR(-ENOMEM);
7988         }
7989
7990         return err;
7991 }
7992
7993 static void free_tracing_log_err(struct tracing_log_err *err)
7994 {
7995         kfree(err->cmd);
7996         kfree(err);
7997 }
7998
7999 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8000                                                    int len)
8001 {
8002         struct tracing_log_err *err;
8003         char *cmd;
8004
8005         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8006                 err = alloc_tracing_log_err(len);
8007                 if (PTR_ERR(err) != -ENOMEM)
8008                         tr->n_err_log_entries++;
8009
8010                 return err;
8011         }
8012         cmd = kzalloc(len, GFP_KERNEL);
8013         if (!cmd)
8014                 return ERR_PTR(-ENOMEM);
8015         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8016         kfree(err->cmd);
8017         err->cmd = cmd;
8018         list_del(&err->list);
8019
8020         return err;
8021 }
8022
8023 /**
8024  * err_pos - find the position of a string within a command for error careting
8025  * @cmd: The tracing command that caused the error
8026  * @str: The string to position the caret at within @cmd
8027  *
8028  * Finds the position of the first occurrence of @str within @cmd.  The
8029  * return value can be passed to tracing_log_err() for caret placement
8030  * within @cmd.
8031  *
8032  * Returns the index within @cmd of the first occurrence of @str or 0
8033  * if @str was not found.
8034  */
8035 unsigned int err_pos(char *cmd, const char *str)
8036 {
8037         char *found;
8038
8039         if (WARN_ON(!strlen(cmd)))
8040                 return 0;
8041
8042         found = strstr(cmd, str);
8043         if (found)
8044                 return found - cmd;
8045
8046         return 0;
8047 }
8048
8049 /**
8050  * tracing_log_err - write an error to the tracing error log
8051  * @tr: The associated trace array for the error (NULL for top level array)
8052  * @loc: A string describing where the error occurred
8053  * @cmd: The tracing command that caused the error
8054  * @errs: The array of loc-specific static error strings
8055  * @type: The index into errs[], which produces the specific static err string
8056  * @pos: The position the caret should be placed in the cmd
8057  *
8058  * Writes an error into tracing/error_log of the form:
8059  *
8060  * <loc>: error: <text>
8061  *   Command: <cmd>
8062  *              ^
8063  *
8064  * tracing/error_log is a small log file containing the last
8065  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8066  * unless there has been a tracing error, and the error log can be
8067  * cleared and have its memory freed by writing the empty string in
8068  * truncation mode to it i.e. echo > tracing/error_log.
8069  *
8070  * NOTE: the @errs array along with the @type param are used to
8071  * produce a static error string - this string is not copied and saved
8072  * when the error is logged - only a pointer to it is saved.  See
8073  * existing callers for examples of how static strings are typically
8074  * defined for use with tracing_log_err().
8075  */
8076 void tracing_log_err(struct trace_array *tr,
8077                      const char *loc, const char *cmd,
8078                      const char **errs, u8 type, u16 pos)
8079 {
8080         struct tracing_log_err *err;
8081         int len = 0;
8082
8083         if (!tr)
8084                 tr = &global_trace;
8085
8086         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8087
8088         mutex_lock(&tracing_err_log_lock);
8089         err = get_tracing_log_err(tr, len);
8090         if (PTR_ERR(err) == -ENOMEM) {
8091                 mutex_unlock(&tracing_err_log_lock);
8092                 return;
8093         }
8094
8095         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8096         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8097
8098         err->info.errs = errs;
8099         err->info.type = type;
8100         err->info.pos = pos;
8101         err->info.ts = local_clock();
8102
8103         list_add_tail(&err->list, &tr->err_log);
8104         mutex_unlock(&tracing_err_log_lock);
8105 }
8106
8107 static void clear_tracing_err_log(struct trace_array *tr)
8108 {
8109         struct tracing_log_err *err, *next;
8110
8111         mutex_lock(&tracing_err_log_lock);
8112         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8113                 list_del(&err->list);
8114                 free_tracing_log_err(err);
8115         }
8116
8117         tr->n_err_log_entries = 0;
8118         mutex_unlock(&tracing_err_log_lock);
8119 }
8120
8121 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8122 {
8123         struct trace_array *tr = m->private;
8124
8125         mutex_lock(&tracing_err_log_lock);
8126
8127         return seq_list_start(&tr->err_log, *pos);
8128 }
8129
8130 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8131 {
8132         struct trace_array *tr = m->private;
8133
8134         return seq_list_next(v, &tr->err_log, pos);
8135 }
8136
8137 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8138 {
8139         mutex_unlock(&tracing_err_log_lock);
8140 }
8141
8142 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8143 {
8144         u16 i;
8145
8146         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8147                 seq_putc(m, ' ');
8148         for (i = 0; i < pos; i++)
8149                 seq_putc(m, ' ');
8150         seq_puts(m, "^\n");
8151 }
8152
8153 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8154 {
8155         struct tracing_log_err *err = v;
8156
8157         if (err) {
8158                 const char *err_text = err->info.errs[err->info.type];
8159                 u64 sec = err->info.ts;
8160                 u32 nsec;
8161
8162                 nsec = do_div(sec, NSEC_PER_SEC);
8163                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8164                            err->loc, err_text);
8165                 seq_printf(m, "%s", err->cmd);
8166                 tracing_err_log_show_pos(m, err->info.pos);
8167         }
8168
8169         return 0;
8170 }
8171
8172 static const struct seq_operations tracing_err_log_seq_ops = {
8173         .start  = tracing_err_log_seq_start,
8174         .next   = tracing_err_log_seq_next,
8175         .stop   = tracing_err_log_seq_stop,
8176         .show   = tracing_err_log_seq_show
8177 };
8178
8179 static int tracing_err_log_open(struct inode *inode, struct file *file)
8180 {
8181         struct trace_array *tr = inode->i_private;
8182         int ret = 0;
8183
8184         ret = tracing_check_open_get_tr(tr);
8185         if (ret)
8186                 return ret;
8187
8188         /* If this file was opened for write, then erase contents */
8189         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8190                 clear_tracing_err_log(tr);
8191
8192         if (file->f_mode & FMODE_READ) {
8193                 ret = seq_open(file, &tracing_err_log_seq_ops);
8194                 if (!ret) {
8195                         struct seq_file *m = file->private_data;
8196                         m->private = tr;
8197                 } else {
8198                         trace_array_put(tr);
8199                 }
8200         }
8201         return ret;
8202 }
8203
8204 static ssize_t tracing_err_log_write(struct file *file,
8205                                      const char __user *buffer,
8206                                      size_t count, loff_t *ppos)
8207 {
8208         return count;
8209 }
8210
8211 static int tracing_err_log_release(struct inode *inode, struct file *file)
8212 {
8213         struct trace_array *tr = inode->i_private;
8214
8215         trace_array_put(tr);
8216
8217         if (file->f_mode & FMODE_READ)
8218                 seq_release(inode, file);
8219
8220         return 0;
8221 }
8222
8223 static const struct file_operations tracing_err_log_fops = {
8224         .open           = tracing_err_log_open,
8225         .write          = tracing_err_log_write,
8226         .read           = seq_read,
8227         .llseek         = tracing_lseek,
8228         .release        = tracing_err_log_release,
8229 };
8230
8231 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8232 {
8233         struct trace_array *tr = inode->i_private;
8234         struct ftrace_buffer_info *info;
8235         int ret;
8236
8237         ret = tracing_check_open_get_tr(tr);
8238         if (ret)
8239                 return ret;
8240
8241         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8242         if (!info) {
8243                 trace_array_put(tr);
8244                 return -ENOMEM;
8245         }
8246
8247         mutex_lock(&trace_types_lock);
8248
8249         info->iter.tr           = tr;
8250         info->iter.cpu_file     = tracing_get_cpu(inode);
8251         info->iter.trace        = tr->current_trace;
8252         info->iter.array_buffer = &tr->array_buffer;
8253         info->spare             = NULL;
8254         /* Force reading ring buffer for first read */
8255         info->read              = (unsigned int)-1;
8256
8257         filp->private_data = info;
8258
8259         tr->trace_ref++;
8260
8261         mutex_unlock(&trace_types_lock);
8262
8263         ret = nonseekable_open(inode, filp);
8264         if (ret < 0)
8265                 trace_array_put(tr);
8266
8267         return ret;
8268 }
8269
8270 static __poll_t
8271 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8272 {
8273         struct ftrace_buffer_info *info = filp->private_data;
8274         struct trace_iterator *iter = &info->iter;
8275
8276         return trace_poll(iter, filp, poll_table);
8277 }
8278
8279 static ssize_t
8280 tracing_buffers_read(struct file *filp, char __user *ubuf,
8281                      size_t count, loff_t *ppos)
8282 {
8283         struct ftrace_buffer_info *info = filp->private_data;
8284         struct trace_iterator *iter = &info->iter;
8285         ssize_t ret = 0;
8286         ssize_t size;
8287
8288         if (!count)
8289                 return 0;
8290
8291 #ifdef CONFIG_TRACER_MAX_TRACE
8292         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8293                 return -EBUSY;
8294 #endif
8295
8296         if (!info->spare) {
8297                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8298                                                           iter->cpu_file);
8299                 if (IS_ERR(info->spare)) {
8300                         ret = PTR_ERR(info->spare);
8301                         info->spare = NULL;
8302                 } else {
8303                         info->spare_cpu = iter->cpu_file;
8304                 }
8305         }
8306         if (!info->spare)
8307                 return ret;
8308
8309         /* Do we have previous read data to read? */
8310         if (info->read < PAGE_SIZE)
8311                 goto read;
8312
8313  again:
8314         trace_access_lock(iter->cpu_file);
8315         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8316                                     &info->spare,
8317                                     count,
8318                                     iter->cpu_file, 0);
8319         trace_access_unlock(iter->cpu_file);
8320
8321         if (ret < 0) {
8322                 if (trace_empty(iter)) {
8323                         if ((filp->f_flags & O_NONBLOCK))
8324                                 return -EAGAIN;
8325
8326                         ret = wait_on_pipe(iter, 0);
8327                         if (ret)
8328                                 return ret;
8329
8330                         goto again;
8331                 }
8332                 return 0;
8333         }
8334
8335         info->read = 0;
8336  read:
8337         size = PAGE_SIZE - info->read;
8338         if (size > count)
8339                 size = count;
8340
8341         ret = copy_to_user(ubuf, info->spare + info->read, size);
8342         if (ret == size)
8343                 return -EFAULT;
8344
8345         size -= ret;
8346
8347         *ppos += size;
8348         info->read += size;
8349
8350         return size;
8351 }
8352
8353 static int tracing_buffers_release(struct inode *inode, struct file *file)
8354 {
8355         struct ftrace_buffer_info *info = file->private_data;
8356         struct trace_iterator *iter = &info->iter;
8357
8358         mutex_lock(&trace_types_lock);
8359
8360         iter->tr->trace_ref--;
8361
8362         __trace_array_put(iter->tr);
8363
8364         iter->wait_index++;
8365         /* Make sure the waiters see the new wait_index */
8366         smp_wmb();
8367
8368         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8369
8370         if (info->spare)
8371                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8372                                            info->spare_cpu, info->spare);
8373         kvfree(info);
8374
8375         mutex_unlock(&trace_types_lock);
8376
8377         return 0;
8378 }
8379
8380 struct buffer_ref {
8381         struct trace_buffer     *buffer;
8382         void                    *page;
8383         int                     cpu;
8384         refcount_t              refcount;
8385 };
8386
8387 static void buffer_ref_release(struct buffer_ref *ref)
8388 {
8389         if (!refcount_dec_and_test(&ref->refcount))
8390                 return;
8391         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8392         kfree(ref);
8393 }
8394
8395 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8396                                     struct pipe_buffer *buf)
8397 {
8398         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8399
8400         buffer_ref_release(ref);
8401         buf->private = 0;
8402 }
8403
8404 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8405                                 struct pipe_buffer *buf)
8406 {
8407         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8408
8409         if (refcount_read(&ref->refcount) > INT_MAX/2)
8410                 return false;
8411
8412         refcount_inc(&ref->refcount);
8413         return true;
8414 }
8415
8416 /* Pipe buffer operations for a buffer. */
8417 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8418         .release                = buffer_pipe_buf_release,
8419         .get                    = buffer_pipe_buf_get,
8420 };
8421
8422 /*
8423  * Callback from splice_to_pipe(), if we need to release some pages
8424  * at the end of the spd in case we error'ed out in filling the pipe.
8425  */
8426 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8427 {
8428         struct buffer_ref *ref =
8429                 (struct buffer_ref *)spd->partial[i].private;
8430
8431         buffer_ref_release(ref);
8432         spd->partial[i].private = 0;
8433 }
8434
8435 static ssize_t
8436 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8437                             struct pipe_inode_info *pipe, size_t len,
8438                             unsigned int flags)
8439 {
8440         struct ftrace_buffer_info *info = file->private_data;
8441         struct trace_iterator *iter = &info->iter;
8442         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8443         struct page *pages_def[PIPE_DEF_BUFFERS];
8444         struct splice_pipe_desc spd = {
8445                 .pages          = pages_def,
8446                 .partial        = partial_def,
8447                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8448                 .ops            = &buffer_pipe_buf_ops,
8449                 .spd_release    = buffer_spd_release,
8450         };
8451         struct buffer_ref *ref;
8452         int entries, i;
8453         ssize_t ret = 0;
8454
8455 #ifdef CONFIG_TRACER_MAX_TRACE
8456         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8457                 return -EBUSY;
8458 #endif
8459
8460         if (*ppos & (PAGE_SIZE - 1))
8461                 return -EINVAL;
8462
8463         if (len & (PAGE_SIZE - 1)) {
8464                 if (len < PAGE_SIZE)
8465                         return -EINVAL;
8466                 len &= PAGE_MASK;
8467         }
8468
8469         if (splice_grow_spd(pipe, &spd))
8470                 return -ENOMEM;
8471
8472  again:
8473         trace_access_lock(iter->cpu_file);
8474         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8475
8476         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8477                 struct page *page;
8478                 int r;
8479
8480                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8481                 if (!ref) {
8482                         ret = -ENOMEM;
8483                         break;
8484                 }
8485
8486                 refcount_set(&ref->refcount, 1);
8487                 ref->buffer = iter->array_buffer->buffer;
8488                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8489                 if (IS_ERR(ref->page)) {
8490                         ret = PTR_ERR(ref->page);
8491                         ref->page = NULL;
8492                         kfree(ref);
8493                         break;
8494                 }
8495                 ref->cpu = iter->cpu_file;
8496
8497                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8498                                           len, iter->cpu_file, 1);
8499                 if (r < 0) {
8500                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8501                                                    ref->page);
8502                         kfree(ref);
8503                         break;
8504                 }
8505
8506                 page = virt_to_page(ref->page);
8507
8508                 spd.pages[i] = page;
8509                 spd.partial[i].len = PAGE_SIZE;
8510                 spd.partial[i].offset = 0;
8511                 spd.partial[i].private = (unsigned long)ref;
8512                 spd.nr_pages++;
8513                 *ppos += PAGE_SIZE;
8514
8515                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8516         }
8517
8518         trace_access_unlock(iter->cpu_file);
8519         spd.nr_pages = i;
8520
8521         /* did we read anything? */
8522         if (!spd.nr_pages) {
8523                 long wait_index;
8524
8525                 if (ret)
8526                         goto out;
8527
8528                 ret = -EAGAIN;
8529                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8530                         goto out;
8531
8532                 wait_index = READ_ONCE(iter->wait_index);
8533
8534                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8535                 if (ret)
8536                         goto out;
8537
8538                 /* No need to wait after waking up when tracing is off */
8539                 if (!tracer_tracing_is_on(iter->tr))
8540                         goto out;
8541
8542                 /* Make sure we see the new wait_index */
8543                 smp_rmb();
8544                 if (wait_index != iter->wait_index)
8545                         goto out;
8546
8547                 goto again;
8548         }
8549
8550         ret = splice_to_pipe(pipe, &spd);
8551 out:
8552         splice_shrink_spd(&spd);
8553
8554         return ret;
8555 }
8556
8557 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8558 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8559 {
8560         struct ftrace_buffer_info *info = file->private_data;
8561         struct trace_iterator *iter = &info->iter;
8562
8563         if (cmd)
8564                 return -ENOIOCTLCMD;
8565
8566         mutex_lock(&trace_types_lock);
8567
8568         iter->wait_index++;
8569         /* Make sure the waiters see the new wait_index */
8570         smp_wmb();
8571
8572         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8573
8574         mutex_unlock(&trace_types_lock);
8575         return 0;
8576 }
8577
8578 static const struct file_operations tracing_buffers_fops = {
8579         .open           = tracing_buffers_open,
8580         .read           = tracing_buffers_read,
8581         .poll           = tracing_buffers_poll,
8582         .release        = tracing_buffers_release,
8583         .splice_read    = tracing_buffers_splice_read,
8584         .unlocked_ioctl = tracing_buffers_ioctl,
8585         .llseek         = no_llseek,
8586 };
8587
8588 static ssize_t
8589 tracing_stats_read(struct file *filp, char __user *ubuf,
8590                    size_t count, loff_t *ppos)
8591 {
8592         struct inode *inode = file_inode(filp);
8593         struct trace_array *tr = inode->i_private;
8594         struct array_buffer *trace_buf = &tr->array_buffer;
8595         int cpu = tracing_get_cpu(inode);
8596         struct trace_seq *s;
8597         unsigned long cnt;
8598         unsigned long long t;
8599         unsigned long usec_rem;
8600
8601         s = kmalloc(sizeof(*s), GFP_KERNEL);
8602         if (!s)
8603                 return -ENOMEM;
8604
8605         trace_seq_init(s);
8606
8607         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8608         trace_seq_printf(s, "entries: %ld\n", cnt);
8609
8610         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8611         trace_seq_printf(s, "overrun: %ld\n", cnt);
8612
8613         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8614         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8615
8616         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8617         trace_seq_printf(s, "bytes: %ld\n", cnt);
8618
8619         if (trace_clocks[tr->clock_id].in_ns) {
8620                 /* local or global for trace_clock */
8621                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8622                 usec_rem = do_div(t, USEC_PER_SEC);
8623                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8624                                                                 t, usec_rem);
8625
8626                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8627                 usec_rem = do_div(t, USEC_PER_SEC);
8628                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8629         } else {
8630                 /* counter or tsc mode for trace_clock */
8631                 trace_seq_printf(s, "oldest event ts: %llu\n",
8632                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8633
8634                 trace_seq_printf(s, "now ts: %llu\n",
8635                                 ring_buffer_time_stamp(trace_buf->buffer));
8636         }
8637
8638         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8639         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8640
8641         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8642         trace_seq_printf(s, "read events: %ld\n", cnt);
8643
8644         count = simple_read_from_buffer(ubuf, count, ppos,
8645                                         s->buffer, trace_seq_used(s));
8646
8647         kfree(s);
8648
8649         return count;
8650 }
8651
8652 static const struct file_operations tracing_stats_fops = {
8653         .open           = tracing_open_generic_tr,
8654         .read           = tracing_stats_read,
8655         .llseek         = generic_file_llseek,
8656         .release        = tracing_release_generic_tr,
8657 };
8658
8659 #ifdef CONFIG_DYNAMIC_FTRACE
8660
8661 static ssize_t
8662 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8663                   size_t cnt, loff_t *ppos)
8664 {
8665         ssize_t ret;
8666         char *buf;
8667         int r;
8668
8669         /* 256 should be plenty to hold the amount needed */
8670         buf = kmalloc(256, GFP_KERNEL);
8671         if (!buf)
8672                 return -ENOMEM;
8673
8674         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8675                       ftrace_update_tot_cnt,
8676                       ftrace_number_of_pages,
8677                       ftrace_number_of_groups);
8678
8679         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8680         kfree(buf);
8681         return ret;
8682 }
8683
8684 static const struct file_operations tracing_dyn_info_fops = {
8685         .open           = tracing_open_generic,
8686         .read           = tracing_read_dyn_info,
8687         .llseek         = generic_file_llseek,
8688 };
8689 #endif /* CONFIG_DYNAMIC_FTRACE */
8690
8691 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8692 static void
8693 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8694                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8695                 void *data)
8696 {
8697         tracing_snapshot_instance(tr);
8698 }
8699
8700 static void
8701 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8702                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8703                       void *data)
8704 {
8705         struct ftrace_func_mapper *mapper = data;
8706         long *count = NULL;
8707
8708         if (mapper)
8709                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8710
8711         if (count) {
8712
8713                 if (*count <= 0)
8714                         return;
8715
8716                 (*count)--;
8717         }
8718
8719         tracing_snapshot_instance(tr);
8720 }
8721
8722 static int
8723 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8724                       struct ftrace_probe_ops *ops, void *data)
8725 {
8726         struct ftrace_func_mapper *mapper = data;
8727         long *count = NULL;
8728
8729         seq_printf(m, "%ps:", (void *)ip);
8730
8731         seq_puts(m, "snapshot");
8732
8733         if (mapper)
8734                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8735
8736         if (count)
8737                 seq_printf(m, ":count=%ld\n", *count);
8738         else
8739                 seq_puts(m, ":unlimited\n");
8740
8741         return 0;
8742 }
8743
8744 static int
8745 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8746                      unsigned long ip, void *init_data, void **data)
8747 {
8748         struct ftrace_func_mapper *mapper = *data;
8749
8750         if (!mapper) {
8751                 mapper = allocate_ftrace_func_mapper();
8752                 if (!mapper)
8753                         return -ENOMEM;
8754                 *data = mapper;
8755         }
8756
8757         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8758 }
8759
8760 static void
8761 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8762                      unsigned long ip, void *data)
8763 {
8764         struct ftrace_func_mapper *mapper = data;
8765
8766         if (!ip) {
8767                 if (!mapper)
8768                         return;
8769                 free_ftrace_func_mapper(mapper, NULL);
8770                 return;
8771         }
8772
8773         ftrace_func_mapper_remove_ip(mapper, ip);
8774 }
8775
8776 static struct ftrace_probe_ops snapshot_probe_ops = {
8777         .func                   = ftrace_snapshot,
8778         .print                  = ftrace_snapshot_print,
8779 };
8780
8781 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8782         .func                   = ftrace_count_snapshot,
8783         .print                  = ftrace_snapshot_print,
8784         .init                   = ftrace_snapshot_init,
8785         .free                   = ftrace_snapshot_free,
8786 };
8787
8788 static int
8789 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8790                                char *glob, char *cmd, char *param, int enable)
8791 {
8792         struct ftrace_probe_ops *ops;
8793         void *count = (void *)-1;
8794         char *number;
8795         int ret;
8796
8797         if (!tr)
8798                 return -ENODEV;
8799
8800         /* hash funcs only work with set_ftrace_filter */
8801         if (!enable)
8802                 return -EINVAL;
8803
8804         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8805
8806         if (glob[0] == '!')
8807                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8808
8809         if (!param)
8810                 goto out_reg;
8811
8812         number = strsep(&param, ":");
8813
8814         if (!strlen(number))
8815                 goto out_reg;
8816
8817         /*
8818          * We use the callback data field (which is a pointer)
8819          * as our counter.
8820          */
8821         ret = kstrtoul(number, 0, (unsigned long *)&count);
8822         if (ret)
8823                 return ret;
8824
8825  out_reg:
8826         ret = tracing_alloc_snapshot_instance(tr);
8827         if (ret < 0)
8828                 goto out;
8829
8830         ret = register_ftrace_function_probe(glob, tr, ops, count);
8831
8832  out:
8833         return ret < 0 ? ret : 0;
8834 }
8835
8836 static struct ftrace_func_command ftrace_snapshot_cmd = {
8837         .name                   = "snapshot",
8838         .func                   = ftrace_trace_snapshot_callback,
8839 };
8840
8841 static __init int register_snapshot_cmd(void)
8842 {
8843         return register_ftrace_command(&ftrace_snapshot_cmd);
8844 }
8845 #else
8846 static inline __init int register_snapshot_cmd(void) { return 0; }
8847 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8848
8849 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8850 {
8851         if (WARN_ON(!tr->dir))
8852                 return ERR_PTR(-ENODEV);
8853
8854         /* Top directory uses NULL as the parent */
8855         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8856                 return NULL;
8857
8858         /* All sub buffers have a descriptor */
8859         return tr->dir;
8860 }
8861
8862 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8863 {
8864         struct dentry *d_tracer;
8865
8866         if (tr->percpu_dir)
8867                 return tr->percpu_dir;
8868
8869         d_tracer = tracing_get_dentry(tr);
8870         if (IS_ERR(d_tracer))
8871                 return NULL;
8872
8873         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8874
8875         MEM_FAIL(!tr->percpu_dir,
8876                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8877
8878         return tr->percpu_dir;
8879 }
8880
8881 static struct dentry *
8882 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8883                       void *data, long cpu, const struct file_operations *fops)
8884 {
8885         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8886
8887         if (ret) /* See tracing_get_cpu() */
8888                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8889         return ret;
8890 }
8891
8892 static void
8893 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8894 {
8895         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8896         struct dentry *d_cpu;
8897         char cpu_dir[30]; /* 30 characters should be more than enough */
8898
8899         if (!d_percpu)
8900                 return;
8901
8902         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8903         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8904         if (!d_cpu) {
8905                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8906                 return;
8907         }
8908
8909         /* per cpu trace_pipe */
8910         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8911                                 tr, cpu, &tracing_pipe_fops);
8912
8913         /* per cpu trace */
8914         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8915                                 tr, cpu, &tracing_fops);
8916
8917         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8918                                 tr, cpu, &tracing_buffers_fops);
8919
8920         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8921                                 tr, cpu, &tracing_stats_fops);
8922
8923         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8924                                 tr, cpu, &tracing_entries_fops);
8925
8926 #ifdef CONFIG_TRACER_SNAPSHOT
8927         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8928                                 tr, cpu, &snapshot_fops);
8929
8930         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8931                                 tr, cpu, &snapshot_raw_fops);
8932 #endif
8933 }
8934
8935 #ifdef CONFIG_FTRACE_SELFTEST
8936 /* Let selftest have access to static functions in this file */
8937 #include "trace_selftest.c"
8938 #endif
8939
8940 static ssize_t
8941 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8942                         loff_t *ppos)
8943 {
8944         struct trace_option_dentry *topt = filp->private_data;
8945         char *buf;
8946
8947         if (topt->flags->val & topt->opt->bit)
8948                 buf = "1\n";
8949         else
8950                 buf = "0\n";
8951
8952         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8953 }
8954
8955 static ssize_t
8956 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8957                          loff_t *ppos)
8958 {
8959         struct trace_option_dentry *topt = filp->private_data;
8960         unsigned long val;
8961         int ret;
8962
8963         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8964         if (ret)
8965                 return ret;
8966
8967         if (val != 0 && val != 1)
8968                 return -EINVAL;
8969
8970         if (!!(topt->flags->val & topt->opt->bit) != val) {
8971                 mutex_lock(&trace_types_lock);
8972                 ret = __set_tracer_option(topt->tr, topt->flags,
8973                                           topt->opt, !val);
8974                 mutex_unlock(&trace_types_lock);
8975                 if (ret)
8976                         return ret;
8977         }
8978
8979         *ppos += cnt;
8980
8981         return cnt;
8982 }
8983
8984 static int tracing_open_options(struct inode *inode, struct file *filp)
8985 {
8986         struct trace_option_dentry *topt = inode->i_private;
8987         int ret;
8988
8989         ret = tracing_check_open_get_tr(topt->tr);
8990         if (ret)
8991                 return ret;
8992
8993         filp->private_data = inode->i_private;
8994         return 0;
8995 }
8996
8997 static int tracing_release_options(struct inode *inode, struct file *file)
8998 {
8999         struct trace_option_dentry *topt = file->private_data;
9000
9001         trace_array_put(topt->tr);
9002         return 0;
9003 }
9004
9005 static const struct file_operations trace_options_fops = {
9006         .open = tracing_open_options,
9007         .read = trace_options_read,
9008         .write = trace_options_write,
9009         .llseek = generic_file_llseek,
9010         .release = tracing_release_options,
9011 };
9012
9013 /*
9014  * In order to pass in both the trace_array descriptor as well as the index
9015  * to the flag that the trace option file represents, the trace_array
9016  * has a character array of trace_flags_index[], which holds the index
9017  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9018  * The address of this character array is passed to the flag option file
9019  * read/write callbacks.
9020  *
9021  * In order to extract both the index and the trace_array descriptor,
9022  * get_tr_index() uses the following algorithm.
9023  *
9024  *   idx = *ptr;
9025  *
9026  * As the pointer itself contains the address of the index (remember
9027  * index[1] == 1).
9028  *
9029  * Then to get the trace_array descriptor, by subtracting that index
9030  * from the ptr, we get to the start of the index itself.
9031  *
9032  *   ptr - idx == &index[0]
9033  *
9034  * Then a simple container_of() from that pointer gets us to the
9035  * trace_array descriptor.
9036  */
9037 static void get_tr_index(void *data, struct trace_array **ptr,
9038                          unsigned int *pindex)
9039 {
9040         *pindex = *(unsigned char *)data;
9041
9042         *ptr = container_of(data - *pindex, struct trace_array,
9043                             trace_flags_index);
9044 }
9045
9046 static ssize_t
9047 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9048                         loff_t *ppos)
9049 {
9050         void *tr_index = filp->private_data;
9051         struct trace_array *tr;
9052         unsigned int index;
9053         char *buf;
9054
9055         get_tr_index(tr_index, &tr, &index);
9056
9057         if (tr->trace_flags & (1 << index))
9058                 buf = "1\n";
9059         else
9060                 buf = "0\n";
9061
9062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9063 }
9064
9065 static ssize_t
9066 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9067                          loff_t *ppos)
9068 {
9069         void *tr_index = filp->private_data;
9070         struct trace_array *tr;
9071         unsigned int index;
9072         unsigned long val;
9073         int ret;
9074
9075         get_tr_index(tr_index, &tr, &index);
9076
9077         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9078         if (ret)
9079                 return ret;
9080
9081         if (val != 0 && val != 1)
9082                 return -EINVAL;
9083
9084         mutex_lock(&event_mutex);
9085         mutex_lock(&trace_types_lock);
9086         ret = set_tracer_flag(tr, 1 << index, val);
9087         mutex_unlock(&trace_types_lock);
9088         mutex_unlock(&event_mutex);
9089
9090         if (ret < 0)
9091                 return ret;
9092
9093         *ppos += cnt;
9094
9095         return cnt;
9096 }
9097
9098 static const struct file_operations trace_options_core_fops = {
9099         .open = tracing_open_generic,
9100         .read = trace_options_core_read,
9101         .write = trace_options_core_write,
9102         .llseek = generic_file_llseek,
9103 };
9104
9105 struct dentry *trace_create_file(const char *name,
9106                                  umode_t mode,
9107                                  struct dentry *parent,
9108                                  void *data,
9109                                  const struct file_operations *fops)
9110 {
9111         struct dentry *ret;
9112
9113         ret = tracefs_create_file(name, mode, parent, data, fops);
9114         if (!ret)
9115                 pr_warn("Could not create tracefs '%s' entry\n", name);
9116
9117         return ret;
9118 }
9119
9120
9121 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9122 {
9123         struct dentry *d_tracer;
9124
9125         if (tr->options)
9126                 return tr->options;
9127
9128         d_tracer = tracing_get_dentry(tr);
9129         if (IS_ERR(d_tracer))
9130                 return NULL;
9131
9132         tr->options = tracefs_create_dir("options", d_tracer);
9133         if (!tr->options) {
9134                 pr_warn("Could not create tracefs directory 'options'\n");
9135                 return NULL;
9136         }
9137
9138         return tr->options;
9139 }
9140
9141 static void
9142 create_trace_option_file(struct trace_array *tr,
9143                          struct trace_option_dentry *topt,
9144                          struct tracer_flags *flags,
9145                          struct tracer_opt *opt)
9146 {
9147         struct dentry *t_options;
9148
9149         t_options = trace_options_init_dentry(tr);
9150         if (!t_options)
9151                 return;
9152
9153         topt->flags = flags;
9154         topt->opt = opt;
9155         topt->tr = tr;
9156
9157         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9158                                         t_options, topt, &trace_options_fops);
9159
9160 }
9161
9162 static void
9163 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9164 {
9165         struct trace_option_dentry *topts;
9166         struct trace_options *tr_topts;
9167         struct tracer_flags *flags;
9168         struct tracer_opt *opts;
9169         int cnt;
9170         int i;
9171
9172         if (!tracer)
9173                 return;
9174
9175         flags = tracer->flags;
9176
9177         if (!flags || !flags->opts)
9178                 return;
9179
9180         /*
9181          * If this is an instance, only create flags for tracers
9182          * the instance may have.
9183          */
9184         if (!trace_ok_for_array(tracer, tr))
9185                 return;
9186
9187         for (i = 0; i < tr->nr_topts; i++) {
9188                 /* Make sure there's no duplicate flags. */
9189                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9190                         return;
9191         }
9192
9193         opts = flags->opts;
9194
9195         for (cnt = 0; opts[cnt].name; cnt++)
9196                 ;
9197
9198         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9199         if (!topts)
9200                 return;
9201
9202         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9203                             GFP_KERNEL);
9204         if (!tr_topts) {
9205                 kfree(topts);
9206                 return;
9207         }
9208
9209         tr->topts = tr_topts;
9210         tr->topts[tr->nr_topts].tracer = tracer;
9211         tr->topts[tr->nr_topts].topts = topts;
9212         tr->nr_topts++;
9213
9214         for (cnt = 0; opts[cnt].name; cnt++) {
9215                 create_trace_option_file(tr, &topts[cnt], flags,
9216                                          &opts[cnt]);
9217                 MEM_FAIL(topts[cnt].entry == NULL,
9218                           "Failed to create trace option: %s",
9219                           opts[cnt].name);
9220         }
9221 }
9222
9223 static struct dentry *
9224 create_trace_option_core_file(struct trace_array *tr,
9225                               const char *option, long index)
9226 {
9227         struct dentry *t_options;
9228
9229         t_options = trace_options_init_dentry(tr);
9230         if (!t_options)
9231                 return NULL;
9232
9233         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9234                                  (void *)&tr->trace_flags_index[index],
9235                                  &trace_options_core_fops);
9236 }
9237
9238 static void create_trace_options_dir(struct trace_array *tr)
9239 {
9240         struct dentry *t_options;
9241         bool top_level = tr == &global_trace;
9242         int i;
9243
9244         t_options = trace_options_init_dentry(tr);
9245         if (!t_options)
9246                 return;
9247
9248         for (i = 0; trace_options[i]; i++) {
9249                 if (top_level ||
9250                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9251                         create_trace_option_core_file(tr, trace_options[i], i);
9252         }
9253 }
9254
9255 static ssize_t
9256 rb_simple_read(struct file *filp, char __user *ubuf,
9257                size_t cnt, loff_t *ppos)
9258 {
9259         struct trace_array *tr = filp->private_data;
9260         char buf[64];
9261         int r;
9262
9263         r = tracer_tracing_is_on(tr);
9264         r = sprintf(buf, "%d\n", r);
9265
9266         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9267 }
9268
9269 static ssize_t
9270 rb_simple_write(struct file *filp, const char __user *ubuf,
9271                 size_t cnt, loff_t *ppos)
9272 {
9273         struct trace_array *tr = filp->private_data;
9274         struct trace_buffer *buffer = tr->array_buffer.buffer;
9275         unsigned long val;
9276         int ret;
9277
9278         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9279         if (ret)
9280                 return ret;
9281
9282         if (buffer) {
9283                 mutex_lock(&trace_types_lock);
9284                 if (!!val == tracer_tracing_is_on(tr)) {
9285                         val = 0; /* do nothing */
9286                 } else if (val) {
9287                         tracer_tracing_on(tr);
9288                         if (tr->current_trace->start)
9289                                 tr->current_trace->start(tr);
9290                 } else {
9291                         tracer_tracing_off(tr);
9292                         if (tr->current_trace->stop)
9293                                 tr->current_trace->stop(tr);
9294                         /* Wake up any waiters */
9295                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9296                 }
9297                 mutex_unlock(&trace_types_lock);
9298         }
9299
9300         (*ppos)++;
9301
9302         return cnt;
9303 }
9304
9305 static const struct file_operations rb_simple_fops = {
9306         .open           = tracing_open_generic_tr,
9307         .read           = rb_simple_read,
9308         .write          = rb_simple_write,
9309         .release        = tracing_release_generic_tr,
9310         .llseek         = default_llseek,
9311 };
9312
9313 static ssize_t
9314 buffer_percent_read(struct file *filp, char __user *ubuf,
9315                     size_t cnt, loff_t *ppos)
9316 {
9317         struct trace_array *tr = filp->private_data;
9318         char buf[64];
9319         int r;
9320
9321         r = tr->buffer_percent;
9322         r = sprintf(buf, "%d\n", r);
9323
9324         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9325 }
9326
9327 static ssize_t
9328 buffer_percent_write(struct file *filp, const char __user *ubuf,
9329                      size_t cnt, loff_t *ppos)
9330 {
9331         struct trace_array *tr = filp->private_data;
9332         unsigned long val;
9333         int ret;
9334
9335         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9336         if (ret)
9337                 return ret;
9338
9339         if (val > 100)
9340                 return -EINVAL;
9341
9342         tr->buffer_percent = val;
9343
9344         (*ppos)++;
9345
9346         return cnt;
9347 }
9348
9349 static const struct file_operations buffer_percent_fops = {
9350         .open           = tracing_open_generic_tr,
9351         .read           = buffer_percent_read,
9352         .write          = buffer_percent_write,
9353         .release        = tracing_release_generic_tr,
9354         .llseek         = default_llseek,
9355 };
9356
9357 static struct dentry *trace_instance_dir;
9358
9359 static void
9360 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9361
9362 static int
9363 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9364 {
9365         enum ring_buffer_flags rb_flags;
9366
9367         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9368
9369         buf->tr = tr;
9370
9371         buf->buffer = ring_buffer_alloc(size, rb_flags);
9372         if (!buf->buffer)
9373                 return -ENOMEM;
9374
9375         buf->data = alloc_percpu(struct trace_array_cpu);
9376         if (!buf->data) {
9377                 ring_buffer_free(buf->buffer);
9378                 buf->buffer = NULL;
9379                 return -ENOMEM;
9380         }
9381
9382         /* Allocate the first page for all buffers */
9383         set_buffer_entries(&tr->array_buffer,
9384                            ring_buffer_size(tr->array_buffer.buffer, 0));
9385
9386         return 0;
9387 }
9388
9389 static void free_trace_buffer(struct array_buffer *buf)
9390 {
9391         if (buf->buffer) {
9392                 ring_buffer_free(buf->buffer);
9393                 buf->buffer = NULL;
9394                 free_percpu(buf->data);
9395                 buf->data = NULL;
9396         }
9397 }
9398
9399 static int allocate_trace_buffers(struct trace_array *tr, int size)
9400 {
9401         int ret;
9402
9403         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9404         if (ret)
9405                 return ret;
9406
9407 #ifdef CONFIG_TRACER_MAX_TRACE
9408         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9409                                     allocate_snapshot ? size : 1);
9410         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9411                 free_trace_buffer(&tr->array_buffer);
9412                 return -ENOMEM;
9413         }
9414         tr->allocated_snapshot = allocate_snapshot;
9415
9416         allocate_snapshot = false;
9417 #endif
9418
9419         return 0;
9420 }
9421
9422 static void free_trace_buffers(struct trace_array *tr)
9423 {
9424         if (!tr)
9425                 return;
9426
9427         free_trace_buffer(&tr->array_buffer);
9428
9429 #ifdef CONFIG_TRACER_MAX_TRACE
9430         free_trace_buffer(&tr->max_buffer);
9431 #endif
9432 }
9433
9434 static void init_trace_flags_index(struct trace_array *tr)
9435 {
9436         int i;
9437
9438         /* Used by the trace options files */
9439         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9440                 tr->trace_flags_index[i] = i;
9441 }
9442
9443 static void __update_tracer_options(struct trace_array *tr)
9444 {
9445         struct tracer *t;
9446
9447         for (t = trace_types; t; t = t->next)
9448                 add_tracer_options(tr, t);
9449 }
9450
9451 static void update_tracer_options(struct trace_array *tr)
9452 {
9453         mutex_lock(&trace_types_lock);
9454         tracer_options_updated = true;
9455         __update_tracer_options(tr);
9456         mutex_unlock(&trace_types_lock);
9457 }
9458
9459 /* Must have trace_types_lock held */
9460 struct trace_array *trace_array_find(const char *instance)
9461 {
9462         struct trace_array *tr, *found = NULL;
9463
9464         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9465                 if (tr->name && strcmp(tr->name, instance) == 0) {
9466                         found = tr;
9467                         break;
9468                 }
9469         }
9470
9471         return found;
9472 }
9473
9474 struct trace_array *trace_array_find_get(const char *instance)
9475 {
9476         struct trace_array *tr;
9477
9478         mutex_lock(&trace_types_lock);
9479         tr = trace_array_find(instance);
9480         if (tr)
9481                 tr->ref++;
9482         mutex_unlock(&trace_types_lock);
9483
9484         return tr;
9485 }
9486
9487 static int trace_array_create_dir(struct trace_array *tr)
9488 {
9489         int ret;
9490
9491         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9492         if (!tr->dir)
9493                 return -EINVAL;
9494
9495         ret = event_trace_add_tracer(tr->dir, tr);
9496         if (ret) {
9497                 tracefs_remove(tr->dir);
9498                 return ret;
9499         }
9500
9501         init_tracer_tracefs(tr, tr->dir);
9502         __update_tracer_options(tr);
9503
9504         return ret;
9505 }
9506
9507 static struct trace_array *trace_array_create(const char *name)
9508 {
9509         struct trace_array *tr;
9510         int ret;
9511
9512         ret = -ENOMEM;
9513         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9514         if (!tr)
9515                 return ERR_PTR(ret);
9516
9517         tr->name = kstrdup(name, GFP_KERNEL);
9518         if (!tr->name)
9519                 goto out_free_tr;
9520
9521         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9522                 goto out_free_tr;
9523
9524         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9525                 goto out_free_tr;
9526
9527         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9528
9529         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9530
9531         raw_spin_lock_init(&tr->start_lock);
9532
9533         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9534
9535         tr->current_trace = &nop_trace;
9536
9537         INIT_LIST_HEAD(&tr->systems);
9538         INIT_LIST_HEAD(&tr->events);
9539         INIT_LIST_HEAD(&tr->hist_vars);
9540         INIT_LIST_HEAD(&tr->err_log);
9541
9542         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9543                 goto out_free_tr;
9544
9545         /* The ring buffer is defaultly expanded */
9546         trace_set_ring_buffer_expanded(tr);
9547
9548         if (ftrace_allocate_ftrace_ops(tr) < 0)
9549                 goto out_free_tr;
9550
9551         ftrace_init_trace_array(tr);
9552
9553         init_trace_flags_index(tr);
9554
9555         if (trace_instance_dir) {
9556                 ret = trace_array_create_dir(tr);
9557                 if (ret)
9558                         goto out_free_tr;
9559         } else
9560                 __trace_early_add_events(tr);
9561
9562         list_add(&tr->list, &ftrace_trace_arrays);
9563
9564         tr->ref++;
9565
9566         return tr;
9567
9568  out_free_tr:
9569         ftrace_free_ftrace_ops(tr);
9570         free_trace_buffers(tr);
9571         free_cpumask_var(tr->pipe_cpumask);
9572         free_cpumask_var(tr->tracing_cpumask);
9573         kfree(tr->name);
9574         kfree(tr);
9575
9576         return ERR_PTR(ret);
9577 }
9578
9579 static int instance_mkdir(const char *name)
9580 {
9581         struct trace_array *tr;
9582         int ret;
9583
9584         mutex_lock(&event_mutex);
9585         mutex_lock(&trace_types_lock);
9586
9587         ret = -EEXIST;
9588         if (trace_array_find(name))
9589                 goto out_unlock;
9590
9591         tr = trace_array_create(name);
9592
9593         ret = PTR_ERR_OR_ZERO(tr);
9594
9595 out_unlock:
9596         mutex_unlock(&trace_types_lock);
9597         mutex_unlock(&event_mutex);
9598         return ret;
9599 }
9600
9601 /**
9602  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9603  * @name: The name of the trace array to be looked up/created.
9604  *
9605  * Returns pointer to trace array with given name.
9606  * NULL, if it cannot be created.
9607  *
9608  * NOTE: This function increments the reference counter associated with the
9609  * trace array returned. This makes sure it cannot be freed while in use.
9610  * Use trace_array_put() once the trace array is no longer needed.
9611  * If the trace_array is to be freed, trace_array_destroy() needs to
9612  * be called after the trace_array_put(), or simply let user space delete
9613  * it from the tracefs instances directory. But until the
9614  * trace_array_put() is called, user space can not delete it.
9615  *
9616  */
9617 struct trace_array *trace_array_get_by_name(const char *name)
9618 {
9619         struct trace_array *tr;
9620
9621         mutex_lock(&event_mutex);
9622         mutex_lock(&trace_types_lock);
9623
9624         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9625                 if (tr->name && strcmp(tr->name, name) == 0)
9626                         goto out_unlock;
9627         }
9628
9629         tr = trace_array_create(name);
9630
9631         if (IS_ERR(tr))
9632                 tr = NULL;
9633 out_unlock:
9634         if (tr)
9635                 tr->ref++;
9636
9637         mutex_unlock(&trace_types_lock);
9638         mutex_unlock(&event_mutex);
9639         return tr;
9640 }
9641 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9642
9643 static int __remove_instance(struct trace_array *tr)
9644 {
9645         int i;
9646
9647         /* Reference counter for a newly created trace array = 1. */
9648         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9649                 return -EBUSY;
9650
9651         list_del(&tr->list);
9652
9653         /* Disable all the flags that were enabled coming in */
9654         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9655                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9656                         set_tracer_flag(tr, 1 << i, 0);
9657         }
9658
9659         tracing_set_nop(tr);
9660         clear_ftrace_function_probes(tr);
9661         event_trace_del_tracer(tr);
9662         ftrace_clear_pids(tr);
9663         ftrace_destroy_function_files(tr);
9664         tracefs_remove(tr->dir);
9665         free_percpu(tr->last_func_repeats);
9666         free_trace_buffers(tr);
9667         clear_tracing_err_log(tr);
9668
9669         for (i = 0; i < tr->nr_topts; i++) {
9670                 kfree(tr->topts[i].topts);
9671         }
9672         kfree(tr->topts);
9673
9674         free_cpumask_var(tr->pipe_cpumask);
9675         free_cpumask_var(tr->tracing_cpumask);
9676         kfree(tr->name);
9677         kfree(tr);
9678
9679         return 0;
9680 }
9681
9682 int trace_array_destroy(struct trace_array *this_tr)
9683 {
9684         struct trace_array *tr;
9685         int ret;
9686
9687         if (!this_tr)
9688                 return -EINVAL;
9689
9690         mutex_lock(&event_mutex);
9691         mutex_lock(&trace_types_lock);
9692
9693         ret = -ENODEV;
9694
9695         /* Making sure trace array exists before destroying it. */
9696         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9697                 if (tr == this_tr) {
9698                         ret = __remove_instance(tr);
9699                         break;
9700                 }
9701         }
9702
9703         mutex_unlock(&trace_types_lock);
9704         mutex_unlock(&event_mutex);
9705
9706         return ret;
9707 }
9708 EXPORT_SYMBOL_GPL(trace_array_destroy);
9709
9710 static int instance_rmdir(const char *name)
9711 {
9712         struct trace_array *tr;
9713         int ret;
9714
9715         mutex_lock(&event_mutex);
9716         mutex_lock(&trace_types_lock);
9717
9718         ret = -ENODEV;
9719         tr = trace_array_find(name);
9720         if (tr)
9721                 ret = __remove_instance(tr);
9722
9723         mutex_unlock(&trace_types_lock);
9724         mutex_unlock(&event_mutex);
9725
9726         return ret;
9727 }
9728
9729 static __init void create_trace_instances(struct dentry *d_tracer)
9730 {
9731         struct trace_array *tr;
9732
9733         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9734                                                          instance_mkdir,
9735                                                          instance_rmdir);
9736         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9737                 return;
9738
9739         mutex_lock(&event_mutex);
9740         mutex_lock(&trace_types_lock);
9741
9742         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9743                 if (!tr->name)
9744                         continue;
9745                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9746                              "Failed to create instance directory\n"))
9747                         break;
9748         }
9749
9750         mutex_unlock(&trace_types_lock);
9751         mutex_unlock(&event_mutex);
9752 }
9753
9754 static void
9755 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9756 {
9757         int cpu;
9758
9759         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9760                         tr, &show_traces_fops);
9761
9762         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9763                         tr, &set_tracer_fops);
9764
9765         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9766                           tr, &tracing_cpumask_fops);
9767
9768         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9769                           tr, &tracing_iter_fops);
9770
9771         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9772                           tr, &tracing_fops);
9773
9774         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9775                           tr, &tracing_pipe_fops);
9776
9777         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9778                           tr, &tracing_entries_fops);
9779
9780         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9781                           tr, &tracing_total_entries_fops);
9782
9783         trace_create_file("free_buffer", 0200, d_tracer,
9784                           tr, &tracing_free_buffer_fops);
9785
9786         trace_create_file("trace_marker", 0220, d_tracer,
9787                           tr, &tracing_mark_fops);
9788
9789         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9790
9791         trace_create_file("trace_marker_raw", 0220, d_tracer,
9792                           tr, &tracing_mark_raw_fops);
9793
9794         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9795                           &trace_clock_fops);
9796
9797         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9798                           tr, &rb_simple_fops);
9799
9800         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9801                           &trace_time_stamp_mode_fops);
9802
9803         tr->buffer_percent = 50;
9804
9805         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9806                         tr, &buffer_percent_fops);
9807
9808         create_trace_options_dir(tr);
9809
9810 #ifdef CONFIG_TRACER_MAX_TRACE
9811         trace_create_maxlat_file(tr, d_tracer);
9812 #endif
9813
9814         if (ftrace_create_function_files(tr, d_tracer))
9815                 MEM_FAIL(1, "Could not allocate function filter files");
9816
9817 #ifdef CONFIG_TRACER_SNAPSHOT
9818         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9819                           tr, &snapshot_fops);
9820 #endif
9821
9822         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9823                           tr, &tracing_err_log_fops);
9824
9825         for_each_tracing_cpu(cpu)
9826                 tracing_init_tracefs_percpu(tr, cpu);
9827
9828         ftrace_init_tracefs(tr, d_tracer);
9829 }
9830
9831 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9832 {
9833         struct vfsmount *mnt;
9834         struct file_system_type *type;
9835
9836         /*
9837          * To maintain backward compatibility for tools that mount
9838          * debugfs to get to the tracing facility, tracefs is automatically
9839          * mounted to the debugfs/tracing directory.
9840          */
9841         type = get_fs_type("tracefs");
9842         if (!type)
9843                 return NULL;
9844         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9845         put_filesystem(type);
9846         if (IS_ERR(mnt))
9847                 return NULL;
9848         mntget(mnt);
9849
9850         return mnt;
9851 }
9852
9853 /**
9854  * tracing_init_dentry - initialize top level trace array
9855  *
9856  * This is called when creating files or directories in the tracing
9857  * directory. It is called via fs_initcall() by any of the boot up code
9858  * and expects to return the dentry of the top level tracing directory.
9859  */
9860 int tracing_init_dentry(void)
9861 {
9862         struct trace_array *tr = &global_trace;
9863
9864         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9865                 pr_warn("Tracing disabled due to lockdown\n");
9866                 return -EPERM;
9867         }
9868
9869         /* The top level trace array uses  NULL as parent */
9870         if (tr->dir)
9871                 return 0;
9872
9873         if (WARN_ON(!tracefs_initialized()))
9874                 return -ENODEV;
9875
9876         /*
9877          * As there may still be users that expect the tracing
9878          * files to exist in debugfs/tracing, we must automount
9879          * the tracefs file system there, so older tools still
9880          * work with the newer kernel.
9881          */
9882         tr->dir = debugfs_create_automount("tracing", NULL,
9883                                            trace_automount, NULL);
9884
9885         return 0;
9886 }
9887
9888 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9889 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9890
9891 static struct workqueue_struct *eval_map_wq __initdata;
9892 static struct work_struct eval_map_work __initdata;
9893 static struct work_struct tracerfs_init_work __initdata;
9894
9895 static void __init eval_map_work_func(struct work_struct *work)
9896 {
9897         int len;
9898
9899         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9900         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9901 }
9902
9903 static int __init trace_eval_init(void)
9904 {
9905         INIT_WORK(&eval_map_work, eval_map_work_func);
9906
9907         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9908         if (!eval_map_wq) {
9909                 pr_err("Unable to allocate eval_map_wq\n");
9910                 /* Do work here */
9911                 eval_map_work_func(&eval_map_work);
9912                 return -ENOMEM;
9913         }
9914
9915         queue_work(eval_map_wq, &eval_map_work);
9916         return 0;
9917 }
9918
9919 subsys_initcall(trace_eval_init);
9920
9921 static int __init trace_eval_sync(void)
9922 {
9923         /* Make sure the eval map updates are finished */
9924         if (eval_map_wq)
9925                 destroy_workqueue(eval_map_wq);
9926         return 0;
9927 }
9928
9929 late_initcall_sync(trace_eval_sync);
9930
9931
9932 #ifdef CONFIG_MODULES
9933 static void trace_module_add_evals(struct module *mod)
9934 {
9935         if (!mod->num_trace_evals)
9936                 return;
9937
9938         /*
9939          * Modules with bad taint do not have events created, do
9940          * not bother with enums either.
9941          */
9942         if (trace_module_has_bad_taint(mod))
9943                 return;
9944
9945         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9946 }
9947
9948 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9949 static void trace_module_remove_evals(struct module *mod)
9950 {
9951         union trace_eval_map_item *map;
9952         union trace_eval_map_item **last = &trace_eval_maps;
9953
9954         if (!mod->num_trace_evals)
9955                 return;
9956
9957         mutex_lock(&trace_eval_mutex);
9958
9959         map = trace_eval_maps;
9960
9961         while (map) {
9962                 if (map->head.mod == mod)
9963                         break;
9964                 map = trace_eval_jmp_to_tail(map);
9965                 last = &map->tail.next;
9966                 map = map->tail.next;
9967         }
9968         if (!map)
9969                 goto out;
9970
9971         *last = trace_eval_jmp_to_tail(map)->tail.next;
9972         kfree(map);
9973  out:
9974         mutex_unlock(&trace_eval_mutex);
9975 }
9976 #else
9977 static inline void trace_module_remove_evals(struct module *mod) { }
9978 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9979
9980 static int trace_module_notify(struct notifier_block *self,
9981                                unsigned long val, void *data)
9982 {
9983         struct module *mod = data;
9984
9985         switch (val) {
9986         case MODULE_STATE_COMING:
9987                 trace_module_add_evals(mod);
9988                 break;
9989         case MODULE_STATE_GOING:
9990                 trace_module_remove_evals(mod);
9991                 break;
9992         }
9993
9994         return NOTIFY_OK;
9995 }
9996
9997 static struct notifier_block trace_module_nb = {
9998         .notifier_call = trace_module_notify,
9999         .priority = 0,
10000 };
10001 #endif /* CONFIG_MODULES */
10002
10003 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10004 {
10005
10006         event_trace_init();
10007
10008         init_tracer_tracefs(&global_trace, NULL);
10009         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10010
10011         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10012                         &global_trace, &tracing_thresh_fops);
10013
10014         trace_create_file("README", TRACE_MODE_READ, NULL,
10015                         NULL, &tracing_readme_fops);
10016
10017         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10018                         NULL, &tracing_saved_cmdlines_fops);
10019
10020         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10021                           NULL, &tracing_saved_cmdlines_size_fops);
10022
10023         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10024                         NULL, &tracing_saved_tgids_fops);
10025
10026         trace_create_eval_file(NULL);
10027
10028 #ifdef CONFIG_MODULES
10029         register_module_notifier(&trace_module_nb);
10030 #endif
10031
10032 #ifdef CONFIG_DYNAMIC_FTRACE
10033         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10034                         NULL, &tracing_dyn_info_fops);
10035 #endif
10036
10037         create_trace_instances(NULL);
10038
10039         update_tracer_options(&global_trace);
10040 }
10041
10042 static __init int tracer_init_tracefs(void)
10043 {
10044         int ret;
10045
10046         trace_access_lock_init();
10047
10048         ret = tracing_init_dentry();
10049         if (ret)
10050                 return 0;
10051
10052         if (eval_map_wq) {
10053                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10054                 queue_work(eval_map_wq, &tracerfs_init_work);
10055         } else {
10056                 tracer_init_tracefs_work_func(NULL);
10057         }
10058
10059         rv_init_interface();
10060
10061         return 0;
10062 }
10063
10064 fs_initcall(tracer_init_tracefs);
10065
10066 static int trace_die_panic_handler(struct notifier_block *self,
10067                                 unsigned long ev, void *unused);
10068
10069 static struct notifier_block trace_panic_notifier = {
10070         .notifier_call = trace_die_panic_handler,
10071         .priority = INT_MAX - 1,
10072 };
10073
10074 static struct notifier_block trace_die_notifier = {
10075         .notifier_call = trace_die_panic_handler,
10076         .priority = INT_MAX - 1,
10077 };
10078
10079 /*
10080  * The idea is to execute the following die/panic callback early, in order
10081  * to avoid showing irrelevant information in the trace (like other panic
10082  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10083  * warnings get disabled (to prevent potential log flooding).
10084  */
10085 static int trace_die_panic_handler(struct notifier_block *self,
10086                                 unsigned long ev, void *unused)
10087 {
10088         if (!ftrace_dump_on_oops)
10089                 return NOTIFY_DONE;
10090
10091         /* The die notifier requires DIE_OOPS to trigger */
10092         if (self == &trace_die_notifier && ev != DIE_OOPS)
10093                 return NOTIFY_DONE;
10094
10095         ftrace_dump(ftrace_dump_on_oops);
10096
10097         return NOTIFY_DONE;
10098 }
10099
10100 /*
10101  * printk is set to max of 1024, we really don't need it that big.
10102  * Nothing should be printing 1000 characters anyway.
10103  */
10104 #define TRACE_MAX_PRINT         1000
10105
10106 /*
10107  * Define here KERN_TRACE so that we have one place to modify
10108  * it if we decide to change what log level the ftrace dump
10109  * should be at.
10110  */
10111 #define KERN_TRACE              KERN_EMERG
10112
10113 void
10114 trace_printk_seq(struct trace_seq *s)
10115 {
10116         /* Probably should print a warning here. */
10117         if (s->seq.len >= TRACE_MAX_PRINT)
10118                 s->seq.len = TRACE_MAX_PRINT;
10119
10120         /*
10121          * More paranoid code. Although the buffer size is set to
10122          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10123          * an extra layer of protection.
10124          */
10125         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10126                 s->seq.len = s->seq.size - 1;
10127
10128         /* should be zero ended, but we are paranoid. */
10129         s->buffer[s->seq.len] = 0;
10130
10131         printk(KERN_TRACE "%s", s->buffer);
10132
10133         trace_seq_init(s);
10134 }
10135
10136 void trace_init_global_iter(struct trace_iterator *iter)
10137 {
10138         iter->tr = &global_trace;
10139         iter->trace = iter->tr->current_trace;
10140         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10141         iter->array_buffer = &global_trace.array_buffer;
10142
10143         if (iter->trace && iter->trace->open)
10144                 iter->trace->open(iter);
10145
10146         /* Annotate start of buffers if we had overruns */
10147         if (ring_buffer_overruns(iter->array_buffer->buffer))
10148                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10149
10150         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10151         if (trace_clocks[iter->tr->clock_id].in_ns)
10152                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10153
10154         /* Can not use kmalloc for iter.temp and iter.fmt */
10155         iter->temp = static_temp_buf;
10156         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10157         iter->fmt = static_fmt_buf;
10158         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10159 }
10160
10161 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10162 {
10163         /* use static because iter can be a bit big for the stack */
10164         static struct trace_iterator iter;
10165         static atomic_t dump_running;
10166         struct trace_array *tr = &global_trace;
10167         unsigned int old_userobj;
10168         unsigned long flags;
10169         int cnt = 0, cpu;
10170
10171         /* Only allow one dump user at a time. */
10172         if (atomic_inc_return(&dump_running) != 1) {
10173                 atomic_dec(&dump_running);
10174                 return;
10175         }
10176
10177         /*
10178          * Always turn off tracing when we dump.
10179          * We don't need to show trace output of what happens
10180          * between multiple crashes.
10181          *
10182          * If the user does a sysrq-z, then they can re-enable
10183          * tracing with echo 1 > tracing_on.
10184          */
10185         tracing_off();
10186
10187         local_irq_save(flags);
10188
10189         /* Simulate the iterator */
10190         trace_init_global_iter(&iter);
10191
10192         for_each_tracing_cpu(cpu) {
10193                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10194         }
10195
10196         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10197
10198         /* don't look at user memory in panic mode */
10199         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10200
10201         switch (oops_dump_mode) {
10202         case DUMP_ALL:
10203                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10204                 break;
10205         case DUMP_ORIG:
10206                 iter.cpu_file = raw_smp_processor_id();
10207                 break;
10208         case DUMP_NONE:
10209                 goto out_enable;
10210         default:
10211                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10212                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10213         }
10214
10215         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10216
10217         /* Did function tracer already get disabled? */
10218         if (ftrace_is_dead()) {
10219                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10220                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10221         }
10222
10223         /*
10224          * We need to stop all tracing on all CPUS to read
10225          * the next buffer. This is a bit expensive, but is
10226          * not done often. We fill all what we can read,
10227          * and then release the locks again.
10228          */
10229
10230         while (!trace_empty(&iter)) {
10231
10232                 if (!cnt)
10233                         printk(KERN_TRACE "---------------------------------\n");
10234
10235                 cnt++;
10236
10237                 trace_iterator_reset(&iter);
10238                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10239
10240                 if (trace_find_next_entry_inc(&iter) != NULL) {
10241                         int ret;
10242
10243                         ret = print_trace_line(&iter);
10244                         if (ret != TRACE_TYPE_NO_CONSUME)
10245                                 trace_consume(&iter);
10246                 }
10247                 touch_nmi_watchdog();
10248
10249                 trace_printk_seq(&iter.seq);
10250         }
10251
10252         if (!cnt)
10253                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10254         else
10255                 printk(KERN_TRACE "---------------------------------\n");
10256
10257  out_enable:
10258         tr->trace_flags |= old_userobj;
10259
10260         for_each_tracing_cpu(cpu) {
10261                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10262         }
10263         atomic_dec(&dump_running);
10264         local_irq_restore(flags);
10265 }
10266 EXPORT_SYMBOL_GPL(ftrace_dump);
10267
10268 #define WRITE_BUFSIZE  4096
10269
10270 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10271                                 size_t count, loff_t *ppos,
10272                                 int (*createfn)(const char *))
10273 {
10274         char *kbuf, *buf, *tmp;
10275         int ret = 0;
10276         size_t done = 0;
10277         size_t size;
10278
10279         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10280         if (!kbuf)
10281                 return -ENOMEM;
10282
10283         while (done < count) {
10284                 size = count - done;
10285
10286                 if (size >= WRITE_BUFSIZE)
10287                         size = WRITE_BUFSIZE - 1;
10288
10289                 if (copy_from_user(kbuf, buffer + done, size)) {
10290                         ret = -EFAULT;
10291                         goto out;
10292                 }
10293                 kbuf[size] = '\0';
10294                 buf = kbuf;
10295                 do {
10296                         tmp = strchr(buf, '\n');
10297                         if (tmp) {
10298                                 *tmp = '\0';
10299                                 size = tmp - buf + 1;
10300                         } else {
10301                                 size = strlen(buf);
10302                                 if (done + size < count) {
10303                                         if (buf != kbuf)
10304                                                 break;
10305                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10306                                         pr_warn("Line length is too long: Should be less than %d\n",
10307                                                 WRITE_BUFSIZE - 2);
10308                                         ret = -EINVAL;
10309                                         goto out;
10310                                 }
10311                         }
10312                         done += size;
10313
10314                         /* Remove comments */
10315                         tmp = strchr(buf, '#');
10316
10317                         if (tmp)
10318                                 *tmp = '\0';
10319
10320                         ret = createfn(buf);
10321                         if (ret)
10322                                 goto out;
10323                         buf += size;
10324
10325                 } while (done < count);
10326         }
10327         ret = done;
10328
10329 out:
10330         kfree(kbuf);
10331
10332         return ret;
10333 }
10334
10335 #ifdef CONFIG_TRACER_MAX_TRACE
10336 __init static bool tr_needs_alloc_snapshot(const char *name)
10337 {
10338         char *test;
10339         int len = strlen(name);
10340         bool ret;
10341
10342         if (!boot_snapshot_index)
10343                 return false;
10344
10345         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10346             boot_snapshot_info[len] == '\t')
10347                 return true;
10348
10349         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10350         if (!test)
10351                 return false;
10352
10353         sprintf(test, "\t%s\t", name);
10354         ret = strstr(boot_snapshot_info, test) == NULL;
10355         kfree(test);
10356         return ret;
10357 }
10358
10359 __init static void do_allocate_snapshot(const char *name)
10360 {
10361         if (!tr_needs_alloc_snapshot(name))
10362                 return;
10363
10364         /*
10365          * When allocate_snapshot is set, the next call to
10366          * allocate_trace_buffers() (called by trace_array_get_by_name())
10367          * will allocate the snapshot buffer. That will alse clear
10368          * this flag.
10369          */
10370         allocate_snapshot = true;
10371 }
10372 #else
10373 static inline void do_allocate_snapshot(const char *name) { }
10374 #endif
10375
10376 __init static void enable_instances(void)
10377 {
10378         struct trace_array *tr;
10379         char *curr_str;
10380         char *str;
10381         char *tok;
10382
10383         /* A tab is always appended */
10384         boot_instance_info[boot_instance_index - 1] = '\0';
10385         str = boot_instance_info;
10386
10387         while ((curr_str = strsep(&str, "\t"))) {
10388
10389                 tok = strsep(&curr_str, ",");
10390
10391                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10392                         do_allocate_snapshot(tok);
10393
10394                 tr = trace_array_get_by_name(tok);
10395                 if (!tr) {
10396                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10397                         continue;
10398                 }
10399                 /* Allow user space to delete it */
10400                 trace_array_put(tr);
10401
10402                 while ((tok = strsep(&curr_str, ","))) {
10403                         early_enable_events(tr, tok, true);
10404                 }
10405         }
10406 }
10407
10408 __init static int tracer_alloc_buffers(void)
10409 {
10410         int ring_buf_size;
10411         int ret = -ENOMEM;
10412
10413
10414         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10415                 pr_warn("Tracing disabled due to lockdown\n");
10416                 return -EPERM;
10417         }
10418
10419         /*
10420          * Make sure we don't accidentally add more trace options
10421          * than we have bits for.
10422          */
10423         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10424
10425         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10426                 goto out;
10427
10428         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10429                 goto out_free_buffer_mask;
10430
10431         /* Only allocate trace_printk buffers if a trace_printk exists */
10432         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10433                 /* Must be called before global_trace.buffer is allocated */
10434                 trace_printk_init_buffers();
10435
10436         /* To save memory, keep the ring buffer size to its minimum */
10437         if (global_trace.ring_buffer_expanded)
10438                 ring_buf_size = trace_buf_size;
10439         else
10440                 ring_buf_size = 1;
10441
10442         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10443         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10444
10445         raw_spin_lock_init(&global_trace.start_lock);
10446
10447         /*
10448          * The prepare callbacks allocates some memory for the ring buffer. We
10449          * don't free the buffer if the CPU goes down. If we were to free
10450          * the buffer, then the user would lose any trace that was in the
10451          * buffer. The memory will be removed once the "instance" is removed.
10452          */
10453         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10454                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10455                                       NULL);
10456         if (ret < 0)
10457                 goto out_free_cpumask;
10458         /* Used for event triggers */
10459         ret = -ENOMEM;
10460         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10461         if (!temp_buffer)
10462                 goto out_rm_hp_state;
10463
10464         if (trace_create_savedcmd() < 0)
10465                 goto out_free_temp_buffer;
10466
10467         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10468                 goto out_free_savedcmd;
10469
10470         /* TODO: make the number of buffers hot pluggable with CPUS */
10471         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10472                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10473                 goto out_free_pipe_cpumask;
10474         }
10475         if (global_trace.buffer_disabled)
10476                 tracing_off();
10477
10478         if (trace_boot_clock) {
10479                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10480                 if (ret < 0)
10481                         pr_warn("Trace clock %s not defined, going back to default\n",
10482                                 trace_boot_clock);
10483         }
10484
10485         /*
10486          * register_tracer() might reference current_trace, so it
10487          * needs to be set before we register anything. This is
10488          * just a bootstrap of current_trace anyway.
10489          */
10490         global_trace.current_trace = &nop_trace;
10491
10492         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10493
10494         ftrace_init_global_array_ops(&global_trace);
10495
10496         init_trace_flags_index(&global_trace);
10497
10498         register_tracer(&nop_trace);
10499
10500         /* Function tracing may start here (via kernel command line) */
10501         init_function_trace();
10502
10503         /* All seems OK, enable tracing */
10504         tracing_disabled = 0;
10505
10506         atomic_notifier_chain_register(&panic_notifier_list,
10507                                        &trace_panic_notifier);
10508
10509         register_die_notifier(&trace_die_notifier);
10510
10511         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10512
10513         INIT_LIST_HEAD(&global_trace.systems);
10514         INIT_LIST_HEAD(&global_trace.events);
10515         INIT_LIST_HEAD(&global_trace.hist_vars);
10516         INIT_LIST_HEAD(&global_trace.err_log);
10517         list_add(&global_trace.list, &ftrace_trace_arrays);
10518
10519         apply_trace_boot_options();
10520
10521         register_snapshot_cmd();
10522
10523         test_can_verify();
10524
10525         return 0;
10526
10527 out_free_pipe_cpumask:
10528         free_cpumask_var(global_trace.pipe_cpumask);
10529 out_free_savedcmd:
10530         free_saved_cmdlines_buffer(savedcmd);
10531 out_free_temp_buffer:
10532         ring_buffer_free(temp_buffer);
10533 out_rm_hp_state:
10534         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10535 out_free_cpumask:
10536         free_cpumask_var(global_trace.tracing_cpumask);
10537 out_free_buffer_mask:
10538         free_cpumask_var(tracing_buffer_mask);
10539 out:
10540         return ret;
10541 }
10542
10543 void __init ftrace_boot_snapshot(void)
10544 {
10545 #ifdef CONFIG_TRACER_MAX_TRACE
10546         struct trace_array *tr;
10547
10548         if (!snapshot_at_boot)
10549                 return;
10550
10551         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10552                 if (!tr->allocated_snapshot)
10553                         continue;
10554
10555                 tracing_snapshot_instance(tr);
10556                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10557         }
10558 #endif
10559 }
10560
10561 void __init early_trace_init(void)
10562 {
10563         if (tracepoint_printk) {
10564                 tracepoint_print_iter =
10565                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10566                 if (MEM_FAIL(!tracepoint_print_iter,
10567                              "Failed to allocate trace iterator\n"))
10568                         tracepoint_printk = 0;
10569                 else
10570                         static_key_enable(&tracepoint_printk_key.key);
10571         }
10572         tracer_alloc_buffers();
10573
10574         init_events();
10575 }
10576
10577 void __init trace_init(void)
10578 {
10579         trace_event_init();
10580
10581         if (boot_instance_index)
10582                 enable_instances();
10583 }
10584
10585 __init static void clear_boot_tracer(void)
10586 {
10587         /*
10588          * The default tracer at boot buffer is an init section.
10589          * This function is called in lateinit. If we did not
10590          * find the boot tracer, then clear it out, to prevent
10591          * later registration from accessing the buffer that is
10592          * about to be freed.
10593          */
10594         if (!default_bootup_tracer)
10595                 return;
10596
10597         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10598                default_bootup_tracer);
10599         default_bootup_tracer = NULL;
10600 }
10601
10602 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10603 __init static void tracing_set_default_clock(void)
10604 {
10605         /* sched_clock_stable() is determined in late_initcall */
10606         if (!trace_boot_clock && !sched_clock_stable()) {
10607                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10608                         pr_warn("Can not set tracing clock due to lockdown\n");
10609                         return;
10610                 }
10611
10612                 printk(KERN_WARNING
10613                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10614                        "If you want to keep using the local clock, then add:\n"
10615                        "  \"trace_clock=local\"\n"
10616                        "on the kernel command line\n");
10617                 tracing_set_clock(&global_trace, "global");
10618         }
10619 }
10620 #else
10621 static inline void tracing_set_default_clock(void) { }
10622 #endif
10623
10624 __init static int late_trace_init(void)
10625 {
10626         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10627                 static_key_disable(&tracepoint_printk_key.key);
10628                 tracepoint_printk = 0;
10629         }
10630
10631         tracing_set_default_clock();
10632         clear_boot_tracer();
10633         return 0;
10634 }
10635
10636 late_initcall_sync(late_trace_init);