Merge tag 'trace-v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193
194 static int __init set_cmdline_ftrace(char *str)
195 {
196         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197         default_bootup_tracer = bootup_tracer_buf;
198         /* We are using ftrace early, expand it */
199         trace_set_ring_buffer_expanded(NULL);
200         return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206         if (*str++ != '=' || !*str || !strcmp("1", str)) {
207                 ftrace_dump_on_oops = DUMP_ALL;
208                 return 1;
209         }
210
211         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212                 ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219
220 static int __init stop_trace_on_warning(char *str)
221 {
222         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223                 __disable_trace_on_warning = 1;
224         return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227
228 static int __init boot_alloc_snapshot(char *str)
229 {
230         char *slot = boot_snapshot_info + boot_snapshot_index;
231         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232         int ret;
233
234         if (str[0] == '=') {
235                 str++;
236                 if (strlen(str) >= left)
237                         return -1;
238
239                 ret = snprintf(slot, left, "%s\t", str);
240                 boot_snapshot_index += ret;
241         } else {
242                 allocate_snapshot = true;
243                 /* We also need the main ring buffer expanded */
244                 trace_set_ring_buffer_expanded(NULL);
245         }
246         return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249
250
251 static int __init boot_snapshot(char *str)
252 {
253         snapshot_at_boot = true;
254         boot_alloc_snapshot(str);
255         return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258
259
260 static int __init boot_instance(char *str)
261 {
262         char *slot = boot_instance_info + boot_instance_index;
263         int left = sizeof(boot_instance_info) - boot_instance_index;
264         int ret;
265
266         if (strlen(str) >= left)
267                 return -1;
268
269         ret = snprintf(slot, left, "%s\t", str);
270         boot_instance_index += ret;
271
272         return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275
276
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278
279 static int __init set_trace_boot_options(char *str)
280 {
281         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282         return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288
289 static int __init set_trace_boot_clock(char *str)
290 {
291         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292         trace_boot_clock = trace_boot_clock_buf;
293         return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296
297 static int __init set_tracepoint_printk(char *str)
298 {
299         /* Ignore the "tp_printk_stop_on_boot" param */
300         if (*str == '_')
301                 return 0;
302
303         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304                 tracepoint_printk = 1;
305         return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311         tracepoint_printk_stop_on_boot = true;
312         return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315
316 unsigned long long ns2usecs(u64 nsec)
317 {
318         nsec += 500;
319         do_div(nsec, 1000);
320         return nsec;
321 }
322
323 static void
324 trace_process_export(struct trace_export *export,
325                struct ring_buffer_event *event, int flag)
326 {
327         struct trace_entry *entry;
328         unsigned int size = 0;
329
330         if (export->flags & flag) {
331                 entry = ring_buffer_event_data(event);
332                 size = ring_buffer_event_length(event);
333                 export->write(export, entry, size);
334         }
335 }
336
337 static DEFINE_MUTEX(ftrace_export_lock);
338
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347         if (export->flags & TRACE_EXPORT_FUNCTION)
348                 static_branch_inc(&trace_function_exports_enabled);
349
350         if (export->flags & TRACE_EXPORT_EVENT)
351                 static_branch_inc(&trace_event_exports_enabled);
352
353         if (export->flags & TRACE_EXPORT_MARKER)
354                 static_branch_inc(&trace_marker_exports_enabled);
355 }
356
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359         if (export->flags & TRACE_EXPORT_FUNCTION)
360                 static_branch_dec(&trace_function_exports_enabled);
361
362         if (export->flags & TRACE_EXPORT_EVENT)
363                 static_branch_dec(&trace_event_exports_enabled);
364
365         if (export->flags & TRACE_EXPORT_MARKER)
366                 static_branch_dec(&trace_marker_exports_enabled);
367 }
368
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371         struct trace_export *export;
372
373         preempt_disable_notrace();
374
375         export = rcu_dereference_raw_check(ftrace_exports_list);
376         while (export) {
377                 trace_process_export(export, event, flag);
378                 export = rcu_dereference_raw_check(export->next);
379         }
380
381         preempt_enable_notrace();
382 }
383
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387         rcu_assign_pointer(export->next, *list);
388         /*
389          * We are entering export into the list but another
390          * CPU might be walking that list. We need to make sure
391          * the export->next pointer is valid before another CPU sees
392          * the export pointer included into the list.
393          */
394         rcu_assign_pointer(*list, export);
395 }
396
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400         struct trace_export **p;
401
402         for (p = list; *p != NULL; p = &(*p)->next)
403                 if (*p == export)
404                         break;
405
406         if (*p != export)
407                 return -1;
408
409         rcu_assign_pointer(*p, (*p)->next);
410
411         return 0;
412 }
413
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417         ftrace_exports_enable(export);
418
419         add_trace_export(list, export);
420 }
421
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425         int ret;
426
427         ret = rm_trace_export(list, export);
428         ftrace_exports_disable(export);
429
430         return ret;
431 }
432
433 int register_ftrace_export(struct trace_export *export)
434 {
435         if (WARN_ON_ONCE(!export->write))
436                 return -1;
437
438         mutex_lock(&ftrace_export_lock);
439
440         add_ftrace_export(&ftrace_exports_list, export);
441
442         mutex_unlock(&ftrace_export_lock);
443
444         return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450         int ret;
451
452         mutex_lock(&ftrace_export_lock);
453
454         ret = rm_ftrace_export(&ftrace_exports_list, export);
455
456         mutex_unlock(&ftrace_export_lock);
457
458         return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS                                             \
464         (FUNCTION_DEFAULT_FLAGS |                                       \
465          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
466          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
467          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
468          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
469          TRACE_ITER_HASH_PTR)
470
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
473                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484         .trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489         if (!tr)
490                 tr = &global_trace;
491         tr->ring_buffer_expanded = true;
492 }
493
494 LIST_HEAD(ftrace_trace_arrays);
495
496 int trace_array_get(struct trace_array *this_tr)
497 {
498         struct trace_array *tr;
499         int ret = -ENODEV;
500
501         mutex_lock(&trace_types_lock);
502         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503                 if (tr == this_tr) {
504                         tr->ref++;
505                         ret = 0;
506                         break;
507                 }
508         }
509         mutex_unlock(&trace_types_lock);
510
511         return ret;
512 }
513
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516         WARN_ON(!this_tr->ref);
517         this_tr->ref--;
518 }
519
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531         if (!this_tr)
532                 return;
533
534         mutex_lock(&trace_types_lock);
535         __trace_array_put(this_tr);
536         mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542         int ret;
543
544         ret = security_locked_down(LOCKDOWN_TRACEFS);
545         if (ret)
546                 return ret;
547
548         if (tracing_disabled)
549                 return -ENODEV;
550
551         if (tr && trace_array_get(tr) < 0)
552                 return -ENODEV;
553
554         return 0;
555 }
556
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558                               struct trace_buffer *buffer,
559                               struct ring_buffer_event *event)
560 {
561         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562             !filter_match_preds(call->filter, rec)) {
563                 __trace_event_discard_commit(buffer, event);
564                 return 1;
565         }
566
567         return 0;
568 }
569
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580         return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595                        struct trace_pid_list *filtered_no_pids,
596                        struct task_struct *task)
597 {
598         /*
599          * If filtered_no_pids is not empty, and the task's pid is listed
600          * in filtered_no_pids, then return true.
601          * Otherwise, if filtered_pids is empty, that means we can
602          * trace all tasks. If it has content, then only trace pids
603          * within filtered_pids.
604          */
605
606         return (filtered_pids &&
607                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608                 (filtered_no_pids &&
609                  trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625                                   struct task_struct *self,
626                                   struct task_struct *task)
627 {
628         if (!pid_list)
629                 return;
630
631         /* For forks, we only add if the forking task is listed */
632         if (self) {
633                 if (!trace_find_filtered_pid(pid_list, self->pid))
634                         return;
635         }
636
637         /* "self" is set for forks, and NULL for exits */
638         if (self)
639                 trace_pid_list_set(pid_list, task->pid);
640         else
641                 trace_pid_list_clear(pid_list, task->pid);
642 }
643
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658         long pid = (unsigned long)v;
659         unsigned int next;
660
661         (*pos)++;
662
663         /* pid already is +1 of the actual previous bit */
664         if (trace_pid_list_next(pid_list, pid, &next) < 0)
665                 return NULL;
666
667         pid = next;
668
669         /* Return pid + 1 to allow zero to be represented */
670         return (void *)(pid + 1);
671 }
672
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686         unsigned long pid;
687         unsigned int first;
688         loff_t l = 0;
689
690         if (trace_pid_list_first(pid_list, &first) < 0)
691                 return NULL;
692
693         pid = first;
694
695         /* Return pid + 1 so that zero can be the exit value */
696         for (pid++; pid && l < *pos;
697              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698                 ;
699         return (void *)pid;
700 }
701
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712         unsigned long pid = (unsigned long)v - 1;
713
714         seq_printf(m, "%lu\n", pid);
715         return 0;
716 }
717
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE            127
720
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722                     struct trace_pid_list **new_pid_list,
723                     const char __user *ubuf, size_t cnt)
724 {
725         struct trace_pid_list *pid_list;
726         struct trace_parser parser;
727         unsigned long val;
728         int nr_pids = 0;
729         ssize_t read = 0;
730         ssize_t ret;
731         loff_t pos;
732         pid_t pid;
733
734         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735                 return -ENOMEM;
736
737         /*
738          * Always recreate a new array. The write is an all or nothing
739          * operation. Always create a new array when adding new pids by
740          * the user. If the operation fails, then the current list is
741          * not modified.
742          */
743         pid_list = trace_pid_list_alloc();
744         if (!pid_list) {
745                 trace_parser_put(&parser);
746                 return -ENOMEM;
747         }
748
749         if (filtered_pids) {
750                 /* copy the current bits to the new max */
751                 ret = trace_pid_list_first(filtered_pids, &pid);
752                 while (!ret) {
753                         trace_pid_list_set(pid_list, pid);
754                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755                         nr_pids++;
756                 }
757         }
758
759         ret = 0;
760         while (cnt > 0) {
761
762                 pos = 0;
763
764                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
765                 if (ret < 0)
766                         break;
767
768                 read += ret;
769                 ubuf += ret;
770                 cnt -= ret;
771
772                 if (!trace_parser_loaded(&parser))
773                         break;
774
775                 ret = -EINVAL;
776                 if (kstrtoul(parser.buffer, 0, &val))
777                         break;
778
779                 pid = (pid_t)val;
780
781                 if (trace_pid_list_set(pid_list, pid) < 0) {
782                         ret = -1;
783                         break;
784                 }
785                 nr_pids++;
786
787                 trace_parser_clear(&parser);
788                 ret = 0;
789         }
790         trace_parser_put(&parser);
791
792         if (ret < 0) {
793                 trace_pid_list_free(pid_list);
794                 return ret;
795         }
796
797         if (!nr_pids) {
798                 /* Cleared the list of pids */
799                 trace_pid_list_free(pid_list);
800                 pid_list = NULL;
801         }
802
803         *new_pid_list = pid_list;
804
805         return read;
806 }
807
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810         u64 ts;
811
812         /* Early boot up does not have a buffer yet */
813         if (!buf->buffer)
814                 return trace_clock_local();
815
816         ts = ring_buffer_time_stamp(buf->buffer);
817         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818
819         return ts;
820 }
821
822 u64 ftrace_now(int cpu)
823 {
824         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838         /*
839          * For quick access (irqsoff uses this in fast path), just
840          * return the mirror variable of the state of the ring buffer.
841          * It's a little racy, but we don't really care.
842          */
843         smp_rmb();
844         return !global_trace.buffer_disabled;
845 }
846
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer            *trace_types __read_mostly;
863
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
895 static inline void trace_access_lock(int cpu)
896 {
897         if (cpu == RING_BUFFER_ALL_CPUS) {
898                 /* gain it for accessing the whole ring buffer. */
899                 down_write(&all_cpu_access_lock);
900         } else {
901                 /* gain it for accessing a cpu ring buffer. */
902
903                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904                 down_read(&all_cpu_access_lock);
905
906                 /* Secondly block other access to this @cpu ring buffer. */
907                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908         }
909 }
910
911 static inline void trace_access_unlock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 up_write(&all_cpu_access_lock);
915         } else {
916                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917                 up_read(&all_cpu_access_lock);
918         }
919 }
920
921 static inline void trace_access_lock_init(void)
922 {
923         int cpu;
924
925         for_each_possible_cpu(cpu)
926                 mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928
929 #else
930
931 static DEFINE_MUTEX(access_lock);
932
933 static inline void trace_access_lock(int cpu)
934 {
935         (void)cpu;
936         mutex_lock(&access_lock);
937 }
938
939 static inline void trace_access_unlock(int cpu)
940 {
941         (void)cpu;
942         mutex_unlock(&access_lock);
943 }
944
945 static inline void trace_access_lock_init(void)
946 {
947 }
948
949 #endif
950
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953                                  unsigned int trace_ctx,
954                                  int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956                                       struct trace_buffer *buffer,
957                                       unsigned int trace_ctx,
958                                       int skip, struct pt_regs *regs);
959
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962                                         unsigned int trace_ctx,
963                                         int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967                                       struct trace_buffer *buffer,
968                                       unsigned long trace_ctx,
969                                       int skip, struct pt_regs *regs)
970 {
971 }
972
973 #endif
974
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977                   int type, unsigned int trace_ctx)
978 {
979         struct trace_entry *ent = ring_buffer_event_data(event);
980
981         tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986                           int type,
987                           unsigned long len,
988                           unsigned int trace_ctx)
989 {
990         struct ring_buffer_event *event;
991
992         event = ring_buffer_lock_reserve(buffer, len);
993         if (event != NULL)
994                 trace_event_setup(event, type, trace_ctx);
995
996         return event;
997 }
998
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001         if (tr->array_buffer.buffer)
1002                 ring_buffer_record_on(tr->array_buffer.buffer);
1003         /*
1004          * This flag is looked at when buffers haven't been allocated
1005          * yet, or by some tracers (like irqsoff), that just want to
1006          * know if the ring buffer has been disabled, but it can handle
1007          * races of where it gets disabled but we still do a record.
1008          * As the check is in the fast path of the tracers, it is more
1009          * important to be fast than accurate.
1010          */
1011         tr->buffer_disabled = 0;
1012         /* Make the flag seen by readers */
1013         smp_wmb();
1014 }
1015
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024         tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032         __this_cpu_write(trace_taskinfo_save, true);
1033
1034         /* If this is the temp buffer, we need to commit fully */
1035         if (this_cpu_read(trace_buffered_event) == event) {
1036                 /* Length is in event->array[0] */
1037                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038                 /* Release the temp buffer */
1039                 this_cpu_dec(trace_buffered_event_cnt);
1040                 /* ring_buffer_unlock_commit() enables preemption */
1041                 preempt_enable_notrace();
1042         } else
1043                 ring_buffer_unlock_commit(buffer);
1044 }
1045
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047                        const char *str, int size)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct print_entry *entry;
1052         unsigned int trace_ctx;
1053         int alloc;
1054
1055         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056                 return 0;
1057
1058         if (unlikely(tracing_selftest_running && tr == &global_trace))
1059                 return 0;
1060
1061         if (unlikely(tracing_disabled))
1062                 return 0;
1063
1064         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066         trace_ctx = tracing_gen_ctx();
1067         buffer = tr->array_buffer.buffer;
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070                                             trace_ctx);
1071         if (!event) {
1072                 size = 0;
1073                 goto out;
1074         }
1075
1076         entry = ring_buffer_event_data(event);
1077         entry->ip = ip;
1078
1079         memcpy(&entry->buf, str, size);
1080
1081         /* Add a newline if necessary */
1082         if (entry->buf[size - 1] != '\n') {
1083                 entry->buf[size] = '\n';
1084                 entry->buf[size + 1] = '\0';
1085         } else
1086                 entry->buf[size] = '\0';
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091         ring_buffer_nest_end(buffer);
1092         return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:    The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104         return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:    The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115         struct ring_buffer_event *event;
1116         struct trace_buffer *buffer;
1117         struct bputs_entry *entry;
1118         unsigned int trace_ctx;
1119         int size = sizeof(struct bputs_entry);
1120         int ret = 0;
1121
1122         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123                 return 0;
1124
1125         if (unlikely(tracing_selftest_running || tracing_disabled))
1126                 return 0;
1127
1128         trace_ctx = tracing_gen_ctx();
1129         buffer = global_trace.array_buffer.buffer;
1130
1131         ring_buffer_nest_start(buffer);
1132         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133                                             trace_ctx);
1134         if (!event)
1135                 goto out;
1136
1137         entry = ring_buffer_event_data(event);
1138         entry->ip                       = ip;
1139         entry->str                      = str;
1140
1141         __buffer_unlock_commit(buffer, event);
1142         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143
1144         ret = 1;
1145  out:
1146         ring_buffer_nest_end(buffer);
1147         return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153                                            void *cond_data)
1154 {
1155         struct tracer *tracer = tr->current_trace;
1156         unsigned long flags;
1157
1158         if (in_nmi()) {
1159                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161                 return;
1162         }
1163
1164         if (!tr->allocated_snapshot) {
1165                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167                 tracer_tracing_off(tr);
1168                 return;
1169         }
1170
1171         /* Note, snapshot can not be used when the tracer uses it */
1172         if (tracer->use_max_tr) {
1173                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175                 return;
1176         }
1177
1178         local_irq_save(flags);
1179         update_max_tr(tr, current, smp_processor_id(), cond_data);
1180         local_irq_restore(flags);
1181 }
1182
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185         tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204         struct trace_array *tr = &global_trace;
1205
1206         tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:         The tracing instance to snapshot
1213  * @cond_data:  The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225         tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:         The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245         void *cond_data = NULL;
1246
1247         local_irq_disable();
1248         arch_spin_lock(&tr->max_lock);
1249
1250         if (tr->cond_snapshot)
1251                 cond_data = tr->cond_snapshot->cond_data;
1252
1253         arch_spin_unlock(&tr->max_lock);
1254         local_irq_enable();
1255
1256         return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261                                         struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266         int order;
1267         int ret;
1268
1269         if (!tr->allocated_snapshot) {
1270
1271                 /* Make the snapshot buffer have the same order as main buffer */
1272                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1273                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1274                 if (ret < 0)
1275                         return ret;
1276
1277                 /* allocate spare buffer */
1278                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1279                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1280                 if (ret < 0)
1281                         return ret;
1282
1283                 tr->allocated_snapshot = true;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static void free_snapshot(struct trace_array *tr)
1290 {
1291         /*
1292          * We don't free the ring buffer. instead, resize it because
1293          * The max_tr ring buffer has some state (e.g. ring->clock) and
1294          * we want preserve it.
1295          */
1296         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1297         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1298         set_buffer_entries(&tr->max_buffer, 1);
1299         tracing_reset_online_cpus(&tr->max_buffer);
1300         tr->allocated_snapshot = false;
1301 }
1302
1303 /**
1304  * tracing_alloc_snapshot - allocate snapshot buffer.
1305  *
1306  * This only allocates the snapshot buffer if it isn't already
1307  * allocated - it doesn't also take a snapshot.
1308  *
1309  * This is meant to be used in cases where the snapshot buffer needs
1310  * to be set up for events that can't sleep but need to be able to
1311  * trigger a snapshot.
1312  */
1313 int tracing_alloc_snapshot(void)
1314 {
1315         struct trace_array *tr = &global_trace;
1316         int ret;
1317
1318         ret = tracing_alloc_snapshot_instance(tr);
1319         WARN_ON(ret < 0);
1320
1321         return ret;
1322 }
1323 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1324
1325 /**
1326  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1327  *
1328  * This is similar to tracing_snapshot(), but it will allocate the
1329  * snapshot buffer if it isn't already allocated. Use this only
1330  * where it is safe to sleep, as the allocation may sleep.
1331  *
1332  * This causes a swap between the snapshot buffer and the current live
1333  * tracing buffer. You can use this to take snapshots of the live
1334  * trace when some condition is triggered, but continue to trace.
1335  */
1336 void tracing_snapshot_alloc(void)
1337 {
1338         int ret;
1339
1340         ret = tracing_alloc_snapshot();
1341         if (ret < 0)
1342                 return;
1343
1344         tracing_snapshot();
1345 }
1346 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1347
1348 /**
1349  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1350  * @tr:         The tracing instance
1351  * @cond_data:  User data to associate with the snapshot
1352  * @update:     Implementation of the cond_snapshot update function
1353  *
1354  * Check whether the conditional snapshot for the given instance has
1355  * already been enabled, or if the current tracer is already using a
1356  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1357  * save the cond_data and update function inside.
1358  *
1359  * Returns 0 if successful, error otherwise.
1360  */
1361 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1362                                  cond_update_fn_t update)
1363 {
1364         struct cond_snapshot *cond_snapshot;
1365         int ret = 0;
1366
1367         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1368         if (!cond_snapshot)
1369                 return -ENOMEM;
1370
1371         cond_snapshot->cond_data = cond_data;
1372         cond_snapshot->update = update;
1373
1374         mutex_lock(&trace_types_lock);
1375
1376         ret = tracing_alloc_snapshot_instance(tr);
1377         if (ret)
1378                 goto fail_unlock;
1379
1380         if (tr->current_trace->use_max_tr) {
1381                 ret = -EBUSY;
1382                 goto fail_unlock;
1383         }
1384
1385         /*
1386          * The cond_snapshot can only change to NULL without the
1387          * trace_types_lock. We don't care if we race with it going
1388          * to NULL, but we want to make sure that it's not set to
1389          * something other than NULL when we get here, which we can
1390          * do safely with only holding the trace_types_lock and not
1391          * having to take the max_lock.
1392          */
1393         if (tr->cond_snapshot) {
1394                 ret = -EBUSY;
1395                 goto fail_unlock;
1396         }
1397
1398         local_irq_disable();
1399         arch_spin_lock(&tr->max_lock);
1400         tr->cond_snapshot = cond_snapshot;
1401         arch_spin_unlock(&tr->max_lock);
1402         local_irq_enable();
1403
1404         mutex_unlock(&trace_types_lock);
1405
1406         return ret;
1407
1408  fail_unlock:
1409         mutex_unlock(&trace_types_lock);
1410         kfree(cond_snapshot);
1411         return ret;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1414
1415 /**
1416  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1417  * @tr:         The tracing instance
1418  *
1419  * Check whether the conditional snapshot for the given instance is
1420  * enabled; if so, free the cond_snapshot associated with it,
1421  * otherwise return -EINVAL.
1422  *
1423  * Returns 0 if successful, error otherwise.
1424  */
1425 int tracing_snapshot_cond_disable(struct trace_array *tr)
1426 {
1427         int ret = 0;
1428
1429         local_irq_disable();
1430         arch_spin_lock(&tr->max_lock);
1431
1432         if (!tr->cond_snapshot)
1433                 ret = -EINVAL;
1434         else {
1435                 kfree(tr->cond_snapshot);
1436                 tr->cond_snapshot = NULL;
1437         }
1438
1439         arch_spin_unlock(&tr->max_lock);
1440         local_irq_enable();
1441
1442         return ret;
1443 }
1444 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1445 #else
1446 void tracing_snapshot(void)
1447 {
1448         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1449 }
1450 EXPORT_SYMBOL_GPL(tracing_snapshot);
1451 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1452 {
1453         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1456 int tracing_alloc_snapshot(void)
1457 {
1458         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1459         return -ENODEV;
1460 }
1461 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1462 void tracing_snapshot_alloc(void)
1463 {
1464         /* Give warning */
1465         tracing_snapshot();
1466 }
1467 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1468 void *tracing_cond_snapshot_data(struct trace_array *tr)
1469 {
1470         return NULL;
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1473 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1474 {
1475         return -ENODEV;
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1478 int tracing_snapshot_cond_disable(struct trace_array *tr)
1479 {
1480         return false;
1481 }
1482 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1483 #define free_snapshot(tr)       do { } while (0)
1484 #endif /* CONFIG_TRACER_SNAPSHOT */
1485
1486 void tracer_tracing_off(struct trace_array *tr)
1487 {
1488         if (tr->array_buffer.buffer)
1489                 ring_buffer_record_off(tr->array_buffer.buffer);
1490         /*
1491          * This flag is looked at when buffers haven't been allocated
1492          * yet, or by some tracers (like irqsoff), that just want to
1493          * know if the ring buffer has been disabled, but it can handle
1494          * races of where it gets disabled but we still do a record.
1495          * As the check is in the fast path of the tracers, it is more
1496          * important to be fast than accurate.
1497          */
1498         tr->buffer_disabled = 1;
1499         /* Make the flag seen by readers */
1500         smp_wmb();
1501 }
1502
1503 /**
1504  * tracing_off - turn off tracing buffers
1505  *
1506  * This function stops the tracing buffers from recording data.
1507  * It does not disable any overhead the tracers themselves may
1508  * be causing. This function simply causes all recording to
1509  * the ring buffers to fail.
1510  */
1511 void tracing_off(void)
1512 {
1513         tracer_tracing_off(&global_trace);
1514 }
1515 EXPORT_SYMBOL_GPL(tracing_off);
1516
1517 void disable_trace_on_warning(void)
1518 {
1519         if (__disable_trace_on_warning) {
1520                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1521                         "Disabling tracing due to warning\n");
1522                 tracing_off();
1523         }
1524 }
1525
1526 /**
1527  * tracer_tracing_is_on - show real state of ring buffer enabled
1528  * @tr : the trace array to know if ring buffer is enabled
1529  *
1530  * Shows real state of the ring buffer if it is enabled or not.
1531  */
1532 bool tracer_tracing_is_on(struct trace_array *tr)
1533 {
1534         if (tr->array_buffer.buffer)
1535                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1536         return !tr->buffer_disabled;
1537 }
1538
1539 /**
1540  * tracing_is_on - show state of ring buffers enabled
1541  */
1542 int tracing_is_on(void)
1543 {
1544         return tracer_tracing_is_on(&global_trace);
1545 }
1546 EXPORT_SYMBOL_GPL(tracing_is_on);
1547
1548 static int __init set_buf_size(char *str)
1549 {
1550         unsigned long buf_size;
1551
1552         if (!str)
1553                 return 0;
1554         buf_size = memparse(str, &str);
1555         /*
1556          * nr_entries can not be zero and the startup
1557          * tests require some buffer space. Therefore
1558          * ensure we have at least 4096 bytes of buffer.
1559          */
1560         trace_buf_size = max(4096UL, buf_size);
1561         return 1;
1562 }
1563 __setup("trace_buf_size=", set_buf_size);
1564
1565 static int __init set_tracing_thresh(char *str)
1566 {
1567         unsigned long threshold;
1568         int ret;
1569
1570         if (!str)
1571                 return 0;
1572         ret = kstrtoul(str, 0, &threshold);
1573         if (ret < 0)
1574                 return 0;
1575         tracing_thresh = threshold * 1000;
1576         return 1;
1577 }
1578 __setup("tracing_thresh=", set_tracing_thresh);
1579
1580 unsigned long nsecs_to_usecs(unsigned long nsecs)
1581 {
1582         return nsecs / 1000;
1583 }
1584
1585 /*
1586  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1587  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1588  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1589  * of strings in the order that the evals (enum) were defined.
1590  */
1591 #undef C
1592 #define C(a, b) b
1593
1594 /* These must match the bit positions in trace_iterator_flags */
1595 static const char *trace_options[] = {
1596         TRACE_FLAGS
1597         NULL
1598 };
1599
1600 static struct {
1601         u64 (*func)(void);
1602         const char *name;
1603         int in_ns;              /* is this clock in nanoseconds? */
1604 } trace_clocks[] = {
1605         { trace_clock_local,            "local",        1 },
1606         { trace_clock_global,           "global",       1 },
1607         { trace_clock_counter,          "counter",      0 },
1608         { trace_clock_jiffies,          "uptime",       0 },
1609         { trace_clock,                  "perf",         1 },
1610         { ktime_get_mono_fast_ns,       "mono",         1 },
1611         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1612         { ktime_get_boot_fast_ns,       "boot",         1 },
1613         { ktime_get_tai_fast_ns,        "tai",          1 },
1614         ARCH_TRACE_CLOCKS
1615 };
1616
1617 bool trace_clock_in_ns(struct trace_array *tr)
1618 {
1619         if (trace_clocks[tr->clock_id].in_ns)
1620                 return true;
1621
1622         return false;
1623 }
1624
1625 /*
1626  * trace_parser_get_init - gets the buffer for trace parser
1627  */
1628 int trace_parser_get_init(struct trace_parser *parser, int size)
1629 {
1630         memset(parser, 0, sizeof(*parser));
1631
1632         parser->buffer = kmalloc(size, GFP_KERNEL);
1633         if (!parser->buffer)
1634                 return 1;
1635
1636         parser->size = size;
1637         return 0;
1638 }
1639
1640 /*
1641  * trace_parser_put - frees the buffer for trace parser
1642  */
1643 void trace_parser_put(struct trace_parser *parser)
1644 {
1645         kfree(parser->buffer);
1646         parser->buffer = NULL;
1647 }
1648
1649 /*
1650  * trace_get_user - reads the user input string separated by  space
1651  * (matched by isspace(ch))
1652  *
1653  * For each string found the 'struct trace_parser' is updated,
1654  * and the function returns.
1655  *
1656  * Returns number of bytes read.
1657  *
1658  * See kernel/trace/trace.h for 'struct trace_parser' details.
1659  */
1660 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1661         size_t cnt, loff_t *ppos)
1662 {
1663         char ch;
1664         size_t read = 0;
1665         ssize_t ret;
1666
1667         if (!*ppos)
1668                 trace_parser_clear(parser);
1669
1670         ret = get_user(ch, ubuf++);
1671         if (ret)
1672                 goto out;
1673
1674         read++;
1675         cnt--;
1676
1677         /*
1678          * The parser is not finished with the last write,
1679          * continue reading the user input without skipping spaces.
1680          */
1681         if (!parser->cont) {
1682                 /* skip white space */
1683                 while (cnt && isspace(ch)) {
1684                         ret = get_user(ch, ubuf++);
1685                         if (ret)
1686                                 goto out;
1687                         read++;
1688                         cnt--;
1689                 }
1690
1691                 parser->idx = 0;
1692
1693                 /* only spaces were written */
1694                 if (isspace(ch) || !ch) {
1695                         *ppos += read;
1696                         ret = read;
1697                         goto out;
1698                 }
1699         }
1700
1701         /* read the non-space input */
1702         while (cnt && !isspace(ch) && ch) {
1703                 if (parser->idx < parser->size - 1)
1704                         parser->buffer[parser->idx++] = ch;
1705                 else {
1706                         ret = -EINVAL;
1707                         goto out;
1708                 }
1709                 ret = get_user(ch, ubuf++);
1710                 if (ret)
1711                         goto out;
1712                 read++;
1713                 cnt--;
1714         }
1715
1716         /* We either got finished input or we have to wait for another call. */
1717         if (isspace(ch) || !ch) {
1718                 parser->buffer[parser->idx] = 0;
1719                 parser->cont = false;
1720         } else if (parser->idx < parser->size - 1) {
1721                 parser->cont = true;
1722                 parser->buffer[parser->idx++] = ch;
1723                 /* Make sure the parsed string always terminates with '\0'. */
1724                 parser->buffer[parser->idx] = 0;
1725         } else {
1726                 ret = -EINVAL;
1727                 goto out;
1728         }
1729
1730         *ppos += read;
1731         ret = read;
1732
1733 out:
1734         return ret;
1735 }
1736
1737 /* TODO add a seq_buf_to_buffer() */
1738 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1739 {
1740         int len;
1741
1742         if (trace_seq_used(s) <= s->readpos)
1743                 return -EBUSY;
1744
1745         len = trace_seq_used(s) - s->readpos;
1746         if (cnt > len)
1747                 cnt = len;
1748         memcpy(buf, s->buffer + s->readpos, cnt);
1749
1750         s->readpos += cnt;
1751         return cnt;
1752 }
1753
1754 unsigned long __read_mostly     tracing_thresh;
1755
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 static const struct file_operations tracing_max_lat_fops;
1758
1759 #ifdef LATENCY_FS_NOTIFY
1760
1761 static struct workqueue_struct *fsnotify_wq;
1762
1763 static void latency_fsnotify_workfn(struct work_struct *work)
1764 {
1765         struct trace_array *tr = container_of(work, struct trace_array,
1766                                               fsnotify_work);
1767         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1768 }
1769
1770 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1771 {
1772         struct trace_array *tr = container_of(iwork, struct trace_array,
1773                                               fsnotify_irqwork);
1774         queue_work(fsnotify_wq, &tr->fsnotify_work);
1775 }
1776
1777 static void trace_create_maxlat_file(struct trace_array *tr,
1778                                      struct dentry *d_tracer)
1779 {
1780         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1781         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1782         tr->d_max_latency = trace_create_file("tracing_max_latency",
1783                                               TRACE_MODE_WRITE,
1784                                               d_tracer, tr,
1785                                               &tracing_max_lat_fops);
1786 }
1787
1788 __init static int latency_fsnotify_init(void)
1789 {
1790         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1791                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1792         if (!fsnotify_wq) {
1793                 pr_err("Unable to allocate tr_max_lat_wq\n");
1794                 return -ENOMEM;
1795         }
1796         return 0;
1797 }
1798
1799 late_initcall_sync(latency_fsnotify_init);
1800
1801 void latency_fsnotify(struct trace_array *tr)
1802 {
1803         if (!fsnotify_wq)
1804                 return;
1805         /*
1806          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1807          * possible that we are called from __schedule() or do_idle(), which
1808          * could cause a deadlock.
1809          */
1810         irq_work_queue(&tr->fsnotify_irqwork);
1811 }
1812
1813 #else /* !LATENCY_FS_NOTIFY */
1814
1815 #define trace_create_maxlat_file(tr, d_tracer)                          \
1816         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1817                           d_tracer, tr, &tracing_max_lat_fops)
1818
1819 #endif
1820
1821 /*
1822  * Copy the new maximum trace into the separate maximum-trace
1823  * structure. (this way the maximum trace is permanently saved,
1824  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1825  */
1826 static void
1827 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1828 {
1829         struct array_buffer *trace_buf = &tr->array_buffer;
1830         struct array_buffer *max_buf = &tr->max_buffer;
1831         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1832         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1833
1834         max_buf->cpu = cpu;
1835         max_buf->time_start = data->preempt_timestamp;
1836
1837         max_data->saved_latency = tr->max_latency;
1838         max_data->critical_start = data->critical_start;
1839         max_data->critical_end = data->critical_end;
1840
1841         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1842         max_data->pid = tsk->pid;
1843         /*
1844          * If tsk == current, then use current_uid(), as that does not use
1845          * RCU. The irq tracer can be called out of RCU scope.
1846          */
1847         if (tsk == current)
1848                 max_data->uid = current_uid();
1849         else
1850                 max_data->uid = task_uid(tsk);
1851
1852         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1853         max_data->policy = tsk->policy;
1854         max_data->rt_priority = tsk->rt_priority;
1855
1856         /* record this tasks comm */
1857         tracing_record_cmdline(tsk);
1858         latency_fsnotify(tr);
1859 }
1860
1861 /**
1862  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1863  * @tr: tracer
1864  * @tsk: the task with the latency
1865  * @cpu: The cpu that initiated the trace.
1866  * @cond_data: User data associated with a conditional snapshot
1867  *
1868  * Flip the buffers between the @tr and the max_tr and record information
1869  * about which task was the cause of this latency.
1870  */
1871 void
1872 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1873               void *cond_data)
1874 {
1875         if (tr->stop_count)
1876                 return;
1877
1878         WARN_ON_ONCE(!irqs_disabled());
1879
1880         if (!tr->allocated_snapshot) {
1881                 /* Only the nop tracer should hit this when disabling */
1882                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1883                 return;
1884         }
1885
1886         arch_spin_lock(&tr->max_lock);
1887
1888         /* Inherit the recordable setting from array_buffer */
1889         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1890                 ring_buffer_record_on(tr->max_buffer.buffer);
1891         else
1892                 ring_buffer_record_off(tr->max_buffer.buffer);
1893
1894 #ifdef CONFIG_TRACER_SNAPSHOT
1895         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1896                 arch_spin_unlock(&tr->max_lock);
1897                 return;
1898         }
1899 #endif
1900         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1901
1902         __update_max_tr(tr, tsk, cpu);
1903
1904         arch_spin_unlock(&tr->max_lock);
1905
1906         /* Any waiters on the old snapshot buffer need to wake up */
1907         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1908 }
1909
1910 /**
1911  * update_max_tr_single - only copy one trace over, and reset the rest
1912  * @tr: tracer
1913  * @tsk: task with the latency
1914  * @cpu: the cpu of the buffer to copy.
1915  *
1916  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1917  */
1918 void
1919 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1920 {
1921         int ret;
1922
1923         if (tr->stop_count)
1924                 return;
1925
1926         WARN_ON_ONCE(!irqs_disabled());
1927         if (!tr->allocated_snapshot) {
1928                 /* Only the nop tracer should hit this when disabling */
1929                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1930                 return;
1931         }
1932
1933         arch_spin_lock(&tr->max_lock);
1934
1935         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1936
1937         if (ret == -EBUSY) {
1938                 /*
1939                  * We failed to swap the buffer due to a commit taking
1940                  * place on this CPU. We fail to record, but we reset
1941                  * the max trace buffer (no one writes directly to it)
1942                  * and flag that it failed.
1943                  * Another reason is resize is in progress.
1944                  */
1945                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1946                         "Failed to swap buffers due to commit or resize in progress\n");
1947         }
1948
1949         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1950
1951         __update_max_tr(tr, tsk, cpu);
1952         arch_spin_unlock(&tr->max_lock);
1953 }
1954
1955 #endif /* CONFIG_TRACER_MAX_TRACE */
1956
1957 static int wait_on_pipe(struct trace_iterator *iter, int full)
1958 {
1959         int ret;
1960
1961         /* Iterators are static, they should be filled or empty */
1962         if (trace_buffer_iter(iter, iter->cpu_file))
1963                 return 0;
1964
1965         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1966
1967 #ifdef CONFIG_TRACER_MAX_TRACE
1968         /*
1969          * Make sure this is still the snapshot buffer, as if a snapshot were
1970          * to happen, this would now be the main buffer.
1971          */
1972         if (iter->snapshot)
1973                 iter->array_buffer = &iter->tr->max_buffer;
1974 #endif
1975         return ret;
1976 }
1977
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979 static bool selftests_can_run;
1980
1981 struct trace_selftests {
1982         struct list_head                list;
1983         struct tracer                   *type;
1984 };
1985
1986 static LIST_HEAD(postponed_selftests);
1987
1988 static int save_selftest(struct tracer *type)
1989 {
1990         struct trace_selftests *selftest;
1991
1992         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1993         if (!selftest)
1994                 return -ENOMEM;
1995
1996         selftest->type = type;
1997         list_add(&selftest->list, &postponed_selftests);
1998         return 0;
1999 }
2000
2001 static int run_tracer_selftest(struct tracer *type)
2002 {
2003         struct trace_array *tr = &global_trace;
2004         struct tracer *saved_tracer = tr->current_trace;
2005         int ret;
2006
2007         if (!type->selftest || tracing_selftest_disabled)
2008                 return 0;
2009
2010         /*
2011          * If a tracer registers early in boot up (before scheduling is
2012          * initialized and such), then do not run its selftests yet.
2013          * Instead, run it a little later in the boot process.
2014          */
2015         if (!selftests_can_run)
2016                 return save_selftest(type);
2017
2018         if (!tracing_is_on()) {
2019                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2020                         type->name);
2021                 return 0;
2022         }
2023
2024         /*
2025          * Run a selftest on this tracer.
2026          * Here we reset the trace buffer, and set the current
2027          * tracer to be this tracer. The tracer can then run some
2028          * internal tracing to verify that everything is in order.
2029          * If we fail, we do not register this tracer.
2030          */
2031         tracing_reset_online_cpus(&tr->array_buffer);
2032
2033         tr->current_trace = type;
2034
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036         if (type->use_max_tr) {
2037                 /* If we expanded the buffers, make sure the max is expanded too */
2038                 if (tr->ring_buffer_expanded)
2039                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2040                                            RING_BUFFER_ALL_CPUS);
2041                 tr->allocated_snapshot = true;
2042         }
2043 #endif
2044
2045         /* the test is responsible for initializing and enabling */
2046         pr_info("Testing tracer %s: ", type->name);
2047         ret = type->selftest(type, tr);
2048         /* the test is responsible for resetting too */
2049         tr->current_trace = saved_tracer;
2050         if (ret) {
2051                 printk(KERN_CONT "FAILED!\n");
2052                 /* Add the warning after printing 'FAILED' */
2053                 WARN_ON(1);
2054                 return -1;
2055         }
2056         /* Only reset on passing, to avoid touching corrupted buffers */
2057         tracing_reset_online_cpus(&tr->array_buffer);
2058
2059 #ifdef CONFIG_TRACER_MAX_TRACE
2060         if (type->use_max_tr) {
2061                 tr->allocated_snapshot = false;
2062
2063                 /* Shrink the max buffer again */
2064                 if (tr->ring_buffer_expanded)
2065                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2066                                            RING_BUFFER_ALL_CPUS);
2067         }
2068 #endif
2069
2070         printk(KERN_CONT "PASSED\n");
2071         return 0;
2072 }
2073
2074 static int do_run_tracer_selftest(struct tracer *type)
2075 {
2076         int ret;
2077
2078         /*
2079          * Tests can take a long time, especially if they are run one after the
2080          * other, as does happen during bootup when all the tracers are
2081          * registered. This could cause the soft lockup watchdog to trigger.
2082          */
2083         cond_resched();
2084
2085         tracing_selftest_running = true;
2086         ret = run_tracer_selftest(type);
2087         tracing_selftest_running = false;
2088
2089         return ret;
2090 }
2091
2092 static __init int init_trace_selftests(void)
2093 {
2094         struct trace_selftests *p, *n;
2095         struct tracer *t, **last;
2096         int ret;
2097
2098         selftests_can_run = true;
2099
2100         mutex_lock(&trace_types_lock);
2101
2102         if (list_empty(&postponed_selftests))
2103                 goto out;
2104
2105         pr_info("Running postponed tracer tests:\n");
2106
2107         tracing_selftest_running = true;
2108         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2109                 /* This loop can take minutes when sanitizers are enabled, so
2110                  * lets make sure we allow RCU processing.
2111                  */
2112                 cond_resched();
2113                 ret = run_tracer_selftest(p->type);
2114                 /* If the test fails, then warn and remove from available_tracers */
2115                 if (ret < 0) {
2116                         WARN(1, "tracer: %s failed selftest, disabling\n",
2117                              p->type->name);
2118                         last = &trace_types;
2119                         for (t = trace_types; t; t = t->next) {
2120                                 if (t == p->type) {
2121                                         *last = t->next;
2122                                         break;
2123                                 }
2124                                 last = &t->next;
2125                         }
2126                 }
2127                 list_del(&p->list);
2128                 kfree(p);
2129         }
2130         tracing_selftest_running = false;
2131
2132  out:
2133         mutex_unlock(&trace_types_lock);
2134
2135         return 0;
2136 }
2137 core_initcall(init_trace_selftests);
2138 #else
2139 static inline int run_tracer_selftest(struct tracer *type)
2140 {
2141         return 0;
2142 }
2143 static inline int do_run_tracer_selftest(struct tracer *type)
2144 {
2145         return 0;
2146 }
2147 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2148
2149 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2150
2151 static void __init apply_trace_boot_options(void);
2152
2153 /**
2154  * register_tracer - register a tracer with the ftrace system.
2155  * @type: the plugin for the tracer
2156  *
2157  * Register a new plugin tracer.
2158  */
2159 int __init register_tracer(struct tracer *type)
2160 {
2161         struct tracer *t;
2162         int ret = 0;
2163
2164         if (!type->name) {
2165                 pr_info("Tracer must have a name\n");
2166                 return -1;
2167         }
2168
2169         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2170                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2171                 return -1;
2172         }
2173
2174         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2175                 pr_warn("Can not register tracer %s due to lockdown\n",
2176                            type->name);
2177                 return -EPERM;
2178         }
2179
2180         mutex_lock(&trace_types_lock);
2181
2182         for (t = trace_types; t; t = t->next) {
2183                 if (strcmp(type->name, t->name) == 0) {
2184                         /* already found */
2185                         pr_info("Tracer %s already registered\n",
2186                                 type->name);
2187                         ret = -1;
2188                         goto out;
2189                 }
2190         }
2191
2192         if (!type->set_flag)
2193                 type->set_flag = &dummy_set_flag;
2194         if (!type->flags) {
2195                 /*allocate a dummy tracer_flags*/
2196                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2197                 if (!type->flags) {
2198                         ret = -ENOMEM;
2199                         goto out;
2200                 }
2201                 type->flags->val = 0;
2202                 type->flags->opts = dummy_tracer_opt;
2203         } else
2204                 if (!type->flags->opts)
2205                         type->flags->opts = dummy_tracer_opt;
2206
2207         /* store the tracer for __set_tracer_option */
2208         type->flags->trace = type;
2209
2210         ret = do_run_tracer_selftest(type);
2211         if (ret < 0)
2212                 goto out;
2213
2214         type->next = trace_types;
2215         trace_types = type;
2216         add_tracer_options(&global_trace, type);
2217
2218  out:
2219         mutex_unlock(&trace_types_lock);
2220
2221         if (ret || !default_bootup_tracer)
2222                 goto out_unlock;
2223
2224         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2225                 goto out_unlock;
2226
2227         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2228         /* Do we want this tracer to start on bootup? */
2229         tracing_set_tracer(&global_trace, type->name);
2230         default_bootup_tracer = NULL;
2231
2232         apply_trace_boot_options();
2233
2234         /* disable other selftests, since this will break it. */
2235         disable_tracing_selftest("running a tracer");
2236
2237  out_unlock:
2238         return ret;
2239 }
2240
2241 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2242 {
2243         struct trace_buffer *buffer = buf->buffer;
2244
2245         if (!buffer)
2246                 return;
2247
2248         ring_buffer_record_disable(buffer);
2249
2250         /* Make sure all commits have finished */
2251         synchronize_rcu();
2252         ring_buffer_reset_cpu(buffer, cpu);
2253
2254         ring_buffer_record_enable(buffer);
2255 }
2256
2257 void tracing_reset_online_cpus(struct array_buffer *buf)
2258 {
2259         struct trace_buffer *buffer = buf->buffer;
2260
2261         if (!buffer)
2262                 return;
2263
2264         ring_buffer_record_disable(buffer);
2265
2266         /* Make sure all commits have finished */
2267         synchronize_rcu();
2268
2269         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2270
2271         ring_buffer_reset_online_cpus(buffer);
2272
2273         ring_buffer_record_enable(buffer);
2274 }
2275
2276 /* Must have trace_types_lock held */
2277 void tracing_reset_all_online_cpus_unlocked(void)
2278 {
2279         struct trace_array *tr;
2280
2281         lockdep_assert_held(&trace_types_lock);
2282
2283         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2284                 if (!tr->clear_trace)
2285                         continue;
2286                 tr->clear_trace = false;
2287                 tracing_reset_online_cpus(&tr->array_buffer);
2288 #ifdef CONFIG_TRACER_MAX_TRACE
2289                 tracing_reset_online_cpus(&tr->max_buffer);
2290 #endif
2291         }
2292 }
2293
2294 void tracing_reset_all_online_cpus(void)
2295 {
2296         mutex_lock(&trace_types_lock);
2297         tracing_reset_all_online_cpus_unlocked();
2298         mutex_unlock(&trace_types_lock);
2299 }
2300
2301 /*
2302  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2303  * is the tgid last observed corresponding to pid=i.
2304  */
2305 static int *tgid_map;
2306
2307 /* The maximum valid index into tgid_map. */
2308 static size_t tgid_map_max;
2309
2310 #define SAVED_CMDLINES_DEFAULT 128
2311 #define NO_CMDLINE_MAP UINT_MAX
2312 /*
2313  * Preemption must be disabled before acquiring trace_cmdline_lock.
2314  * The various trace_arrays' max_lock must be acquired in a context
2315  * where interrupt is disabled.
2316  */
2317 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2318 struct saved_cmdlines_buffer {
2319         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2320         unsigned *map_cmdline_to_pid;
2321         unsigned cmdline_num;
2322         int cmdline_idx;
2323         char *saved_cmdlines;
2324 };
2325 static struct saved_cmdlines_buffer *savedcmd;
2326
2327 static inline char *get_saved_cmdlines(int idx)
2328 {
2329         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2330 }
2331
2332 static inline void set_cmdline(int idx, const char *cmdline)
2333 {
2334         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2335 }
2336
2337 static int allocate_cmdlines_buffer(unsigned int val,
2338                                     struct saved_cmdlines_buffer *s)
2339 {
2340         s->map_cmdline_to_pid = kmalloc_array(val,
2341                                               sizeof(*s->map_cmdline_to_pid),
2342                                               GFP_KERNEL);
2343         if (!s->map_cmdline_to_pid)
2344                 return -ENOMEM;
2345
2346         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2347         if (!s->saved_cmdlines) {
2348                 kfree(s->map_cmdline_to_pid);
2349                 return -ENOMEM;
2350         }
2351
2352         s->cmdline_idx = 0;
2353         s->cmdline_num = val;
2354         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2355                sizeof(s->map_pid_to_cmdline));
2356         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2357                val * sizeof(*s->map_cmdline_to_pid));
2358
2359         return 0;
2360 }
2361
2362 static int trace_create_savedcmd(void)
2363 {
2364         int ret;
2365
2366         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2367         if (!savedcmd)
2368                 return -ENOMEM;
2369
2370         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2371         if (ret < 0) {
2372                 kfree(savedcmd);
2373                 savedcmd = NULL;
2374                 return -ENOMEM;
2375         }
2376
2377         return 0;
2378 }
2379
2380 int is_tracing_stopped(void)
2381 {
2382         return global_trace.stop_count;
2383 }
2384
2385 static void tracing_start_tr(struct trace_array *tr)
2386 {
2387         struct trace_buffer *buffer;
2388         unsigned long flags;
2389
2390         if (tracing_disabled)
2391                 return;
2392
2393         raw_spin_lock_irqsave(&tr->start_lock, flags);
2394         if (--tr->stop_count) {
2395                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2396                         /* Someone screwed up their debugging */
2397                         tr->stop_count = 0;
2398                 }
2399                 goto out;
2400         }
2401
2402         /* Prevent the buffers from switching */
2403         arch_spin_lock(&tr->max_lock);
2404
2405         buffer = tr->array_buffer.buffer;
2406         if (buffer)
2407                 ring_buffer_record_enable(buffer);
2408
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410         buffer = tr->max_buffer.buffer;
2411         if (buffer)
2412                 ring_buffer_record_enable(buffer);
2413 #endif
2414
2415         arch_spin_unlock(&tr->max_lock);
2416
2417  out:
2418         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2419 }
2420
2421 /**
2422  * tracing_start - quick start of the tracer
2423  *
2424  * If tracing is enabled but was stopped by tracing_stop,
2425  * this will start the tracer back up.
2426  */
2427 void tracing_start(void)
2428
2429 {
2430         return tracing_start_tr(&global_trace);
2431 }
2432
2433 static void tracing_stop_tr(struct trace_array *tr)
2434 {
2435         struct trace_buffer *buffer;
2436         unsigned long flags;
2437
2438         raw_spin_lock_irqsave(&tr->start_lock, flags);
2439         if (tr->stop_count++)
2440                 goto out;
2441
2442         /* Prevent the buffers from switching */
2443         arch_spin_lock(&tr->max_lock);
2444
2445         buffer = tr->array_buffer.buffer;
2446         if (buffer)
2447                 ring_buffer_record_disable(buffer);
2448
2449 #ifdef CONFIG_TRACER_MAX_TRACE
2450         buffer = tr->max_buffer.buffer;
2451         if (buffer)
2452                 ring_buffer_record_disable(buffer);
2453 #endif
2454
2455         arch_spin_unlock(&tr->max_lock);
2456
2457  out:
2458         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2459 }
2460
2461 /**
2462  * tracing_stop - quick stop of the tracer
2463  *
2464  * Light weight way to stop tracing. Use in conjunction with
2465  * tracing_start.
2466  */
2467 void tracing_stop(void)
2468 {
2469         return tracing_stop_tr(&global_trace);
2470 }
2471
2472 static int trace_save_cmdline(struct task_struct *tsk)
2473 {
2474         unsigned tpid, idx;
2475
2476         /* treat recording of idle task as a success */
2477         if (!tsk->pid)
2478                 return 1;
2479
2480         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2481
2482         /*
2483          * It's not the end of the world if we don't get
2484          * the lock, but we also don't want to spin
2485          * nor do we want to disable interrupts,
2486          * so if we miss here, then better luck next time.
2487          *
2488          * This is called within the scheduler and wake up, so interrupts
2489          * had better been disabled and run queue lock been held.
2490          */
2491         lockdep_assert_preemption_disabled();
2492         if (!arch_spin_trylock(&trace_cmdline_lock))
2493                 return 0;
2494
2495         idx = savedcmd->map_pid_to_cmdline[tpid];
2496         if (idx == NO_CMDLINE_MAP) {
2497                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2498
2499                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2500                 savedcmd->cmdline_idx = idx;
2501         }
2502
2503         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2504         set_cmdline(idx, tsk->comm);
2505
2506         arch_spin_unlock(&trace_cmdline_lock);
2507
2508         return 1;
2509 }
2510
2511 static void __trace_find_cmdline(int pid, char comm[])
2512 {
2513         unsigned map;
2514         int tpid;
2515
2516         if (!pid) {
2517                 strcpy(comm, "<idle>");
2518                 return;
2519         }
2520
2521         if (WARN_ON_ONCE(pid < 0)) {
2522                 strcpy(comm, "<XXX>");
2523                 return;
2524         }
2525
2526         tpid = pid & (PID_MAX_DEFAULT - 1);
2527         map = savedcmd->map_pid_to_cmdline[tpid];
2528         if (map != NO_CMDLINE_MAP) {
2529                 tpid = savedcmd->map_cmdline_to_pid[map];
2530                 if (tpid == pid) {
2531                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2532                         return;
2533                 }
2534         }
2535         strcpy(comm, "<...>");
2536 }
2537
2538 void trace_find_cmdline(int pid, char comm[])
2539 {
2540         preempt_disable();
2541         arch_spin_lock(&trace_cmdline_lock);
2542
2543         __trace_find_cmdline(pid, comm);
2544
2545         arch_spin_unlock(&trace_cmdline_lock);
2546         preempt_enable();
2547 }
2548
2549 static int *trace_find_tgid_ptr(int pid)
2550 {
2551         /*
2552          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2553          * if we observe a non-NULL tgid_map then we also observe the correct
2554          * tgid_map_max.
2555          */
2556         int *map = smp_load_acquire(&tgid_map);
2557
2558         if (unlikely(!map || pid > tgid_map_max))
2559                 return NULL;
2560
2561         return &map[pid];
2562 }
2563
2564 int trace_find_tgid(int pid)
2565 {
2566         int *ptr = trace_find_tgid_ptr(pid);
2567
2568         return ptr ? *ptr : 0;
2569 }
2570
2571 static int trace_save_tgid(struct task_struct *tsk)
2572 {
2573         int *ptr;
2574
2575         /* treat recording of idle task as a success */
2576         if (!tsk->pid)
2577                 return 1;
2578
2579         ptr = trace_find_tgid_ptr(tsk->pid);
2580         if (!ptr)
2581                 return 0;
2582
2583         *ptr = tsk->tgid;
2584         return 1;
2585 }
2586
2587 static bool tracing_record_taskinfo_skip(int flags)
2588 {
2589         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2590                 return true;
2591         if (!__this_cpu_read(trace_taskinfo_save))
2592                 return true;
2593         return false;
2594 }
2595
2596 /**
2597  * tracing_record_taskinfo - record the task info of a task
2598  *
2599  * @task:  task to record
2600  * @flags: TRACE_RECORD_CMDLINE for recording comm
2601  *         TRACE_RECORD_TGID for recording tgid
2602  */
2603 void tracing_record_taskinfo(struct task_struct *task, int flags)
2604 {
2605         bool done;
2606
2607         if (tracing_record_taskinfo_skip(flags))
2608                 return;
2609
2610         /*
2611          * Record as much task information as possible. If some fail, continue
2612          * to try to record the others.
2613          */
2614         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2615         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2616
2617         /* If recording any information failed, retry again soon. */
2618         if (!done)
2619                 return;
2620
2621         __this_cpu_write(trace_taskinfo_save, false);
2622 }
2623
2624 /**
2625  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2626  *
2627  * @prev: previous task during sched_switch
2628  * @next: next task during sched_switch
2629  * @flags: TRACE_RECORD_CMDLINE for recording comm
2630  *         TRACE_RECORD_TGID for recording tgid
2631  */
2632 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2633                                           struct task_struct *next, int flags)
2634 {
2635         bool done;
2636
2637         if (tracing_record_taskinfo_skip(flags))
2638                 return;
2639
2640         /*
2641          * Record as much task information as possible. If some fail, continue
2642          * to try to record the others.
2643          */
2644         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2645         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2646         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2647         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2648
2649         /* If recording any information failed, retry again soon. */
2650         if (!done)
2651                 return;
2652
2653         __this_cpu_write(trace_taskinfo_save, false);
2654 }
2655
2656 /* Helpers to record a specific task information */
2657 void tracing_record_cmdline(struct task_struct *task)
2658 {
2659         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2660 }
2661
2662 void tracing_record_tgid(struct task_struct *task)
2663 {
2664         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2665 }
2666
2667 /*
2668  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2669  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2670  * simplifies those functions and keeps them in sync.
2671  */
2672 enum print_line_t trace_handle_return(struct trace_seq *s)
2673 {
2674         return trace_seq_has_overflowed(s) ?
2675                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2676 }
2677 EXPORT_SYMBOL_GPL(trace_handle_return);
2678
2679 static unsigned short migration_disable_value(void)
2680 {
2681 #if defined(CONFIG_SMP)
2682         return current->migration_disabled;
2683 #else
2684         return 0;
2685 #endif
2686 }
2687
2688 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2689 {
2690         unsigned int trace_flags = irqs_status;
2691         unsigned int pc;
2692
2693         pc = preempt_count();
2694
2695         if (pc & NMI_MASK)
2696                 trace_flags |= TRACE_FLAG_NMI;
2697         if (pc & HARDIRQ_MASK)
2698                 trace_flags |= TRACE_FLAG_HARDIRQ;
2699         if (in_serving_softirq())
2700                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2701         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2702                 trace_flags |= TRACE_FLAG_BH_OFF;
2703
2704         if (tif_need_resched())
2705                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2706         if (test_preempt_need_resched())
2707                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2708         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2709                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2710 }
2711
2712 struct ring_buffer_event *
2713 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2714                           int type,
2715                           unsigned long len,
2716                           unsigned int trace_ctx)
2717 {
2718         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2719 }
2720
2721 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2722 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2723 static int trace_buffered_event_ref;
2724
2725 /**
2726  * trace_buffered_event_enable - enable buffering events
2727  *
2728  * When events are being filtered, it is quicker to use a temporary
2729  * buffer to write the event data into if there's a likely chance
2730  * that it will not be committed. The discard of the ring buffer
2731  * is not as fast as committing, and is much slower than copying
2732  * a commit.
2733  *
2734  * When an event is to be filtered, allocate per cpu buffers to
2735  * write the event data into, and if the event is filtered and discarded
2736  * it is simply dropped, otherwise, the entire data is to be committed
2737  * in one shot.
2738  */
2739 void trace_buffered_event_enable(void)
2740 {
2741         struct ring_buffer_event *event;
2742         struct page *page;
2743         int cpu;
2744
2745         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2746
2747         if (trace_buffered_event_ref++)
2748                 return;
2749
2750         for_each_tracing_cpu(cpu) {
2751                 page = alloc_pages_node(cpu_to_node(cpu),
2752                                         GFP_KERNEL | __GFP_NORETRY, 0);
2753                 /* This is just an optimization and can handle failures */
2754                 if (!page) {
2755                         pr_err("Failed to allocate event buffer\n");
2756                         break;
2757                 }
2758
2759                 event = page_address(page);
2760                 memset(event, 0, sizeof(*event));
2761
2762                 per_cpu(trace_buffered_event, cpu) = event;
2763
2764                 preempt_disable();
2765                 if (cpu == smp_processor_id() &&
2766                     __this_cpu_read(trace_buffered_event) !=
2767                     per_cpu(trace_buffered_event, cpu))
2768                         WARN_ON_ONCE(1);
2769                 preempt_enable();
2770         }
2771 }
2772
2773 static void enable_trace_buffered_event(void *data)
2774 {
2775         /* Probably not needed, but do it anyway */
2776         smp_rmb();
2777         this_cpu_dec(trace_buffered_event_cnt);
2778 }
2779
2780 static void disable_trace_buffered_event(void *data)
2781 {
2782         this_cpu_inc(trace_buffered_event_cnt);
2783 }
2784
2785 /**
2786  * trace_buffered_event_disable - disable buffering events
2787  *
2788  * When a filter is removed, it is faster to not use the buffered
2789  * events, and to commit directly into the ring buffer. Free up
2790  * the temp buffers when there are no more users. This requires
2791  * special synchronization with current events.
2792  */
2793 void trace_buffered_event_disable(void)
2794 {
2795         int cpu;
2796
2797         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2798
2799         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2800                 return;
2801
2802         if (--trace_buffered_event_ref)
2803                 return;
2804
2805         /* For each CPU, set the buffer as used. */
2806         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2807                          NULL, true);
2808
2809         /* Wait for all current users to finish */
2810         synchronize_rcu();
2811
2812         for_each_tracing_cpu(cpu) {
2813                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2814                 per_cpu(trace_buffered_event, cpu) = NULL;
2815         }
2816
2817         /*
2818          * Wait for all CPUs that potentially started checking if they can use
2819          * their event buffer only after the previous synchronize_rcu() call and
2820          * they still read a valid pointer from trace_buffered_event. It must be
2821          * ensured they don't see cleared trace_buffered_event_cnt else they
2822          * could wrongly decide to use the pointed-to buffer which is now freed.
2823          */
2824         synchronize_rcu();
2825
2826         /* For each CPU, relinquish the buffer */
2827         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2828                          true);
2829 }
2830
2831 static struct trace_buffer *temp_buffer;
2832
2833 struct ring_buffer_event *
2834 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2835                           struct trace_event_file *trace_file,
2836                           int type, unsigned long len,
2837                           unsigned int trace_ctx)
2838 {
2839         struct ring_buffer_event *entry;
2840         struct trace_array *tr = trace_file->tr;
2841         int val;
2842
2843         *current_rb = tr->array_buffer.buffer;
2844
2845         if (!tr->no_filter_buffering_ref &&
2846             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2847                 preempt_disable_notrace();
2848                 /*
2849                  * Filtering is on, so try to use the per cpu buffer first.
2850                  * This buffer will simulate a ring_buffer_event,
2851                  * where the type_len is zero and the array[0] will
2852                  * hold the full length.
2853                  * (see include/linux/ring-buffer.h for details on
2854                  *  how the ring_buffer_event is structured).
2855                  *
2856                  * Using a temp buffer during filtering and copying it
2857                  * on a matched filter is quicker than writing directly
2858                  * into the ring buffer and then discarding it when
2859                  * it doesn't match. That is because the discard
2860                  * requires several atomic operations to get right.
2861                  * Copying on match and doing nothing on a failed match
2862                  * is still quicker than no copy on match, but having
2863                  * to discard out of the ring buffer on a failed match.
2864                  */
2865                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2866                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2867
2868                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2869
2870                         /*
2871                          * Preemption is disabled, but interrupts and NMIs
2872                          * can still come in now. If that happens after
2873                          * the above increment, then it will have to go
2874                          * back to the old method of allocating the event
2875                          * on the ring buffer, and if the filter fails, it
2876                          * will have to call ring_buffer_discard_commit()
2877                          * to remove it.
2878                          *
2879                          * Need to also check the unlikely case that the
2880                          * length is bigger than the temp buffer size.
2881                          * If that happens, then the reserve is pretty much
2882                          * guaranteed to fail, as the ring buffer currently
2883                          * only allows events less than a page. But that may
2884                          * change in the future, so let the ring buffer reserve
2885                          * handle the failure in that case.
2886                          */
2887                         if (val == 1 && likely(len <= max_len)) {
2888                                 trace_event_setup(entry, type, trace_ctx);
2889                                 entry->array[0] = len;
2890                                 /* Return with preemption disabled */
2891                                 return entry;
2892                         }
2893                         this_cpu_dec(trace_buffered_event_cnt);
2894                 }
2895                 /* __trace_buffer_lock_reserve() disables preemption */
2896                 preempt_enable_notrace();
2897         }
2898
2899         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2900                                             trace_ctx);
2901         /*
2902          * If tracing is off, but we have triggers enabled
2903          * we still need to look at the event data. Use the temp_buffer
2904          * to store the trace event for the trigger to use. It's recursive
2905          * safe and will not be recorded anywhere.
2906          */
2907         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2908                 *current_rb = temp_buffer;
2909                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2910                                                     trace_ctx);
2911         }
2912         return entry;
2913 }
2914 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2915
2916 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2917 static DEFINE_MUTEX(tracepoint_printk_mutex);
2918
2919 static void output_printk(struct trace_event_buffer *fbuffer)
2920 {
2921         struct trace_event_call *event_call;
2922         struct trace_event_file *file;
2923         struct trace_event *event;
2924         unsigned long flags;
2925         struct trace_iterator *iter = tracepoint_print_iter;
2926
2927         /* We should never get here if iter is NULL */
2928         if (WARN_ON_ONCE(!iter))
2929                 return;
2930
2931         event_call = fbuffer->trace_file->event_call;
2932         if (!event_call || !event_call->event.funcs ||
2933             !event_call->event.funcs->trace)
2934                 return;
2935
2936         file = fbuffer->trace_file;
2937         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2938             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2939              !filter_match_preds(file->filter, fbuffer->entry)))
2940                 return;
2941
2942         event = &fbuffer->trace_file->event_call->event;
2943
2944         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2945         trace_seq_init(&iter->seq);
2946         iter->ent = fbuffer->entry;
2947         event_call->event.funcs->trace(iter, 0, event);
2948         trace_seq_putc(&iter->seq, 0);
2949         printk("%s", iter->seq.buffer);
2950
2951         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2952 }
2953
2954 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2955                              void *buffer, size_t *lenp,
2956                              loff_t *ppos)
2957 {
2958         int save_tracepoint_printk;
2959         int ret;
2960
2961         mutex_lock(&tracepoint_printk_mutex);
2962         save_tracepoint_printk = tracepoint_printk;
2963
2964         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2965
2966         /*
2967          * This will force exiting early, as tracepoint_printk
2968          * is always zero when tracepoint_printk_iter is not allocated
2969          */
2970         if (!tracepoint_print_iter)
2971                 tracepoint_printk = 0;
2972
2973         if (save_tracepoint_printk == tracepoint_printk)
2974                 goto out;
2975
2976         if (tracepoint_printk)
2977                 static_key_enable(&tracepoint_printk_key.key);
2978         else
2979                 static_key_disable(&tracepoint_printk_key.key);
2980
2981  out:
2982         mutex_unlock(&tracepoint_printk_mutex);
2983
2984         return ret;
2985 }
2986
2987 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2988 {
2989         enum event_trigger_type tt = ETT_NONE;
2990         struct trace_event_file *file = fbuffer->trace_file;
2991
2992         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2993                         fbuffer->entry, &tt))
2994                 goto discard;
2995
2996         if (static_key_false(&tracepoint_printk_key.key))
2997                 output_printk(fbuffer);
2998
2999         if (static_branch_unlikely(&trace_event_exports_enabled))
3000                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3001
3002         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3003                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3004
3005 discard:
3006         if (tt)
3007                 event_triggers_post_call(file, tt);
3008
3009 }
3010 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3011
3012 /*
3013  * Skip 3:
3014  *
3015  *   trace_buffer_unlock_commit_regs()
3016  *   trace_event_buffer_commit()
3017  *   trace_event_raw_event_xxx()
3018  */
3019 # define STACK_SKIP 3
3020
3021 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3022                                      struct trace_buffer *buffer,
3023                                      struct ring_buffer_event *event,
3024                                      unsigned int trace_ctx,
3025                                      struct pt_regs *regs)
3026 {
3027         __buffer_unlock_commit(buffer, event);
3028
3029         /*
3030          * If regs is not set, then skip the necessary functions.
3031          * Note, we can still get here via blktrace, wakeup tracer
3032          * and mmiotrace, but that's ok if they lose a function or
3033          * two. They are not that meaningful.
3034          */
3035         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3036         ftrace_trace_userstack(tr, buffer, trace_ctx);
3037 }
3038
3039 /*
3040  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3041  */
3042 void
3043 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3044                                    struct ring_buffer_event *event)
3045 {
3046         __buffer_unlock_commit(buffer, event);
3047 }
3048
3049 void
3050 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3051                parent_ip, unsigned int trace_ctx)
3052 {
3053         struct trace_event_call *call = &event_function;
3054         struct trace_buffer *buffer = tr->array_buffer.buffer;
3055         struct ring_buffer_event *event;
3056         struct ftrace_entry *entry;
3057
3058         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3059                                             trace_ctx);
3060         if (!event)
3061                 return;
3062         entry   = ring_buffer_event_data(event);
3063         entry->ip                       = ip;
3064         entry->parent_ip                = parent_ip;
3065
3066         if (!call_filter_check_discard(call, entry, buffer, event)) {
3067                 if (static_branch_unlikely(&trace_function_exports_enabled))
3068                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3069                 __buffer_unlock_commit(buffer, event);
3070         }
3071 }
3072
3073 #ifdef CONFIG_STACKTRACE
3074
3075 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3076 #define FTRACE_KSTACK_NESTING   4
3077
3078 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3079
3080 struct ftrace_stack {
3081         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3082 };
3083
3084
3085 struct ftrace_stacks {
3086         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3087 };
3088
3089 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3090 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3091
3092 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3093                                  unsigned int trace_ctx,
3094                                  int skip, struct pt_regs *regs)
3095 {
3096         struct trace_event_call *call = &event_kernel_stack;
3097         struct ring_buffer_event *event;
3098         unsigned int size, nr_entries;
3099         struct ftrace_stack *fstack;
3100         struct stack_entry *entry;
3101         int stackidx;
3102
3103         /*
3104          * Add one, for this function and the call to save_stack_trace()
3105          * If regs is set, then these functions will not be in the way.
3106          */
3107 #ifndef CONFIG_UNWINDER_ORC
3108         if (!regs)
3109                 skip++;
3110 #endif
3111
3112         preempt_disable_notrace();
3113
3114         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3115
3116         /* This should never happen. If it does, yell once and skip */
3117         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3118                 goto out;
3119
3120         /*
3121          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3122          * interrupt will either see the value pre increment or post
3123          * increment. If the interrupt happens pre increment it will have
3124          * restored the counter when it returns.  We just need a barrier to
3125          * keep gcc from moving things around.
3126          */
3127         barrier();
3128
3129         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3130         size = ARRAY_SIZE(fstack->calls);
3131
3132         if (regs) {
3133                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3134                                                    size, skip);
3135         } else {
3136                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3137         }
3138
3139         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3140                                     struct_size(entry, caller, nr_entries),
3141                                     trace_ctx);
3142         if (!event)
3143                 goto out;
3144         entry = ring_buffer_event_data(event);
3145
3146         entry->size = nr_entries;
3147         memcpy(&entry->caller, fstack->calls,
3148                flex_array_size(entry, caller, nr_entries));
3149
3150         if (!call_filter_check_discard(call, entry, buffer, event))
3151                 __buffer_unlock_commit(buffer, event);
3152
3153  out:
3154         /* Again, don't let gcc optimize things here */
3155         barrier();
3156         __this_cpu_dec(ftrace_stack_reserve);
3157         preempt_enable_notrace();
3158
3159 }
3160
3161 static inline void ftrace_trace_stack(struct trace_array *tr,
3162                                       struct trace_buffer *buffer,
3163                                       unsigned int trace_ctx,
3164                                       int skip, struct pt_regs *regs)
3165 {
3166         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3167                 return;
3168
3169         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3170 }
3171
3172 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3173                    int skip)
3174 {
3175         struct trace_buffer *buffer = tr->array_buffer.buffer;
3176
3177         if (rcu_is_watching()) {
3178                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3179                 return;
3180         }
3181
3182         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3183                 return;
3184
3185         /*
3186          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3187          * but if the above rcu_is_watching() failed, then the NMI
3188          * triggered someplace critical, and ct_irq_enter() should
3189          * not be called from NMI.
3190          */
3191         if (unlikely(in_nmi()))
3192                 return;
3193
3194         ct_irq_enter_irqson();
3195         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3196         ct_irq_exit_irqson();
3197 }
3198
3199 /**
3200  * trace_dump_stack - record a stack back trace in the trace buffer
3201  * @skip: Number of functions to skip (helper handlers)
3202  */
3203 void trace_dump_stack(int skip)
3204 {
3205         if (tracing_disabled || tracing_selftest_running)
3206                 return;
3207
3208 #ifndef CONFIG_UNWINDER_ORC
3209         /* Skip 1 to skip this function. */
3210         skip++;
3211 #endif
3212         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3213                              tracing_gen_ctx(), skip, NULL);
3214 }
3215 EXPORT_SYMBOL_GPL(trace_dump_stack);
3216
3217 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3218 static DEFINE_PER_CPU(int, user_stack_count);
3219
3220 static void
3221 ftrace_trace_userstack(struct trace_array *tr,
3222                        struct trace_buffer *buffer, unsigned int trace_ctx)
3223 {
3224         struct trace_event_call *call = &event_user_stack;
3225         struct ring_buffer_event *event;
3226         struct userstack_entry *entry;
3227
3228         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3229                 return;
3230
3231         /*
3232          * NMIs can not handle page faults, even with fix ups.
3233          * The save user stack can (and often does) fault.
3234          */
3235         if (unlikely(in_nmi()))
3236                 return;
3237
3238         /*
3239          * prevent recursion, since the user stack tracing may
3240          * trigger other kernel events.
3241          */
3242         preempt_disable();
3243         if (__this_cpu_read(user_stack_count))
3244                 goto out;
3245
3246         __this_cpu_inc(user_stack_count);
3247
3248         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3249                                             sizeof(*entry), trace_ctx);
3250         if (!event)
3251                 goto out_drop_count;
3252         entry   = ring_buffer_event_data(event);
3253
3254         entry->tgid             = current->tgid;
3255         memset(&entry->caller, 0, sizeof(entry->caller));
3256
3257         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3258         if (!call_filter_check_discard(call, entry, buffer, event))
3259                 __buffer_unlock_commit(buffer, event);
3260
3261  out_drop_count:
3262         __this_cpu_dec(user_stack_count);
3263  out:
3264         preempt_enable();
3265 }
3266 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3267 static void ftrace_trace_userstack(struct trace_array *tr,
3268                                    struct trace_buffer *buffer,
3269                                    unsigned int trace_ctx)
3270 {
3271 }
3272 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3273
3274 #endif /* CONFIG_STACKTRACE */
3275
3276 static inline void
3277 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3278                           unsigned long long delta)
3279 {
3280         entry->bottom_delta_ts = delta & U32_MAX;
3281         entry->top_delta_ts = (delta >> 32);
3282 }
3283
3284 void trace_last_func_repeats(struct trace_array *tr,
3285                              struct trace_func_repeats *last_info,
3286                              unsigned int trace_ctx)
3287 {
3288         struct trace_buffer *buffer = tr->array_buffer.buffer;
3289         struct func_repeats_entry *entry;
3290         struct ring_buffer_event *event;
3291         u64 delta;
3292
3293         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3294                                             sizeof(*entry), trace_ctx);
3295         if (!event)
3296                 return;
3297
3298         delta = ring_buffer_event_time_stamp(buffer, event) -
3299                 last_info->ts_last_call;
3300
3301         entry = ring_buffer_event_data(event);
3302         entry->ip = last_info->ip;
3303         entry->parent_ip = last_info->parent_ip;
3304         entry->count = last_info->count;
3305         func_repeats_set_delta_ts(entry, delta);
3306
3307         __buffer_unlock_commit(buffer, event);
3308 }
3309
3310 /* created for use with alloc_percpu */
3311 struct trace_buffer_struct {
3312         int nesting;
3313         char buffer[4][TRACE_BUF_SIZE];
3314 };
3315
3316 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3317
3318 /*
3319  * This allows for lockless recording.  If we're nested too deeply, then
3320  * this returns NULL.
3321  */
3322 static char *get_trace_buf(void)
3323 {
3324         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3325
3326         if (!trace_percpu_buffer || buffer->nesting >= 4)
3327                 return NULL;
3328
3329         buffer->nesting++;
3330
3331         /* Interrupts must see nesting incremented before we use the buffer */
3332         barrier();
3333         return &buffer->buffer[buffer->nesting - 1][0];
3334 }
3335
3336 static void put_trace_buf(void)
3337 {
3338         /* Don't let the decrement of nesting leak before this */
3339         barrier();
3340         this_cpu_dec(trace_percpu_buffer->nesting);
3341 }
3342
3343 static int alloc_percpu_trace_buffer(void)
3344 {
3345         struct trace_buffer_struct __percpu *buffers;
3346
3347         if (trace_percpu_buffer)
3348                 return 0;
3349
3350         buffers = alloc_percpu(struct trace_buffer_struct);
3351         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3352                 return -ENOMEM;
3353
3354         trace_percpu_buffer = buffers;
3355         return 0;
3356 }
3357
3358 static int buffers_allocated;
3359
3360 void trace_printk_init_buffers(void)
3361 {
3362         if (buffers_allocated)
3363                 return;
3364
3365         if (alloc_percpu_trace_buffer())
3366                 return;
3367
3368         /* trace_printk() is for debug use only. Don't use it in production. */
3369
3370         pr_warn("\n");
3371         pr_warn("**********************************************************\n");
3372         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3373         pr_warn("**                                                      **\n");
3374         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3375         pr_warn("**                                                      **\n");
3376         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3377         pr_warn("** unsafe for production use.                           **\n");
3378         pr_warn("**                                                      **\n");
3379         pr_warn("** If you see this message and you are not debugging    **\n");
3380         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3381         pr_warn("**                                                      **\n");
3382         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3383         pr_warn("**********************************************************\n");
3384
3385         /* Expand the buffers to set size */
3386         tracing_update_buffers(&global_trace);
3387
3388         buffers_allocated = 1;
3389
3390         /*
3391          * trace_printk_init_buffers() can be called by modules.
3392          * If that happens, then we need to start cmdline recording
3393          * directly here. If the global_trace.buffer is already
3394          * allocated here, then this was called by module code.
3395          */
3396         if (global_trace.array_buffer.buffer)
3397                 tracing_start_cmdline_record();
3398 }
3399 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3400
3401 void trace_printk_start_comm(void)
3402 {
3403         /* Start tracing comms if trace printk is set */
3404         if (!buffers_allocated)
3405                 return;
3406         tracing_start_cmdline_record();
3407 }
3408
3409 static void trace_printk_start_stop_comm(int enabled)
3410 {
3411         if (!buffers_allocated)
3412                 return;
3413
3414         if (enabled)
3415                 tracing_start_cmdline_record();
3416         else
3417                 tracing_stop_cmdline_record();
3418 }
3419
3420 /**
3421  * trace_vbprintk - write binary msg to tracing buffer
3422  * @ip:    The address of the caller
3423  * @fmt:   The string format to write to the buffer
3424  * @args:  Arguments for @fmt
3425  */
3426 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3427 {
3428         struct trace_event_call *call = &event_bprint;
3429         struct ring_buffer_event *event;
3430         struct trace_buffer *buffer;
3431         struct trace_array *tr = &global_trace;
3432         struct bprint_entry *entry;
3433         unsigned int trace_ctx;
3434         char *tbuffer;
3435         int len = 0, size;
3436
3437         if (unlikely(tracing_selftest_running || tracing_disabled))
3438                 return 0;
3439
3440         /* Don't pollute graph traces with trace_vprintk internals */
3441         pause_graph_tracing();
3442
3443         trace_ctx = tracing_gen_ctx();
3444         preempt_disable_notrace();
3445
3446         tbuffer = get_trace_buf();
3447         if (!tbuffer) {
3448                 len = 0;
3449                 goto out_nobuffer;
3450         }
3451
3452         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3453
3454         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3455                 goto out_put;
3456
3457         size = sizeof(*entry) + sizeof(u32) * len;
3458         buffer = tr->array_buffer.buffer;
3459         ring_buffer_nest_start(buffer);
3460         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3461                                             trace_ctx);
3462         if (!event)
3463                 goto out;
3464         entry = ring_buffer_event_data(event);
3465         entry->ip                       = ip;
3466         entry->fmt                      = fmt;
3467
3468         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3469         if (!call_filter_check_discard(call, entry, buffer, event)) {
3470                 __buffer_unlock_commit(buffer, event);
3471                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3472         }
3473
3474 out:
3475         ring_buffer_nest_end(buffer);
3476 out_put:
3477         put_trace_buf();
3478
3479 out_nobuffer:
3480         preempt_enable_notrace();
3481         unpause_graph_tracing();
3482
3483         return len;
3484 }
3485 EXPORT_SYMBOL_GPL(trace_vbprintk);
3486
3487 __printf(3, 0)
3488 static int
3489 __trace_array_vprintk(struct trace_buffer *buffer,
3490                       unsigned long ip, const char *fmt, va_list args)
3491 {
3492         struct trace_event_call *call = &event_print;
3493         struct ring_buffer_event *event;
3494         int len = 0, size;
3495         struct print_entry *entry;
3496         unsigned int trace_ctx;
3497         char *tbuffer;
3498
3499         if (tracing_disabled)
3500                 return 0;
3501
3502         /* Don't pollute graph traces with trace_vprintk internals */
3503         pause_graph_tracing();
3504
3505         trace_ctx = tracing_gen_ctx();
3506         preempt_disable_notrace();
3507
3508
3509         tbuffer = get_trace_buf();
3510         if (!tbuffer) {
3511                 len = 0;
3512                 goto out_nobuffer;
3513         }
3514
3515         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3516
3517         size = sizeof(*entry) + len + 1;
3518         ring_buffer_nest_start(buffer);
3519         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3520                                             trace_ctx);
3521         if (!event)
3522                 goto out;
3523         entry = ring_buffer_event_data(event);
3524         entry->ip = ip;
3525
3526         memcpy(&entry->buf, tbuffer, len + 1);
3527         if (!call_filter_check_discard(call, entry, buffer, event)) {
3528                 __buffer_unlock_commit(buffer, event);
3529                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3530         }
3531
3532 out:
3533         ring_buffer_nest_end(buffer);
3534         put_trace_buf();
3535
3536 out_nobuffer:
3537         preempt_enable_notrace();
3538         unpause_graph_tracing();
3539
3540         return len;
3541 }
3542
3543 __printf(3, 0)
3544 int trace_array_vprintk(struct trace_array *tr,
3545                         unsigned long ip, const char *fmt, va_list args)
3546 {
3547         if (tracing_selftest_running && tr == &global_trace)
3548                 return 0;
3549
3550         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3551 }
3552
3553 /**
3554  * trace_array_printk - Print a message to a specific instance
3555  * @tr: The instance trace_array descriptor
3556  * @ip: The instruction pointer that this is called from.
3557  * @fmt: The format to print (printf format)
3558  *
3559  * If a subsystem sets up its own instance, they have the right to
3560  * printk strings into their tracing instance buffer using this
3561  * function. Note, this function will not write into the top level
3562  * buffer (use trace_printk() for that), as writing into the top level
3563  * buffer should only have events that can be individually disabled.
3564  * trace_printk() is only used for debugging a kernel, and should not
3565  * be ever incorporated in normal use.
3566  *
3567  * trace_array_printk() can be used, as it will not add noise to the
3568  * top level tracing buffer.
3569  *
3570  * Note, trace_array_init_printk() must be called on @tr before this
3571  * can be used.
3572  */
3573 __printf(3, 0)
3574 int trace_array_printk(struct trace_array *tr,
3575                        unsigned long ip, const char *fmt, ...)
3576 {
3577         int ret;
3578         va_list ap;
3579
3580         if (!tr)
3581                 return -ENOENT;
3582
3583         /* This is only allowed for created instances */
3584         if (tr == &global_trace)
3585                 return 0;
3586
3587         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3588                 return 0;
3589
3590         va_start(ap, fmt);
3591         ret = trace_array_vprintk(tr, ip, fmt, ap);
3592         va_end(ap);
3593         return ret;
3594 }
3595 EXPORT_SYMBOL_GPL(trace_array_printk);
3596
3597 /**
3598  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3599  * @tr: The trace array to initialize the buffers for
3600  *
3601  * As trace_array_printk() only writes into instances, they are OK to
3602  * have in the kernel (unlike trace_printk()). This needs to be called
3603  * before trace_array_printk() can be used on a trace_array.
3604  */
3605 int trace_array_init_printk(struct trace_array *tr)
3606 {
3607         if (!tr)
3608                 return -ENOENT;
3609
3610         /* This is only allowed for created instances */
3611         if (tr == &global_trace)
3612                 return -EINVAL;
3613
3614         return alloc_percpu_trace_buffer();
3615 }
3616 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3617
3618 __printf(3, 4)
3619 int trace_array_printk_buf(struct trace_buffer *buffer,
3620                            unsigned long ip, const char *fmt, ...)
3621 {
3622         int ret;
3623         va_list ap;
3624
3625         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3626                 return 0;
3627
3628         va_start(ap, fmt);
3629         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3630         va_end(ap);
3631         return ret;
3632 }
3633
3634 __printf(2, 0)
3635 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3636 {
3637         return trace_array_vprintk(&global_trace, ip, fmt, args);
3638 }
3639 EXPORT_SYMBOL_GPL(trace_vprintk);
3640
3641 static void trace_iterator_increment(struct trace_iterator *iter)
3642 {
3643         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3644
3645         iter->idx++;
3646         if (buf_iter)
3647                 ring_buffer_iter_advance(buf_iter);
3648 }
3649
3650 static struct trace_entry *
3651 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3652                 unsigned long *lost_events)
3653 {
3654         struct ring_buffer_event *event;
3655         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3656
3657         if (buf_iter) {
3658                 event = ring_buffer_iter_peek(buf_iter, ts);
3659                 if (lost_events)
3660                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3661                                 (unsigned long)-1 : 0;
3662         } else {
3663                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3664                                          lost_events);
3665         }
3666
3667         if (event) {
3668                 iter->ent_size = ring_buffer_event_length(event);
3669                 return ring_buffer_event_data(event);
3670         }
3671         iter->ent_size = 0;
3672         return NULL;
3673 }
3674
3675 static struct trace_entry *
3676 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3677                   unsigned long *missing_events, u64 *ent_ts)
3678 {
3679         struct trace_buffer *buffer = iter->array_buffer->buffer;
3680         struct trace_entry *ent, *next = NULL;
3681         unsigned long lost_events = 0, next_lost = 0;
3682         int cpu_file = iter->cpu_file;
3683         u64 next_ts = 0, ts;
3684         int next_cpu = -1;
3685         int next_size = 0;
3686         int cpu;
3687
3688         /*
3689          * If we are in a per_cpu trace file, don't bother by iterating over
3690          * all cpu and peek directly.
3691          */
3692         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3693                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3694                         return NULL;
3695                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3696                 if (ent_cpu)
3697                         *ent_cpu = cpu_file;
3698
3699                 return ent;
3700         }
3701
3702         for_each_tracing_cpu(cpu) {
3703
3704                 if (ring_buffer_empty_cpu(buffer, cpu))
3705                         continue;
3706
3707                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3708
3709                 /*
3710                  * Pick the entry with the smallest timestamp:
3711                  */
3712                 if (ent && (!next || ts < next_ts)) {
3713                         next = ent;
3714                         next_cpu = cpu;
3715                         next_ts = ts;
3716                         next_lost = lost_events;
3717                         next_size = iter->ent_size;
3718                 }
3719         }
3720
3721         iter->ent_size = next_size;
3722
3723         if (ent_cpu)
3724                 *ent_cpu = next_cpu;
3725
3726         if (ent_ts)
3727                 *ent_ts = next_ts;
3728
3729         if (missing_events)
3730                 *missing_events = next_lost;
3731
3732         return next;
3733 }
3734
3735 #define STATIC_FMT_BUF_SIZE     128
3736 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3737
3738 char *trace_iter_expand_format(struct trace_iterator *iter)
3739 {
3740         char *tmp;
3741
3742         /*
3743          * iter->tr is NULL when used with tp_printk, which makes
3744          * this get called where it is not safe to call krealloc().
3745          */
3746         if (!iter->tr || iter->fmt == static_fmt_buf)
3747                 return NULL;
3748
3749         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3750                        GFP_KERNEL);
3751         if (tmp) {
3752                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3753                 iter->fmt = tmp;
3754         }
3755
3756         return tmp;
3757 }
3758
3759 /* Returns true if the string is safe to dereference from an event */
3760 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3761                            bool star, int len)
3762 {
3763         unsigned long addr = (unsigned long)str;
3764         struct trace_event *trace_event;
3765         struct trace_event_call *event;
3766
3767         /* Ignore strings with no length */
3768         if (star && !len)
3769                 return true;
3770
3771         /* OK if part of the event data */
3772         if ((addr >= (unsigned long)iter->ent) &&
3773             (addr < (unsigned long)iter->ent + iter->ent_size))
3774                 return true;
3775
3776         /* OK if part of the temp seq buffer */
3777         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3778             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3779                 return true;
3780
3781         /* Core rodata can not be freed */
3782         if (is_kernel_rodata(addr))
3783                 return true;
3784
3785         if (trace_is_tracepoint_string(str))
3786                 return true;
3787
3788         /*
3789          * Now this could be a module event, referencing core module
3790          * data, which is OK.
3791          */
3792         if (!iter->ent)
3793                 return false;
3794
3795         trace_event = ftrace_find_event(iter->ent->type);
3796         if (!trace_event)
3797                 return false;
3798
3799         event = container_of(trace_event, struct trace_event_call, event);
3800         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3801                 return false;
3802
3803         /* Would rather have rodata, but this will suffice */
3804         if (within_module_core(addr, event->module))
3805                 return true;
3806
3807         return false;
3808 }
3809
3810 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3811
3812 static int test_can_verify_check(const char *fmt, ...)
3813 {
3814         char buf[16];
3815         va_list ap;
3816         int ret;
3817
3818         /*
3819          * The verifier is dependent on vsnprintf() modifies the va_list
3820          * passed to it, where it is sent as a reference. Some architectures
3821          * (like x86_32) passes it by value, which means that vsnprintf()
3822          * does not modify the va_list passed to it, and the verifier
3823          * would then need to be able to understand all the values that
3824          * vsnprintf can use. If it is passed by value, then the verifier
3825          * is disabled.
3826          */
3827         va_start(ap, fmt);
3828         vsnprintf(buf, 16, "%d", ap);
3829         ret = va_arg(ap, int);
3830         va_end(ap);
3831
3832         return ret;
3833 }
3834
3835 static void test_can_verify(void)
3836 {
3837         if (!test_can_verify_check("%d %d", 0, 1)) {
3838                 pr_info("trace event string verifier disabled\n");
3839                 static_branch_inc(&trace_no_verify);
3840         }
3841 }
3842
3843 /**
3844  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3845  * @iter: The iterator that holds the seq buffer and the event being printed
3846  * @fmt: The format used to print the event
3847  * @ap: The va_list holding the data to print from @fmt.
3848  *
3849  * This writes the data into the @iter->seq buffer using the data from
3850  * @fmt and @ap. If the format has a %s, then the source of the string
3851  * is examined to make sure it is safe to print, otherwise it will
3852  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3853  * pointer.
3854  */
3855 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3856                          va_list ap)
3857 {
3858         const char *p = fmt;
3859         const char *str;
3860         int i, j;
3861
3862         if (WARN_ON_ONCE(!fmt))
3863                 return;
3864
3865         if (static_branch_unlikely(&trace_no_verify))
3866                 goto print;
3867
3868         /* Don't bother checking when doing a ftrace_dump() */
3869         if (iter->fmt == static_fmt_buf)
3870                 goto print;
3871
3872         while (*p) {
3873                 bool star = false;
3874                 int len = 0;
3875
3876                 j = 0;
3877
3878                 /* We only care about %s and variants */
3879                 for (i = 0; p[i]; i++) {
3880                         if (i + 1 >= iter->fmt_size) {
3881                                 /*
3882                                  * If we can't expand the copy buffer,
3883                                  * just print it.
3884                                  */
3885                                 if (!trace_iter_expand_format(iter))
3886                                         goto print;
3887                         }
3888
3889                         if (p[i] == '\\' && p[i+1]) {
3890                                 i++;
3891                                 continue;
3892                         }
3893                         if (p[i] == '%') {
3894                                 /* Need to test cases like %08.*s */
3895                                 for (j = 1; p[i+j]; j++) {
3896                                         if (isdigit(p[i+j]) ||
3897                                             p[i+j] == '.')
3898                                                 continue;
3899                                         if (p[i+j] == '*') {
3900                                                 star = true;
3901                                                 continue;
3902                                         }
3903                                         break;
3904                                 }
3905                                 if (p[i+j] == 's')
3906                                         break;
3907                                 star = false;
3908                         }
3909                         j = 0;
3910                 }
3911                 /* If no %s found then just print normally */
3912                 if (!p[i])
3913                         break;
3914
3915                 /* Copy up to the %s, and print that */
3916                 strncpy(iter->fmt, p, i);
3917                 iter->fmt[i] = '\0';
3918                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3919
3920                 /*
3921                  * If iter->seq is full, the above call no longer guarantees
3922                  * that ap is in sync with fmt processing, and further calls
3923                  * to va_arg() can return wrong positional arguments.
3924                  *
3925                  * Ensure that ap is no longer used in this case.
3926                  */
3927                 if (iter->seq.full) {
3928                         p = "";
3929                         break;
3930                 }
3931
3932                 if (star)
3933                         len = va_arg(ap, int);
3934
3935                 /* The ap now points to the string data of the %s */
3936                 str = va_arg(ap, const char *);
3937
3938                 /*
3939                  * If you hit this warning, it is likely that the
3940                  * trace event in question used %s on a string that
3941                  * was saved at the time of the event, but may not be
3942                  * around when the trace is read. Use __string(),
3943                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3944                  * instead. See samples/trace_events/trace-events-sample.h
3945                  * for reference.
3946                  */
3947                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3948                               "fmt: '%s' current_buffer: '%s'",
3949                               fmt, seq_buf_str(&iter->seq.seq))) {
3950                         int ret;
3951
3952                         /* Try to safely read the string */
3953                         if (star) {
3954                                 if (len + 1 > iter->fmt_size)
3955                                         len = iter->fmt_size - 1;
3956                                 if (len < 0)
3957                                         len = 0;
3958                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3959                                 iter->fmt[len] = 0;
3960                                 star = false;
3961                         } else {
3962                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3963                                                                   iter->fmt_size);
3964                         }
3965                         if (ret < 0)
3966                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3967                         else
3968                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3969                                                  str, iter->fmt);
3970                         str = "[UNSAFE-MEMORY]";
3971                         strcpy(iter->fmt, "%s");
3972                 } else {
3973                         strncpy(iter->fmt, p + i, j + 1);
3974                         iter->fmt[j+1] = '\0';
3975                 }
3976                 if (star)
3977                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3978                 else
3979                         trace_seq_printf(&iter->seq, iter->fmt, str);
3980
3981                 p += i + j + 1;
3982         }
3983  print:
3984         if (*p)
3985                 trace_seq_vprintf(&iter->seq, p, ap);
3986 }
3987
3988 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3989 {
3990         const char *p, *new_fmt;
3991         char *q;
3992
3993         if (WARN_ON_ONCE(!fmt))
3994                 return fmt;
3995
3996         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3997                 return fmt;
3998
3999         p = fmt;
4000         new_fmt = q = iter->fmt;
4001         while (*p) {
4002                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4003                         if (!trace_iter_expand_format(iter))
4004                                 return fmt;
4005
4006                         q += iter->fmt - new_fmt;
4007                         new_fmt = iter->fmt;
4008                 }
4009
4010                 *q++ = *p++;
4011
4012                 /* Replace %p with %px */
4013                 if (p[-1] == '%') {
4014                         if (p[0] == '%') {
4015                                 *q++ = *p++;
4016                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4017                                 *q++ = *p++;
4018                                 *q++ = 'x';
4019                         }
4020                 }
4021         }
4022         *q = '\0';
4023
4024         return new_fmt;
4025 }
4026
4027 #define STATIC_TEMP_BUF_SIZE    128
4028 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4029
4030 /* Find the next real entry, without updating the iterator itself */
4031 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4032                                           int *ent_cpu, u64 *ent_ts)
4033 {
4034         /* __find_next_entry will reset ent_size */
4035         int ent_size = iter->ent_size;
4036         struct trace_entry *entry;
4037
4038         /*
4039          * If called from ftrace_dump(), then the iter->temp buffer
4040          * will be the static_temp_buf and not created from kmalloc.
4041          * If the entry size is greater than the buffer, we can
4042          * not save it. Just return NULL in that case. This is only
4043          * used to add markers when two consecutive events' time
4044          * stamps have a large delta. See trace_print_lat_context()
4045          */
4046         if (iter->temp == static_temp_buf &&
4047             STATIC_TEMP_BUF_SIZE < ent_size)
4048                 return NULL;
4049
4050         /*
4051          * The __find_next_entry() may call peek_next_entry(), which may
4052          * call ring_buffer_peek() that may make the contents of iter->ent
4053          * undefined. Need to copy iter->ent now.
4054          */
4055         if (iter->ent && iter->ent != iter->temp) {
4056                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4057                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4058                         void *temp;
4059                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4060                         if (!temp)
4061                                 return NULL;
4062                         kfree(iter->temp);
4063                         iter->temp = temp;
4064                         iter->temp_size = iter->ent_size;
4065                 }
4066                 memcpy(iter->temp, iter->ent, iter->ent_size);
4067                 iter->ent = iter->temp;
4068         }
4069         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4070         /* Put back the original ent_size */
4071         iter->ent_size = ent_size;
4072
4073         return entry;
4074 }
4075
4076 /* Find the next real entry, and increment the iterator to the next entry */
4077 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4078 {
4079         iter->ent = __find_next_entry(iter, &iter->cpu,
4080                                       &iter->lost_events, &iter->ts);
4081
4082         if (iter->ent)
4083                 trace_iterator_increment(iter);
4084
4085         return iter->ent ? iter : NULL;
4086 }
4087
4088 static void trace_consume(struct trace_iterator *iter)
4089 {
4090         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4091                             &iter->lost_events);
4092 }
4093
4094 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4095 {
4096         struct trace_iterator *iter = m->private;
4097         int i = (int)*pos;
4098         void *ent;
4099
4100         WARN_ON_ONCE(iter->leftover);
4101
4102         (*pos)++;
4103
4104         /* can't go backwards */
4105         if (iter->idx > i)
4106                 return NULL;
4107
4108         if (iter->idx < 0)
4109                 ent = trace_find_next_entry_inc(iter);
4110         else
4111                 ent = iter;
4112
4113         while (ent && iter->idx < i)
4114                 ent = trace_find_next_entry_inc(iter);
4115
4116         iter->pos = *pos;
4117
4118         return ent;
4119 }
4120
4121 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4122 {
4123         struct ring_buffer_iter *buf_iter;
4124         unsigned long entries = 0;
4125         u64 ts;
4126
4127         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4128
4129         buf_iter = trace_buffer_iter(iter, cpu);
4130         if (!buf_iter)
4131                 return;
4132
4133         ring_buffer_iter_reset(buf_iter);
4134
4135         /*
4136          * We could have the case with the max latency tracers
4137          * that a reset never took place on a cpu. This is evident
4138          * by the timestamp being before the start of the buffer.
4139          */
4140         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4141                 if (ts >= iter->array_buffer->time_start)
4142                         break;
4143                 entries++;
4144                 ring_buffer_iter_advance(buf_iter);
4145         }
4146
4147         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4148 }
4149
4150 /*
4151  * The current tracer is copied to avoid a global locking
4152  * all around.
4153  */
4154 static void *s_start(struct seq_file *m, loff_t *pos)
4155 {
4156         struct trace_iterator *iter = m->private;
4157         struct trace_array *tr = iter->tr;
4158         int cpu_file = iter->cpu_file;
4159         void *p = NULL;
4160         loff_t l = 0;
4161         int cpu;
4162
4163         mutex_lock(&trace_types_lock);
4164         if (unlikely(tr->current_trace != iter->trace)) {
4165                 /* Close iter->trace before switching to the new current tracer */
4166                 if (iter->trace->close)
4167                         iter->trace->close(iter);
4168                 iter->trace = tr->current_trace;
4169                 /* Reopen the new current tracer */
4170                 if (iter->trace->open)
4171                         iter->trace->open(iter);
4172         }
4173         mutex_unlock(&trace_types_lock);
4174
4175 #ifdef CONFIG_TRACER_MAX_TRACE
4176         if (iter->snapshot && iter->trace->use_max_tr)
4177                 return ERR_PTR(-EBUSY);
4178 #endif
4179
4180         if (*pos != iter->pos) {
4181                 iter->ent = NULL;
4182                 iter->cpu = 0;
4183                 iter->idx = -1;
4184
4185                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4186                         for_each_tracing_cpu(cpu)
4187                                 tracing_iter_reset(iter, cpu);
4188                 } else
4189                         tracing_iter_reset(iter, cpu_file);
4190
4191                 iter->leftover = 0;
4192                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4193                         ;
4194
4195         } else {
4196                 /*
4197                  * If we overflowed the seq_file before, then we want
4198                  * to just reuse the trace_seq buffer again.
4199                  */
4200                 if (iter->leftover)
4201                         p = iter;
4202                 else {
4203                         l = *pos - 1;
4204                         p = s_next(m, p, &l);
4205                 }
4206         }
4207
4208         trace_event_read_lock();
4209         trace_access_lock(cpu_file);
4210         return p;
4211 }
4212
4213 static void s_stop(struct seq_file *m, void *p)
4214 {
4215         struct trace_iterator *iter = m->private;
4216
4217 #ifdef CONFIG_TRACER_MAX_TRACE
4218         if (iter->snapshot && iter->trace->use_max_tr)
4219                 return;
4220 #endif
4221
4222         trace_access_unlock(iter->cpu_file);
4223         trace_event_read_unlock();
4224 }
4225
4226 static void
4227 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4228                       unsigned long *entries, int cpu)
4229 {
4230         unsigned long count;
4231
4232         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4233         /*
4234          * If this buffer has skipped entries, then we hold all
4235          * entries for the trace and we need to ignore the
4236          * ones before the time stamp.
4237          */
4238         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4239                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4240                 /* total is the same as the entries */
4241                 *total = count;
4242         } else
4243                 *total = count +
4244                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4245         *entries = count;
4246 }
4247
4248 static void
4249 get_total_entries(struct array_buffer *buf,
4250                   unsigned long *total, unsigned long *entries)
4251 {
4252         unsigned long t, e;
4253         int cpu;
4254
4255         *total = 0;
4256         *entries = 0;
4257
4258         for_each_tracing_cpu(cpu) {
4259                 get_total_entries_cpu(buf, &t, &e, cpu);
4260                 *total += t;
4261                 *entries += e;
4262         }
4263 }
4264
4265 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4266 {
4267         unsigned long total, entries;
4268
4269         if (!tr)
4270                 tr = &global_trace;
4271
4272         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4273
4274         return entries;
4275 }
4276
4277 unsigned long trace_total_entries(struct trace_array *tr)
4278 {
4279         unsigned long total, entries;
4280
4281         if (!tr)
4282                 tr = &global_trace;
4283
4284         get_total_entries(&tr->array_buffer, &total, &entries);
4285
4286         return entries;
4287 }
4288
4289 static void print_lat_help_header(struct seq_file *m)
4290 {
4291         seq_puts(m, "#                    _------=> CPU#            \n"
4292                     "#                   / _-----=> irqs-off/BH-disabled\n"
4293                     "#                  | / _----=> need-resched    \n"
4294                     "#                  || / _---=> hardirq/softirq \n"
4295                     "#                  ||| / _--=> preempt-depth   \n"
4296                     "#                  |||| / _-=> migrate-disable \n"
4297                     "#                  ||||| /     delay           \n"
4298                     "#  cmd     pid     |||||| time  |   caller     \n"
4299                     "#     \\   /        ||||||  \\    |    /       \n");
4300 }
4301
4302 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4303 {
4304         unsigned long total;
4305         unsigned long entries;
4306
4307         get_total_entries(buf, &total, &entries);
4308         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4309                    entries, total, num_online_cpus());
4310         seq_puts(m, "#\n");
4311 }
4312
4313 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4314                                    unsigned int flags)
4315 {
4316         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4317
4318         print_event_info(buf, m);
4319
4320         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4321         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4322 }
4323
4324 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4325                                        unsigned int flags)
4326 {
4327         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4328         static const char space[] = "            ";
4329         int prec = tgid ? 12 : 2;
4330
4331         print_event_info(buf, m);
4332
4333         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4334         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4335         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4336         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4337         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4338         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4339         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4340         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4341 }
4342
4343 void
4344 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4345 {
4346         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4347         struct array_buffer *buf = iter->array_buffer;
4348         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4349         struct tracer *type = iter->trace;
4350         unsigned long entries;
4351         unsigned long total;
4352         const char *name = type->name;
4353
4354         get_total_entries(buf, &total, &entries);
4355
4356         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4357                    name, UTS_RELEASE);
4358         seq_puts(m, "# -----------------------------------"
4359                  "---------------------------------\n");
4360         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4361                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4362                    nsecs_to_usecs(data->saved_latency),
4363                    entries,
4364                    total,
4365                    buf->cpu,
4366                    preempt_model_none()      ? "server" :
4367                    preempt_model_voluntary() ? "desktop" :
4368                    preempt_model_full()      ? "preempt" :
4369                    preempt_model_rt()        ? "preempt_rt" :
4370                    "unknown",
4371                    /* These are reserved for later use */
4372                    0, 0, 0, 0);
4373 #ifdef CONFIG_SMP
4374         seq_printf(m, " #P:%d)\n", num_online_cpus());
4375 #else
4376         seq_puts(m, ")\n");
4377 #endif
4378         seq_puts(m, "#    -----------------\n");
4379         seq_printf(m, "#    | task: %.16s-%d "
4380                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4381                    data->comm, data->pid,
4382                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4383                    data->policy, data->rt_priority);
4384         seq_puts(m, "#    -----------------\n");
4385
4386         if (data->critical_start) {
4387                 seq_puts(m, "#  => started at: ");
4388                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4389                 trace_print_seq(m, &iter->seq);
4390                 seq_puts(m, "\n#  => ended at:   ");
4391                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4392                 trace_print_seq(m, &iter->seq);
4393                 seq_puts(m, "\n#\n");
4394         }
4395
4396         seq_puts(m, "#\n");
4397 }
4398
4399 static void test_cpu_buff_start(struct trace_iterator *iter)
4400 {
4401         struct trace_seq *s = &iter->seq;
4402         struct trace_array *tr = iter->tr;
4403
4404         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4405                 return;
4406
4407         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4408                 return;
4409
4410         if (cpumask_available(iter->started) &&
4411             cpumask_test_cpu(iter->cpu, iter->started))
4412                 return;
4413
4414         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4415                 return;
4416
4417         if (cpumask_available(iter->started))
4418                 cpumask_set_cpu(iter->cpu, iter->started);
4419
4420         /* Don't print started cpu buffer for the first entry of the trace */
4421         if (iter->idx > 1)
4422                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4423                                 iter->cpu);
4424 }
4425
4426 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4427 {
4428         struct trace_array *tr = iter->tr;
4429         struct trace_seq *s = &iter->seq;
4430         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4431         struct trace_entry *entry;
4432         struct trace_event *event;
4433
4434         entry = iter->ent;
4435
4436         test_cpu_buff_start(iter);
4437
4438         event = ftrace_find_event(entry->type);
4439
4440         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4441                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4442                         trace_print_lat_context(iter);
4443                 else
4444                         trace_print_context(iter);
4445         }
4446
4447         if (trace_seq_has_overflowed(s))
4448                 return TRACE_TYPE_PARTIAL_LINE;
4449
4450         if (event) {
4451                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4452                         return print_event_fields(iter, event);
4453                 return event->funcs->trace(iter, sym_flags, event);
4454         }
4455
4456         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4457
4458         return trace_handle_return(s);
4459 }
4460
4461 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4462 {
4463         struct trace_array *tr = iter->tr;
4464         struct trace_seq *s = &iter->seq;
4465         struct trace_entry *entry;
4466         struct trace_event *event;
4467
4468         entry = iter->ent;
4469
4470         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4471                 trace_seq_printf(s, "%d %d %llu ",
4472                                  entry->pid, iter->cpu, iter->ts);
4473
4474         if (trace_seq_has_overflowed(s))
4475                 return TRACE_TYPE_PARTIAL_LINE;
4476
4477         event = ftrace_find_event(entry->type);
4478         if (event)
4479                 return event->funcs->raw(iter, 0, event);
4480
4481         trace_seq_printf(s, "%d ?\n", entry->type);
4482
4483         return trace_handle_return(s);
4484 }
4485
4486 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4487 {
4488         struct trace_array *tr = iter->tr;
4489         struct trace_seq *s = &iter->seq;
4490         unsigned char newline = '\n';
4491         struct trace_entry *entry;
4492         struct trace_event *event;
4493
4494         entry = iter->ent;
4495
4496         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4497                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4498                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4499                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4500                 if (trace_seq_has_overflowed(s))
4501                         return TRACE_TYPE_PARTIAL_LINE;
4502         }
4503
4504         event = ftrace_find_event(entry->type);
4505         if (event) {
4506                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4507                 if (ret != TRACE_TYPE_HANDLED)
4508                         return ret;
4509         }
4510
4511         SEQ_PUT_FIELD(s, newline);
4512
4513         return trace_handle_return(s);
4514 }
4515
4516 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4517 {
4518         struct trace_array *tr = iter->tr;
4519         struct trace_seq *s = &iter->seq;
4520         struct trace_entry *entry;
4521         struct trace_event *event;
4522
4523         entry = iter->ent;
4524
4525         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4526                 SEQ_PUT_FIELD(s, entry->pid);
4527                 SEQ_PUT_FIELD(s, iter->cpu);
4528                 SEQ_PUT_FIELD(s, iter->ts);
4529                 if (trace_seq_has_overflowed(s))
4530                         return TRACE_TYPE_PARTIAL_LINE;
4531         }
4532
4533         event = ftrace_find_event(entry->type);
4534         return event ? event->funcs->binary(iter, 0, event) :
4535                 TRACE_TYPE_HANDLED;
4536 }
4537
4538 int trace_empty(struct trace_iterator *iter)
4539 {
4540         struct ring_buffer_iter *buf_iter;
4541         int cpu;
4542
4543         /* If we are looking at one CPU buffer, only check that one */
4544         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4545                 cpu = iter->cpu_file;
4546                 buf_iter = trace_buffer_iter(iter, cpu);
4547                 if (buf_iter) {
4548                         if (!ring_buffer_iter_empty(buf_iter))
4549                                 return 0;
4550                 } else {
4551                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4552                                 return 0;
4553                 }
4554                 return 1;
4555         }
4556
4557         for_each_tracing_cpu(cpu) {
4558                 buf_iter = trace_buffer_iter(iter, cpu);
4559                 if (buf_iter) {
4560                         if (!ring_buffer_iter_empty(buf_iter))
4561                                 return 0;
4562                 } else {
4563                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4564                                 return 0;
4565                 }
4566         }
4567
4568         return 1;
4569 }
4570
4571 /*  Called with trace_event_read_lock() held. */
4572 enum print_line_t print_trace_line(struct trace_iterator *iter)
4573 {
4574         struct trace_array *tr = iter->tr;
4575         unsigned long trace_flags = tr->trace_flags;
4576         enum print_line_t ret;
4577
4578         if (iter->lost_events) {
4579                 if (iter->lost_events == (unsigned long)-1)
4580                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4581                                          iter->cpu);
4582                 else
4583                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4584                                          iter->cpu, iter->lost_events);
4585                 if (trace_seq_has_overflowed(&iter->seq))
4586                         return TRACE_TYPE_PARTIAL_LINE;
4587         }
4588
4589         if (iter->trace && iter->trace->print_line) {
4590                 ret = iter->trace->print_line(iter);
4591                 if (ret != TRACE_TYPE_UNHANDLED)
4592                         return ret;
4593         }
4594
4595         if (iter->ent->type == TRACE_BPUTS &&
4596                         trace_flags & TRACE_ITER_PRINTK &&
4597                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4598                 return trace_print_bputs_msg_only(iter);
4599
4600         if (iter->ent->type == TRACE_BPRINT &&
4601                         trace_flags & TRACE_ITER_PRINTK &&
4602                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4603                 return trace_print_bprintk_msg_only(iter);
4604
4605         if (iter->ent->type == TRACE_PRINT &&
4606                         trace_flags & TRACE_ITER_PRINTK &&
4607                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4608                 return trace_print_printk_msg_only(iter);
4609
4610         if (trace_flags & TRACE_ITER_BIN)
4611                 return print_bin_fmt(iter);
4612
4613         if (trace_flags & TRACE_ITER_HEX)
4614                 return print_hex_fmt(iter);
4615
4616         if (trace_flags & TRACE_ITER_RAW)
4617                 return print_raw_fmt(iter);
4618
4619         return print_trace_fmt(iter);
4620 }
4621
4622 void trace_latency_header(struct seq_file *m)
4623 {
4624         struct trace_iterator *iter = m->private;
4625         struct trace_array *tr = iter->tr;
4626
4627         /* print nothing if the buffers are empty */
4628         if (trace_empty(iter))
4629                 return;
4630
4631         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4632                 print_trace_header(m, iter);
4633
4634         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4635                 print_lat_help_header(m);
4636 }
4637
4638 void trace_default_header(struct seq_file *m)
4639 {
4640         struct trace_iterator *iter = m->private;
4641         struct trace_array *tr = iter->tr;
4642         unsigned long trace_flags = tr->trace_flags;
4643
4644         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4645                 return;
4646
4647         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4648                 /* print nothing if the buffers are empty */
4649                 if (trace_empty(iter))
4650                         return;
4651                 print_trace_header(m, iter);
4652                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4653                         print_lat_help_header(m);
4654         } else {
4655                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4656                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4657                                 print_func_help_header_irq(iter->array_buffer,
4658                                                            m, trace_flags);
4659                         else
4660                                 print_func_help_header(iter->array_buffer, m,
4661                                                        trace_flags);
4662                 }
4663         }
4664 }
4665
4666 static void test_ftrace_alive(struct seq_file *m)
4667 {
4668         if (!ftrace_is_dead())
4669                 return;
4670         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4671                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4672 }
4673
4674 #ifdef CONFIG_TRACER_MAX_TRACE
4675 static void show_snapshot_main_help(struct seq_file *m)
4676 {
4677         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4678                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4679                     "#                      Takes a snapshot of the main buffer.\n"
4680                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4681                     "#                      (Doesn't have to be '2' works with any number that\n"
4682                     "#                       is not a '0' or '1')\n");
4683 }
4684
4685 static void show_snapshot_percpu_help(struct seq_file *m)
4686 {
4687         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4688 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4689         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4690                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4691 #else
4692         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4693                     "#                     Must use main snapshot file to allocate.\n");
4694 #endif
4695         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4696                     "#                      (Doesn't have to be '2' works with any number that\n"
4697                     "#                       is not a '0' or '1')\n");
4698 }
4699
4700 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4701 {
4702         if (iter->tr->allocated_snapshot)
4703                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4704         else
4705                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4706
4707         seq_puts(m, "# Snapshot commands:\n");
4708         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4709                 show_snapshot_main_help(m);
4710         else
4711                 show_snapshot_percpu_help(m);
4712 }
4713 #else
4714 /* Should never be called */
4715 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4716 #endif
4717
4718 static int s_show(struct seq_file *m, void *v)
4719 {
4720         struct trace_iterator *iter = v;
4721         int ret;
4722
4723         if (iter->ent == NULL) {
4724                 if (iter->tr) {
4725                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4726                         seq_puts(m, "#\n");
4727                         test_ftrace_alive(m);
4728                 }
4729                 if (iter->snapshot && trace_empty(iter))
4730                         print_snapshot_help(m, iter);
4731                 else if (iter->trace && iter->trace->print_header)
4732                         iter->trace->print_header(m);
4733                 else
4734                         trace_default_header(m);
4735
4736         } else if (iter->leftover) {
4737                 /*
4738                  * If we filled the seq_file buffer earlier, we
4739                  * want to just show it now.
4740                  */
4741                 ret = trace_print_seq(m, &iter->seq);
4742
4743                 /* ret should this time be zero, but you never know */
4744                 iter->leftover = ret;
4745
4746         } else {
4747                 ret = print_trace_line(iter);
4748                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4749                         iter->seq.full = 0;
4750                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4751                 }
4752                 ret = trace_print_seq(m, &iter->seq);
4753                 /*
4754                  * If we overflow the seq_file buffer, then it will
4755                  * ask us for this data again at start up.
4756                  * Use that instead.
4757                  *  ret is 0 if seq_file write succeeded.
4758                  *        -1 otherwise.
4759                  */
4760                 iter->leftover = ret;
4761         }
4762
4763         return 0;
4764 }
4765
4766 /*
4767  * Should be used after trace_array_get(), trace_types_lock
4768  * ensures that i_cdev was already initialized.
4769  */
4770 static inline int tracing_get_cpu(struct inode *inode)
4771 {
4772         if (inode->i_cdev) /* See trace_create_cpu_file() */
4773                 return (long)inode->i_cdev - 1;
4774         return RING_BUFFER_ALL_CPUS;
4775 }
4776
4777 static const struct seq_operations tracer_seq_ops = {
4778         .start          = s_start,
4779         .next           = s_next,
4780         .stop           = s_stop,
4781         .show           = s_show,
4782 };
4783
4784 /*
4785  * Note, as iter itself can be allocated and freed in different
4786  * ways, this function is only used to free its content, and not
4787  * the iterator itself. The only requirement to all the allocations
4788  * is that it must zero all fields (kzalloc), as freeing works with
4789  * ethier allocated content or NULL.
4790  */
4791 static void free_trace_iter_content(struct trace_iterator *iter)
4792 {
4793         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4794         if (iter->fmt != static_fmt_buf)
4795                 kfree(iter->fmt);
4796
4797         kfree(iter->temp);
4798         kfree(iter->buffer_iter);
4799         mutex_destroy(&iter->mutex);
4800         free_cpumask_var(iter->started);
4801 }
4802
4803 static struct trace_iterator *
4804 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4805 {
4806         struct trace_array *tr = inode->i_private;
4807         struct trace_iterator *iter;
4808         int cpu;
4809
4810         if (tracing_disabled)
4811                 return ERR_PTR(-ENODEV);
4812
4813         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4814         if (!iter)
4815                 return ERR_PTR(-ENOMEM);
4816
4817         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4818                                     GFP_KERNEL);
4819         if (!iter->buffer_iter)
4820                 goto release;
4821
4822         /*
4823          * trace_find_next_entry() may need to save off iter->ent.
4824          * It will place it into the iter->temp buffer. As most
4825          * events are less than 128, allocate a buffer of that size.
4826          * If one is greater, then trace_find_next_entry() will
4827          * allocate a new buffer to adjust for the bigger iter->ent.
4828          * It's not critical if it fails to get allocated here.
4829          */
4830         iter->temp = kmalloc(128, GFP_KERNEL);
4831         if (iter->temp)
4832                 iter->temp_size = 128;
4833
4834         /*
4835          * trace_event_printf() may need to modify given format
4836          * string to replace %p with %px so that it shows real address
4837          * instead of hash value. However, that is only for the event
4838          * tracing, other tracer may not need. Defer the allocation
4839          * until it is needed.
4840          */
4841         iter->fmt = NULL;
4842         iter->fmt_size = 0;
4843
4844         mutex_lock(&trace_types_lock);
4845         iter->trace = tr->current_trace;
4846
4847         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4848                 goto fail;
4849
4850         iter->tr = tr;
4851
4852 #ifdef CONFIG_TRACER_MAX_TRACE
4853         /* Currently only the top directory has a snapshot */
4854         if (tr->current_trace->print_max || snapshot)
4855                 iter->array_buffer = &tr->max_buffer;
4856         else
4857 #endif
4858                 iter->array_buffer = &tr->array_buffer;
4859         iter->snapshot = snapshot;
4860         iter->pos = -1;
4861         iter->cpu_file = tracing_get_cpu(inode);
4862         mutex_init(&iter->mutex);
4863
4864         /* Notify the tracer early; before we stop tracing. */
4865         if (iter->trace->open)
4866                 iter->trace->open(iter);
4867
4868         /* Annotate start of buffers if we had overruns */
4869         if (ring_buffer_overruns(iter->array_buffer->buffer))
4870                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4871
4872         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4873         if (trace_clocks[tr->clock_id].in_ns)
4874                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4875
4876         /*
4877          * If pause-on-trace is enabled, then stop the trace while
4878          * dumping, unless this is the "snapshot" file
4879          */
4880         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4881                 tracing_stop_tr(tr);
4882
4883         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4884                 for_each_tracing_cpu(cpu) {
4885                         iter->buffer_iter[cpu] =
4886                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4887                                                          cpu, GFP_KERNEL);
4888                 }
4889                 ring_buffer_read_prepare_sync();
4890                 for_each_tracing_cpu(cpu) {
4891                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4892                         tracing_iter_reset(iter, cpu);
4893                 }
4894         } else {
4895                 cpu = iter->cpu_file;
4896                 iter->buffer_iter[cpu] =
4897                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4898                                                  cpu, GFP_KERNEL);
4899                 ring_buffer_read_prepare_sync();
4900                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4901                 tracing_iter_reset(iter, cpu);
4902         }
4903
4904         mutex_unlock(&trace_types_lock);
4905
4906         return iter;
4907
4908  fail:
4909         mutex_unlock(&trace_types_lock);
4910         free_trace_iter_content(iter);
4911 release:
4912         seq_release_private(inode, file);
4913         return ERR_PTR(-ENOMEM);
4914 }
4915
4916 int tracing_open_generic(struct inode *inode, struct file *filp)
4917 {
4918         int ret;
4919
4920         ret = tracing_check_open_get_tr(NULL);
4921         if (ret)
4922                 return ret;
4923
4924         filp->private_data = inode->i_private;
4925         return 0;
4926 }
4927
4928 bool tracing_is_disabled(void)
4929 {
4930         return (tracing_disabled) ? true: false;
4931 }
4932
4933 /*
4934  * Open and update trace_array ref count.
4935  * Must have the current trace_array passed to it.
4936  */
4937 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4938 {
4939         struct trace_array *tr = inode->i_private;
4940         int ret;
4941
4942         ret = tracing_check_open_get_tr(tr);
4943         if (ret)
4944                 return ret;
4945
4946         filp->private_data = inode->i_private;
4947
4948         return 0;
4949 }
4950
4951 /*
4952  * The private pointer of the inode is the trace_event_file.
4953  * Update the tr ref count associated to it.
4954  */
4955 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4956 {
4957         struct trace_event_file *file = inode->i_private;
4958         int ret;
4959
4960         ret = tracing_check_open_get_tr(file->tr);
4961         if (ret)
4962                 return ret;
4963
4964         mutex_lock(&event_mutex);
4965
4966         /* Fail if the file is marked for removal */
4967         if (file->flags & EVENT_FILE_FL_FREED) {
4968                 trace_array_put(file->tr);
4969                 ret = -ENODEV;
4970         } else {
4971                 event_file_get(file);
4972         }
4973
4974         mutex_unlock(&event_mutex);
4975         if (ret)
4976                 return ret;
4977
4978         filp->private_data = inode->i_private;
4979
4980         return 0;
4981 }
4982
4983 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4984 {
4985         struct trace_event_file *file = inode->i_private;
4986
4987         trace_array_put(file->tr);
4988         event_file_put(file);
4989
4990         return 0;
4991 }
4992
4993 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4994 {
4995         tracing_release_file_tr(inode, filp);
4996         return single_release(inode, filp);
4997 }
4998
4999 static int tracing_mark_open(struct inode *inode, struct file *filp)
5000 {
5001         stream_open(inode, filp);
5002         return tracing_open_generic_tr(inode, filp);
5003 }
5004
5005 static int tracing_release(struct inode *inode, struct file *file)
5006 {
5007         struct trace_array *tr = inode->i_private;
5008         struct seq_file *m = file->private_data;
5009         struct trace_iterator *iter;
5010         int cpu;
5011
5012         if (!(file->f_mode & FMODE_READ)) {
5013                 trace_array_put(tr);
5014                 return 0;
5015         }
5016
5017         /* Writes do not use seq_file */
5018         iter = m->private;
5019         mutex_lock(&trace_types_lock);
5020
5021         for_each_tracing_cpu(cpu) {
5022                 if (iter->buffer_iter[cpu])
5023                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5024         }
5025
5026         if (iter->trace && iter->trace->close)
5027                 iter->trace->close(iter);
5028
5029         if (!iter->snapshot && tr->stop_count)
5030                 /* reenable tracing if it was previously enabled */
5031                 tracing_start_tr(tr);
5032
5033         __trace_array_put(tr);
5034
5035         mutex_unlock(&trace_types_lock);
5036
5037         free_trace_iter_content(iter);
5038         seq_release_private(inode, file);
5039
5040         return 0;
5041 }
5042
5043 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5044 {
5045         struct trace_array *tr = inode->i_private;
5046
5047         trace_array_put(tr);
5048         return 0;
5049 }
5050
5051 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5052 {
5053         struct trace_array *tr = inode->i_private;
5054
5055         trace_array_put(tr);
5056
5057         return single_release(inode, file);
5058 }
5059
5060 static int tracing_open(struct inode *inode, struct file *file)
5061 {
5062         struct trace_array *tr = inode->i_private;
5063         struct trace_iterator *iter;
5064         int ret;
5065
5066         ret = tracing_check_open_get_tr(tr);
5067         if (ret)
5068                 return ret;
5069
5070         /* If this file was open for write, then erase contents */
5071         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5072                 int cpu = tracing_get_cpu(inode);
5073                 struct array_buffer *trace_buf = &tr->array_buffer;
5074
5075 #ifdef CONFIG_TRACER_MAX_TRACE
5076                 if (tr->current_trace->print_max)
5077                         trace_buf = &tr->max_buffer;
5078 #endif
5079
5080                 if (cpu == RING_BUFFER_ALL_CPUS)
5081                         tracing_reset_online_cpus(trace_buf);
5082                 else
5083                         tracing_reset_cpu(trace_buf, cpu);
5084         }
5085
5086         if (file->f_mode & FMODE_READ) {
5087                 iter = __tracing_open(inode, file, false);
5088                 if (IS_ERR(iter))
5089                         ret = PTR_ERR(iter);
5090                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5091                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5092         }
5093
5094         if (ret < 0)
5095                 trace_array_put(tr);
5096
5097         return ret;
5098 }
5099
5100 /*
5101  * Some tracers are not suitable for instance buffers.
5102  * A tracer is always available for the global array (toplevel)
5103  * or if it explicitly states that it is.
5104  */
5105 static bool
5106 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5107 {
5108         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5109 }
5110
5111 /* Find the next tracer that this trace array may use */
5112 static struct tracer *
5113 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5114 {
5115         while (t && !trace_ok_for_array(t, tr))
5116                 t = t->next;
5117
5118         return t;
5119 }
5120
5121 static void *
5122 t_next(struct seq_file *m, void *v, loff_t *pos)
5123 {
5124         struct trace_array *tr = m->private;
5125         struct tracer *t = v;
5126
5127         (*pos)++;
5128
5129         if (t)
5130                 t = get_tracer_for_array(tr, t->next);
5131
5132         return t;
5133 }
5134
5135 static void *t_start(struct seq_file *m, loff_t *pos)
5136 {
5137         struct trace_array *tr = m->private;
5138         struct tracer *t;
5139         loff_t l = 0;
5140
5141         mutex_lock(&trace_types_lock);
5142
5143         t = get_tracer_for_array(tr, trace_types);
5144         for (; t && l < *pos; t = t_next(m, t, &l))
5145                         ;
5146
5147         return t;
5148 }
5149
5150 static void t_stop(struct seq_file *m, void *p)
5151 {
5152         mutex_unlock(&trace_types_lock);
5153 }
5154
5155 static int t_show(struct seq_file *m, void *v)
5156 {
5157         struct tracer *t = v;
5158
5159         if (!t)
5160                 return 0;
5161
5162         seq_puts(m, t->name);
5163         if (t->next)
5164                 seq_putc(m, ' ');
5165         else
5166                 seq_putc(m, '\n');
5167
5168         return 0;
5169 }
5170
5171 static const struct seq_operations show_traces_seq_ops = {
5172         .start          = t_start,
5173         .next           = t_next,
5174         .stop           = t_stop,
5175         .show           = t_show,
5176 };
5177
5178 static int show_traces_open(struct inode *inode, struct file *file)
5179 {
5180         struct trace_array *tr = inode->i_private;
5181         struct seq_file *m;
5182         int ret;
5183
5184         ret = tracing_check_open_get_tr(tr);
5185         if (ret)
5186                 return ret;
5187
5188         ret = seq_open(file, &show_traces_seq_ops);
5189         if (ret) {
5190                 trace_array_put(tr);
5191                 return ret;
5192         }
5193
5194         m = file->private_data;
5195         m->private = tr;
5196
5197         return 0;
5198 }
5199
5200 static int show_traces_release(struct inode *inode, struct file *file)
5201 {
5202         struct trace_array *tr = inode->i_private;
5203
5204         trace_array_put(tr);
5205         return seq_release(inode, file);
5206 }
5207
5208 static ssize_t
5209 tracing_write_stub(struct file *filp, const char __user *ubuf,
5210                    size_t count, loff_t *ppos)
5211 {
5212         return count;
5213 }
5214
5215 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5216 {
5217         int ret;
5218
5219         if (file->f_mode & FMODE_READ)
5220                 ret = seq_lseek(file, offset, whence);
5221         else
5222                 file->f_pos = ret = 0;
5223
5224         return ret;
5225 }
5226
5227 static const struct file_operations tracing_fops = {
5228         .open           = tracing_open,
5229         .read           = seq_read,
5230         .read_iter      = seq_read_iter,
5231         .splice_read    = copy_splice_read,
5232         .write          = tracing_write_stub,
5233         .llseek         = tracing_lseek,
5234         .release        = tracing_release,
5235 };
5236
5237 static const struct file_operations show_traces_fops = {
5238         .open           = show_traces_open,
5239         .read           = seq_read,
5240         .llseek         = seq_lseek,
5241         .release        = show_traces_release,
5242 };
5243
5244 static ssize_t
5245 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5246                      size_t count, loff_t *ppos)
5247 {
5248         struct trace_array *tr = file_inode(filp)->i_private;
5249         char *mask_str;
5250         int len;
5251
5252         len = snprintf(NULL, 0, "%*pb\n",
5253                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5254         mask_str = kmalloc(len, GFP_KERNEL);
5255         if (!mask_str)
5256                 return -ENOMEM;
5257
5258         len = snprintf(mask_str, len, "%*pb\n",
5259                        cpumask_pr_args(tr->tracing_cpumask));
5260         if (len >= count) {
5261                 count = -EINVAL;
5262                 goto out_err;
5263         }
5264         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5265
5266 out_err:
5267         kfree(mask_str);
5268
5269         return count;
5270 }
5271
5272 int tracing_set_cpumask(struct trace_array *tr,
5273                         cpumask_var_t tracing_cpumask_new)
5274 {
5275         int cpu;
5276
5277         if (!tr)
5278                 return -EINVAL;
5279
5280         local_irq_disable();
5281         arch_spin_lock(&tr->max_lock);
5282         for_each_tracing_cpu(cpu) {
5283                 /*
5284                  * Increase/decrease the disabled counter if we are
5285                  * about to flip a bit in the cpumask:
5286                  */
5287                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5288                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5289                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5290                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5291 #ifdef CONFIG_TRACER_MAX_TRACE
5292                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5293 #endif
5294                 }
5295                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5296                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5297                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5298                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5299 #ifdef CONFIG_TRACER_MAX_TRACE
5300                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5301 #endif
5302                 }
5303         }
5304         arch_spin_unlock(&tr->max_lock);
5305         local_irq_enable();
5306
5307         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5308
5309         return 0;
5310 }
5311
5312 static ssize_t
5313 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5314                       size_t count, loff_t *ppos)
5315 {
5316         struct trace_array *tr = file_inode(filp)->i_private;
5317         cpumask_var_t tracing_cpumask_new;
5318         int err;
5319
5320         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5321                 return -ENOMEM;
5322
5323         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5324         if (err)
5325                 goto err_free;
5326
5327         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5328         if (err)
5329                 goto err_free;
5330
5331         free_cpumask_var(tracing_cpumask_new);
5332
5333         return count;
5334
5335 err_free:
5336         free_cpumask_var(tracing_cpumask_new);
5337
5338         return err;
5339 }
5340
5341 static const struct file_operations tracing_cpumask_fops = {
5342         .open           = tracing_open_generic_tr,
5343         .read           = tracing_cpumask_read,
5344         .write          = tracing_cpumask_write,
5345         .release        = tracing_release_generic_tr,
5346         .llseek         = generic_file_llseek,
5347 };
5348
5349 static int tracing_trace_options_show(struct seq_file *m, void *v)
5350 {
5351         struct tracer_opt *trace_opts;
5352         struct trace_array *tr = m->private;
5353         u32 tracer_flags;
5354         int i;
5355
5356         mutex_lock(&trace_types_lock);
5357         tracer_flags = tr->current_trace->flags->val;
5358         trace_opts = tr->current_trace->flags->opts;
5359
5360         for (i = 0; trace_options[i]; i++) {
5361                 if (tr->trace_flags & (1 << i))
5362                         seq_printf(m, "%s\n", trace_options[i]);
5363                 else
5364                         seq_printf(m, "no%s\n", trace_options[i]);
5365         }
5366
5367         for (i = 0; trace_opts[i].name; i++) {
5368                 if (tracer_flags & trace_opts[i].bit)
5369                         seq_printf(m, "%s\n", trace_opts[i].name);
5370                 else
5371                         seq_printf(m, "no%s\n", trace_opts[i].name);
5372         }
5373         mutex_unlock(&trace_types_lock);
5374
5375         return 0;
5376 }
5377
5378 static int __set_tracer_option(struct trace_array *tr,
5379                                struct tracer_flags *tracer_flags,
5380                                struct tracer_opt *opts, int neg)
5381 {
5382         struct tracer *trace = tracer_flags->trace;
5383         int ret;
5384
5385         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5386         if (ret)
5387                 return ret;
5388
5389         if (neg)
5390                 tracer_flags->val &= ~opts->bit;
5391         else
5392                 tracer_flags->val |= opts->bit;
5393         return 0;
5394 }
5395
5396 /* Try to assign a tracer specific option */
5397 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5398 {
5399         struct tracer *trace = tr->current_trace;
5400         struct tracer_flags *tracer_flags = trace->flags;
5401         struct tracer_opt *opts = NULL;
5402         int i;
5403
5404         for (i = 0; tracer_flags->opts[i].name; i++) {
5405                 opts = &tracer_flags->opts[i];
5406
5407                 if (strcmp(cmp, opts->name) == 0)
5408                         return __set_tracer_option(tr, trace->flags, opts, neg);
5409         }
5410
5411         return -EINVAL;
5412 }
5413
5414 /* Some tracers require overwrite to stay enabled */
5415 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5416 {
5417         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5418                 return -1;
5419
5420         return 0;
5421 }
5422
5423 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5424 {
5425         int *map;
5426
5427         if ((mask == TRACE_ITER_RECORD_TGID) ||
5428             (mask == TRACE_ITER_RECORD_CMD))
5429                 lockdep_assert_held(&event_mutex);
5430
5431         /* do nothing if flag is already set */
5432         if (!!(tr->trace_flags & mask) == !!enabled)
5433                 return 0;
5434
5435         /* Give the tracer a chance to approve the change */
5436         if (tr->current_trace->flag_changed)
5437                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5438                         return -EINVAL;
5439
5440         if (enabled)
5441                 tr->trace_flags |= mask;
5442         else
5443                 tr->trace_flags &= ~mask;
5444
5445         if (mask == TRACE_ITER_RECORD_CMD)
5446                 trace_event_enable_cmd_record(enabled);
5447
5448         if (mask == TRACE_ITER_RECORD_TGID) {
5449                 if (!tgid_map) {
5450                         tgid_map_max = pid_max;
5451                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5452                                        GFP_KERNEL);
5453
5454                         /*
5455                          * Pairs with smp_load_acquire() in
5456                          * trace_find_tgid_ptr() to ensure that if it observes
5457                          * the tgid_map we just allocated then it also observes
5458                          * the corresponding tgid_map_max value.
5459                          */
5460                         smp_store_release(&tgid_map, map);
5461                 }
5462                 if (!tgid_map) {
5463                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5464                         return -ENOMEM;
5465                 }
5466
5467                 trace_event_enable_tgid_record(enabled);
5468         }
5469
5470         if (mask == TRACE_ITER_EVENT_FORK)
5471                 trace_event_follow_fork(tr, enabled);
5472
5473         if (mask == TRACE_ITER_FUNC_FORK)
5474                 ftrace_pid_follow_fork(tr, enabled);
5475
5476         if (mask == TRACE_ITER_OVERWRITE) {
5477                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5478 #ifdef CONFIG_TRACER_MAX_TRACE
5479                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5480 #endif
5481         }
5482
5483         if (mask == TRACE_ITER_PRINTK) {
5484                 trace_printk_start_stop_comm(enabled);
5485                 trace_printk_control(enabled);
5486         }
5487
5488         return 0;
5489 }
5490
5491 int trace_set_options(struct trace_array *tr, char *option)
5492 {
5493         char *cmp;
5494         int neg = 0;
5495         int ret;
5496         size_t orig_len = strlen(option);
5497         int len;
5498
5499         cmp = strstrip(option);
5500
5501         len = str_has_prefix(cmp, "no");
5502         if (len)
5503                 neg = 1;
5504
5505         cmp += len;
5506
5507         mutex_lock(&event_mutex);
5508         mutex_lock(&trace_types_lock);
5509
5510         ret = match_string(trace_options, -1, cmp);
5511         /* If no option could be set, test the specific tracer options */
5512         if (ret < 0)
5513                 ret = set_tracer_option(tr, cmp, neg);
5514         else
5515                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5516
5517         mutex_unlock(&trace_types_lock);
5518         mutex_unlock(&event_mutex);
5519
5520         /*
5521          * If the first trailing whitespace is replaced with '\0' by strstrip,
5522          * turn it back into a space.
5523          */
5524         if (orig_len > strlen(option))
5525                 option[strlen(option)] = ' ';
5526
5527         return ret;
5528 }
5529
5530 static void __init apply_trace_boot_options(void)
5531 {
5532         char *buf = trace_boot_options_buf;
5533         char *option;
5534
5535         while (true) {
5536                 option = strsep(&buf, ",");
5537
5538                 if (!option)
5539                         break;
5540
5541                 if (*option)
5542                         trace_set_options(&global_trace, option);
5543
5544                 /* Put back the comma to allow this to be called again */
5545                 if (buf)
5546                         *(buf - 1) = ',';
5547         }
5548 }
5549
5550 static ssize_t
5551 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5552                         size_t cnt, loff_t *ppos)
5553 {
5554         struct seq_file *m = filp->private_data;
5555         struct trace_array *tr = m->private;
5556         char buf[64];
5557         int ret;
5558
5559         if (cnt >= sizeof(buf))
5560                 return -EINVAL;
5561
5562         if (copy_from_user(buf, ubuf, cnt))
5563                 return -EFAULT;
5564
5565         buf[cnt] = 0;
5566
5567         ret = trace_set_options(tr, buf);
5568         if (ret < 0)
5569                 return ret;
5570
5571         *ppos += cnt;
5572
5573         return cnt;
5574 }
5575
5576 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5577 {
5578         struct trace_array *tr = inode->i_private;
5579         int ret;
5580
5581         ret = tracing_check_open_get_tr(tr);
5582         if (ret)
5583                 return ret;
5584
5585         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5586         if (ret < 0)
5587                 trace_array_put(tr);
5588
5589         return ret;
5590 }
5591
5592 static const struct file_operations tracing_iter_fops = {
5593         .open           = tracing_trace_options_open,
5594         .read           = seq_read,
5595         .llseek         = seq_lseek,
5596         .release        = tracing_single_release_tr,
5597         .write          = tracing_trace_options_write,
5598 };
5599
5600 static const char readme_msg[] =
5601         "tracing mini-HOWTO:\n\n"
5602         "# echo 0 > tracing_on : quick way to disable tracing\n"
5603         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5604         " Important files:\n"
5605         "  trace\t\t\t- The static contents of the buffer\n"
5606         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5607         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5608         "  current_tracer\t- function and latency tracers\n"
5609         "  available_tracers\t- list of configured tracers for current_tracer\n"
5610         "  error_log\t- error log for failed commands (that support it)\n"
5611         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5612         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5613         "  trace_clock\t\t- change the clock used to order events\n"
5614         "       local:   Per cpu clock but may not be synced across CPUs\n"
5615         "      global:   Synced across CPUs but slows tracing down.\n"
5616         "     counter:   Not a clock, but just an increment\n"
5617         "      uptime:   Jiffy counter from time of boot\n"
5618         "        perf:   Same clock that perf events use\n"
5619 #ifdef CONFIG_X86_64
5620         "     x86-tsc:   TSC cycle counter\n"
5621 #endif
5622         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5623         "       delta:   Delta difference against a buffer-wide timestamp\n"
5624         "    absolute:   Absolute (standalone) timestamp\n"
5625         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5626         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5627         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5628         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5629         "\t\t\t  Remove sub-buffer with rmdir\n"
5630         "  trace_options\t\t- Set format or modify how tracing happens\n"
5631         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5632         "\t\t\t  option name\n"
5633         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5634 #ifdef CONFIG_DYNAMIC_FTRACE
5635         "\n  available_filter_functions - list of functions that can be filtered on\n"
5636         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5637         "\t\t\t  functions\n"
5638         "\t     accepts: func_full_name or glob-matching-pattern\n"
5639         "\t     modules: Can select a group via module\n"
5640         "\t      Format: :mod:<module-name>\n"
5641         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5642         "\t    triggers: a command to perform when function is hit\n"
5643         "\t      Format: <function>:<trigger>[:count]\n"
5644         "\t     trigger: traceon, traceoff\n"
5645         "\t\t      enable_event:<system>:<event>\n"
5646         "\t\t      disable_event:<system>:<event>\n"
5647 #ifdef CONFIG_STACKTRACE
5648         "\t\t      stacktrace\n"
5649 #endif
5650 #ifdef CONFIG_TRACER_SNAPSHOT
5651         "\t\t      snapshot\n"
5652 #endif
5653         "\t\t      dump\n"
5654         "\t\t      cpudump\n"
5655         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5656         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5657         "\t     The first one will disable tracing every time do_fault is hit\n"
5658         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5659         "\t       The first time do trap is hit and it disables tracing, the\n"
5660         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5661         "\t       the counter will not decrement. It only decrements when the\n"
5662         "\t       trigger did work\n"
5663         "\t     To remove trigger without count:\n"
5664         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5665         "\t     To remove trigger with a count:\n"
5666         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5667         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5668         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5669         "\t    modules: Can select a group via module command :mod:\n"
5670         "\t    Does not accept triggers\n"
5671 #endif /* CONFIG_DYNAMIC_FTRACE */
5672 #ifdef CONFIG_FUNCTION_TRACER
5673         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5674         "\t\t    (function)\n"
5675         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5676         "\t\t    (function)\n"
5677 #endif
5678 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5679         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5680         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5681         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5682 #endif
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5685         "\t\t\t  snapshot buffer. Read the contents for more\n"
5686         "\t\t\t  information\n"
5687 #endif
5688 #ifdef CONFIG_STACK_TRACER
5689         "  stack_trace\t\t- Shows the max stack trace when active\n"
5690         "  stack_max_size\t- Shows current max stack size that was traced\n"
5691         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5692         "\t\t\t  new trace)\n"
5693 #ifdef CONFIG_DYNAMIC_FTRACE
5694         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5695         "\t\t\t  traces\n"
5696 #endif
5697 #endif /* CONFIG_STACK_TRACER */
5698 #ifdef CONFIG_DYNAMIC_EVENTS
5699         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5700         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5701 #endif
5702 #ifdef CONFIG_KPROBE_EVENTS
5703         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5704         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5705 #endif
5706 #ifdef CONFIG_UPROBE_EVENTS
5707         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5708         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5709 #endif
5710 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5711     defined(CONFIG_FPROBE_EVENTS)
5712         "\t  accepts: event-definitions (one definition per line)\n"
5713 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5714         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5715         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5716 #endif
5717 #ifdef CONFIG_FPROBE_EVENTS
5718         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5719         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5720 #endif
5721 #ifdef CONFIG_HIST_TRIGGERS
5722         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5723 #endif
5724         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5725         "\t           -:[<group>/][<event>]\n"
5726 #ifdef CONFIG_KPROBE_EVENTS
5727         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5728   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5729 #endif
5730 #ifdef CONFIG_UPROBE_EVENTS
5731   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5732 #endif
5733         "\t     args: <name>=fetcharg[:type]\n"
5734         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5735 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5736 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5737         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5738         "\t           <argname>[->field[->field|.field...]],\n"
5739 #else
5740         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5741 #endif
5742 #else
5743         "\t           $stack<index>, $stack, $retval, $comm,\n"
5744 #endif
5745         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5746         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5747         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5748         "\t           symstr, <type>\\[<array-size>\\]\n"
5749 #ifdef CONFIG_HIST_TRIGGERS
5750         "\t    field: <stype> <name>;\n"
5751         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5752         "\t           [unsigned] char/int/long\n"
5753 #endif
5754         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5755         "\t            of the <attached-group>/<attached-event>.\n"
5756 #endif
5757         "  events/\t\t- Directory containing all trace event subsystems:\n"
5758         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5759         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5760         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5761         "\t\t\t  events\n"
5762         "      filter\t\t- If set, only events passing filter are traced\n"
5763         "  events/<system>/<event>/\t- Directory containing control files for\n"
5764         "\t\t\t  <event>:\n"
5765         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5766         "      filter\t\t- If set, only events passing filter are traced\n"
5767         "      trigger\t\t- If set, a command to perform when event is hit\n"
5768         "\t    Format: <trigger>[:count][if <filter>]\n"
5769         "\t   trigger: traceon, traceoff\n"
5770         "\t            enable_event:<system>:<event>\n"
5771         "\t            disable_event:<system>:<event>\n"
5772 #ifdef CONFIG_HIST_TRIGGERS
5773         "\t            enable_hist:<system>:<event>\n"
5774         "\t            disable_hist:<system>:<event>\n"
5775 #endif
5776 #ifdef CONFIG_STACKTRACE
5777         "\t\t    stacktrace\n"
5778 #endif
5779 #ifdef CONFIG_TRACER_SNAPSHOT
5780         "\t\t    snapshot\n"
5781 #endif
5782 #ifdef CONFIG_HIST_TRIGGERS
5783         "\t\t    hist (see below)\n"
5784 #endif
5785         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5786         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5787         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5788         "\t                  events/block/block_unplug/trigger\n"
5789         "\t   The first disables tracing every time block_unplug is hit.\n"
5790         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5791         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5792         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5793         "\t   Like function triggers, the counter is only decremented if it\n"
5794         "\t    enabled or disabled tracing.\n"
5795         "\t   To remove a trigger without a count:\n"
5796         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5797         "\t   To remove a trigger with a count:\n"
5798         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5799         "\t   Filters can be ignored when removing a trigger.\n"
5800 #ifdef CONFIG_HIST_TRIGGERS
5801         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5802         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5803         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5804         "\t            [:values=<field1[,field2,...]>]\n"
5805         "\t            [:sort=<field1[,field2,...]>]\n"
5806         "\t            [:size=#entries]\n"
5807         "\t            [:pause][:continue][:clear]\n"
5808         "\t            [:name=histname1]\n"
5809         "\t            [:nohitcount]\n"
5810         "\t            [:<handler>.<action>]\n"
5811         "\t            [if <filter>]\n\n"
5812         "\t    Note, special fields can be used as well:\n"
5813         "\t            common_timestamp - to record current timestamp\n"
5814         "\t            common_cpu - to record the CPU the event happened on\n"
5815         "\n"
5816         "\t    A hist trigger variable can be:\n"
5817         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5818         "\t        - a reference to another variable e.g. y=$x,\n"
5819         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5820         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5821         "\n"
5822         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5823         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5824         "\t    variable reference, field or numeric literal.\n"
5825         "\n"
5826         "\t    When a matching event is hit, an entry is added to a hash\n"
5827         "\t    table using the key(s) and value(s) named, and the value of a\n"
5828         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5829         "\t    correspond to fields in the event's format description.  Keys\n"
5830         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5831         "\t    Compound keys consisting of up to two fields can be specified\n"
5832         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5833         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5834         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5835         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5836         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5837         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5838         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5839         "\t    its histogram data will be shared with other triggers of the\n"
5840         "\t    same name, and trigger hits will update this common data.\n\n"
5841         "\t    Reading the 'hist' file for the event will dump the hash\n"
5842         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5843         "\t    triggers attached to an event, there will be a table for each\n"
5844         "\t    trigger in the output.  The table displayed for a named\n"
5845         "\t    trigger will be the same as any other instance having the\n"
5846         "\t    same name.  The default format used to display a given field\n"
5847         "\t    can be modified by appending any of the following modifiers\n"
5848         "\t    to the field name, as applicable:\n\n"
5849         "\t            .hex        display a number as a hex value\n"
5850         "\t            .sym        display an address as a symbol\n"
5851         "\t            .sym-offset display an address as a symbol and offset\n"
5852         "\t            .execname   display a common_pid as a program name\n"
5853         "\t            .syscall    display a syscall id as a syscall name\n"
5854         "\t            .log2       display log2 value rather than raw number\n"
5855         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5856         "\t            .usecs      display a common_timestamp in microseconds\n"
5857         "\t            .percent    display a number of percentage value\n"
5858         "\t            .graph      display a bar-graph of a value\n\n"
5859         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5860         "\t    trigger or to start a hist trigger but not log any events\n"
5861         "\t    until told to do so.  'continue' can be used to start or\n"
5862         "\t    restart a paused hist trigger.\n\n"
5863         "\t    The 'clear' parameter will clear the contents of a running\n"
5864         "\t    hist trigger and leave its current paused/active state\n"
5865         "\t    unchanged.\n\n"
5866         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5867         "\t    raw hitcount in the histogram.\n\n"
5868         "\t    The enable_hist and disable_hist triggers can be used to\n"
5869         "\t    have one event conditionally start and stop another event's\n"
5870         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5871         "\t    the enable_event and disable_event triggers.\n\n"
5872         "\t    Hist trigger handlers and actions are executed whenever a\n"
5873         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5874         "\t        <handler>.<action>\n\n"
5875         "\t    The available handlers are:\n\n"
5876         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5877         "\t        onmax(var)               - invoke if var exceeds current max\n"
5878         "\t        onchange(var)            - invoke action if var changes\n\n"
5879         "\t    The available actions are:\n\n"
5880         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5881         "\t        save(field,...)                      - save current event fields\n"
5882 #ifdef CONFIG_TRACER_SNAPSHOT
5883         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5884 #endif
5885 #ifdef CONFIG_SYNTH_EVENTS
5886         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5887         "\t  Write into this file to define/undefine new synthetic events.\n"
5888         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5889 #endif
5890 #endif
5891 ;
5892
5893 static ssize_t
5894 tracing_readme_read(struct file *filp, char __user *ubuf,
5895                        size_t cnt, loff_t *ppos)
5896 {
5897         return simple_read_from_buffer(ubuf, cnt, ppos,
5898                                         readme_msg, strlen(readme_msg));
5899 }
5900
5901 static const struct file_operations tracing_readme_fops = {
5902         .open           = tracing_open_generic,
5903         .read           = tracing_readme_read,
5904         .llseek         = generic_file_llseek,
5905 };
5906
5907 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5908 {
5909         int pid = ++(*pos);
5910
5911         return trace_find_tgid_ptr(pid);
5912 }
5913
5914 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5915 {
5916         int pid = *pos;
5917
5918         return trace_find_tgid_ptr(pid);
5919 }
5920
5921 static void saved_tgids_stop(struct seq_file *m, void *v)
5922 {
5923 }
5924
5925 static int saved_tgids_show(struct seq_file *m, void *v)
5926 {
5927         int *entry = (int *)v;
5928         int pid = entry - tgid_map;
5929         int tgid = *entry;
5930
5931         if (tgid == 0)
5932                 return SEQ_SKIP;
5933
5934         seq_printf(m, "%d %d\n", pid, tgid);
5935         return 0;
5936 }
5937
5938 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5939         .start          = saved_tgids_start,
5940         .stop           = saved_tgids_stop,
5941         .next           = saved_tgids_next,
5942         .show           = saved_tgids_show,
5943 };
5944
5945 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5946 {
5947         int ret;
5948
5949         ret = tracing_check_open_get_tr(NULL);
5950         if (ret)
5951                 return ret;
5952
5953         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5954 }
5955
5956
5957 static const struct file_operations tracing_saved_tgids_fops = {
5958         .open           = tracing_saved_tgids_open,
5959         .read           = seq_read,
5960         .llseek         = seq_lseek,
5961         .release        = seq_release,
5962 };
5963
5964 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5965 {
5966         unsigned int *ptr = v;
5967
5968         if (*pos || m->count)
5969                 ptr++;
5970
5971         (*pos)++;
5972
5973         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5974              ptr++) {
5975                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5976                         continue;
5977
5978                 return ptr;
5979         }
5980
5981         return NULL;
5982 }
5983
5984 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5985 {
5986         void *v;
5987         loff_t l = 0;
5988
5989         preempt_disable();
5990         arch_spin_lock(&trace_cmdline_lock);
5991
5992         v = &savedcmd->map_cmdline_to_pid[0];
5993         while (l <= *pos) {
5994                 v = saved_cmdlines_next(m, v, &l);
5995                 if (!v)
5996                         return NULL;
5997         }
5998
5999         return v;
6000 }
6001
6002 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6003 {
6004         arch_spin_unlock(&trace_cmdline_lock);
6005         preempt_enable();
6006 }
6007
6008 static int saved_cmdlines_show(struct seq_file *m, void *v)
6009 {
6010         char buf[TASK_COMM_LEN];
6011         unsigned int *pid = v;
6012
6013         __trace_find_cmdline(*pid, buf);
6014         seq_printf(m, "%d %s\n", *pid, buf);
6015         return 0;
6016 }
6017
6018 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6019         .start          = saved_cmdlines_start,
6020         .next           = saved_cmdlines_next,
6021         .stop           = saved_cmdlines_stop,
6022         .show           = saved_cmdlines_show,
6023 };
6024
6025 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6026 {
6027         int ret;
6028
6029         ret = tracing_check_open_get_tr(NULL);
6030         if (ret)
6031                 return ret;
6032
6033         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6034 }
6035
6036 static const struct file_operations tracing_saved_cmdlines_fops = {
6037         .open           = tracing_saved_cmdlines_open,
6038         .read           = seq_read,
6039         .llseek         = seq_lseek,
6040         .release        = seq_release,
6041 };
6042
6043 static ssize_t
6044 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6045                                  size_t cnt, loff_t *ppos)
6046 {
6047         char buf[64];
6048         int r;
6049
6050         preempt_disable();
6051         arch_spin_lock(&trace_cmdline_lock);
6052         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6053         arch_spin_unlock(&trace_cmdline_lock);
6054         preempt_enable();
6055
6056         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6057 }
6058
6059 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6060 {
6061         kfree(s->saved_cmdlines);
6062         kfree(s->map_cmdline_to_pid);
6063         kfree(s);
6064 }
6065
6066 static int tracing_resize_saved_cmdlines(unsigned int val)
6067 {
6068         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6069
6070         s = kmalloc(sizeof(*s), GFP_KERNEL);
6071         if (!s)
6072                 return -ENOMEM;
6073
6074         if (allocate_cmdlines_buffer(val, s) < 0) {
6075                 kfree(s);
6076                 return -ENOMEM;
6077         }
6078
6079         preempt_disable();
6080         arch_spin_lock(&trace_cmdline_lock);
6081         savedcmd_temp = savedcmd;
6082         savedcmd = s;
6083         arch_spin_unlock(&trace_cmdline_lock);
6084         preempt_enable();
6085         free_saved_cmdlines_buffer(savedcmd_temp);
6086
6087         return 0;
6088 }
6089
6090 static ssize_t
6091 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6092                                   size_t cnt, loff_t *ppos)
6093 {
6094         unsigned long val;
6095         int ret;
6096
6097         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6098         if (ret)
6099                 return ret;
6100
6101         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6102         if (!val || val > PID_MAX_DEFAULT)
6103                 return -EINVAL;
6104
6105         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6106         if (ret < 0)
6107                 return ret;
6108
6109         *ppos += cnt;
6110
6111         return cnt;
6112 }
6113
6114 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6115         .open           = tracing_open_generic,
6116         .read           = tracing_saved_cmdlines_size_read,
6117         .write          = tracing_saved_cmdlines_size_write,
6118 };
6119
6120 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6121 static union trace_eval_map_item *
6122 update_eval_map(union trace_eval_map_item *ptr)
6123 {
6124         if (!ptr->map.eval_string) {
6125                 if (ptr->tail.next) {
6126                         ptr = ptr->tail.next;
6127                         /* Set ptr to the next real item (skip head) */
6128                         ptr++;
6129                 } else
6130                         return NULL;
6131         }
6132         return ptr;
6133 }
6134
6135 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6136 {
6137         union trace_eval_map_item *ptr = v;
6138
6139         /*
6140          * Paranoid! If ptr points to end, we don't want to increment past it.
6141          * This really should never happen.
6142          */
6143         (*pos)++;
6144         ptr = update_eval_map(ptr);
6145         if (WARN_ON_ONCE(!ptr))
6146                 return NULL;
6147
6148         ptr++;
6149         ptr = update_eval_map(ptr);
6150
6151         return ptr;
6152 }
6153
6154 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6155 {
6156         union trace_eval_map_item *v;
6157         loff_t l = 0;
6158
6159         mutex_lock(&trace_eval_mutex);
6160
6161         v = trace_eval_maps;
6162         if (v)
6163                 v++;
6164
6165         while (v && l < *pos) {
6166                 v = eval_map_next(m, v, &l);
6167         }
6168
6169         return v;
6170 }
6171
6172 static void eval_map_stop(struct seq_file *m, void *v)
6173 {
6174         mutex_unlock(&trace_eval_mutex);
6175 }
6176
6177 static int eval_map_show(struct seq_file *m, void *v)
6178 {
6179         union trace_eval_map_item *ptr = v;
6180
6181         seq_printf(m, "%s %ld (%s)\n",
6182                    ptr->map.eval_string, ptr->map.eval_value,
6183                    ptr->map.system);
6184
6185         return 0;
6186 }
6187
6188 static const struct seq_operations tracing_eval_map_seq_ops = {
6189         .start          = eval_map_start,
6190         .next           = eval_map_next,
6191         .stop           = eval_map_stop,
6192         .show           = eval_map_show,
6193 };
6194
6195 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6196 {
6197         int ret;
6198
6199         ret = tracing_check_open_get_tr(NULL);
6200         if (ret)
6201                 return ret;
6202
6203         return seq_open(filp, &tracing_eval_map_seq_ops);
6204 }
6205
6206 static const struct file_operations tracing_eval_map_fops = {
6207         .open           = tracing_eval_map_open,
6208         .read           = seq_read,
6209         .llseek         = seq_lseek,
6210         .release        = seq_release,
6211 };
6212
6213 static inline union trace_eval_map_item *
6214 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6215 {
6216         /* Return tail of array given the head */
6217         return ptr + ptr->head.length + 1;
6218 }
6219
6220 static void
6221 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6222                            int len)
6223 {
6224         struct trace_eval_map **stop;
6225         struct trace_eval_map **map;
6226         union trace_eval_map_item *map_array;
6227         union trace_eval_map_item *ptr;
6228
6229         stop = start + len;
6230
6231         /*
6232          * The trace_eval_maps contains the map plus a head and tail item,
6233          * where the head holds the module and length of array, and the
6234          * tail holds a pointer to the next list.
6235          */
6236         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6237         if (!map_array) {
6238                 pr_warn("Unable to allocate trace eval mapping\n");
6239                 return;
6240         }
6241
6242         mutex_lock(&trace_eval_mutex);
6243
6244         if (!trace_eval_maps)
6245                 trace_eval_maps = map_array;
6246         else {
6247                 ptr = trace_eval_maps;
6248                 for (;;) {
6249                         ptr = trace_eval_jmp_to_tail(ptr);
6250                         if (!ptr->tail.next)
6251                                 break;
6252                         ptr = ptr->tail.next;
6253
6254                 }
6255                 ptr->tail.next = map_array;
6256         }
6257         map_array->head.mod = mod;
6258         map_array->head.length = len;
6259         map_array++;
6260
6261         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6262                 map_array->map = **map;
6263                 map_array++;
6264         }
6265         memset(map_array, 0, sizeof(*map_array));
6266
6267         mutex_unlock(&trace_eval_mutex);
6268 }
6269
6270 static void trace_create_eval_file(struct dentry *d_tracer)
6271 {
6272         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6273                           NULL, &tracing_eval_map_fops);
6274 }
6275
6276 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6277 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6278 static inline void trace_insert_eval_map_file(struct module *mod,
6279                               struct trace_eval_map **start, int len) { }
6280 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6281
6282 static void trace_insert_eval_map(struct module *mod,
6283                                   struct trace_eval_map **start, int len)
6284 {
6285         struct trace_eval_map **map;
6286
6287         if (len <= 0)
6288                 return;
6289
6290         map = start;
6291
6292         trace_event_eval_update(map, len);
6293
6294         trace_insert_eval_map_file(mod, start, len);
6295 }
6296
6297 static ssize_t
6298 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6299                        size_t cnt, loff_t *ppos)
6300 {
6301         struct trace_array *tr = filp->private_data;
6302         char buf[MAX_TRACER_SIZE+2];
6303         int r;
6304
6305         mutex_lock(&trace_types_lock);
6306         r = sprintf(buf, "%s\n", tr->current_trace->name);
6307         mutex_unlock(&trace_types_lock);
6308
6309         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6310 }
6311
6312 int tracer_init(struct tracer *t, struct trace_array *tr)
6313 {
6314         tracing_reset_online_cpus(&tr->array_buffer);
6315         return t->init(tr);
6316 }
6317
6318 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6319 {
6320         int cpu;
6321
6322         for_each_tracing_cpu(cpu)
6323                 per_cpu_ptr(buf->data, cpu)->entries = val;
6324 }
6325
6326 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6327 {
6328         if (cpu == RING_BUFFER_ALL_CPUS) {
6329                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6330         } else {
6331                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6332         }
6333 }
6334
6335 #ifdef CONFIG_TRACER_MAX_TRACE
6336 /* resize @tr's buffer to the size of @size_tr's entries */
6337 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6338                                         struct array_buffer *size_buf, int cpu_id)
6339 {
6340         int cpu, ret = 0;
6341
6342         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6343                 for_each_tracing_cpu(cpu) {
6344                         ret = ring_buffer_resize(trace_buf->buffer,
6345                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6346                         if (ret < 0)
6347                                 break;
6348                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6349                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6350                 }
6351         } else {
6352                 ret = ring_buffer_resize(trace_buf->buffer,
6353                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6354                 if (ret == 0)
6355                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6356                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6357         }
6358
6359         return ret;
6360 }
6361 #endif /* CONFIG_TRACER_MAX_TRACE */
6362
6363 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6364                                         unsigned long size, int cpu)
6365 {
6366         int ret;
6367
6368         /*
6369          * If kernel or user changes the size of the ring buffer
6370          * we use the size that was given, and we can forget about
6371          * expanding it later.
6372          */
6373         trace_set_ring_buffer_expanded(tr);
6374
6375         /* May be called before buffers are initialized */
6376         if (!tr->array_buffer.buffer)
6377                 return 0;
6378
6379         /* Do not allow tracing while resizing ring buffer */
6380         tracing_stop_tr(tr);
6381
6382         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6383         if (ret < 0)
6384                 goto out_start;
6385
6386 #ifdef CONFIG_TRACER_MAX_TRACE
6387         if (!tr->allocated_snapshot)
6388                 goto out;
6389
6390         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6391         if (ret < 0) {
6392                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6393                                                      &tr->array_buffer, cpu);
6394                 if (r < 0) {
6395                         /*
6396                          * AARGH! We are left with different
6397                          * size max buffer!!!!
6398                          * The max buffer is our "snapshot" buffer.
6399                          * When a tracer needs a snapshot (one of the
6400                          * latency tracers), it swaps the max buffer
6401                          * with the saved snap shot. We succeeded to
6402                          * update the size of the main buffer, but failed to
6403                          * update the size of the max buffer. But when we tried
6404                          * to reset the main buffer to the original size, we
6405                          * failed there too. This is very unlikely to
6406                          * happen, but if it does, warn and kill all
6407                          * tracing.
6408                          */
6409                         WARN_ON(1);
6410                         tracing_disabled = 1;
6411                 }
6412                 goto out_start;
6413         }
6414
6415         update_buffer_entries(&tr->max_buffer, cpu);
6416
6417  out:
6418 #endif /* CONFIG_TRACER_MAX_TRACE */
6419
6420         update_buffer_entries(&tr->array_buffer, cpu);
6421  out_start:
6422         tracing_start_tr(tr);
6423         return ret;
6424 }
6425
6426 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6427                                   unsigned long size, int cpu_id)
6428 {
6429         int ret;
6430
6431         mutex_lock(&trace_types_lock);
6432
6433         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6434                 /* make sure, this cpu is enabled in the mask */
6435                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6436                         ret = -EINVAL;
6437                         goto out;
6438                 }
6439         }
6440
6441         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6442         if (ret < 0)
6443                 ret = -ENOMEM;
6444
6445 out:
6446         mutex_unlock(&trace_types_lock);
6447
6448         return ret;
6449 }
6450
6451
6452 /**
6453  * tracing_update_buffers - used by tracing facility to expand ring buffers
6454  * @tr: The tracing instance
6455  *
6456  * To save on memory when the tracing is never used on a system with it
6457  * configured in. The ring buffers are set to a minimum size. But once
6458  * a user starts to use the tracing facility, then they need to grow
6459  * to their default size.
6460  *
6461  * This function is to be called when a tracer is about to be used.
6462  */
6463 int tracing_update_buffers(struct trace_array *tr)
6464 {
6465         int ret = 0;
6466
6467         mutex_lock(&trace_types_lock);
6468         if (!tr->ring_buffer_expanded)
6469                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6470                                                 RING_BUFFER_ALL_CPUS);
6471         mutex_unlock(&trace_types_lock);
6472
6473         return ret;
6474 }
6475
6476 struct trace_option_dentry;
6477
6478 static void
6479 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6480
6481 /*
6482  * Used to clear out the tracer before deletion of an instance.
6483  * Must have trace_types_lock held.
6484  */
6485 static void tracing_set_nop(struct trace_array *tr)
6486 {
6487         if (tr->current_trace == &nop_trace)
6488                 return;
6489         
6490         tr->current_trace->enabled--;
6491
6492         if (tr->current_trace->reset)
6493                 tr->current_trace->reset(tr);
6494
6495         tr->current_trace = &nop_trace;
6496 }
6497
6498 static bool tracer_options_updated;
6499
6500 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6501 {
6502         /* Only enable if the directory has been created already. */
6503         if (!tr->dir)
6504                 return;
6505
6506         /* Only create trace option files after update_tracer_options finish */
6507         if (!tracer_options_updated)
6508                 return;
6509
6510         create_trace_option_files(tr, t);
6511 }
6512
6513 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6514 {
6515         struct tracer *t;
6516 #ifdef CONFIG_TRACER_MAX_TRACE
6517         bool had_max_tr;
6518 #endif
6519         int ret = 0;
6520
6521         mutex_lock(&trace_types_lock);
6522
6523         if (!tr->ring_buffer_expanded) {
6524                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6525                                                 RING_BUFFER_ALL_CPUS);
6526                 if (ret < 0)
6527                         goto out;
6528                 ret = 0;
6529         }
6530
6531         for (t = trace_types; t; t = t->next) {
6532                 if (strcmp(t->name, buf) == 0)
6533                         break;
6534         }
6535         if (!t) {
6536                 ret = -EINVAL;
6537                 goto out;
6538         }
6539         if (t == tr->current_trace)
6540                 goto out;
6541
6542 #ifdef CONFIG_TRACER_SNAPSHOT
6543         if (t->use_max_tr) {
6544                 local_irq_disable();
6545                 arch_spin_lock(&tr->max_lock);
6546                 if (tr->cond_snapshot)
6547                         ret = -EBUSY;
6548                 arch_spin_unlock(&tr->max_lock);
6549                 local_irq_enable();
6550                 if (ret)
6551                         goto out;
6552         }
6553 #endif
6554         /* Some tracers won't work on kernel command line */
6555         if (system_state < SYSTEM_RUNNING && t->noboot) {
6556                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6557                         t->name);
6558                 goto out;
6559         }
6560
6561         /* Some tracers are only allowed for the top level buffer */
6562         if (!trace_ok_for_array(t, tr)) {
6563                 ret = -EINVAL;
6564                 goto out;
6565         }
6566
6567         /* If trace pipe files are being read, we can't change the tracer */
6568         if (tr->trace_ref) {
6569                 ret = -EBUSY;
6570                 goto out;
6571         }
6572
6573         trace_branch_disable();
6574
6575         tr->current_trace->enabled--;
6576
6577         if (tr->current_trace->reset)
6578                 tr->current_trace->reset(tr);
6579
6580 #ifdef CONFIG_TRACER_MAX_TRACE
6581         had_max_tr = tr->current_trace->use_max_tr;
6582
6583         /* Current trace needs to be nop_trace before synchronize_rcu */
6584         tr->current_trace = &nop_trace;
6585
6586         if (had_max_tr && !t->use_max_tr) {
6587                 /*
6588                  * We need to make sure that the update_max_tr sees that
6589                  * current_trace changed to nop_trace to keep it from
6590                  * swapping the buffers after we resize it.
6591                  * The update_max_tr is called from interrupts disabled
6592                  * so a synchronized_sched() is sufficient.
6593                  */
6594                 synchronize_rcu();
6595                 free_snapshot(tr);
6596         }
6597
6598         if (t->use_max_tr && !tr->allocated_snapshot) {
6599                 ret = tracing_alloc_snapshot_instance(tr);
6600                 if (ret < 0)
6601                         goto out;
6602         }
6603 #else
6604         tr->current_trace = &nop_trace;
6605 #endif
6606
6607         if (t->init) {
6608                 ret = tracer_init(t, tr);
6609                 if (ret)
6610                         goto out;
6611         }
6612
6613         tr->current_trace = t;
6614         tr->current_trace->enabled++;
6615         trace_branch_enable(tr);
6616  out:
6617         mutex_unlock(&trace_types_lock);
6618
6619         return ret;
6620 }
6621
6622 static ssize_t
6623 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6624                         size_t cnt, loff_t *ppos)
6625 {
6626         struct trace_array *tr = filp->private_data;
6627         char buf[MAX_TRACER_SIZE+1];
6628         char *name;
6629         size_t ret;
6630         int err;
6631
6632         ret = cnt;
6633
6634         if (cnt > MAX_TRACER_SIZE)
6635                 cnt = MAX_TRACER_SIZE;
6636
6637         if (copy_from_user(buf, ubuf, cnt))
6638                 return -EFAULT;
6639
6640         buf[cnt] = 0;
6641
6642         name = strim(buf);
6643
6644         err = tracing_set_tracer(tr, name);
6645         if (err)
6646                 return err;
6647
6648         *ppos += ret;
6649
6650         return ret;
6651 }
6652
6653 static ssize_t
6654 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6655                    size_t cnt, loff_t *ppos)
6656 {
6657         char buf[64];
6658         int r;
6659
6660         r = snprintf(buf, sizeof(buf), "%ld\n",
6661                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6662         if (r > sizeof(buf))
6663                 r = sizeof(buf);
6664         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6665 }
6666
6667 static ssize_t
6668 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6669                     size_t cnt, loff_t *ppos)
6670 {
6671         unsigned long val;
6672         int ret;
6673
6674         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6675         if (ret)
6676                 return ret;
6677
6678         *ptr = val * 1000;
6679
6680         return cnt;
6681 }
6682
6683 static ssize_t
6684 tracing_thresh_read(struct file *filp, char __user *ubuf,
6685                     size_t cnt, loff_t *ppos)
6686 {
6687         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6688 }
6689
6690 static ssize_t
6691 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6692                      size_t cnt, loff_t *ppos)
6693 {
6694         struct trace_array *tr = filp->private_data;
6695         int ret;
6696
6697         mutex_lock(&trace_types_lock);
6698         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6699         if (ret < 0)
6700                 goto out;
6701
6702         if (tr->current_trace->update_thresh) {
6703                 ret = tr->current_trace->update_thresh(tr);
6704                 if (ret < 0)
6705                         goto out;
6706         }
6707
6708         ret = cnt;
6709 out:
6710         mutex_unlock(&trace_types_lock);
6711
6712         return ret;
6713 }
6714
6715 #ifdef CONFIG_TRACER_MAX_TRACE
6716
6717 static ssize_t
6718 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6719                      size_t cnt, loff_t *ppos)
6720 {
6721         struct trace_array *tr = filp->private_data;
6722
6723         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6724 }
6725
6726 static ssize_t
6727 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6728                       size_t cnt, loff_t *ppos)
6729 {
6730         struct trace_array *tr = filp->private_data;
6731
6732         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6733 }
6734
6735 #endif
6736
6737 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6738 {
6739         if (cpu == RING_BUFFER_ALL_CPUS) {
6740                 if (cpumask_empty(tr->pipe_cpumask)) {
6741                         cpumask_setall(tr->pipe_cpumask);
6742                         return 0;
6743                 }
6744         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6745                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6746                 return 0;
6747         }
6748         return -EBUSY;
6749 }
6750
6751 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6752 {
6753         if (cpu == RING_BUFFER_ALL_CPUS) {
6754                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6755                 cpumask_clear(tr->pipe_cpumask);
6756         } else {
6757                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6758                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6759         }
6760 }
6761
6762 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6763 {
6764         struct trace_array *tr = inode->i_private;
6765         struct trace_iterator *iter;
6766         int cpu;
6767         int ret;
6768
6769         ret = tracing_check_open_get_tr(tr);
6770         if (ret)
6771                 return ret;
6772
6773         mutex_lock(&trace_types_lock);
6774         cpu = tracing_get_cpu(inode);
6775         ret = open_pipe_on_cpu(tr, cpu);
6776         if (ret)
6777                 goto fail_pipe_on_cpu;
6778
6779         /* create a buffer to store the information to pass to userspace */
6780         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6781         if (!iter) {
6782                 ret = -ENOMEM;
6783                 goto fail_alloc_iter;
6784         }
6785
6786         trace_seq_init(&iter->seq);
6787         iter->trace = tr->current_trace;
6788
6789         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6790                 ret = -ENOMEM;
6791                 goto fail;
6792         }
6793
6794         /* trace pipe does not show start of buffer */
6795         cpumask_setall(iter->started);
6796
6797         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6798                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6799
6800         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6801         if (trace_clocks[tr->clock_id].in_ns)
6802                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6803
6804         iter->tr = tr;
6805         iter->array_buffer = &tr->array_buffer;
6806         iter->cpu_file = cpu;
6807         mutex_init(&iter->mutex);
6808         filp->private_data = iter;
6809
6810         if (iter->trace->pipe_open)
6811                 iter->trace->pipe_open(iter);
6812
6813         nonseekable_open(inode, filp);
6814
6815         tr->trace_ref++;
6816
6817         mutex_unlock(&trace_types_lock);
6818         return ret;
6819
6820 fail:
6821         kfree(iter);
6822 fail_alloc_iter:
6823         close_pipe_on_cpu(tr, cpu);
6824 fail_pipe_on_cpu:
6825         __trace_array_put(tr);
6826         mutex_unlock(&trace_types_lock);
6827         return ret;
6828 }
6829
6830 static int tracing_release_pipe(struct inode *inode, struct file *file)
6831 {
6832         struct trace_iterator *iter = file->private_data;
6833         struct trace_array *tr = inode->i_private;
6834
6835         mutex_lock(&trace_types_lock);
6836
6837         tr->trace_ref--;
6838
6839         if (iter->trace->pipe_close)
6840                 iter->trace->pipe_close(iter);
6841         close_pipe_on_cpu(tr, iter->cpu_file);
6842         mutex_unlock(&trace_types_lock);
6843
6844         free_trace_iter_content(iter);
6845         kfree(iter);
6846
6847         trace_array_put(tr);
6848
6849         return 0;
6850 }
6851
6852 static __poll_t
6853 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6854 {
6855         struct trace_array *tr = iter->tr;
6856
6857         /* Iterators are static, they should be filled or empty */
6858         if (trace_buffer_iter(iter, iter->cpu_file))
6859                 return EPOLLIN | EPOLLRDNORM;
6860
6861         if (tr->trace_flags & TRACE_ITER_BLOCK)
6862                 /*
6863                  * Always select as readable when in blocking mode
6864                  */
6865                 return EPOLLIN | EPOLLRDNORM;
6866         else
6867                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6868                                              filp, poll_table, iter->tr->buffer_percent);
6869 }
6870
6871 static __poll_t
6872 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6873 {
6874         struct trace_iterator *iter = filp->private_data;
6875
6876         return trace_poll(iter, filp, poll_table);
6877 }
6878
6879 /* Must be called with iter->mutex held. */
6880 static int tracing_wait_pipe(struct file *filp)
6881 {
6882         struct trace_iterator *iter = filp->private_data;
6883         int ret;
6884
6885         while (trace_empty(iter)) {
6886
6887                 if ((filp->f_flags & O_NONBLOCK)) {
6888                         return -EAGAIN;
6889                 }
6890
6891                 /*
6892                  * We block until we read something and tracing is disabled.
6893                  * We still block if tracing is disabled, but we have never
6894                  * read anything. This allows a user to cat this file, and
6895                  * then enable tracing. But after we have read something,
6896                  * we give an EOF when tracing is again disabled.
6897                  *
6898                  * iter->pos will be 0 if we haven't read anything.
6899                  */
6900                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6901                         break;
6902
6903                 mutex_unlock(&iter->mutex);
6904
6905                 ret = wait_on_pipe(iter, 0);
6906
6907                 mutex_lock(&iter->mutex);
6908
6909                 if (ret)
6910                         return ret;
6911         }
6912
6913         return 1;
6914 }
6915
6916 /*
6917  * Consumer reader.
6918  */
6919 static ssize_t
6920 tracing_read_pipe(struct file *filp, char __user *ubuf,
6921                   size_t cnt, loff_t *ppos)
6922 {
6923         struct trace_iterator *iter = filp->private_data;
6924         ssize_t sret;
6925
6926         /*
6927          * Avoid more than one consumer on a single file descriptor
6928          * This is just a matter of traces coherency, the ring buffer itself
6929          * is protected.
6930          */
6931         mutex_lock(&iter->mutex);
6932
6933         /* return any leftover data */
6934         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6935         if (sret != -EBUSY)
6936                 goto out;
6937
6938         trace_seq_init(&iter->seq);
6939
6940         if (iter->trace->read) {
6941                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6942                 if (sret)
6943                         goto out;
6944         }
6945
6946 waitagain:
6947         sret = tracing_wait_pipe(filp);
6948         if (sret <= 0)
6949                 goto out;
6950
6951         /* stop when tracing is finished */
6952         if (trace_empty(iter)) {
6953                 sret = 0;
6954                 goto out;
6955         }
6956
6957         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6958                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6959
6960         /* reset all but tr, trace, and overruns */
6961         trace_iterator_reset(iter);
6962         cpumask_clear(iter->started);
6963         trace_seq_init(&iter->seq);
6964
6965         trace_event_read_lock();
6966         trace_access_lock(iter->cpu_file);
6967         while (trace_find_next_entry_inc(iter) != NULL) {
6968                 enum print_line_t ret;
6969                 int save_len = iter->seq.seq.len;
6970
6971                 ret = print_trace_line(iter);
6972                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6973                         /*
6974                          * If one print_trace_line() fills entire trace_seq in one shot,
6975                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6976                          * In this case, we need to consume it, otherwise, loop will peek
6977                          * this event next time, resulting in an infinite loop.
6978                          */
6979                         if (save_len == 0) {
6980                                 iter->seq.full = 0;
6981                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6982                                 trace_consume(iter);
6983                                 break;
6984                         }
6985
6986                         /* In other cases, don't print partial lines */
6987                         iter->seq.seq.len = save_len;
6988                         break;
6989                 }
6990                 if (ret != TRACE_TYPE_NO_CONSUME)
6991                         trace_consume(iter);
6992
6993                 if (trace_seq_used(&iter->seq) >= cnt)
6994                         break;
6995
6996                 /*
6997                  * Setting the full flag means we reached the trace_seq buffer
6998                  * size and we should leave by partial output condition above.
6999                  * One of the trace_seq_* functions is not used properly.
7000                  */
7001                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7002                           iter->ent->type);
7003         }
7004         trace_access_unlock(iter->cpu_file);
7005         trace_event_read_unlock();
7006
7007         /* Now copy what we have to the user */
7008         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7009         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7010                 trace_seq_init(&iter->seq);
7011
7012         /*
7013          * If there was nothing to send to user, in spite of consuming trace
7014          * entries, go back to wait for more entries.
7015          */
7016         if (sret == -EBUSY)
7017                 goto waitagain;
7018
7019 out:
7020         mutex_unlock(&iter->mutex);
7021
7022         return sret;
7023 }
7024
7025 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7026                                      unsigned int idx)
7027 {
7028         __free_page(spd->pages[idx]);
7029 }
7030
7031 static size_t
7032 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7033 {
7034         size_t count;
7035         int save_len;
7036         int ret;
7037
7038         /* Seq buffer is page-sized, exactly what we need. */
7039         for (;;) {
7040                 save_len = iter->seq.seq.len;
7041                 ret = print_trace_line(iter);
7042
7043                 if (trace_seq_has_overflowed(&iter->seq)) {
7044                         iter->seq.seq.len = save_len;
7045                         break;
7046                 }
7047
7048                 /*
7049                  * This should not be hit, because it should only
7050                  * be set if the iter->seq overflowed. But check it
7051                  * anyway to be safe.
7052                  */
7053                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7054                         iter->seq.seq.len = save_len;
7055                         break;
7056                 }
7057
7058                 count = trace_seq_used(&iter->seq) - save_len;
7059                 if (rem < count) {
7060                         rem = 0;
7061                         iter->seq.seq.len = save_len;
7062                         break;
7063                 }
7064
7065                 if (ret != TRACE_TYPE_NO_CONSUME)
7066                         trace_consume(iter);
7067                 rem -= count;
7068                 if (!trace_find_next_entry_inc(iter))   {
7069                         rem = 0;
7070                         iter->ent = NULL;
7071                         break;
7072                 }
7073         }
7074
7075         return rem;
7076 }
7077
7078 static ssize_t tracing_splice_read_pipe(struct file *filp,
7079                                         loff_t *ppos,
7080                                         struct pipe_inode_info *pipe,
7081                                         size_t len,
7082                                         unsigned int flags)
7083 {
7084         struct page *pages_def[PIPE_DEF_BUFFERS];
7085         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7086         struct trace_iterator *iter = filp->private_data;
7087         struct splice_pipe_desc spd = {
7088                 .pages          = pages_def,
7089                 .partial        = partial_def,
7090                 .nr_pages       = 0, /* This gets updated below. */
7091                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7092                 .ops            = &default_pipe_buf_ops,
7093                 .spd_release    = tracing_spd_release_pipe,
7094         };
7095         ssize_t ret;
7096         size_t rem;
7097         unsigned int i;
7098
7099         if (splice_grow_spd(pipe, &spd))
7100                 return -ENOMEM;
7101
7102         mutex_lock(&iter->mutex);
7103
7104         if (iter->trace->splice_read) {
7105                 ret = iter->trace->splice_read(iter, filp,
7106                                                ppos, pipe, len, flags);
7107                 if (ret)
7108                         goto out_err;
7109         }
7110
7111         ret = tracing_wait_pipe(filp);
7112         if (ret <= 0)
7113                 goto out_err;
7114
7115         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7116                 ret = -EFAULT;
7117                 goto out_err;
7118         }
7119
7120         trace_event_read_lock();
7121         trace_access_lock(iter->cpu_file);
7122
7123         /* Fill as many pages as possible. */
7124         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7125                 spd.pages[i] = alloc_page(GFP_KERNEL);
7126                 if (!spd.pages[i])
7127                         break;
7128
7129                 rem = tracing_fill_pipe_page(rem, iter);
7130
7131                 /* Copy the data into the page, so we can start over. */
7132                 ret = trace_seq_to_buffer(&iter->seq,
7133                                           page_address(spd.pages[i]),
7134                                           trace_seq_used(&iter->seq));
7135                 if (ret < 0) {
7136                         __free_page(spd.pages[i]);
7137                         break;
7138                 }
7139                 spd.partial[i].offset = 0;
7140                 spd.partial[i].len = trace_seq_used(&iter->seq);
7141
7142                 trace_seq_init(&iter->seq);
7143         }
7144
7145         trace_access_unlock(iter->cpu_file);
7146         trace_event_read_unlock();
7147         mutex_unlock(&iter->mutex);
7148
7149         spd.nr_pages = i;
7150
7151         if (i)
7152                 ret = splice_to_pipe(pipe, &spd);
7153         else
7154                 ret = 0;
7155 out:
7156         splice_shrink_spd(&spd);
7157         return ret;
7158
7159 out_err:
7160         mutex_unlock(&iter->mutex);
7161         goto out;
7162 }
7163
7164 static ssize_t
7165 tracing_entries_read(struct file *filp, char __user *ubuf,
7166                      size_t cnt, loff_t *ppos)
7167 {
7168         struct inode *inode = file_inode(filp);
7169         struct trace_array *tr = inode->i_private;
7170         int cpu = tracing_get_cpu(inode);
7171         char buf[64];
7172         int r = 0;
7173         ssize_t ret;
7174
7175         mutex_lock(&trace_types_lock);
7176
7177         if (cpu == RING_BUFFER_ALL_CPUS) {
7178                 int cpu, buf_size_same;
7179                 unsigned long size;
7180
7181                 size = 0;
7182                 buf_size_same = 1;
7183                 /* check if all cpu sizes are same */
7184                 for_each_tracing_cpu(cpu) {
7185                         /* fill in the size from first enabled cpu */
7186                         if (size == 0)
7187                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7188                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7189                                 buf_size_same = 0;
7190                                 break;
7191                         }
7192                 }
7193
7194                 if (buf_size_same) {
7195                         if (!tr->ring_buffer_expanded)
7196                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7197                                             size >> 10,
7198                                             trace_buf_size >> 10);
7199                         else
7200                                 r = sprintf(buf, "%lu\n", size >> 10);
7201                 } else
7202                         r = sprintf(buf, "X\n");
7203         } else
7204                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7205
7206         mutex_unlock(&trace_types_lock);
7207
7208         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7209         return ret;
7210 }
7211
7212 static ssize_t
7213 tracing_entries_write(struct file *filp, const char __user *ubuf,
7214                       size_t cnt, loff_t *ppos)
7215 {
7216         struct inode *inode = file_inode(filp);
7217         struct trace_array *tr = inode->i_private;
7218         unsigned long val;
7219         int ret;
7220
7221         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7222         if (ret)
7223                 return ret;
7224
7225         /* must have at least 1 entry */
7226         if (!val)
7227                 return -EINVAL;
7228
7229         /* value is in KB */
7230         val <<= 10;
7231         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7232         if (ret < 0)
7233                 return ret;
7234
7235         *ppos += cnt;
7236
7237         return cnt;
7238 }
7239
7240 static ssize_t
7241 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7242                                 size_t cnt, loff_t *ppos)
7243 {
7244         struct trace_array *tr = filp->private_data;
7245         char buf[64];
7246         int r, cpu;
7247         unsigned long size = 0, expanded_size = 0;
7248
7249         mutex_lock(&trace_types_lock);
7250         for_each_tracing_cpu(cpu) {
7251                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7252                 if (!tr->ring_buffer_expanded)
7253                         expanded_size += trace_buf_size >> 10;
7254         }
7255         if (tr->ring_buffer_expanded)
7256                 r = sprintf(buf, "%lu\n", size);
7257         else
7258                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7259         mutex_unlock(&trace_types_lock);
7260
7261         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7262 }
7263
7264 static ssize_t
7265 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7266                           size_t cnt, loff_t *ppos)
7267 {
7268         /*
7269          * There is no need to read what the user has written, this function
7270          * is just to make sure that there is no error when "echo" is used
7271          */
7272
7273         *ppos += cnt;
7274
7275         return cnt;
7276 }
7277
7278 static int
7279 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7280 {
7281         struct trace_array *tr = inode->i_private;
7282
7283         /* disable tracing ? */
7284         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7285                 tracer_tracing_off(tr);
7286         /* resize the ring buffer to 0 */
7287         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7288
7289         trace_array_put(tr);
7290
7291         return 0;
7292 }
7293
7294 static ssize_t
7295 tracing_mark_write(struct file *filp, const char __user *ubuf,
7296                                         size_t cnt, loff_t *fpos)
7297 {
7298         struct trace_array *tr = filp->private_data;
7299         struct ring_buffer_event *event;
7300         enum event_trigger_type tt = ETT_NONE;
7301         struct trace_buffer *buffer;
7302         struct print_entry *entry;
7303         int meta_size;
7304         ssize_t written;
7305         size_t size;
7306         int len;
7307
7308 /* Used in tracing_mark_raw_write() as well */
7309 #define FAULTED_STR "<faulted>"
7310 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7311
7312         if (tracing_disabled)
7313                 return -EINVAL;
7314
7315         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7316                 return -EINVAL;
7317
7318         if ((ssize_t)cnt < 0)
7319                 return -EINVAL;
7320
7321         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7322  again:
7323         size = cnt + meta_size;
7324
7325         /* If less than "<faulted>", then make sure we can still add that */
7326         if (cnt < FAULTED_SIZE)
7327                 size += FAULTED_SIZE - cnt;
7328
7329         if (size > TRACE_SEQ_BUFFER_SIZE) {
7330                 cnt -= size - TRACE_SEQ_BUFFER_SIZE;
7331                 goto again;
7332         }
7333
7334         buffer = tr->array_buffer.buffer;
7335         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7336                                             tracing_gen_ctx());
7337         if (unlikely(!event)) {
7338                 /*
7339                  * If the size was greater than what was allowed, then
7340                  * make it smaller and try again.
7341                  */
7342                 if (size > ring_buffer_max_event_size(buffer)) {
7343                         /* cnt < FAULTED size should never be bigger than max */
7344                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7345                                 return -EBADF;
7346                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7347                         /* The above should only happen once */
7348                         if (WARN_ON_ONCE(cnt + meta_size == size))
7349                                 return -EBADF;
7350                         goto again;
7351                 }
7352
7353                 /* Ring buffer disabled, return as if not open for write */
7354                 return -EBADF;
7355         }
7356
7357         entry = ring_buffer_event_data(event);
7358         entry->ip = _THIS_IP_;
7359
7360         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7361         if (len) {
7362                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7363                 cnt = FAULTED_SIZE;
7364                 written = -EFAULT;
7365         } else
7366                 written = cnt;
7367
7368         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7369                 /* do not add \n before testing triggers, but add \0 */
7370                 entry->buf[cnt] = '\0';
7371                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7372         }
7373
7374         if (entry->buf[cnt - 1] != '\n') {
7375                 entry->buf[cnt] = '\n';
7376                 entry->buf[cnt + 1] = '\0';
7377         } else
7378                 entry->buf[cnt] = '\0';
7379
7380         if (static_branch_unlikely(&trace_marker_exports_enabled))
7381                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7382         __buffer_unlock_commit(buffer, event);
7383
7384         if (tt)
7385                 event_triggers_post_call(tr->trace_marker_file, tt);
7386
7387         return written;
7388 }
7389
7390 static ssize_t
7391 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7392                                         size_t cnt, loff_t *fpos)
7393 {
7394         struct trace_array *tr = filp->private_data;
7395         struct ring_buffer_event *event;
7396         struct trace_buffer *buffer;
7397         struct raw_data_entry *entry;
7398         ssize_t written;
7399         int size;
7400         int len;
7401
7402 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7403
7404         if (tracing_disabled)
7405                 return -EINVAL;
7406
7407         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7408                 return -EINVAL;
7409
7410         /* The marker must at least have a tag id */
7411         if (cnt < sizeof(unsigned int))
7412                 return -EINVAL;
7413
7414         size = sizeof(*entry) + cnt;
7415         if (cnt < FAULT_SIZE_ID)
7416                 size += FAULT_SIZE_ID - cnt;
7417
7418         buffer = tr->array_buffer.buffer;
7419
7420         if (size > ring_buffer_max_event_size(buffer))
7421                 return -EINVAL;
7422
7423         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7424                                             tracing_gen_ctx());
7425         if (!event)
7426                 /* Ring buffer disabled, return as if not open for write */
7427                 return -EBADF;
7428
7429         entry = ring_buffer_event_data(event);
7430
7431         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7432         if (len) {
7433                 entry->id = -1;
7434                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7435                 written = -EFAULT;
7436         } else
7437                 written = cnt;
7438
7439         __buffer_unlock_commit(buffer, event);
7440
7441         return written;
7442 }
7443
7444 static int tracing_clock_show(struct seq_file *m, void *v)
7445 {
7446         struct trace_array *tr = m->private;
7447         int i;
7448
7449         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7450                 seq_printf(m,
7451                         "%s%s%s%s", i ? " " : "",
7452                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7453                         i == tr->clock_id ? "]" : "");
7454         seq_putc(m, '\n');
7455
7456         return 0;
7457 }
7458
7459 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7460 {
7461         int i;
7462
7463         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7464                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7465                         break;
7466         }
7467         if (i == ARRAY_SIZE(trace_clocks))
7468                 return -EINVAL;
7469
7470         mutex_lock(&trace_types_lock);
7471
7472         tr->clock_id = i;
7473
7474         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7475
7476         /*
7477          * New clock may not be consistent with the previous clock.
7478          * Reset the buffer so that it doesn't have incomparable timestamps.
7479          */
7480         tracing_reset_online_cpus(&tr->array_buffer);
7481
7482 #ifdef CONFIG_TRACER_MAX_TRACE
7483         if (tr->max_buffer.buffer)
7484                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7485         tracing_reset_online_cpus(&tr->max_buffer);
7486 #endif
7487
7488         mutex_unlock(&trace_types_lock);
7489
7490         return 0;
7491 }
7492
7493 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7494                                    size_t cnt, loff_t *fpos)
7495 {
7496         struct seq_file *m = filp->private_data;
7497         struct trace_array *tr = m->private;
7498         char buf[64];
7499         const char *clockstr;
7500         int ret;
7501
7502         if (cnt >= sizeof(buf))
7503                 return -EINVAL;
7504
7505         if (copy_from_user(buf, ubuf, cnt))
7506                 return -EFAULT;
7507
7508         buf[cnt] = 0;
7509
7510         clockstr = strstrip(buf);
7511
7512         ret = tracing_set_clock(tr, clockstr);
7513         if (ret)
7514                 return ret;
7515
7516         *fpos += cnt;
7517
7518         return cnt;
7519 }
7520
7521 static int tracing_clock_open(struct inode *inode, struct file *file)
7522 {
7523         struct trace_array *tr = inode->i_private;
7524         int ret;
7525
7526         ret = tracing_check_open_get_tr(tr);
7527         if (ret)
7528                 return ret;
7529
7530         ret = single_open(file, tracing_clock_show, inode->i_private);
7531         if (ret < 0)
7532                 trace_array_put(tr);
7533
7534         return ret;
7535 }
7536
7537 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7538 {
7539         struct trace_array *tr = m->private;
7540
7541         mutex_lock(&trace_types_lock);
7542
7543         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7544                 seq_puts(m, "delta [absolute]\n");
7545         else
7546                 seq_puts(m, "[delta] absolute\n");
7547
7548         mutex_unlock(&trace_types_lock);
7549
7550         return 0;
7551 }
7552
7553 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7554 {
7555         struct trace_array *tr = inode->i_private;
7556         int ret;
7557
7558         ret = tracing_check_open_get_tr(tr);
7559         if (ret)
7560                 return ret;
7561
7562         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7563         if (ret < 0)
7564                 trace_array_put(tr);
7565
7566         return ret;
7567 }
7568
7569 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7570 {
7571         if (rbe == this_cpu_read(trace_buffered_event))
7572                 return ring_buffer_time_stamp(buffer);
7573
7574         return ring_buffer_event_time_stamp(buffer, rbe);
7575 }
7576
7577 /*
7578  * Set or disable using the per CPU trace_buffer_event when possible.
7579  */
7580 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7581 {
7582         int ret = 0;
7583
7584         mutex_lock(&trace_types_lock);
7585
7586         if (set && tr->no_filter_buffering_ref++)
7587                 goto out;
7588
7589         if (!set) {
7590                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7591                         ret = -EINVAL;
7592                         goto out;
7593                 }
7594
7595                 --tr->no_filter_buffering_ref;
7596         }
7597  out:
7598         mutex_unlock(&trace_types_lock);
7599
7600         return ret;
7601 }
7602
7603 struct ftrace_buffer_info {
7604         struct trace_iterator   iter;
7605         void                    *spare;
7606         unsigned int            spare_cpu;
7607         unsigned int            spare_size;
7608         unsigned int            read;
7609 };
7610
7611 #ifdef CONFIG_TRACER_SNAPSHOT
7612 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7613 {
7614         struct trace_array *tr = inode->i_private;
7615         struct trace_iterator *iter;
7616         struct seq_file *m;
7617         int ret;
7618
7619         ret = tracing_check_open_get_tr(tr);
7620         if (ret)
7621                 return ret;
7622
7623         if (file->f_mode & FMODE_READ) {
7624                 iter = __tracing_open(inode, file, true);
7625                 if (IS_ERR(iter))
7626                         ret = PTR_ERR(iter);
7627         } else {
7628                 /* Writes still need the seq_file to hold the private data */
7629                 ret = -ENOMEM;
7630                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7631                 if (!m)
7632                         goto out;
7633                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7634                 if (!iter) {
7635                         kfree(m);
7636                         goto out;
7637                 }
7638                 ret = 0;
7639
7640                 iter->tr = tr;
7641                 iter->array_buffer = &tr->max_buffer;
7642                 iter->cpu_file = tracing_get_cpu(inode);
7643                 m->private = iter;
7644                 file->private_data = m;
7645         }
7646 out:
7647         if (ret < 0)
7648                 trace_array_put(tr);
7649
7650         return ret;
7651 }
7652
7653 static void tracing_swap_cpu_buffer(void *tr)
7654 {
7655         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7656 }
7657
7658 static ssize_t
7659 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7660                        loff_t *ppos)
7661 {
7662         struct seq_file *m = filp->private_data;
7663         struct trace_iterator *iter = m->private;
7664         struct trace_array *tr = iter->tr;
7665         unsigned long val;
7666         int ret;
7667
7668         ret = tracing_update_buffers(tr);
7669         if (ret < 0)
7670                 return ret;
7671
7672         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7673         if (ret)
7674                 return ret;
7675
7676         mutex_lock(&trace_types_lock);
7677
7678         if (tr->current_trace->use_max_tr) {
7679                 ret = -EBUSY;
7680                 goto out;
7681         }
7682
7683         local_irq_disable();
7684         arch_spin_lock(&tr->max_lock);
7685         if (tr->cond_snapshot)
7686                 ret = -EBUSY;
7687         arch_spin_unlock(&tr->max_lock);
7688         local_irq_enable();
7689         if (ret)
7690                 goto out;
7691
7692         switch (val) {
7693         case 0:
7694                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7695                         ret = -EINVAL;
7696                         break;
7697                 }
7698                 if (tr->allocated_snapshot)
7699                         free_snapshot(tr);
7700                 break;
7701         case 1:
7702 /* Only allow per-cpu swap if the ring buffer supports it */
7703 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7704                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7705                         ret = -EINVAL;
7706                         break;
7707                 }
7708 #endif
7709                 if (tr->allocated_snapshot)
7710                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7711                                         &tr->array_buffer, iter->cpu_file);
7712                 else
7713                         ret = tracing_alloc_snapshot_instance(tr);
7714                 if (ret < 0)
7715                         break;
7716                 /* Now, we're going to swap */
7717                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7718                         local_irq_disable();
7719                         update_max_tr(tr, current, smp_processor_id(), NULL);
7720                         local_irq_enable();
7721                 } else {
7722                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7723                                                  (void *)tr, 1);
7724                 }
7725                 break;
7726         default:
7727                 if (tr->allocated_snapshot) {
7728                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7729                                 tracing_reset_online_cpus(&tr->max_buffer);
7730                         else
7731                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7732                 }
7733                 break;
7734         }
7735
7736         if (ret >= 0) {
7737                 *ppos += cnt;
7738                 ret = cnt;
7739         }
7740 out:
7741         mutex_unlock(&trace_types_lock);
7742         return ret;
7743 }
7744
7745 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7746 {
7747         struct seq_file *m = file->private_data;
7748         int ret;
7749
7750         ret = tracing_release(inode, file);
7751
7752         if (file->f_mode & FMODE_READ)
7753                 return ret;
7754
7755         /* If write only, the seq_file is just a stub */
7756         if (m)
7757                 kfree(m->private);
7758         kfree(m);
7759
7760         return 0;
7761 }
7762
7763 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7764 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7765                                     size_t count, loff_t *ppos);
7766 static int tracing_buffers_release(struct inode *inode, struct file *file);
7767 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7768                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7769
7770 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7771 {
7772         struct ftrace_buffer_info *info;
7773         int ret;
7774
7775         /* The following checks for tracefs lockdown */
7776         ret = tracing_buffers_open(inode, filp);
7777         if (ret < 0)
7778                 return ret;
7779
7780         info = filp->private_data;
7781
7782         if (info->iter.trace->use_max_tr) {
7783                 tracing_buffers_release(inode, filp);
7784                 return -EBUSY;
7785         }
7786
7787         info->iter.snapshot = true;
7788         info->iter.array_buffer = &info->iter.tr->max_buffer;
7789
7790         return ret;
7791 }
7792
7793 #endif /* CONFIG_TRACER_SNAPSHOT */
7794
7795
7796 static const struct file_operations tracing_thresh_fops = {
7797         .open           = tracing_open_generic,
7798         .read           = tracing_thresh_read,
7799         .write          = tracing_thresh_write,
7800         .llseek         = generic_file_llseek,
7801 };
7802
7803 #ifdef CONFIG_TRACER_MAX_TRACE
7804 static const struct file_operations tracing_max_lat_fops = {
7805         .open           = tracing_open_generic_tr,
7806         .read           = tracing_max_lat_read,
7807         .write          = tracing_max_lat_write,
7808         .llseek         = generic_file_llseek,
7809         .release        = tracing_release_generic_tr,
7810 };
7811 #endif
7812
7813 static const struct file_operations set_tracer_fops = {
7814         .open           = tracing_open_generic_tr,
7815         .read           = tracing_set_trace_read,
7816         .write          = tracing_set_trace_write,
7817         .llseek         = generic_file_llseek,
7818         .release        = tracing_release_generic_tr,
7819 };
7820
7821 static const struct file_operations tracing_pipe_fops = {
7822         .open           = tracing_open_pipe,
7823         .poll           = tracing_poll_pipe,
7824         .read           = tracing_read_pipe,
7825         .splice_read    = tracing_splice_read_pipe,
7826         .release        = tracing_release_pipe,
7827         .llseek         = no_llseek,
7828 };
7829
7830 static const struct file_operations tracing_entries_fops = {
7831         .open           = tracing_open_generic_tr,
7832         .read           = tracing_entries_read,
7833         .write          = tracing_entries_write,
7834         .llseek         = generic_file_llseek,
7835         .release        = tracing_release_generic_tr,
7836 };
7837
7838 static const struct file_operations tracing_total_entries_fops = {
7839         .open           = tracing_open_generic_tr,
7840         .read           = tracing_total_entries_read,
7841         .llseek         = generic_file_llseek,
7842         .release        = tracing_release_generic_tr,
7843 };
7844
7845 static const struct file_operations tracing_free_buffer_fops = {
7846         .open           = tracing_open_generic_tr,
7847         .write          = tracing_free_buffer_write,
7848         .release        = tracing_free_buffer_release,
7849 };
7850
7851 static const struct file_operations tracing_mark_fops = {
7852         .open           = tracing_mark_open,
7853         .write          = tracing_mark_write,
7854         .release        = tracing_release_generic_tr,
7855 };
7856
7857 static const struct file_operations tracing_mark_raw_fops = {
7858         .open           = tracing_mark_open,
7859         .write          = tracing_mark_raw_write,
7860         .release        = tracing_release_generic_tr,
7861 };
7862
7863 static const struct file_operations trace_clock_fops = {
7864         .open           = tracing_clock_open,
7865         .read           = seq_read,
7866         .llseek         = seq_lseek,
7867         .release        = tracing_single_release_tr,
7868         .write          = tracing_clock_write,
7869 };
7870
7871 static const struct file_operations trace_time_stamp_mode_fops = {
7872         .open           = tracing_time_stamp_mode_open,
7873         .read           = seq_read,
7874         .llseek         = seq_lseek,
7875         .release        = tracing_single_release_tr,
7876 };
7877
7878 #ifdef CONFIG_TRACER_SNAPSHOT
7879 static const struct file_operations snapshot_fops = {
7880         .open           = tracing_snapshot_open,
7881         .read           = seq_read,
7882         .write          = tracing_snapshot_write,
7883         .llseek         = tracing_lseek,
7884         .release        = tracing_snapshot_release,
7885 };
7886
7887 static const struct file_operations snapshot_raw_fops = {
7888         .open           = snapshot_raw_open,
7889         .read           = tracing_buffers_read,
7890         .release        = tracing_buffers_release,
7891         .splice_read    = tracing_buffers_splice_read,
7892         .llseek         = no_llseek,
7893 };
7894
7895 #endif /* CONFIG_TRACER_SNAPSHOT */
7896
7897 /*
7898  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7899  * @filp: The active open file structure
7900  * @ubuf: The userspace provided buffer to read value into
7901  * @cnt: The maximum number of bytes to read
7902  * @ppos: The current "file" position
7903  *
7904  * This function implements the write interface for a struct trace_min_max_param.
7905  * The filp->private_data must point to a trace_min_max_param structure that
7906  * defines where to write the value, the min and the max acceptable values,
7907  * and a lock to protect the write.
7908  */
7909 static ssize_t
7910 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7911 {
7912         struct trace_min_max_param *param = filp->private_data;
7913         u64 val;
7914         int err;
7915
7916         if (!param)
7917                 return -EFAULT;
7918
7919         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7920         if (err)
7921                 return err;
7922
7923         if (param->lock)
7924                 mutex_lock(param->lock);
7925
7926         if (param->min && val < *param->min)
7927                 err = -EINVAL;
7928
7929         if (param->max && val > *param->max)
7930                 err = -EINVAL;
7931
7932         if (!err)
7933                 *param->val = val;
7934
7935         if (param->lock)
7936                 mutex_unlock(param->lock);
7937
7938         if (err)
7939                 return err;
7940
7941         return cnt;
7942 }
7943
7944 /*
7945  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7946  * @filp: The active open file structure
7947  * @ubuf: The userspace provided buffer to read value into
7948  * @cnt: The maximum number of bytes to read
7949  * @ppos: The current "file" position
7950  *
7951  * This function implements the read interface for a struct trace_min_max_param.
7952  * The filp->private_data must point to a trace_min_max_param struct with valid
7953  * data.
7954  */
7955 static ssize_t
7956 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7957 {
7958         struct trace_min_max_param *param = filp->private_data;
7959         char buf[U64_STR_SIZE];
7960         int len;
7961         u64 val;
7962
7963         if (!param)
7964                 return -EFAULT;
7965
7966         val = *param->val;
7967
7968         if (cnt > sizeof(buf))
7969                 cnt = sizeof(buf);
7970
7971         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7972
7973         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7974 }
7975
7976 const struct file_operations trace_min_max_fops = {
7977         .open           = tracing_open_generic,
7978         .read           = trace_min_max_read,
7979         .write          = trace_min_max_write,
7980 };
7981
7982 #define TRACING_LOG_ERRS_MAX    8
7983 #define TRACING_LOG_LOC_MAX     128
7984
7985 #define CMD_PREFIX "  Command: "
7986
7987 struct err_info {
7988         const char      **errs; /* ptr to loc-specific array of err strings */
7989         u8              type;   /* index into errs -> specific err string */
7990         u16             pos;    /* caret position */
7991         u64             ts;
7992 };
7993
7994 struct tracing_log_err {
7995         struct list_head        list;
7996         struct err_info         info;
7997         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7998         char                    *cmd;                     /* what caused err */
7999 };
8000
8001 static DEFINE_MUTEX(tracing_err_log_lock);
8002
8003 static struct tracing_log_err *alloc_tracing_log_err(int len)
8004 {
8005         struct tracing_log_err *err;
8006
8007         err = kzalloc(sizeof(*err), GFP_KERNEL);
8008         if (!err)
8009                 return ERR_PTR(-ENOMEM);
8010
8011         err->cmd = kzalloc(len, GFP_KERNEL);
8012         if (!err->cmd) {
8013                 kfree(err);
8014                 return ERR_PTR(-ENOMEM);
8015         }
8016
8017         return err;
8018 }
8019
8020 static void free_tracing_log_err(struct tracing_log_err *err)
8021 {
8022         kfree(err->cmd);
8023         kfree(err);
8024 }
8025
8026 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8027                                                    int len)
8028 {
8029         struct tracing_log_err *err;
8030         char *cmd;
8031
8032         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8033                 err = alloc_tracing_log_err(len);
8034                 if (PTR_ERR(err) != -ENOMEM)
8035                         tr->n_err_log_entries++;
8036
8037                 return err;
8038         }
8039         cmd = kzalloc(len, GFP_KERNEL);
8040         if (!cmd)
8041                 return ERR_PTR(-ENOMEM);
8042         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8043         kfree(err->cmd);
8044         err->cmd = cmd;
8045         list_del(&err->list);
8046
8047         return err;
8048 }
8049
8050 /**
8051  * err_pos - find the position of a string within a command for error careting
8052  * @cmd: The tracing command that caused the error
8053  * @str: The string to position the caret at within @cmd
8054  *
8055  * Finds the position of the first occurrence of @str within @cmd.  The
8056  * return value can be passed to tracing_log_err() for caret placement
8057  * within @cmd.
8058  *
8059  * Returns the index within @cmd of the first occurrence of @str or 0
8060  * if @str was not found.
8061  */
8062 unsigned int err_pos(char *cmd, const char *str)
8063 {
8064         char *found;
8065
8066         if (WARN_ON(!strlen(cmd)))
8067                 return 0;
8068
8069         found = strstr(cmd, str);
8070         if (found)
8071                 return found - cmd;
8072
8073         return 0;
8074 }
8075
8076 /**
8077  * tracing_log_err - write an error to the tracing error log
8078  * @tr: The associated trace array for the error (NULL for top level array)
8079  * @loc: A string describing where the error occurred
8080  * @cmd: The tracing command that caused the error
8081  * @errs: The array of loc-specific static error strings
8082  * @type: The index into errs[], which produces the specific static err string
8083  * @pos: The position the caret should be placed in the cmd
8084  *
8085  * Writes an error into tracing/error_log of the form:
8086  *
8087  * <loc>: error: <text>
8088  *   Command: <cmd>
8089  *              ^
8090  *
8091  * tracing/error_log is a small log file containing the last
8092  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8093  * unless there has been a tracing error, and the error log can be
8094  * cleared and have its memory freed by writing the empty string in
8095  * truncation mode to it i.e. echo > tracing/error_log.
8096  *
8097  * NOTE: the @errs array along with the @type param are used to
8098  * produce a static error string - this string is not copied and saved
8099  * when the error is logged - only a pointer to it is saved.  See
8100  * existing callers for examples of how static strings are typically
8101  * defined for use with tracing_log_err().
8102  */
8103 void tracing_log_err(struct trace_array *tr,
8104                      const char *loc, const char *cmd,
8105                      const char **errs, u8 type, u16 pos)
8106 {
8107         struct tracing_log_err *err;
8108         int len = 0;
8109
8110         if (!tr)
8111                 tr = &global_trace;
8112
8113         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8114
8115         mutex_lock(&tracing_err_log_lock);
8116         err = get_tracing_log_err(tr, len);
8117         if (PTR_ERR(err) == -ENOMEM) {
8118                 mutex_unlock(&tracing_err_log_lock);
8119                 return;
8120         }
8121
8122         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8123         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8124
8125         err->info.errs = errs;
8126         err->info.type = type;
8127         err->info.pos = pos;
8128         err->info.ts = local_clock();
8129
8130         list_add_tail(&err->list, &tr->err_log);
8131         mutex_unlock(&tracing_err_log_lock);
8132 }
8133
8134 static void clear_tracing_err_log(struct trace_array *tr)
8135 {
8136         struct tracing_log_err *err, *next;
8137
8138         mutex_lock(&tracing_err_log_lock);
8139         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8140                 list_del(&err->list);
8141                 free_tracing_log_err(err);
8142         }
8143
8144         tr->n_err_log_entries = 0;
8145         mutex_unlock(&tracing_err_log_lock);
8146 }
8147
8148 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8149 {
8150         struct trace_array *tr = m->private;
8151
8152         mutex_lock(&tracing_err_log_lock);
8153
8154         return seq_list_start(&tr->err_log, *pos);
8155 }
8156
8157 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8158 {
8159         struct trace_array *tr = m->private;
8160
8161         return seq_list_next(v, &tr->err_log, pos);
8162 }
8163
8164 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8165 {
8166         mutex_unlock(&tracing_err_log_lock);
8167 }
8168
8169 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8170 {
8171         u16 i;
8172
8173         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8174                 seq_putc(m, ' ');
8175         for (i = 0; i < pos; i++)
8176                 seq_putc(m, ' ');
8177         seq_puts(m, "^\n");
8178 }
8179
8180 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8181 {
8182         struct tracing_log_err *err = v;
8183
8184         if (err) {
8185                 const char *err_text = err->info.errs[err->info.type];
8186                 u64 sec = err->info.ts;
8187                 u32 nsec;
8188
8189                 nsec = do_div(sec, NSEC_PER_SEC);
8190                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8191                            err->loc, err_text);
8192                 seq_printf(m, "%s", err->cmd);
8193                 tracing_err_log_show_pos(m, err->info.pos);
8194         }
8195
8196         return 0;
8197 }
8198
8199 static const struct seq_operations tracing_err_log_seq_ops = {
8200         .start  = tracing_err_log_seq_start,
8201         .next   = tracing_err_log_seq_next,
8202         .stop   = tracing_err_log_seq_stop,
8203         .show   = tracing_err_log_seq_show
8204 };
8205
8206 static int tracing_err_log_open(struct inode *inode, struct file *file)
8207 {
8208         struct trace_array *tr = inode->i_private;
8209         int ret = 0;
8210
8211         ret = tracing_check_open_get_tr(tr);
8212         if (ret)
8213                 return ret;
8214
8215         /* If this file was opened for write, then erase contents */
8216         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8217                 clear_tracing_err_log(tr);
8218
8219         if (file->f_mode & FMODE_READ) {
8220                 ret = seq_open(file, &tracing_err_log_seq_ops);
8221                 if (!ret) {
8222                         struct seq_file *m = file->private_data;
8223                         m->private = tr;
8224                 } else {
8225                         trace_array_put(tr);
8226                 }
8227         }
8228         return ret;
8229 }
8230
8231 static ssize_t tracing_err_log_write(struct file *file,
8232                                      const char __user *buffer,
8233                                      size_t count, loff_t *ppos)
8234 {
8235         return count;
8236 }
8237
8238 static int tracing_err_log_release(struct inode *inode, struct file *file)
8239 {
8240         struct trace_array *tr = inode->i_private;
8241
8242         trace_array_put(tr);
8243
8244         if (file->f_mode & FMODE_READ)
8245                 seq_release(inode, file);
8246
8247         return 0;
8248 }
8249
8250 static const struct file_operations tracing_err_log_fops = {
8251         .open           = tracing_err_log_open,
8252         .write          = tracing_err_log_write,
8253         .read           = seq_read,
8254         .llseek         = tracing_lseek,
8255         .release        = tracing_err_log_release,
8256 };
8257
8258 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8259 {
8260         struct trace_array *tr = inode->i_private;
8261         struct ftrace_buffer_info *info;
8262         int ret;
8263
8264         ret = tracing_check_open_get_tr(tr);
8265         if (ret)
8266                 return ret;
8267
8268         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8269         if (!info) {
8270                 trace_array_put(tr);
8271                 return -ENOMEM;
8272         }
8273
8274         mutex_lock(&trace_types_lock);
8275
8276         info->iter.tr           = tr;
8277         info->iter.cpu_file     = tracing_get_cpu(inode);
8278         info->iter.trace        = tr->current_trace;
8279         info->iter.array_buffer = &tr->array_buffer;
8280         info->spare             = NULL;
8281         /* Force reading ring buffer for first read */
8282         info->read              = (unsigned int)-1;
8283
8284         filp->private_data = info;
8285
8286         tr->trace_ref++;
8287
8288         mutex_unlock(&trace_types_lock);
8289
8290         ret = nonseekable_open(inode, filp);
8291         if (ret < 0)
8292                 trace_array_put(tr);
8293
8294         return ret;
8295 }
8296
8297 static __poll_t
8298 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8299 {
8300         struct ftrace_buffer_info *info = filp->private_data;
8301         struct trace_iterator *iter = &info->iter;
8302
8303         return trace_poll(iter, filp, poll_table);
8304 }
8305
8306 static ssize_t
8307 tracing_buffers_read(struct file *filp, char __user *ubuf,
8308                      size_t count, loff_t *ppos)
8309 {
8310         struct ftrace_buffer_info *info = filp->private_data;
8311         struct trace_iterator *iter = &info->iter;
8312         void *trace_data;
8313         int page_size;
8314         ssize_t ret = 0;
8315         ssize_t size;
8316
8317         if (!count)
8318                 return 0;
8319
8320 #ifdef CONFIG_TRACER_MAX_TRACE
8321         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8322                 return -EBUSY;
8323 #endif
8324
8325         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8326
8327         /* Make sure the spare matches the current sub buffer size */
8328         if (info->spare) {
8329                 if (page_size != info->spare_size) {
8330                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8331                                                    info->spare_cpu, info->spare);
8332                         info->spare = NULL;
8333                 }
8334         }
8335
8336         if (!info->spare) {
8337                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8338                                                           iter->cpu_file);
8339                 if (IS_ERR(info->spare)) {
8340                         ret = PTR_ERR(info->spare);
8341                         info->spare = NULL;
8342                 } else {
8343                         info->spare_cpu = iter->cpu_file;
8344                         info->spare_size = page_size;
8345                 }
8346         }
8347         if (!info->spare)
8348                 return ret;
8349
8350         /* Do we have previous read data to read? */
8351         if (info->read < page_size)
8352                 goto read;
8353
8354  again:
8355         trace_access_lock(iter->cpu_file);
8356         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8357                                     info->spare,
8358                                     count,
8359                                     iter->cpu_file, 0);
8360         trace_access_unlock(iter->cpu_file);
8361
8362         if (ret < 0) {
8363                 if (trace_empty(iter)) {
8364                         if ((filp->f_flags & O_NONBLOCK))
8365                                 return -EAGAIN;
8366
8367                         ret = wait_on_pipe(iter, 0);
8368                         if (ret)
8369                                 return ret;
8370
8371                         goto again;
8372                 }
8373                 return 0;
8374         }
8375
8376         info->read = 0;
8377  read:
8378         size = page_size - info->read;
8379         if (size > count)
8380                 size = count;
8381         trace_data = ring_buffer_read_page_data(info->spare);
8382         ret = copy_to_user(ubuf, trace_data + info->read, size);
8383         if (ret == size)
8384                 return -EFAULT;
8385
8386         size -= ret;
8387
8388         *ppos += size;
8389         info->read += size;
8390
8391         return size;
8392 }
8393
8394 static int tracing_buffers_release(struct inode *inode, struct file *file)
8395 {
8396         struct ftrace_buffer_info *info = file->private_data;
8397         struct trace_iterator *iter = &info->iter;
8398
8399         mutex_lock(&trace_types_lock);
8400
8401         iter->tr->trace_ref--;
8402
8403         __trace_array_put(iter->tr);
8404
8405         iter->wait_index++;
8406         /* Make sure the waiters see the new wait_index */
8407         smp_wmb();
8408
8409         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8410
8411         if (info->spare)
8412                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8413                                            info->spare_cpu, info->spare);
8414         kvfree(info);
8415
8416         mutex_unlock(&trace_types_lock);
8417
8418         return 0;
8419 }
8420
8421 struct buffer_ref {
8422         struct trace_buffer     *buffer;
8423         void                    *page;
8424         int                     cpu;
8425         refcount_t              refcount;
8426 };
8427
8428 static void buffer_ref_release(struct buffer_ref *ref)
8429 {
8430         if (!refcount_dec_and_test(&ref->refcount))
8431                 return;
8432         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8433         kfree(ref);
8434 }
8435
8436 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8437                                     struct pipe_buffer *buf)
8438 {
8439         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8440
8441         buffer_ref_release(ref);
8442         buf->private = 0;
8443 }
8444
8445 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8446                                 struct pipe_buffer *buf)
8447 {
8448         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8449
8450         if (refcount_read(&ref->refcount) > INT_MAX/2)
8451                 return false;
8452
8453         refcount_inc(&ref->refcount);
8454         return true;
8455 }
8456
8457 /* Pipe buffer operations for a buffer. */
8458 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8459         .release                = buffer_pipe_buf_release,
8460         .get                    = buffer_pipe_buf_get,
8461 };
8462
8463 /*
8464  * Callback from splice_to_pipe(), if we need to release some pages
8465  * at the end of the spd in case we error'ed out in filling the pipe.
8466  */
8467 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8468 {
8469         struct buffer_ref *ref =
8470                 (struct buffer_ref *)spd->partial[i].private;
8471
8472         buffer_ref_release(ref);
8473         spd->partial[i].private = 0;
8474 }
8475
8476 static ssize_t
8477 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8478                             struct pipe_inode_info *pipe, size_t len,
8479                             unsigned int flags)
8480 {
8481         struct ftrace_buffer_info *info = file->private_data;
8482         struct trace_iterator *iter = &info->iter;
8483         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8484         struct page *pages_def[PIPE_DEF_BUFFERS];
8485         struct splice_pipe_desc spd = {
8486                 .pages          = pages_def,
8487                 .partial        = partial_def,
8488                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8489                 .ops            = &buffer_pipe_buf_ops,
8490                 .spd_release    = buffer_spd_release,
8491         };
8492         struct buffer_ref *ref;
8493         int page_size;
8494         int entries, i;
8495         ssize_t ret = 0;
8496
8497 #ifdef CONFIG_TRACER_MAX_TRACE
8498         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8499                 return -EBUSY;
8500 #endif
8501
8502         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8503         if (*ppos & (page_size - 1))
8504                 return -EINVAL;
8505
8506         if (len & (page_size - 1)) {
8507                 if (len < page_size)
8508                         return -EINVAL;
8509                 len &= (~(page_size - 1));
8510         }
8511
8512         if (splice_grow_spd(pipe, &spd))
8513                 return -ENOMEM;
8514
8515  again:
8516         trace_access_lock(iter->cpu_file);
8517         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8518
8519         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8520                 struct page *page;
8521                 int r;
8522
8523                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8524                 if (!ref) {
8525                         ret = -ENOMEM;
8526                         break;
8527                 }
8528
8529                 refcount_set(&ref->refcount, 1);
8530                 ref->buffer = iter->array_buffer->buffer;
8531                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8532                 if (IS_ERR(ref->page)) {
8533                         ret = PTR_ERR(ref->page);
8534                         ref->page = NULL;
8535                         kfree(ref);
8536                         break;
8537                 }
8538                 ref->cpu = iter->cpu_file;
8539
8540                 r = ring_buffer_read_page(ref->buffer, ref->page,
8541                                           len, iter->cpu_file, 1);
8542                 if (r < 0) {
8543                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8544                                                    ref->page);
8545                         kfree(ref);
8546                         break;
8547                 }
8548
8549                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8550
8551                 spd.pages[i] = page;
8552                 spd.partial[i].len = page_size;
8553                 spd.partial[i].offset = 0;
8554                 spd.partial[i].private = (unsigned long)ref;
8555                 spd.nr_pages++;
8556                 *ppos += page_size;
8557
8558                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8559         }
8560
8561         trace_access_unlock(iter->cpu_file);
8562         spd.nr_pages = i;
8563
8564         /* did we read anything? */
8565         if (!spd.nr_pages) {
8566                 long wait_index;
8567
8568                 if (ret)
8569                         goto out;
8570
8571                 ret = -EAGAIN;
8572                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8573                         goto out;
8574
8575                 wait_index = READ_ONCE(iter->wait_index);
8576
8577                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8578                 if (ret)
8579                         goto out;
8580
8581                 /* No need to wait after waking up when tracing is off */
8582                 if (!tracer_tracing_is_on(iter->tr))
8583                         goto out;
8584
8585                 /* Make sure we see the new wait_index */
8586                 smp_rmb();
8587                 if (wait_index != iter->wait_index)
8588                         goto out;
8589
8590                 goto again;
8591         }
8592
8593         ret = splice_to_pipe(pipe, &spd);
8594 out:
8595         splice_shrink_spd(&spd);
8596
8597         return ret;
8598 }
8599
8600 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8601 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8602 {
8603         struct ftrace_buffer_info *info = file->private_data;
8604         struct trace_iterator *iter = &info->iter;
8605
8606         if (cmd)
8607                 return -ENOIOCTLCMD;
8608
8609         mutex_lock(&trace_types_lock);
8610
8611         iter->wait_index++;
8612         /* Make sure the waiters see the new wait_index */
8613         smp_wmb();
8614
8615         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8616
8617         mutex_unlock(&trace_types_lock);
8618         return 0;
8619 }
8620
8621 static const struct file_operations tracing_buffers_fops = {
8622         .open           = tracing_buffers_open,
8623         .read           = tracing_buffers_read,
8624         .poll           = tracing_buffers_poll,
8625         .release        = tracing_buffers_release,
8626         .splice_read    = tracing_buffers_splice_read,
8627         .unlocked_ioctl = tracing_buffers_ioctl,
8628         .llseek         = no_llseek,
8629 };
8630
8631 static ssize_t
8632 tracing_stats_read(struct file *filp, char __user *ubuf,
8633                    size_t count, loff_t *ppos)
8634 {
8635         struct inode *inode = file_inode(filp);
8636         struct trace_array *tr = inode->i_private;
8637         struct array_buffer *trace_buf = &tr->array_buffer;
8638         int cpu = tracing_get_cpu(inode);
8639         struct trace_seq *s;
8640         unsigned long cnt;
8641         unsigned long long t;
8642         unsigned long usec_rem;
8643
8644         s = kmalloc(sizeof(*s), GFP_KERNEL);
8645         if (!s)
8646                 return -ENOMEM;
8647
8648         trace_seq_init(s);
8649
8650         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8651         trace_seq_printf(s, "entries: %ld\n", cnt);
8652
8653         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8654         trace_seq_printf(s, "overrun: %ld\n", cnt);
8655
8656         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8657         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8658
8659         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8660         trace_seq_printf(s, "bytes: %ld\n", cnt);
8661
8662         if (trace_clocks[tr->clock_id].in_ns) {
8663                 /* local or global for trace_clock */
8664                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8665                 usec_rem = do_div(t, USEC_PER_SEC);
8666                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8667                                                                 t, usec_rem);
8668
8669                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8670                 usec_rem = do_div(t, USEC_PER_SEC);
8671                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8672         } else {
8673                 /* counter or tsc mode for trace_clock */
8674                 trace_seq_printf(s, "oldest event ts: %llu\n",
8675                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8676
8677                 trace_seq_printf(s, "now ts: %llu\n",
8678                                 ring_buffer_time_stamp(trace_buf->buffer));
8679         }
8680
8681         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8682         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8683
8684         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8685         trace_seq_printf(s, "read events: %ld\n", cnt);
8686
8687         count = simple_read_from_buffer(ubuf, count, ppos,
8688                                         s->buffer, trace_seq_used(s));
8689
8690         kfree(s);
8691
8692         return count;
8693 }
8694
8695 static const struct file_operations tracing_stats_fops = {
8696         .open           = tracing_open_generic_tr,
8697         .read           = tracing_stats_read,
8698         .llseek         = generic_file_llseek,
8699         .release        = tracing_release_generic_tr,
8700 };
8701
8702 #ifdef CONFIG_DYNAMIC_FTRACE
8703
8704 static ssize_t
8705 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8706                   size_t cnt, loff_t *ppos)
8707 {
8708         ssize_t ret;
8709         char *buf;
8710         int r;
8711
8712         /* 256 should be plenty to hold the amount needed */
8713         buf = kmalloc(256, GFP_KERNEL);
8714         if (!buf)
8715                 return -ENOMEM;
8716
8717         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8718                       ftrace_update_tot_cnt,
8719                       ftrace_number_of_pages,
8720                       ftrace_number_of_groups);
8721
8722         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8723         kfree(buf);
8724         return ret;
8725 }
8726
8727 static const struct file_operations tracing_dyn_info_fops = {
8728         .open           = tracing_open_generic,
8729         .read           = tracing_read_dyn_info,
8730         .llseek         = generic_file_llseek,
8731 };
8732 #endif /* CONFIG_DYNAMIC_FTRACE */
8733
8734 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8735 static void
8736 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8737                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8738                 void *data)
8739 {
8740         tracing_snapshot_instance(tr);
8741 }
8742
8743 static void
8744 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8745                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8746                       void *data)
8747 {
8748         struct ftrace_func_mapper *mapper = data;
8749         long *count = NULL;
8750
8751         if (mapper)
8752                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8753
8754         if (count) {
8755
8756                 if (*count <= 0)
8757                         return;
8758
8759                 (*count)--;
8760         }
8761
8762         tracing_snapshot_instance(tr);
8763 }
8764
8765 static int
8766 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8767                       struct ftrace_probe_ops *ops, void *data)
8768 {
8769         struct ftrace_func_mapper *mapper = data;
8770         long *count = NULL;
8771
8772         seq_printf(m, "%ps:", (void *)ip);
8773
8774         seq_puts(m, "snapshot");
8775
8776         if (mapper)
8777                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8778
8779         if (count)
8780                 seq_printf(m, ":count=%ld\n", *count);
8781         else
8782                 seq_puts(m, ":unlimited\n");
8783
8784         return 0;
8785 }
8786
8787 static int
8788 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8789                      unsigned long ip, void *init_data, void **data)
8790 {
8791         struct ftrace_func_mapper *mapper = *data;
8792
8793         if (!mapper) {
8794                 mapper = allocate_ftrace_func_mapper();
8795                 if (!mapper)
8796                         return -ENOMEM;
8797                 *data = mapper;
8798         }
8799
8800         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8801 }
8802
8803 static void
8804 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8805                      unsigned long ip, void *data)
8806 {
8807         struct ftrace_func_mapper *mapper = data;
8808
8809         if (!ip) {
8810                 if (!mapper)
8811                         return;
8812                 free_ftrace_func_mapper(mapper, NULL);
8813                 return;
8814         }
8815
8816         ftrace_func_mapper_remove_ip(mapper, ip);
8817 }
8818
8819 static struct ftrace_probe_ops snapshot_probe_ops = {
8820         .func                   = ftrace_snapshot,
8821         .print                  = ftrace_snapshot_print,
8822 };
8823
8824 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8825         .func                   = ftrace_count_snapshot,
8826         .print                  = ftrace_snapshot_print,
8827         .init                   = ftrace_snapshot_init,
8828         .free                   = ftrace_snapshot_free,
8829 };
8830
8831 static int
8832 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8833                                char *glob, char *cmd, char *param, int enable)
8834 {
8835         struct ftrace_probe_ops *ops;
8836         void *count = (void *)-1;
8837         char *number;
8838         int ret;
8839
8840         if (!tr)
8841                 return -ENODEV;
8842
8843         /* hash funcs only work with set_ftrace_filter */
8844         if (!enable)
8845                 return -EINVAL;
8846
8847         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8848
8849         if (glob[0] == '!')
8850                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8851
8852         if (!param)
8853                 goto out_reg;
8854
8855         number = strsep(&param, ":");
8856
8857         if (!strlen(number))
8858                 goto out_reg;
8859
8860         /*
8861          * We use the callback data field (which is a pointer)
8862          * as our counter.
8863          */
8864         ret = kstrtoul(number, 0, (unsigned long *)&count);
8865         if (ret)
8866                 return ret;
8867
8868  out_reg:
8869         ret = tracing_alloc_snapshot_instance(tr);
8870         if (ret < 0)
8871                 goto out;
8872
8873         ret = register_ftrace_function_probe(glob, tr, ops, count);
8874
8875  out:
8876         return ret < 0 ? ret : 0;
8877 }
8878
8879 static struct ftrace_func_command ftrace_snapshot_cmd = {
8880         .name                   = "snapshot",
8881         .func                   = ftrace_trace_snapshot_callback,
8882 };
8883
8884 static __init int register_snapshot_cmd(void)
8885 {
8886         return register_ftrace_command(&ftrace_snapshot_cmd);
8887 }
8888 #else
8889 static inline __init int register_snapshot_cmd(void) { return 0; }
8890 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8891
8892 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8893 {
8894         if (WARN_ON(!tr->dir))
8895                 return ERR_PTR(-ENODEV);
8896
8897         /* Top directory uses NULL as the parent */
8898         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8899                 return NULL;
8900
8901         /* All sub buffers have a descriptor */
8902         return tr->dir;
8903 }
8904
8905 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8906 {
8907         struct dentry *d_tracer;
8908
8909         if (tr->percpu_dir)
8910                 return tr->percpu_dir;
8911
8912         d_tracer = tracing_get_dentry(tr);
8913         if (IS_ERR(d_tracer))
8914                 return NULL;
8915
8916         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8917
8918         MEM_FAIL(!tr->percpu_dir,
8919                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8920
8921         return tr->percpu_dir;
8922 }
8923
8924 static struct dentry *
8925 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8926                       void *data, long cpu, const struct file_operations *fops)
8927 {
8928         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8929
8930         if (ret) /* See tracing_get_cpu() */
8931                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8932         return ret;
8933 }
8934
8935 static void
8936 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8937 {
8938         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8939         struct dentry *d_cpu;
8940         char cpu_dir[30]; /* 30 characters should be more than enough */
8941
8942         if (!d_percpu)
8943                 return;
8944
8945         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8946         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8947         if (!d_cpu) {
8948                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8949                 return;
8950         }
8951
8952         /* per cpu trace_pipe */
8953         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8954                                 tr, cpu, &tracing_pipe_fops);
8955
8956         /* per cpu trace */
8957         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8958                                 tr, cpu, &tracing_fops);
8959
8960         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8961                                 tr, cpu, &tracing_buffers_fops);
8962
8963         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8964                                 tr, cpu, &tracing_stats_fops);
8965
8966         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8967                                 tr, cpu, &tracing_entries_fops);
8968
8969 #ifdef CONFIG_TRACER_SNAPSHOT
8970         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8971                                 tr, cpu, &snapshot_fops);
8972
8973         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8974                                 tr, cpu, &snapshot_raw_fops);
8975 #endif
8976 }
8977
8978 #ifdef CONFIG_FTRACE_SELFTEST
8979 /* Let selftest have access to static functions in this file */
8980 #include "trace_selftest.c"
8981 #endif
8982
8983 static ssize_t
8984 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8985                         loff_t *ppos)
8986 {
8987         struct trace_option_dentry *topt = filp->private_data;
8988         char *buf;
8989
8990         if (topt->flags->val & topt->opt->bit)
8991                 buf = "1\n";
8992         else
8993                 buf = "0\n";
8994
8995         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8996 }
8997
8998 static ssize_t
8999 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9000                          loff_t *ppos)
9001 {
9002         struct trace_option_dentry *topt = filp->private_data;
9003         unsigned long val;
9004         int ret;
9005
9006         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9007         if (ret)
9008                 return ret;
9009
9010         if (val != 0 && val != 1)
9011                 return -EINVAL;
9012
9013         if (!!(topt->flags->val & topt->opt->bit) != val) {
9014                 mutex_lock(&trace_types_lock);
9015                 ret = __set_tracer_option(topt->tr, topt->flags,
9016                                           topt->opt, !val);
9017                 mutex_unlock(&trace_types_lock);
9018                 if (ret)
9019                         return ret;
9020         }
9021
9022         *ppos += cnt;
9023
9024         return cnt;
9025 }
9026
9027 static int tracing_open_options(struct inode *inode, struct file *filp)
9028 {
9029         struct trace_option_dentry *topt = inode->i_private;
9030         int ret;
9031
9032         ret = tracing_check_open_get_tr(topt->tr);
9033         if (ret)
9034                 return ret;
9035
9036         filp->private_data = inode->i_private;
9037         return 0;
9038 }
9039
9040 static int tracing_release_options(struct inode *inode, struct file *file)
9041 {
9042         struct trace_option_dentry *topt = file->private_data;
9043
9044         trace_array_put(topt->tr);
9045         return 0;
9046 }
9047
9048 static const struct file_operations trace_options_fops = {
9049         .open = tracing_open_options,
9050         .read = trace_options_read,
9051         .write = trace_options_write,
9052         .llseek = generic_file_llseek,
9053         .release = tracing_release_options,
9054 };
9055
9056 /*
9057  * In order to pass in both the trace_array descriptor as well as the index
9058  * to the flag that the trace option file represents, the trace_array
9059  * has a character array of trace_flags_index[], which holds the index
9060  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9061  * The address of this character array is passed to the flag option file
9062  * read/write callbacks.
9063  *
9064  * In order to extract both the index and the trace_array descriptor,
9065  * get_tr_index() uses the following algorithm.
9066  *
9067  *   idx = *ptr;
9068  *
9069  * As the pointer itself contains the address of the index (remember
9070  * index[1] == 1).
9071  *
9072  * Then to get the trace_array descriptor, by subtracting that index
9073  * from the ptr, we get to the start of the index itself.
9074  *
9075  *   ptr - idx == &index[0]
9076  *
9077  * Then a simple container_of() from that pointer gets us to the
9078  * trace_array descriptor.
9079  */
9080 static void get_tr_index(void *data, struct trace_array **ptr,
9081                          unsigned int *pindex)
9082 {
9083         *pindex = *(unsigned char *)data;
9084
9085         *ptr = container_of(data - *pindex, struct trace_array,
9086                             trace_flags_index);
9087 }
9088
9089 static ssize_t
9090 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9091                         loff_t *ppos)
9092 {
9093         void *tr_index = filp->private_data;
9094         struct trace_array *tr;
9095         unsigned int index;
9096         char *buf;
9097
9098         get_tr_index(tr_index, &tr, &index);
9099
9100         if (tr->trace_flags & (1 << index))
9101                 buf = "1\n";
9102         else
9103                 buf = "0\n";
9104
9105         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9106 }
9107
9108 static ssize_t
9109 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9110                          loff_t *ppos)
9111 {
9112         void *tr_index = filp->private_data;
9113         struct trace_array *tr;
9114         unsigned int index;
9115         unsigned long val;
9116         int ret;
9117
9118         get_tr_index(tr_index, &tr, &index);
9119
9120         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9121         if (ret)
9122                 return ret;
9123
9124         if (val != 0 && val != 1)
9125                 return -EINVAL;
9126
9127         mutex_lock(&event_mutex);
9128         mutex_lock(&trace_types_lock);
9129         ret = set_tracer_flag(tr, 1 << index, val);
9130         mutex_unlock(&trace_types_lock);
9131         mutex_unlock(&event_mutex);
9132
9133         if (ret < 0)
9134                 return ret;
9135
9136         *ppos += cnt;
9137
9138         return cnt;
9139 }
9140
9141 static const struct file_operations trace_options_core_fops = {
9142         .open = tracing_open_generic,
9143         .read = trace_options_core_read,
9144         .write = trace_options_core_write,
9145         .llseek = generic_file_llseek,
9146 };
9147
9148 struct dentry *trace_create_file(const char *name,
9149                                  umode_t mode,
9150                                  struct dentry *parent,
9151                                  void *data,
9152                                  const struct file_operations *fops)
9153 {
9154         struct dentry *ret;
9155
9156         ret = tracefs_create_file(name, mode, parent, data, fops);
9157         if (!ret)
9158                 pr_warn("Could not create tracefs '%s' entry\n", name);
9159
9160         return ret;
9161 }
9162
9163
9164 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9165 {
9166         struct dentry *d_tracer;
9167
9168         if (tr->options)
9169                 return tr->options;
9170
9171         d_tracer = tracing_get_dentry(tr);
9172         if (IS_ERR(d_tracer))
9173                 return NULL;
9174
9175         tr->options = tracefs_create_dir("options", d_tracer);
9176         if (!tr->options) {
9177                 pr_warn("Could not create tracefs directory 'options'\n");
9178                 return NULL;
9179         }
9180
9181         return tr->options;
9182 }
9183
9184 static void
9185 create_trace_option_file(struct trace_array *tr,
9186                          struct trace_option_dentry *topt,
9187                          struct tracer_flags *flags,
9188                          struct tracer_opt *opt)
9189 {
9190         struct dentry *t_options;
9191
9192         t_options = trace_options_init_dentry(tr);
9193         if (!t_options)
9194                 return;
9195
9196         topt->flags = flags;
9197         topt->opt = opt;
9198         topt->tr = tr;
9199
9200         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9201                                         t_options, topt, &trace_options_fops);
9202
9203 }
9204
9205 static void
9206 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9207 {
9208         struct trace_option_dentry *topts;
9209         struct trace_options *tr_topts;
9210         struct tracer_flags *flags;
9211         struct tracer_opt *opts;
9212         int cnt;
9213         int i;
9214
9215         if (!tracer)
9216                 return;
9217
9218         flags = tracer->flags;
9219
9220         if (!flags || !flags->opts)
9221                 return;
9222
9223         /*
9224          * If this is an instance, only create flags for tracers
9225          * the instance may have.
9226          */
9227         if (!trace_ok_for_array(tracer, tr))
9228                 return;
9229
9230         for (i = 0; i < tr->nr_topts; i++) {
9231                 /* Make sure there's no duplicate flags. */
9232                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9233                         return;
9234         }
9235
9236         opts = flags->opts;
9237
9238         for (cnt = 0; opts[cnt].name; cnt++)
9239                 ;
9240
9241         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9242         if (!topts)
9243                 return;
9244
9245         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9246                             GFP_KERNEL);
9247         if (!tr_topts) {
9248                 kfree(topts);
9249                 return;
9250         }
9251
9252         tr->topts = tr_topts;
9253         tr->topts[tr->nr_topts].tracer = tracer;
9254         tr->topts[tr->nr_topts].topts = topts;
9255         tr->nr_topts++;
9256
9257         for (cnt = 0; opts[cnt].name; cnt++) {
9258                 create_trace_option_file(tr, &topts[cnt], flags,
9259                                          &opts[cnt]);
9260                 MEM_FAIL(topts[cnt].entry == NULL,
9261                           "Failed to create trace option: %s",
9262                           opts[cnt].name);
9263         }
9264 }
9265
9266 static struct dentry *
9267 create_trace_option_core_file(struct trace_array *tr,
9268                               const char *option, long index)
9269 {
9270         struct dentry *t_options;
9271
9272         t_options = trace_options_init_dentry(tr);
9273         if (!t_options)
9274                 return NULL;
9275
9276         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9277                                  (void *)&tr->trace_flags_index[index],
9278                                  &trace_options_core_fops);
9279 }
9280
9281 static void create_trace_options_dir(struct trace_array *tr)
9282 {
9283         struct dentry *t_options;
9284         bool top_level = tr == &global_trace;
9285         int i;
9286
9287         t_options = trace_options_init_dentry(tr);
9288         if (!t_options)
9289                 return;
9290
9291         for (i = 0; trace_options[i]; i++) {
9292                 if (top_level ||
9293                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9294                         create_trace_option_core_file(tr, trace_options[i], i);
9295         }
9296 }
9297
9298 static ssize_t
9299 rb_simple_read(struct file *filp, char __user *ubuf,
9300                size_t cnt, loff_t *ppos)
9301 {
9302         struct trace_array *tr = filp->private_data;
9303         char buf[64];
9304         int r;
9305
9306         r = tracer_tracing_is_on(tr);
9307         r = sprintf(buf, "%d\n", r);
9308
9309         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9310 }
9311
9312 static ssize_t
9313 rb_simple_write(struct file *filp, const char __user *ubuf,
9314                 size_t cnt, loff_t *ppos)
9315 {
9316         struct trace_array *tr = filp->private_data;
9317         struct trace_buffer *buffer = tr->array_buffer.buffer;
9318         unsigned long val;
9319         int ret;
9320
9321         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9322         if (ret)
9323                 return ret;
9324
9325         if (buffer) {
9326                 mutex_lock(&trace_types_lock);
9327                 if (!!val == tracer_tracing_is_on(tr)) {
9328                         val = 0; /* do nothing */
9329                 } else if (val) {
9330                         tracer_tracing_on(tr);
9331                         if (tr->current_trace->start)
9332                                 tr->current_trace->start(tr);
9333                 } else {
9334                         tracer_tracing_off(tr);
9335                         if (tr->current_trace->stop)
9336                                 tr->current_trace->stop(tr);
9337                         /* Wake up any waiters */
9338                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9339                 }
9340                 mutex_unlock(&trace_types_lock);
9341         }
9342
9343         (*ppos)++;
9344
9345         return cnt;
9346 }
9347
9348 static const struct file_operations rb_simple_fops = {
9349         .open           = tracing_open_generic_tr,
9350         .read           = rb_simple_read,
9351         .write          = rb_simple_write,
9352         .release        = tracing_release_generic_tr,
9353         .llseek         = default_llseek,
9354 };
9355
9356 static ssize_t
9357 buffer_percent_read(struct file *filp, char __user *ubuf,
9358                     size_t cnt, loff_t *ppos)
9359 {
9360         struct trace_array *tr = filp->private_data;
9361         char buf[64];
9362         int r;
9363
9364         r = tr->buffer_percent;
9365         r = sprintf(buf, "%d\n", r);
9366
9367         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9368 }
9369
9370 static ssize_t
9371 buffer_percent_write(struct file *filp, const char __user *ubuf,
9372                      size_t cnt, loff_t *ppos)
9373 {
9374         struct trace_array *tr = filp->private_data;
9375         unsigned long val;
9376         int ret;
9377
9378         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9379         if (ret)
9380                 return ret;
9381
9382         if (val > 100)
9383                 return -EINVAL;
9384
9385         tr->buffer_percent = val;
9386
9387         (*ppos)++;
9388
9389         return cnt;
9390 }
9391
9392 static const struct file_operations buffer_percent_fops = {
9393         .open           = tracing_open_generic_tr,
9394         .read           = buffer_percent_read,
9395         .write          = buffer_percent_write,
9396         .release        = tracing_release_generic_tr,
9397         .llseek         = default_llseek,
9398 };
9399
9400 static ssize_t
9401 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9402 {
9403         struct trace_array *tr = filp->private_data;
9404         size_t size;
9405         char buf[64];
9406         int order;
9407         int r;
9408
9409         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9410         size = (PAGE_SIZE << order) / 1024;
9411
9412         r = sprintf(buf, "%zd\n", size);
9413
9414         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9415 }
9416
9417 static ssize_t
9418 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9419                          size_t cnt, loff_t *ppos)
9420 {
9421         struct trace_array *tr = filp->private_data;
9422         unsigned long val;
9423         int old_order;
9424         int order;
9425         int pages;
9426         int ret;
9427
9428         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9429         if (ret)
9430                 return ret;
9431
9432         val *= 1024; /* value passed in is in KB */
9433
9434         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9435         order = fls(pages - 1);
9436
9437         /* limit between 1 and 128 system pages */
9438         if (order < 0 || order > 7)
9439                 return -EINVAL;
9440
9441         /* Do not allow tracing while changing the order of the ring buffer */
9442         tracing_stop_tr(tr);
9443
9444         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9445         if (old_order == order)
9446                 goto out;
9447
9448         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9449         if (ret)
9450                 goto out;
9451
9452 #ifdef CONFIG_TRACER_MAX_TRACE
9453
9454         if (!tr->allocated_snapshot)
9455                 goto out_max;
9456
9457         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9458         if (ret) {
9459                 /* Put back the old order */
9460                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9461                 if (WARN_ON_ONCE(cnt)) {
9462                         /*
9463                          * AARGH! We are left with different orders!
9464                          * The max buffer is our "snapshot" buffer.
9465                          * When a tracer needs a snapshot (one of the
9466                          * latency tracers), it swaps the max buffer
9467                          * with the saved snap shot. We succeeded to
9468                          * update the order of the main buffer, but failed to
9469                          * update the order of the max buffer. But when we tried
9470                          * to reset the main buffer to the original size, we
9471                          * failed there too. This is very unlikely to
9472                          * happen, but if it does, warn and kill all
9473                          * tracing.
9474                          */
9475                         tracing_disabled = 1;
9476                 }
9477                 goto out;
9478         }
9479  out_max:
9480 #endif
9481         (*ppos)++;
9482  out:
9483         if (ret)
9484                 cnt = ret;
9485         tracing_start_tr(tr);
9486         return cnt;
9487 }
9488
9489 static const struct file_operations buffer_subbuf_size_fops = {
9490         .open           = tracing_open_generic_tr,
9491         .read           = buffer_subbuf_size_read,
9492         .write          = buffer_subbuf_size_write,
9493         .release        = tracing_release_generic_tr,
9494         .llseek         = default_llseek,
9495 };
9496
9497 static struct dentry *trace_instance_dir;
9498
9499 static void
9500 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9501
9502 static int
9503 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9504 {
9505         enum ring_buffer_flags rb_flags;
9506
9507         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9508
9509         buf->tr = tr;
9510
9511         buf->buffer = ring_buffer_alloc(size, rb_flags);
9512         if (!buf->buffer)
9513                 return -ENOMEM;
9514
9515         buf->data = alloc_percpu(struct trace_array_cpu);
9516         if (!buf->data) {
9517                 ring_buffer_free(buf->buffer);
9518                 buf->buffer = NULL;
9519                 return -ENOMEM;
9520         }
9521
9522         /* Allocate the first page for all buffers */
9523         set_buffer_entries(&tr->array_buffer,
9524                            ring_buffer_size(tr->array_buffer.buffer, 0));
9525
9526         return 0;
9527 }
9528
9529 static void free_trace_buffer(struct array_buffer *buf)
9530 {
9531         if (buf->buffer) {
9532                 ring_buffer_free(buf->buffer);
9533                 buf->buffer = NULL;
9534                 free_percpu(buf->data);
9535                 buf->data = NULL;
9536         }
9537 }
9538
9539 static int allocate_trace_buffers(struct trace_array *tr, int size)
9540 {
9541         int ret;
9542
9543         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9544         if (ret)
9545                 return ret;
9546
9547 #ifdef CONFIG_TRACER_MAX_TRACE
9548         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9549                                     allocate_snapshot ? size : 1);
9550         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9551                 free_trace_buffer(&tr->array_buffer);
9552                 return -ENOMEM;
9553         }
9554         tr->allocated_snapshot = allocate_snapshot;
9555
9556         allocate_snapshot = false;
9557 #endif
9558
9559         return 0;
9560 }
9561
9562 static void free_trace_buffers(struct trace_array *tr)
9563 {
9564         if (!tr)
9565                 return;
9566
9567         free_trace_buffer(&tr->array_buffer);
9568
9569 #ifdef CONFIG_TRACER_MAX_TRACE
9570         free_trace_buffer(&tr->max_buffer);
9571 #endif
9572 }
9573
9574 static void init_trace_flags_index(struct trace_array *tr)
9575 {
9576         int i;
9577
9578         /* Used by the trace options files */
9579         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9580                 tr->trace_flags_index[i] = i;
9581 }
9582
9583 static void __update_tracer_options(struct trace_array *tr)
9584 {
9585         struct tracer *t;
9586
9587         for (t = trace_types; t; t = t->next)
9588                 add_tracer_options(tr, t);
9589 }
9590
9591 static void update_tracer_options(struct trace_array *tr)
9592 {
9593         mutex_lock(&trace_types_lock);
9594         tracer_options_updated = true;
9595         __update_tracer_options(tr);
9596         mutex_unlock(&trace_types_lock);
9597 }
9598
9599 /* Must have trace_types_lock held */
9600 struct trace_array *trace_array_find(const char *instance)
9601 {
9602         struct trace_array *tr, *found = NULL;
9603
9604         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9605                 if (tr->name && strcmp(tr->name, instance) == 0) {
9606                         found = tr;
9607                         break;
9608                 }
9609         }
9610
9611         return found;
9612 }
9613
9614 struct trace_array *trace_array_find_get(const char *instance)
9615 {
9616         struct trace_array *tr;
9617
9618         mutex_lock(&trace_types_lock);
9619         tr = trace_array_find(instance);
9620         if (tr)
9621                 tr->ref++;
9622         mutex_unlock(&trace_types_lock);
9623
9624         return tr;
9625 }
9626
9627 static int trace_array_create_dir(struct trace_array *tr)
9628 {
9629         int ret;
9630
9631         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9632         if (!tr->dir)
9633                 return -EINVAL;
9634
9635         ret = event_trace_add_tracer(tr->dir, tr);
9636         if (ret) {
9637                 tracefs_remove(tr->dir);
9638                 return ret;
9639         }
9640
9641         init_tracer_tracefs(tr, tr->dir);
9642         __update_tracer_options(tr);
9643
9644         return ret;
9645 }
9646
9647 static struct trace_array *
9648 trace_array_create_systems(const char *name, const char *systems)
9649 {
9650         struct trace_array *tr;
9651         int ret;
9652
9653         ret = -ENOMEM;
9654         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9655         if (!tr)
9656                 return ERR_PTR(ret);
9657
9658         tr->name = kstrdup(name, GFP_KERNEL);
9659         if (!tr->name)
9660                 goto out_free_tr;
9661
9662         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9663                 goto out_free_tr;
9664
9665         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9666                 goto out_free_tr;
9667
9668         if (systems) {
9669                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9670                 if (!tr->system_names)
9671                         goto out_free_tr;
9672         }
9673
9674         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9675
9676         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9677
9678         raw_spin_lock_init(&tr->start_lock);
9679
9680         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9681
9682         tr->current_trace = &nop_trace;
9683
9684         INIT_LIST_HEAD(&tr->systems);
9685         INIT_LIST_HEAD(&tr->events);
9686         INIT_LIST_HEAD(&tr->hist_vars);
9687         INIT_LIST_HEAD(&tr->err_log);
9688
9689         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9690                 goto out_free_tr;
9691
9692         /* The ring buffer is defaultly expanded */
9693         trace_set_ring_buffer_expanded(tr);
9694
9695         if (ftrace_allocate_ftrace_ops(tr) < 0)
9696                 goto out_free_tr;
9697
9698         ftrace_init_trace_array(tr);
9699
9700         init_trace_flags_index(tr);
9701
9702         if (trace_instance_dir) {
9703                 ret = trace_array_create_dir(tr);
9704                 if (ret)
9705                         goto out_free_tr;
9706         } else
9707                 __trace_early_add_events(tr);
9708
9709         list_add(&tr->list, &ftrace_trace_arrays);
9710
9711         tr->ref++;
9712
9713         return tr;
9714
9715  out_free_tr:
9716         ftrace_free_ftrace_ops(tr);
9717         free_trace_buffers(tr);
9718         free_cpumask_var(tr->pipe_cpumask);
9719         free_cpumask_var(tr->tracing_cpumask);
9720         kfree_const(tr->system_names);
9721         kfree(tr->name);
9722         kfree(tr);
9723
9724         return ERR_PTR(ret);
9725 }
9726
9727 static struct trace_array *trace_array_create(const char *name)
9728 {
9729         return trace_array_create_systems(name, NULL);
9730 }
9731
9732 static int instance_mkdir(const char *name)
9733 {
9734         struct trace_array *tr;
9735         int ret;
9736
9737         mutex_lock(&event_mutex);
9738         mutex_lock(&trace_types_lock);
9739
9740         ret = -EEXIST;
9741         if (trace_array_find(name))
9742                 goto out_unlock;
9743
9744         tr = trace_array_create(name);
9745
9746         ret = PTR_ERR_OR_ZERO(tr);
9747
9748 out_unlock:
9749         mutex_unlock(&trace_types_lock);
9750         mutex_unlock(&event_mutex);
9751         return ret;
9752 }
9753
9754 /**
9755  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9756  * @name: The name of the trace array to be looked up/created.
9757  * @systems: A list of systems to create event directories for (NULL for all)
9758  *
9759  * Returns pointer to trace array with given name.
9760  * NULL, if it cannot be created.
9761  *
9762  * NOTE: This function increments the reference counter associated with the
9763  * trace array returned. This makes sure it cannot be freed while in use.
9764  * Use trace_array_put() once the trace array is no longer needed.
9765  * If the trace_array is to be freed, trace_array_destroy() needs to
9766  * be called after the trace_array_put(), or simply let user space delete
9767  * it from the tracefs instances directory. But until the
9768  * trace_array_put() is called, user space can not delete it.
9769  *
9770  */
9771 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9772 {
9773         struct trace_array *tr;
9774
9775         mutex_lock(&event_mutex);
9776         mutex_lock(&trace_types_lock);
9777
9778         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9779                 if (tr->name && strcmp(tr->name, name) == 0)
9780                         goto out_unlock;
9781         }
9782
9783         tr = trace_array_create_systems(name, systems);
9784
9785         if (IS_ERR(tr))
9786                 tr = NULL;
9787 out_unlock:
9788         if (tr)
9789                 tr->ref++;
9790
9791         mutex_unlock(&trace_types_lock);
9792         mutex_unlock(&event_mutex);
9793         return tr;
9794 }
9795 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9796
9797 static int __remove_instance(struct trace_array *tr)
9798 {
9799         int i;
9800
9801         /* Reference counter for a newly created trace array = 1. */
9802         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9803                 return -EBUSY;
9804
9805         list_del(&tr->list);
9806
9807         /* Disable all the flags that were enabled coming in */
9808         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9809                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9810                         set_tracer_flag(tr, 1 << i, 0);
9811         }
9812
9813         tracing_set_nop(tr);
9814         clear_ftrace_function_probes(tr);
9815         event_trace_del_tracer(tr);
9816         ftrace_clear_pids(tr);
9817         ftrace_destroy_function_files(tr);
9818         tracefs_remove(tr->dir);
9819         free_percpu(tr->last_func_repeats);
9820         free_trace_buffers(tr);
9821         clear_tracing_err_log(tr);
9822
9823         for (i = 0; i < tr->nr_topts; i++) {
9824                 kfree(tr->topts[i].topts);
9825         }
9826         kfree(tr->topts);
9827
9828         free_cpumask_var(tr->pipe_cpumask);
9829         free_cpumask_var(tr->tracing_cpumask);
9830         kfree_const(tr->system_names);
9831         kfree(tr->name);
9832         kfree(tr);
9833
9834         return 0;
9835 }
9836
9837 int trace_array_destroy(struct trace_array *this_tr)
9838 {
9839         struct trace_array *tr;
9840         int ret;
9841
9842         if (!this_tr)
9843                 return -EINVAL;
9844
9845         mutex_lock(&event_mutex);
9846         mutex_lock(&trace_types_lock);
9847
9848         ret = -ENODEV;
9849
9850         /* Making sure trace array exists before destroying it. */
9851         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9852                 if (tr == this_tr) {
9853                         ret = __remove_instance(tr);
9854                         break;
9855                 }
9856         }
9857
9858         mutex_unlock(&trace_types_lock);
9859         mutex_unlock(&event_mutex);
9860
9861         return ret;
9862 }
9863 EXPORT_SYMBOL_GPL(trace_array_destroy);
9864
9865 static int instance_rmdir(const char *name)
9866 {
9867         struct trace_array *tr;
9868         int ret;
9869
9870         mutex_lock(&event_mutex);
9871         mutex_lock(&trace_types_lock);
9872
9873         ret = -ENODEV;
9874         tr = trace_array_find(name);
9875         if (tr)
9876                 ret = __remove_instance(tr);
9877
9878         mutex_unlock(&trace_types_lock);
9879         mutex_unlock(&event_mutex);
9880
9881         return ret;
9882 }
9883
9884 static __init void create_trace_instances(struct dentry *d_tracer)
9885 {
9886         struct trace_array *tr;
9887
9888         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9889                                                          instance_mkdir,
9890                                                          instance_rmdir);
9891         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9892                 return;
9893
9894         mutex_lock(&event_mutex);
9895         mutex_lock(&trace_types_lock);
9896
9897         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9898                 if (!tr->name)
9899                         continue;
9900                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9901                              "Failed to create instance directory\n"))
9902                         break;
9903         }
9904
9905         mutex_unlock(&trace_types_lock);
9906         mutex_unlock(&event_mutex);
9907 }
9908
9909 static void
9910 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9911 {
9912         int cpu;
9913
9914         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9915                         tr, &show_traces_fops);
9916
9917         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9918                         tr, &set_tracer_fops);
9919
9920         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9921                           tr, &tracing_cpumask_fops);
9922
9923         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9924                           tr, &tracing_iter_fops);
9925
9926         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9927                           tr, &tracing_fops);
9928
9929         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9930                           tr, &tracing_pipe_fops);
9931
9932         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9933                           tr, &tracing_entries_fops);
9934
9935         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9936                           tr, &tracing_total_entries_fops);
9937
9938         trace_create_file("free_buffer", 0200, d_tracer,
9939                           tr, &tracing_free_buffer_fops);
9940
9941         trace_create_file("trace_marker", 0220, d_tracer,
9942                           tr, &tracing_mark_fops);
9943
9944         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9945
9946         trace_create_file("trace_marker_raw", 0220, d_tracer,
9947                           tr, &tracing_mark_raw_fops);
9948
9949         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9950                           &trace_clock_fops);
9951
9952         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9953                           tr, &rb_simple_fops);
9954
9955         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9956                           &trace_time_stamp_mode_fops);
9957
9958         tr->buffer_percent = 50;
9959
9960         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9961                         tr, &buffer_percent_fops);
9962
9963         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9964                           tr, &buffer_subbuf_size_fops);
9965
9966         create_trace_options_dir(tr);
9967
9968 #ifdef CONFIG_TRACER_MAX_TRACE
9969         trace_create_maxlat_file(tr, d_tracer);
9970 #endif
9971
9972         if (ftrace_create_function_files(tr, d_tracer))
9973                 MEM_FAIL(1, "Could not allocate function filter files");
9974
9975 #ifdef CONFIG_TRACER_SNAPSHOT
9976         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9977                           tr, &snapshot_fops);
9978 #endif
9979
9980         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9981                           tr, &tracing_err_log_fops);
9982
9983         for_each_tracing_cpu(cpu)
9984                 tracing_init_tracefs_percpu(tr, cpu);
9985
9986         ftrace_init_tracefs(tr, d_tracer);
9987 }
9988
9989 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9990 {
9991         struct vfsmount *mnt;
9992         struct file_system_type *type;
9993
9994         /*
9995          * To maintain backward compatibility for tools that mount
9996          * debugfs to get to the tracing facility, tracefs is automatically
9997          * mounted to the debugfs/tracing directory.
9998          */
9999         type = get_fs_type("tracefs");
10000         if (!type)
10001                 return NULL;
10002         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10003         put_filesystem(type);
10004         if (IS_ERR(mnt))
10005                 return NULL;
10006         mntget(mnt);
10007
10008         return mnt;
10009 }
10010
10011 /**
10012  * tracing_init_dentry - initialize top level trace array
10013  *
10014  * This is called when creating files or directories in the tracing
10015  * directory. It is called via fs_initcall() by any of the boot up code
10016  * and expects to return the dentry of the top level tracing directory.
10017  */
10018 int tracing_init_dentry(void)
10019 {
10020         struct trace_array *tr = &global_trace;
10021
10022         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10023                 pr_warn("Tracing disabled due to lockdown\n");
10024                 return -EPERM;
10025         }
10026
10027         /* The top level trace array uses  NULL as parent */
10028         if (tr->dir)
10029                 return 0;
10030
10031         if (WARN_ON(!tracefs_initialized()))
10032                 return -ENODEV;
10033
10034         /*
10035          * As there may still be users that expect the tracing
10036          * files to exist in debugfs/tracing, we must automount
10037          * the tracefs file system there, so older tools still
10038          * work with the newer kernel.
10039          */
10040         tr->dir = debugfs_create_automount("tracing", NULL,
10041                                            trace_automount, NULL);
10042
10043         return 0;
10044 }
10045
10046 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10047 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10048
10049 static struct workqueue_struct *eval_map_wq __initdata;
10050 static struct work_struct eval_map_work __initdata;
10051 static struct work_struct tracerfs_init_work __initdata;
10052
10053 static void __init eval_map_work_func(struct work_struct *work)
10054 {
10055         int len;
10056
10057         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10058         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10059 }
10060
10061 static int __init trace_eval_init(void)
10062 {
10063         INIT_WORK(&eval_map_work, eval_map_work_func);
10064
10065         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10066         if (!eval_map_wq) {
10067                 pr_err("Unable to allocate eval_map_wq\n");
10068                 /* Do work here */
10069                 eval_map_work_func(&eval_map_work);
10070                 return -ENOMEM;
10071         }
10072
10073         queue_work(eval_map_wq, &eval_map_work);
10074         return 0;
10075 }
10076
10077 subsys_initcall(trace_eval_init);
10078
10079 static int __init trace_eval_sync(void)
10080 {
10081         /* Make sure the eval map updates are finished */
10082         if (eval_map_wq)
10083                 destroy_workqueue(eval_map_wq);
10084         return 0;
10085 }
10086
10087 late_initcall_sync(trace_eval_sync);
10088
10089
10090 #ifdef CONFIG_MODULES
10091 static void trace_module_add_evals(struct module *mod)
10092 {
10093         if (!mod->num_trace_evals)
10094                 return;
10095
10096         /*
10097          * Modules with bad taint do not have events created, do
10098          * not bother with enums either.
10099          */
10100         if (trace_module_has_bad_taint(mod))
10101                 return;
10102
10103         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10104 }
10105
10106 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10107 static void trace_module_remove_evals(struct module *mod)
10108 {
10109         union trace_eval_map_item *map;
10110         union trace_eval_map_item **last = &trace_eval_maps;
10111
10112         if (!mod->num_trace_evals)
10113                 return;
10114
10115         mutex_lock(&trace_eval_mutex);
10116
10117         map = trace_eval_maps;
10118
10119         while (map) {
10120                 if (map->head.mod == mod)
10121                         break;
10122                 map = trace_eval_jmp_to_tail(map);
10123                 last = &map->tail.next;
10124                 map = map->tail.next;
10125         }
10126         if (!map)
10127                 goto out;
10128
10129         *last = trace_eval_jmp_to_tail(map)->tail.next;
10130         kfree(map);
10131  out:
10132         mutex_unlock(&trace_eval_mutex);
10133 }
10134 #else
10135 static inline void trace_module_remove_evals(struct module *mod) { }
10136 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10137
10138 static int trace_module_notify(struct notifier_block *self,
10139                                unsigned long val, void *data)
10140 {
10141         struct module *mod = data;
10142
10143         switch (val) {
10144         case MODULE_STATE_COMING:
10145                 trace_module_add_evals(mod);
10146                 break;
10147         case MODULE_STATE_GOING:
10148                 trace_module_remove_evals(mod);
10149                 break;
10150         }
10151
10152         return NOTIFY_OK;
10153 }
10154
10155 static struct notifier_block trace_module_nb = {
10156         .notifier_call = trace_module_notify,
10157         .priority = 0,
10158 };
10159 #endif /* CONFIG_MODULES */
10160
10161 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10162 {
10163
10164         event_trace_init();
10165
10166         init_tracer_tracefs(&global_trace, NULL);
10167         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10168
10169         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10170                         &global_trace, &tracing_thresh_fops);
10171
10172         trace_create_file("README", TRACE_MODE_READ, NULL,
10173                         NULL, &tracing_readme_fops);
10174
10175         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10176                         NULL, &tracing_saved_cmdlines_fops);
10177
10178         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10179                           NULL, &tracing_saved_cmdlines_size_fops);
10180
10181         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10182                         NULL, &tracing_saved_tgids_fops);
10183
10184         trace_create_eval_file(NULL);
10185
10186 #ifdef CONFIG_MODULES
10187         register_module_notifier(&trace_module_nb);
10188 #endif
10189
10190 #ifdef CONFIG_DYNAMIC_FTRACE
10191         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10192                         NULL, &tracing_dyn_info_fops);
10193 #endif
10194
10195         create_trace_instances(NULL);
10196
10197         update_tracer_options(&global_trace);
10198 }
10199
10200 static __init int tracer_init_tracefs(void)
10201 {
10202         int ret;
10203
10204         trace_access_lock_init();
10205
10206         ret = tracing_init_dentry();
10207         if (ret)
10208                 return 0;
10209
10210         if (eval_map_wq) {
10211                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10212                 queue_work(eval_map_wq, &tracerfs_init_work);
10213         } else {
10214                 tracer_init_tracefs_work_func(NULL);
10215         }
10216
10217         rv_init_interface();
10218
10219         return 0;
10220 }
10221
10222 fs_initcall(tracer_init_tracefs);
10223
10224 static int trace_die_panic_handler(struct notifier_block *self,
10225                                 unsigned long ev, void *unused);
10226
10227 static struct notifier_block trace_panic_notifier = {
10228         .notifier_call = trace_die_panic_handler,
10229         .priority = INT_MAX - 1,
10230 };
10231
10232 static struct notifier_block trace_die_notifier = {
10233         .notifier_call = trace_die_panic_handler,
10234         .priority = INT_MAX - 1,
10235 };
10236
10237 /*
10238  * The idea is to execute the following die/panic callback early, in order
10239  * to avoid showing irrelevant information in the trace (like other panic
10240  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10241  * warnings get disabled (to prevent potential log flooding).
10242  */
10243 static int trace_die_panic_handler(struct notifier_block *self,
10244                                 unsigned long ev, void *unused)
10245 {
10246         if (!ftrace_dump_on_oops)
10247                 return NOTIFY_DONE;
10248
10249         /* The die notifier requires DIE_OOPS to trigger */
10250         if (self == &trace_die_notifier && ev != DIE_OOPS)
10251                 return NOTIFY_DONE;
10252
10253         ftrace_dump(ftrace_dump_on_oops);
10254
10255         return NOTIFY_DONE;
10256 }
10257
10258 /*
10259  * printk is set to max of 1024, we really don't need it that big.
10260  * Nothing should be printing 1000 characters anyway.
10261  */
10262 #define TRACE_MAX_PRINT         1000
10263
10264 /*
10265  * Define here KERN_TRACE so that we have one place to modify
10266  * it if we decide to change what log level the ftrace dump
10267  * should be at.
10268  */
10269 #define KERN_TRACE              KERN_EMERG
10270
10271 void
10272 trace_printk_seq(struct trace_seq *s)
10273 {
10274         /* Probably should print a warning here. */
10275         if (s->seq.len >= TRACE_MAX_PRINT)
10276                 s->seq.len = TRACE_MAX_PRINT;
10277
10278         /*
10279          * More paranoid code. Although the buffer size is set to
10280          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10281          * an extra layer of protection.
10282          */
10283         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10284                 s->seq.len = s->seq.size - 1;
10285
10286         /* should be zero ended, but we are paranoid. */
10287         s->buffer[s->seq.len] = 0;
10288
10289         printk(KERN_TRACE "%s", s->buffer);
10290
10291         trace_seq_init(s);
10292 }
10293
10294 void trace_init_global_iter(struct trace_iterator *iter)
10295 {
10296         iter->tr = &global_trace;
10297         iter->trace = iter->tr->current_trace;
10298         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10299         iter->array_buffer = &global_trace.array_buffer;
10300
10301         if (iter->trace && iter->trace->open)
10302                 iter->trace->open(iter);
10303
10304         /* Annotate start of buffers if we had overruns */
10305         if (ring_buffer_overruns(iter->array_buffer->buffer))
10306                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10307
10308         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10309         if (trace_clocks[iter->tr->clock_id].in_ns)
10310                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10311
10312         /* Can not use kmalloc for iter.temp and iter.fmt */
10313         iter->temp = static_temp_buf;
10314         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10315         iter->fmt = static_fmt_buf;
10316         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10317 }
10318
10319 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10320 {
10321         /* use static because iter can be a bit big for the stack */
10322         static struct trace_iterator iter;
10323         static atomic_t dump_running;
10324         struct trace_array *tr = &global_trace;
10325         unsigned int old_userobj;
10326         unsigned long flags;
10327         int cnt = 0, cpu;
10328
10329         /* Only allow one dump user at a time. */
10330         if (atomic_inc_return(&dump_running) != 1) {
10331                 atomic_dec(&dump_running);
10332                 return;
10333         }
10334
10335         /*
10336          * Always turn off tracing when we dump.
10337          * We don't need to show trace output of what happens
10338          * between multiple crashes.
10339          *
10340          * If the user does a sysrq-z, then they can re-enable
10341          * tracing with echo 1 > tracing_on.
10342          */
10343         tracing_off();
10344
10345         local_irq_save(flags);
10346
10347         /* Simulate the iterator */
10348         trace_init_global_iter(&iter);
10349
10350         for_each_tracing_cpu(cpu) {
10351                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10352         }
10353
10354         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10355
10356         /* don't look at user memory in panic mode */
10357         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10358
10359         switch (oops_dump_mode) {
10360         case DUMP_ALL:
10361                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10362                 break;
10363         case DUMP_ORIG:
10364                 iter.cpu_file = raw_smp_processor_id();
10365                 break;
10366         case DUMP_NONE:
10367                 goto out_enable;
10368         default:
10369                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10370                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10371         }
10372
10373         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10374
10375         /* Did function tracer already get disabled? */
10376         if (ftrace_is_dead()) {
10377                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10378                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10379         }
10380
10381         /*
10382          * We need to stop all tracing on all CPUS to read
10383          * the next buffer. This is a bit expensive, but is
10384          * not done often. We fill all what we can read,
10385          * and then release the locks again.
10386          */
10387
10388         while (!trace_empty(&iter)) {
10389
10390                 if (!cnt)
10391                         printk(KERN_TRACE "---------------------------------\n");
10392
10393                 cnt++;
10394
10395                 trace_iterator_reset(&iter);
10396                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10397
10398                 if (trace_find_next_entry_inc(&iter) != NULL) {
10399                         int ret;
10400
10401                         ret = print_trace_line(&iter);
10402                         if (ret != TRACE_TYPE_NO_CONSUME)
10403                                 trace_consume(&iter);
10404                 }
10405                 touch_nmi_watchdog();
10406
10407                 trace_printk_seq(&iter.seq);
10408         }
10409
10410         if (!cnt)
10411                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10412         else
10413                 printk(KERN_TRACE "---------------------------------\n");
10414
10415  out_enable:
10416         tr->trace_flags |= old_userobj;
10417
10418         for_each_tracing_cpu(cpu) {
10419                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10420         }
10421         atomic_dec(&dump_running);
10422         local_irq_restore(flags);
10423 }
10424 EXPORT_SYMBOL_GPL(ftrace_dump);
10425
10426 #define WRITE_BUFSIZE  4096
10427
10428 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10429                                 size_t count, loff_t *ppos,
10430                                 int (*createfn)(const char *))
10431 {
10432         char *kbuf, *buf, *tmp;
10433         int ret = 0;
10434         size_t done = 0;
10435         size_t size;
10436
10437         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10438         if (!kbuf)
10439                 return -ENOMEM;
10440
10441         while (done < count) {
10442                 size = count - done;
10443
10444                 if (size >= WRITE_BUFSIZE)
10445                         size = WRITE_BUFSIZE - 1;
10446
10447                 if (copy_from_user(kbuf, buffer + done, size)) {
10448                         ret = -EFAULT;
10449                         goto out;
10450                 }
10451                 kbuf[size] = '\0';
10452                 buf = kbuf;
10453                 do {
10454                         tmp = strchr(buf, '\n');
10455                         if (tmp) {
10456                                 *tmp = '\0';
10457                                 size = tmp - buf + 1;
10458                         } else {
10459                                 size = strlen(buf);
10460                                 if (done + size < count) {
10461                                         if (buf != kbuf)
10462                                                 break;
10463                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10464                                         pr_warn("Line length is too long: Should be less than %d\n",
10465                                                 WRITE_BUFSIZE - 2);
10466                                         ret = -EINVAL;
10467                                         goto out;
10468                                 }
10469                         }
10470                         done += size;
10471
10472                         /* Remove comments */
10473                         tmp = strchr(buf, '#');
10474
10475                         if (tmp)
10476                                 *tmp = '\0';
10477
10478                         ret = createfn(buf);
10479                         if (ret)
10480                                 goto out;
10481                         buf += size;
10482
10483                 } while (done < count);
10484         }
10485         ret = done;
10486
10487 out:
10488         kfree(kbuf);
10489
10490         return ret;
10491 }
10492
10493 #ifdef CONFIG_TRACER_MAX_TRACE
10494 __init static bool tr_needs_alloc_snapshot(const char *name)
10495 {
10496         char *test;
10497         int len = strlen(name);
10498         bool ret;
10499
10500         if (!boot_snapshot_index)
10501                 return false;
10502
10503         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10504             boot_snapshot_info[len] == '\t')
10505                 return true;
10506
10507         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10508         if (!test)
10509                 return false;
10510
10511         sprintf(test, "\t%s\t", name);
10512         ret = strstr(boot_snapshot_info, test) == NULL;
10513         kfree(test);
10514         return ret;
10515 }
10516
10517 __init static void do_allocate_snapshot(const char *name)
10518 {
10519         if (!tr_needs_alloc_snapshot(name))
10520                 return;
10521
10522         /*
10523          * When allocate_snapshot is set, the next call to
10524          * allocate_trace_buffers() (called by trace_array_get_by_name())
10525          * will allocate the snapshot buffer. That will alse clear
10526          * this flag.
10527          */
10528         allocate_snapshot = true;
10529 }
10530 #else
10531 static inline void do_allocate_snapshot(const char *name) { }
10532 #endif
10533
10534 __init static void enable_instances(void)
10535 {
10536         struct trace_array *tr;
10537         char *curr_str;
10538         char *str;
10539         char *tok;
10540
10541         /* A tab is always appended */
10542         boot_instance_info[boot_instance_index - 1] = '\0';
10543         str = boot_instance_info;
10544
10545         while ((curr_str = strsep(&str, "\t"))) {
10546
10547                 tok = strsep(&curr_str, ",");
10548
10549                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10550                         do_allocate_snapshot(tok);
10551
10552                 tr = trace_array_get_by_name(tok, NULL);
10553                 if (!tr) {
10554                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10555                         continue;
10556                 }
10557                 /* Allow user space to delete it */
10558                 trace_array_put(tr);
10559
10560                 while ((tok = strsep(&curr_str, ","))) {
10561                         early_enable_events(tr, tok, true);
10562                 }
10563         }
10564 }
10565
10566 __init static int tracer_alloc_buffers(void)
10567 {
10568         int ring_buf_size;
10569         int ret = -ENOMEM;
10570
10571
10572         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10573                 pr_warn("Tracing disabled due to lockdown\n");
10574                 return -EPERM;
10575         }
10576
10577         /*
10578          * Make sure we don't accidentally add more trace options
10579          * than we have bits for.
10580          */
10581         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10582
10583         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10584                 goto out;
10585
10586         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10587                 goto out_free_buffer_mask;
10588
10589         /* Only allocate trace_printk buffers if a trace_printk exists */
10590         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10591                 /* Must be called before global_trace.buffer is allocated */
10592                 trace_printk_init_buffers();
10593
10594         /* To save memory, keep the ring buffer size to its minimum */
10595         if (global_trace.ring_buffer_expanded)
10596                 ring_buf_size = trace_buf_size;
10597         else
10598                 ring_buf_size = 1;
10599
10600         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10601         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10602
10603         raw_spin_lock_init(&global_trace.start_lock);
10604
10605         /*
10606          * The prepare callbacks allocates some memory for the ring buffer. We
10607          * don't free the buffer if the CPU goes down. If we were to free
10608          * the buffer, then the user would lose any trace that was in the
10609          * buffer. The memory will be removed once the "instance" is removed.
10610          */
10611         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10612                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10613                                       NULL);
10614         if (ret < 0)
10615                 goto out_free_cpumask;
10616         /* Used for event triggers */
10617         ret = -ENOMEM;
10618         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10619         if (!temp_buffer)
10620                 goto out_rm_hp_state;
10621
10622         if (trace_create_savedcmd() < 0)
10623                 goto out_free_temp_buffer;
10624
10625         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10626                 goto out_free_savedcmd;
10627
10628         /* TODO: make the number of buffers hot pluggable with CPUS */
10629         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10630                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10631                 goto out_free_pipe_cpumask;
10632         }
10633         if (global_trace.buffer_disabled)
10634                 tracing_off();
10635
10636         if (trace_boot_clock) {
10637                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10638                 if (ret < 0)
10639                         pr_warn("Trace clock %s not defined, going back to default\n",
10640                                 trace_boot_clock);
10641         }
10642
10643         /*
10644          * register_tracer() might reference current_trace, so it
10645          * needs to be set before we register anything. This is
10646          * just a bootstrap of current_trace anyway.
10647          */
10648         global_trace.current_trace = &nop_trace;
10649
10650         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10651
10652         ftrace_init_global_array_ops(&global_trace);
10653
10654         init_trace_flags_index(&global_trace);
10655
10656         register_tracer(&nop_trace);
10657
10658         /* Function tracing may start here (via kernel command line) */
10659         init_function_trace();
10660
10661         /* All seems OK, enable tracing */
10662         tracing_disabled = 0;
10663
10664         atomic_notifier_chain_register(&panic_notifier_list,
10665                                        &trace_panic_notifier);
10666
10667         register_die_notifier(&trace_die_notifier);
10668
10669         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10670
10671         INIT_LIST_HEAD(&global_trace.systems);
10672         INIT_LIST_HEAD(&global_trace.events);
10673         INIT_LIST_HEAD(&global_trace.hist_vars);
10674         INIT_LIST_HEAD(&global_trace.err_log);
10675         list_add(&global_trace.list, &ftrace_trace_arrays);
10676
10677         apply_trace_boot_options();
10678
10679         register_snapshot_cmd();
10680
10681         test_can_verify();
10682
10683         return 0;
10684
10685 out_free_pipe_cpumask:
10686         free_cpumask_var(global_trace.pipe_cpumask);
10687 out_free_savedcmd:
10688         free_saved_cmdlines_buffer(savedcmd);
10689 out_free_temp_buffer:
10690         ring_buffer_free(temp_buffer);
10691 out_rm_hp_state:
10692         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10693 out_free_cpumask:
10694         free_cpumask_var(global_trace.tracing_cpumask);
10695 out_free_buffer_mask:
10696         free_cpumask_var(tracing_buffer_mask);
10697 out:
10698         return ret;
10699 }
10700
10701 void __init ftrace_boot_snapshot(void)
10702 {
10703 #ifdef CONFIG_TRACER_MAX_TRACE
10704         struct trace_array *tr;
10705
10706         if (!snapshot_at_boot)
10707                 return;
10708
10709         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10710                 if (!tr->allocated_snapshot)
10711                         continue;
10712
10713                 tracing_snapshot_instance(tr);
10714                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10715         }
10716 #endif
10717 }
10718
10719 void __init early_trace_init(void)
10720 {
10721         if (tracepoint_printk) {
10722                 tracepoint_print_iter =
10723                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10724                 if (MEM_FAIL(!tracepoint_print_iter,
10725                              "Failed to allocate trace iterator\n"))
10726                         tracepoint_printk = 0;
10727                 else
10728                         static_key_enable(&tracepoint_printk_key.key);
10729         }
10730         tracer_alloc_buffers();
10731
10732         init_events();
10733 }
10734
10735 void __init trace_init(void)
10736 {
10737         trace_event_init();
10738
10739         if (boot_instance_index)
10740                 enable_instances();
10741 }
10742
10743 __init static void clear_boot_tracer(void)
10744 {
10745         /*
10746          * The default tracer at boot buffer is an init section.
10747          * This function is called in lateinit. If we did not
10748          * find the boot tracer, then clear it out, to prevent
10749          * later registration from accessing the buffer that is
10750          * about to be freed.
10751          */
10752         if (!default_bootup_tracer)
10753                 return;
10754
10755         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10756                default_bootup_tracer);
10757         default_bootup_tracer = NULL;
10758 }
10759
10760 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10761 __init static void tracing_set_default_clock(void)
10762 {
10763         /* sched_clock_stable() is determined in late_initcall */
10764         if (!trace_boot_clock && !sched_clock_stable()) {
10765                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10766                         pr_warn("Can not set tracing clock due to lockdown\n");
10767                         return;
10768                 }
10769
10770                 printk(KERN_WARNING
10771                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10772                        "If you want to keep using the local clock, then add:\n"
10773                        "  \"trace_clock=local\"\n"
10774                        "on the kernel command line\n");
10775                 tracing_set_clock(&global_trace, "global");
10776         }
10777 }
10778 #else
10779 static inline void tracing_set_default_clock(void) { }
10780 #endif
10781
10782 __init static int late_trace_init(void)
10783 {
10784         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10785                 static_key_disable(&tracepoint_printk_key.key);
10786                 tracepoint_printk = 0;
10787         }
10788
10789         tracing_set_default_clock();
10790         clear_boot_tracer();
10791         return 0;
10792 }
10793
10794 late_initcall_sync(late_trace_init);