x86/boot/64: Move 5-level paging global variable assignments back
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73
74 void __init disable_tracing_selftest(const char *reason)
75 {
76         if (!tracing_selftest_disabled) {
77                 tracing_selftest_disabled = true;
78                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
79         }
80 }
81 #else
82 #define tracing_selftest_running        0
83 #define tracing_selftest_disabled       0
84 #endif
85
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187 static bool snapshot_at_boot;
188
189 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
190 static int boot_instance_index;
191
192 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_snapshot_index;
194
195 static int __init set_cmdline_ftrace(char *str)
196 {
197         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
198         default_bootup_tracer = bootup_tracer_buf;
199         /* We are using ftrace early, expand it */
200         trace_set_ring_buffer_expanded(NULL);
201         return 1;
202 }
203 __setup("ftrace=", set_cmdline_ftrace);
204
205 static int __init set_ftrace_dump_on_oops(char *str)
206 {
207         if (*str++ != '=' || !*str || !strcmp("1", str)) {
208                 ftrace_dump_on_oops = DUMP_ALL;
209                 return 1;
210         }
211
212         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
213                 ftrace_dump_on_oops = DUMP_ORIG;
214                 return 1;
215         }
216
217         return 0;
218 }
219 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
220
221 static int __init stop_trace_on_warning(char *str)
222 {
223         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
224                 __disable_trace_on_warning = 1;
225         return 1;
226 }
227 __setup("traceoff_on_warning", stop_trace_on_warning);
228
229 static int __init boot_alloc_snapshot(char *str)
230 {
231         char *slot = boot_snapshot_info + boot_snapshot_index;
232         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
233         int ret;
234
235         if (str[0] == '=') {
236                 str++;
237                 if (strlen(str) >= left)
238                         return -1;
239
240                 ret = snprintf(slot, left, "%s\t", str);
241                 boot_snapshot_index += ret;
242         } else {
243                 allocate_snapshot = true;
244                 /* We also need the main ring buffer expanded */
245                 trace_set_ring_buffer_expanded(NULL);
246         }
247         return 1;
248 }
249 __setup("alloc_snapshot", boot_alloc_snapshot);
250
251
252 static int __init boot_snapshot(char *str)
253 {
254         snapshot_at_boot = true;
255         boot_alloc_snapshot(str);
256         return 1;
257 }
258 __setup("ftrace_boot_snapshot", boot_snapshot);
259
260
261 static int __init boot_instance(char *str)
262 {
263         char *slot = boot_instance_info + boot_instance_index;
264         int left = sizeof(boot_instance_info) - boot_instance_index;
265         int ret;
266
267         if (strlen(str) >= left)
268                 return -1;
269
270         ret = snprintf(slot, left, "%s\t", str);
271         boot_instance_index += ret;
272
273         return 1;
274 }
275 __setup("trace_instance=", boot_instance);
276
277
278 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
279
280 static int __init set_trace_boot_options(char *str)
281 {
282         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
283         return 1;
284 }
285 __setup("trace_options=", set_trace_boot_options);
286
287 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
288 static char *trace_boot_clock __initdata;
289
290 static int __init set_trace_boot_clock(char *str)
291 {
292         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
293         trace_boot_clock = trace_boot_clock_buf;
294         return 1;
295 }
296 __setup("trace_clock=", set_trace_boot_clock);
297
298 static int __init set_tracepoint_printk(char *str)
299 {
300         /* Ignore the "tp_printk_stop_on_boot" param */
301         if (*str == '_')
302                 return 0;
303
304         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
305                 tracepoint_printk = 1;
306         return 1;
307 }
308 __setup("tp_printk", set_tracepoint_printk);
309
310 static int __init set_tracepoint_printk_stop(char *str)
311 {
312         tracepoint_printk_stop_on_boot = true;
313         return 1;
314 }
315 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
316
317 unsigned long long ns2usecs(u64 nsec)
318 {
319         nsec += 500;
320         do_div(nsec, 1000);
321         return nsec;
322 }
323
324 static void
325 trace_process_export(struct trace_export *export,
326                struct ring_buffer_event *event, int flag)
327 {
328         struct trace_entry *entry;
329         unsigned int size = 0;
330
331         if (export->flags & flag) {
332                 entry = ring_buffer_event_data(event);
333                 size = ring_buffer_event_length(event);
334                 export->write(export, entry, size);
335         }
336 }
337
338 static DEFINE_MUTEX(ftrace_export_lock);
339
340 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
341
342 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
344 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
345
346 static inline void ftrace_exports_enable(struct trace_export *export)
347 {
348         if (export->flags & TRACE_EXPORT_FUNCTION)
349                 static_branch_inc(&trace_function_exports_enabled);
350
351         if (export->flags & TRACE_EXPORT_EVENT)
352                 static_branch_inc(&trace_event_exports_enabled);
353
354         if (export->flags & TRACE_EXPORT_MARKER)
355                 static_branch_inc(&trace_marker_exports_enabled);
356 }
357
358 static inline void ftrace_exports_disable(struct trace_export *export)
359 {
360         if (export->flags & TRACE_EXPORT_FUNCTION)
361                 static_branch_dec(&trace_function_exports_enabled);
362
363         if (export->flags & TRACE_EXPORT_EVENT)
364                 static_branch_dec(&trace_event_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_MARKER)
367                 static_branch_dec(&trace_marker_exports_enabled);
368 }
369
370 static void ftrace_exports(struct ring_buffer_event *event, int flag)
371 {
372         struct trace_export *export;
373
374         preempt_disable_notrace();
375
376         export = rcu_dereference_raw_check(ftrace_exports_list);
377         while (export) {
378                 trace_process_export(export, event, flag);
379                 export = rcu_dereference_raw_check(export->next);
380         }
381
382         preempt_enable_notrace();
383 }
384
385 static inline void
386 add_trace_export(struct trace_export **list, struct trace_export *export)
387 {
388         rcu_assign_pointer(export->next, *list);
389         /*
390          * We are entering export into the list but another
391          * CPU might be walking that list. We need to make sure
392          * the export->next pointer is valid before another CPU sees
393          * the export pointer included into the list.
394          */
395         rcu_assign_pointer(*list, export);
396 }
397
398 static inline int
399 rm_trace_export(struct trace_export **list, struct trace_export *export)
400 {
401         struct trace_export **p;
402
403         for (p = list; *p != NULL; p = &(*p)->next)
404                 if (*p == export)
405                         break;
406
407         if (*p != export)
408                 return -1;
409
410         rcu_assign_pointer(*p, (*p)->next);
411
412         return 0;
413 }
414
415 static inline void
416 add_ftrace_export(struct trace_export **list, struct trace_export *export)
417 {
418         ftrace_exports_enable(export);
419
420         add_trace_export(list, export);
421 }
422
423 static inline int
424 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
425 {
426         int ret;
427
428         ret = rm_trace_export(list, export);
429         ftrace_exports_disable(export);
430
431         return ret;
432 }
433
434 int register_ftrace_export(struct trace_export *export)
435 {
436         if (WARN_ON_ONCE(!export->write))
437                 return -1;
438
439         mutex_lock(&ftrace_export_lock);
440
441         add_ftrace_export(&ftrace_exports_list, export);
442
443         mutex_unlock(&ftrace_export_lock);
444
445         return 0;
446 }
447 EXPORT_SYMBOL_GPL(register_ftrace_export);
448
449 int unregister_ftrace_export(struct trace_export *export)
450 {
451         int ret;
452
453         mutex_lock(&ftrace_export_lock);
454
455         ret = rm_ftrace_export(&ftrace_exports_list, export);
456
457         mutex_unlock(&ftrace_export_lock);
458
459         return ret;
460 }
461 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
462
463 /* trace_flags holds trace_options default values */
464 #define TRACE_DEFAULT_FLAGS                                             \
465         (FUNCTION_DEFAULT_FLAGS |                                       \
466          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
467          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
468          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
469          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
470          TRACE_ITER_HASH_PTR)
471
472 /* trace_options that are only supported by global_trace */
473 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
474                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
475
476 /* trace_flags that are default zero for instances */
477 #define ZEROED_TRACE_FLAGS \
478         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
479
480 /*
481  * The global_trace is the descriptor that holds the top-level tracing
482  * buffers for the live tracing.
483  */
484 static struct trace_array global_trace = {
485         .trace_flags = TRACE_DEFAULT_FLAGS,
486 };
487
488 void trace_set_ring_buffer_expanded(struct trace_array *tr)
489 {
490         if (!tr)
491                 tr = &global_trace;
492         tr->ring_buffer_expanded = true;
493 }
494
495 LIST_HEAD(ftrace_trace_arrays);
496
497 int trace_array_get(struct trace_array *this_tr)
498 {
499         struct trace_array *tr;
500         int ret = -ENODEV;
501
502         mutex_lock(&trace_types_lock);
503         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
504                 if (tr == this_tr) {
505                         tr->ref++;
506                         ret = 0;
507                         break;
508                 }
509         }
510         mutex_unlock(&trace_types_lock);
511
512         return ret;
513 }
514
515 static void __trace_array_put(struct trace_array *this_tr)
516 {
517         WARN_ON(!this_tr->ref);
518         this_tr->ref--;
519 }
520
521 /**
522  * trace_array_put - Decrement the reference counter for this trace array.
523  * @this_tr : pointer to the trace array
524  *
525  * NOTE: Use this when we no longer need the trace array returned by
526  * trace_array_get_by_name(). This ensures the trace array can be later
527  * destroyed.
528  *
529  */
530 void trace_array_put(struct trace_array *this_tr)
531 {
532         if (!this_tr)
533                 return;
534
535         mutex_lock(&trace_types_lock);
536         __trace_array_put(this_tr);
537         mutex_unlock(&trace_types_lock);
538 }
539 EXPORT_SYMBOL_GPL(trace_array_put);
540
541 int tracing_check_open_get_tr(struct trace_array *tr)
542 {
543         int ret;
544
545         ret = security_locked_down(LOCKDOWN_TRACEFS);
546         if (ret)
547                 return ret;
548
549         if (tracing_disabled)
550                 return -ENODEV;
551
552         if (tr && trace_array_get(tr) < 0)
553                 return -ENODEV;
554
555         return 0;
556 }
557
558 int call_filter_check_discard(struct trace_event_call *call, void *rec,
559                               struct trace_buffer *buffer,
560                               struct ring_buffer_event *event)
561 {
562         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
563             !filter_match_preds(call->filter, rec)) {
564                 __trace_event_discard_commit(buffer, event);
565                 return 1;
566         }
567
568         return 0;
569 }
570
571 /**
572  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
573  * @filtered_pids: The list of pids to check
574  * @search_pid: The PID to find in @filtered_pids
575  *
576  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577  */
578 bool
579 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
580 {
581         return trace_pid_list_is_set(filtered_pids, search_pid);
582 }
583
584 /**
585  * trace_ignore_this_task - should a task be ignored for tracing
586  * @filtered_pids: The list of pids to check
587  * @filtered_no_pids: The list of pids not to be traced
588  * @task: The task that should be ignored if not filtered
589  *
590  * Checks if @task should be traced or not from @filtered_pids.
591  * Returns true if @task should *NOT* be traced.
592  * Returns false if @task should be traced.
593  */
594 bool
595 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
596                        struct trace_pid_list *filtered_no_pids,
597                        struct task_struct *task)
598 {
599         /*
600          * If filtered_no_pids is not empty, and the task's pid is listed
601          * in filtered_no_pids, then return true.
602          * Otherwise, if filtered_pids is empty, that means we can
603          * trace all tasks. If it has content, then only trace pids
604          * within filtered_pids.
605          */
606
607         return (filtered_pids &&
608                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
609                 (filtered_no_pids &&
610                  trace_find_filtered_pid(filtered_no_pids, task->pid));
611 }
612
613 /**
614  * trace_filter_add_remove_task - Add or remove a task from a pid_list
615  * @pid_list: The list to modify
616  * @self: The current task for fork or NULL for exit
617  * @task: The task to add or remove
618  *
619  * If adding a task, if @self is defined, the task is only added if @self
620  * is also included in @pid_list. This happens on fork and tasks should
621  * only be added when the parent is listed. If @self is NULL, then the
622  * @task pid will be removed from the list, which would happen on exit
623  * of a task.
624  */
625 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
626                                   struct task_struct *self,
627                                   struct task_struct *task)
628 {
629         if (!pid_list)
630                 return;
631
632         /* For forks, we only add if the forking task is listed */
633         if (self) {
634                 if (!trace_find_filtered_pid(pid_list, self->pid))
635                         return;
636         }
637
638         /* "self" is set for forks, and NULL for exits */
639         if (self)
640                 trace_pid_list_set(pid_list, task->pid);
641         else
642                 trace_pid_list_clear(pid_list, task->pid);
643 }
644
645 /**
646  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
647  * @pid_list: The pid list to show
648  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
649  * @pos: The position of the file
650  *
651  * This is used by the seq_file "next" operation to iterate the pids
652  * listed in a trace_pid_list structure.
653  *
654  * Returns the pid+1 as we want to display pid of zero, but NULL would
655  * stop the iteration.
656  */
657 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
658 {
659         long pid = (unsigned long)v;
660         unsigned int next;
661
662         (*pos)++;
663
664         /* pid already is +1 of the actual previous bit */
665         if (trace_pid_list_next(pid_list, pid, &next) < 0)
666                 return NULL;
667
668         pid = next;
669
670         /* Return pid + 1 to allow zero to be represented */
671         return (void *)(pid + 1);
672 }
673
674 /**
675  * trace_pid_start - Used for seq_file to start reading pid lists
676  * @pid_list: The pid list to show
677  * @pos: The position of the file
678  *
679  * This is used by seq_file "start" operation to start the iteration
680  * of listing pids.
681  *
682  * Returns the pid+1 as we want to display pid of zero, but NULL would
683  * stop the iteration.
684  */
685 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
686 {
687         unsigned long pid;
688         unsigned int first;
689         loff_t l = 0;
690
691         if (trace_pid_list_first(pid_list, &first) < 0)
692                 return NULL;
693
694         pid = first;
695
696         /* Return pid + 1 so that zero can be the exit value */
697         for (pid++; pid && l < *pos;
698              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
699                 ;
700         return (void *)pid;
701 }
702
703 /**
704  * trace_pid_show - show the current pid in seq_file processing
705  * @m: The seq_file structure to write into
706  * @v: A void pointer of the pid (+1) value to display
707  *
708  * Can be directly used by seq_file operations to display the current
709  * pid value.
710  */
711 int trace_pid_show(struct seq_file *m, void *v)
712 {
713         unsigned long pid = (unsigned long)v - 1;
714
715         seq_printf(m, "%lu\n", pid);
716         return 0;
717 }
718
719 /* 128 should be much more than enough */
720 #define PID_BUF_SIZE            127
721
722 int trace_pid_write(struct trace_pid_list *filtered_pids,
723                     struct trace_pid_list **new_pid_list,
724                     const char __user *ubuf, size_t cnt)
725 {
726         struct trace_pid_list *pid_list;
727         struct trace_parser parser;
728         unsigned long val;
729         int nr_pids = 0;
730         ssize_t read = 0;
731         ssize_t ret;
732         loff_t pos;
733         pid_t pid;
734
735         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
736                 return -ENOMEM;
737
738         /*
739          * Always recreate a new array. The write is an all or nothing
740          * operation. Always create a new array when adding new pids by
741          * the user. If the operation fails, then the current list is
742          * not modified.
743          */
744         pid_list = trace_pid_list_alloc();
745         if (!pid_list) {
746                 trace_parser_put(&parser);
747                 return -ENOMEM;
748         }
749
750         if (filtered_pids) {
751                 /* copy the current bits to the new max */
752                 ret = trace_pid_list_first(filtered_pids, &pid);
753                 while (!ret) {
754                         trace_pid_list_set(pid_list, pid);
755                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
756                         nr_pids++;
757                 }
758         }
759
760         ret = 0;
761         while (cnt > 0) {
762
763                 pos = 0;
764
765                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
766                 if (ret < 0)
767                         break;
768
769                 read += ret;
770                 ubuf += ret;
771                 cnt -= ret;
772
773                 if (!trace_parser_loaded(&parser))
774                         break;
775
776                 ret = -EINVAL;
777                 if (kstrtoul(parser.buffer, 0, &val))
778                         break;
779
780                 pid = (pid_t)val;
781
782                 if (trace_pid_list_set(pid_list, pid) < 0) {
783                         ret = -1;
784                         break;
785                 }
786                 nr_pids++;
787
788                 trace_parser_clear(&parser);
789                 ret = 0;
790         }
791         trace_parser_put(&parser);
792
793         if (ret < 0) {
794                 trace_pid_list_free(pid_list);
795                 return ret;
796         }
797
798         if (!nr_pids) {
799                 /* Cleared the list of pids */
800                 trace_pid_list_free(pid_list);
801                 pid_list = NULL;
802         }
803
804         *new_pid_list = pid_list;
805
806         return read;
807 }
808
809 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
810 {
811         u64 ts;
812
813         /* Early boot up does not have a buffer yet */
814         if (!buf->buffer)
815                 return trace_clock_local();
816
817         ts = ring_buffer_time_stamp(buf->buffer);
818         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
819
820         return ts;
821 }
822
823 u64 ftrace_now(int cpu)
824 {
825         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
826 }
827
828 /**
829  * tracing_is_enabled - Show if global_trace has been enabled
830  *
831  * Shows if the global trace has been enabled or not. It uses the
832  * mirror flag "buffer_disabled" to be used in fast paths such as for
833  * the irqsoff tracer. But it may be inaccurate due to races. If you
834  * need to know the accurate state, use tracing_is_on() which is a little
835  * slower, but accurate.
836  */
837 int tracing_is_enabled(void)
838 {
839         /*
840          * For quick access (irqsoff uses this in fast path), just
841          * return the mirror variable of the state of the ring buffer.
842          * It's a little racy, but we don't really care.
843          */
844         smp_rmb();
845         return !global_trace.buffer_disabled;
846 }
847
848 /*
849  * trace_buf_size is the size in bytes that is allocated
850  * for a buffer. Note, the number of bytes is always rounded
851  * to page size.
852  *
853  * This number is purposely set to a low number of 16384.
854  * If the dump on oops happens, it will be much appreciated
855  * to not have to wait for all that output. Anyway this can be
856  * boot time and run time configurable.
857  */
858 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
859
860 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
861
862 /* trace_types holds a link list of available tracers. */
863 static struct tracer            *trace_types __read_mostly;
864
865 /*
866  * trace_types_lock is used to protect the trace_types list.
867  */
868 DEFINE_MUTEX(trace_types_lock);
869
870 /*
871  * serialize the access of the ring buffer
872  *
873  * ring buffer serializes readers, but it is low level protection.
874  * The validity of the events (which returns by ring_buffer_peek() ..etc)
875  * are not protected by ring buffer.
876  *
877  * The content of events may become garbage if we allow other process consumes
878  * these events concurrently:
879  *   A) the page of the consumed events may become a normal page
880  *      (not reader page) in ring buffer, and this page will be rewritten
881  *      by events producer.
882  *   B) The page of the consumed events may become a page for splice_read,
883  *      and this page will be returned to system.
884  *
885  * These primitives allow multi process access to different cpu ring buffer
886  * concurrently.
887  *
888  * These primitives don't distinguish read-only and read-consume access.
889  * Multi read-only access are also serialized.
890  */
891
892 #ifdef CONFIG_SMP
893 static DECLARE_RWSEM(all_cpu_access_lock);
894 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
895
896 static inline void trace_access_lock(int cpu)
897 {
898         if (cpu == RING_BUFFER_ALL_CPUS) {
899                 /* gain it for accessing the whole ring buffer. */
900                 down_write(&all_cpu_access_lock);
901         } else {
902                 /* gain it for accessing a cpu ring buffer. */
903
904                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
905                 down_read(&all_cpu_access_lock);
906
907                 /* Secondly block other access to this @cpu ring buffer. */
908                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
909         }
910 }
911
912 static inline void trace_access_unlock(int cpu)
913 {
914         if (cpu == RING_BUFFER_ALL_CPUS) {
915                 up_write(&all_cpu_access_lock);
916         } else {
917                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
918                 up_read(&all_cpu_access_lock);
919         }
920 }
921
922 static inline void trace_access_lock_init(void)
923 {
924         int cpu;
925
926         for_each_possible_cpu(cpu)
927                 mutex_init(&per_cpu(cpu_access_lock, cpu));
928 }
929
930 #else
931
932 static DEFINE_MUTEX(access_lock);
933
934 static inline void trace_access_lock(int cpu)
935 {
936         (void)cpu;
937         mutex_lock(&access_lock);
938 }
939
940 static inline void trace_access_unlock(int cpu)
941 {
942         (void)cpu;
943         mutex_unlock(&access_lock);
944 }
945
946 static inline void trace_access_lock_init(void)
947 {
948 }
949
950 #endif
951
952 #ifdef CONFIG_STACKTRACE
953 static void __ftrace_trace_stack(struct trace_buffer *buffer,
954                                  unsigned int trace_ctx,
955                                  int skip, struct pt_regs *regs);
956 static inline void ftrace_trace_stack(struct trace_array *tr,
957                                       struct trace_buffer *buffer,
958                                       unsigned int trace_ctx,
959                                       int skip, struct pt_regs *regs);
960
961 #else
962 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
963                                         unsigned int trace_ctx,
964                                         int skip, struct pt_regs *regs)
965 {
966 }
967 static inline void ftrace_trace_stack(struct trace_array *tr,
968                                       struct trace_buffer *buffer,
969                                       unsigned long trace_ctx,
970                                       int skip, struct pt_regs *regs)
971 {
972 }
973
974 #endif
975
976 static __always_inline void
977 trace_event_setup(struct ring_buffer_event *event,
978                   int type, unsigned int trace_ctx)
979 {
980         struct trace_entry *ent = ring_buffer_event_data(event);
981
982         tracing_generic_entry_update(ent, type, trace_ctx);
983 }
984
985 static __always_inline struct ring_buffer_event *
986 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987                           int type,
988                           unsigned long len,
989                           unsigned int trace_ctx)
990 {
991         struct ring_buffer_event *event;
992
993         event = ring_buffer_lock_reserve(buffer, len);
994         if (event != NULL)
995                 trace_event_setup(event, type, trace_ctx);
996
997         return event;
998 }
999
1000 void tracer_tracing_on(struct trace_array *tr)
1001 {
1002         if (tr->array_buffer.buffer)
1003                 ring_buffer_record_on(tr->array_buffer.buffer);
1004         /*
1005          * This flag is looked at when buffers haven't been allocated
1006          * yet, or by some tracers (like irqsoff), that just want to
1007          * know if the ring buffer has been disabled, but it can handle
1008          * races of where it gets disabled but we still do a record.
1009          * As the check is in the fast path of the tracers, it is more
1010          * important to be fast than accurate.
1011          */
1012         tr->buffer_disabled = 0;
1013         /* Make the flag seen by readers */
1014         smp_wmb();
1015 }
1016
1017 /**
1018  * tracing_on - enable tracing buffers
1019  *
1020  * This function enables tracing buffers that may have been
1021  * disabled with tracing_off.
1022  */
1023 void tracing_on(void)
1024 {
1025         tracer_tracing_on(&global_trace);
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_on);
1028
1029
1030 static __always_inline void
1031 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1032 {
1033         __this_cpu_write(trace_taskinfo_save, true);
1034
1035         /* If this is the temp buffer, we need to commit fully */
1036         if (this_cpu_read(trace_buffered_event) == event) {
1037                 /* Length is in event->array[0] */
1038                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1039                 /* Release the temp buffer */
1040                 this_cpu_dec(trace_buffered_event_cnt);
1041                 /* ring_buffer_unlock_commit() enables preemption */
1042                 preempt_enable_notrace();
1043         } else
1044                 ring_buffer_unlock_commit(buffer);
1045 }
1046
1047 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1048                        const char *str, int size)
1049 {
1050         struct ring_buffer_event *event;
1051         struct trace_buffer *buffer;
1052         struct print_entry *entry;
1053         unsigned int trace_ctx;
1054         int alloc;
1055
1056         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057                 return 0;
1058
1059         if (unlikely(tracing_selftest_running && tr == &global_trace))
1060                 return 0;
1061
1062         if (unlikely(tracing_disabled))
1063                 return 0;
1064
1065         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1066
1067         trace_ctx = tracing_gen_ctx();
1068         buffer = tr->array_buffer.buffer;
1069         ring_buffer_nest_start(buffer);
1070         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1071                                             trace_ctx);
1072         if (!event) {
1073                 size = 0;
1074                 goto out;
1075         }
1076
1077         entry = ring_buffer_event_data(event);
1078         entry->ip = ip;
1079
1080         memcpy(&entry->buf, str, size);
1081
1082         /* Add a newline if necessary */
1083         if (entry->buf[size - 1] != '\n') {
1084                 entry->buf[size] = '\n';
1085                 entry->buf[size + 1] = '\0';
1086         } else
1087                 entry->buf[size] = '\0';
1088
1089         __buffer_unlock_commit(buffer, event);
1090         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return size;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_array_puts);
1096
1097 /**
1098  * __trace_puts - write a constant string into the trace buffer.
1099  * @ip:    The address of the caller
1100  * @str:   The constant string to write
1101  * @size:  The size of the string.
1102  */
1103 int __trace_puts(unsigned long ip, const char *str, int size)
1104 {
1105         return __trace_array_puts(&global_trace, ip, str, size);
1106 }
1107 EXPORT_SYMBOL_GPL(__trace_puts);
1108
1109 /**
1110  * __trace_bputs - write the pointer to a constant string into trace buffer
1111  * @ip:    The address of the caller
1112  * @str:   The constant string to write to the buffer to
1113  */
1114 int __trace_bputs(unsigned long ip, const char *str)
1115 {
1116         struct ring_buffer_event *event;
1117         struct trace_buffer *buffer;
1118         struct bputs_entry *entry;
1119         unsigned int trace_ctx;
1120         int size = sizeof(struct bputs_entry);
1121         int ret = 0;
1122
1123         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1124                 return 0;
1125
1126         if (unlikely(tracing_selftest_running || tracing_disabled))
1127                 return 0;
1128
1129         trace_ctx = tracing_gen_ctx();
1130         buffer = global_trace.array_buffer.buffer;
1131
1132         ring_buffer_nest_start(buffer);
1133         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1134                                             trace_ctx);
1135         if (!event)
1136                 goto out;
1137
1138         entry = ring_buffer_event_data(event);
1139         entry->ip                       = ip;
1140         entry->str                      = str;
1141
1142         __buffer_unlock_commit(buffer, event);
1143         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1144
1145         ret = 1;
1146  out:
1147         ring_buffer_nest_end(buffer);
1148         return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(__trace_bputs);
1151
1152 #ifdef CONFIG_TRACER_SNAPSHOT
1153 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1154                                            void *cond_data)
1155 {
1156         struct tracer *tracer = tr->current_trace;
1157         unsigned long flags;
1158
1159         if (in_nmi()) {
1160                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1161                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1162                 return;
1163         }
1164
1165         if (!tr->allocated_snapshot) {
1166                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1167                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1168                 tracer_tracing_off(tr);
1169                 return;
1170         }
1171
1172         /* Note, snapshot can not be used when the tracer uses it */
1173         if (tracer->use_max_tr) {
1174                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1175                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1176                 return;
1177         }
1178
1179         local_irq_save(flags);
1180         update_max_tr(tr, current, smp_processor_id(), cond_data);
1181         local_irq_restore(flags);
1182 }
1183
1184 void tracing_snapshot_instance(struct trace_array *tr)
1185 {
1186         tracing_snapshot_instance_cond(tr, NULL);
1187 }
1188
1189 /**
1190  * tracing_snapshot - take a snapshot of the current buffer.
1191  *
1192  * This causes a swap between the snapshot buffer and the current live
1193  * tracing buffer. You can use this to take snapshots of the live
1194  * trace when some condition is triggered, but continue to trace.
1195  *
1196  * Note, make sure to allocate the snapshot with either
1197  * a tracing_snapshot_alloc(), or by doing it manually
1198  * with: echo 1 > /sys/kernel/tracing/snapshot
1199  *
1200  * If the snapshot buffer is not allocated, it will stop tracing.
1201  * Basically making a permanent snapshot.
1202  */
1203 void tracing_snapshot(void)
1204 {
1205         struct trace_array *tr = &global_trace;
1206
1207         tracing_snapshot_instance(tr);
1208 }
1209 EXPORT_SYMBOL_GPL(tracing_snapshot);
1210
1211 /**
1212  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1213  * @tr:         The tracing instance to snapshot
1214  * @cond_data:  The data to be tested conditionally, and possibly saved
1215  *
1216  * This is the same as tracing_snapshot() except that the snapshot is
1217  * conditional - the snapshot will only happen if the
1218  * cond_snapshot.update() implementation receiving the cond_data
1219  * returns true, which means that the trace array's cond_snapshot
1220  * update() operation used the cond_data to determine whether the
1221  * snapshot should be taken, and if it was, presumably saved it along
1222  * with the snapshot.
1223  */
1224 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1225 {
1226         tracing_snapshot_instance_cond(tr, cond_data);
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1229
1230 /**
1231  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1232  * @tr:         The tracing instance
1233  *
1234  * When the user enables a conditional snapshot using
1235  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1236  * with the snapshot.  This accessor is used to retrieve it.
1237  *
1238  * Should not be called from cond_snapshot.update(), since it takes
1239  * the tr->max_lock lock, which the code calling
1240  * cond_snapshot.update() has already done.
1241  *
1242  * Returns the cond_data associated with the trace array's snapshot.
1243  */
1244 void *tracing_cond_snapshot_data(struct trace_array *tr)
1245 {
1246         void *cond_data = NULL;
1247
1248         local_irq_disable();
1249         arch_spin_lock(&tr->max_lock);
1250
1251         if (tr->cond_snapshot)
1252                 cond_data = tr->cond_snapshot->cond_data;
1253
1254         arch_spin_unlock(&tr->max_lock);
1255         local_irq_enable();
1256
1257         return cond_data;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1260
1261 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1262                                         struct array_buffer *size_buf, int cpu_id);
1263 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1264
1265 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1266 {
1267         int order;
1268         int ret;
1269
1270         if (!tr->allocated_snapshot) {
1271
1272                 /* Make the snapshot buffer have the same order as main buffer */
1273                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1274                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1275                 if (ret < 0)
1276                         return ret;
1277
1278                 /* allocate spare buffer */
1279                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1280                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1281                 if (ret < 0)
1282                         return ret;
1283
1284                 tr->allocated_snapshot = true;
1285         }
1286
1287         return 0;
1288 }
1289
1290 static void free_snapshot(struct trace_array *tr)
1291 {
1292         /*
1293          * We don't free the ring buffer. instead, resize it because
1294          * The max_tr ring buffer has some state (e.g. ring->clock) and
1295          * we want preserve it.
1296          */
1297         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1298         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1299         set_buffer_entries(&tr->max_buffer, 1);
1300         tracing_reset_online_cpus(&tr->max_buffer);
1301         tr->allocated_snapshot = false;
1302 }
1303
1304 /**
1305  * tracing_alloc_snapshot - allocate snapshot buffer.
1306  *
1307  * This only allocates the snapshot buffer if it isn't already
1308  * allocated - it doesn't also take a snapshot.
1309  *
1310  * This is meant to be used in cases where the snapshot buffer needs
1311  * to be set up for events that can't sleep but need to be able to
1312  * trigger a snapshot.
1313  */
1314 int tracing_alloc_snapshot(void)
1315 {
1316         struct trace_array *tr = &global_trace;
1317         int ret;
1318
1319         ret = tracing_alloc_snapshot_instance(tr);
1320         WARN_ON(ret < 0);
1321
1322         return ret;
1323 }
1324 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1325
1326 /**
1327  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1328  *
1329  * This is similar to tracing_snapshot(), but it will allocate the
1330  * snapshot buffer if it isn't already allocated. Use this only
1331  * where it is safe to sleep, as the allocation may sleep.
1332  *
1333  * This causes a swap between the snapshot buffer and the current live
1334  * tracing buffer. You can use this to take snapshots of the live
1335  * trace when some condition is triggered, but continue to trace.
1336  */
1337 void tracing_snapshot_alloc(void)
1338 {
1339         int ret;
1340
1341         ret = tracing_alloc_snapshot();
1342         if (ret < 0)
1343                 return;
1344
1345         tracing_snapshot();
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1348
1349 /**
1350  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  * @cond_data:  User data to associate with the snapshot
1353  * @update:     Implementation of the cond_snapshot update function
1354  *
1355  * Check whether the conditional snapshot for the given instance has
1356  * already been enabled, or if the current tracer is already using a
1357  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1358  * save the cond_data and update function inside.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1363                                  cond_update_fn_t update)
1364 {
1365         struct cond_snapshot *cond_snapshot;
1366         int ret = 0;
1367
1368         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1369         if (!cond_snapshot)
1370                 return -ENOMEM;
1371
1372         cond_snapshot->cond_data = cond_data;
1373         cond_snapshot->update = update;
1374
1375         mutex_lock(&trace_types_lock);
1376
1377         ret = tracing_alloc_snapshot_instance(tr);
1378         if (ret)
1379                 goto fail_unlock;
1380
1381         if (tr->current_trace->use_max_tr) {
1382                 ret = -EBUSY;
1383                 goto fail_unlock;
1384         }
1385
1386         /*
1387          * The cond_snapshot can only change to NULL without the
1388          * trace_types_lock. We don't care if we race with it going
1389          * to NULL, but we want to make sure that it's not set to
1390          * something other than NULL when we get here, which we can
1391          * do safely with only holding the trace_types_lock and not
1392          * having to take the max_lock.
1393          */
1394         if (tr->cond_snapshot) {
1395                 ret = -EBUSY;
1396                 goto fail_unlock;
1397         }
1398
1399         local_irq_disable();
1400         arch_spin_lock(&tr->max_lock);
1401         tr->cond_snapshot = cond_snapshot;
1402         arch_spin_unlock(&tr->max_lock);
1403         local_irq_enable();
1404
1405         mutex_unlock(&trace_types_lock);
1406
1407         return ret;
1408
1409  fail_unlock:
1410         mutex_unlock(&trace_types_lock);
1411         kfree(cond_snapshot);
1412         return ret;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1415
1416 /**
1417  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1418  * @tr:         The tracing instance
1419  *
1420  * Check whether the conditional snapshot for the given instance is
1421  * enabled; if so, free the cond_snapshot associated with it,
1422  * otherwise return -EINVAL.
1423  *
1424  * Returns 0 if successful, error otherwise.
1425  */
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         int ret = 0;
1429
1430         local_irq_disable();
1431         arch_spin_lock(&tr->max_lock);
1432
1433         if (!tr->cond_snapshot)
1434                 ret = -EINVAL;
1435         else {
1436                 kfree(tr->cond_snapshot);
1437                 tr->cond_snapshot = NULL;
1438         }
1439
1440         arch_spin_unlock(&tr->max_lock);
1441         local_irq_enable();
1442
1443         return ret;
1444 }
1445 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1446 #else
1447 void tracing_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_snapshot);
1452 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1453 {
1454         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1455 }
1456 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1457 int tracing_alloc_snapshot(void)
1458 {
1459         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1460         return -ENODEV;
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1463 void tracing_snapshot_alloc(void)
1464 {
1465         /* Give warning */
1466         tracing_snapshot();
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1469 void *tracing_cond_snapshot_data(struct trace_array *tr)
1470 {
1471         return NULL;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1474 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1475 {
1476         return -ENODEV;
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1479 int tracing_snapshot_cond_disable(struct trace_array *tr)
1480 {
1481         return false;
1482 }
1483 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1484 #define free_snapshot(tr)       do { } while (0)
1485 #endif /* CONFIG_TRACER_SNAPSHOT */
1486
1487 void tracer_tracing_off(struct trace_array *tr)
1488 {
1489         if (tr->array_buffer.buffer)
1490                 ring_buffer_record_off(tr->array_buffer.buffer);
1491         /*
1492          * This flag is looked at when buffers haven't been allocated
1493          * yet, or by some tracers (like irqsoff), that just want to
1494          * know if the ring buffer has been disabled, but it can handle
1495          * races of where it gets disabled but we still do a record.
1496          * As the check is in the fast path of the tracers, it is more
1497          * important to be fast than accurate.
1498          */
1499         tr->buffer_disabled = 1;
1500         /* Make the flag seen by readers */
1501         smp_wmb();
1502 }
1503
1504 /**
1505  * tracing_off - turn off tracing buffers
1506  *
1507  * This function stops the tracing buffers from recording data.
1508  * It does not disable any overhead the tracers themselves may
1509  * be causing. This function simply causes all recording to
1510  * the ring buffers to fail.
1511  */
1512 void tracing_off(void)
1513 {
1514         tracer_tracing_off(&global_trace);
1515 }
1516 EXPORT_SYMBOL_GPL(tracing_off);
1517
1518 void disable_trace_on_warning(void)
1519 {
1520         if (__disable_trace_on_warning) {
1521                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1522                         "Disabling tracing due to warning\n");
1523                 tracing_off();
1524         }
1525 }
1526
1527 /**
1528  * tracer_tracing_is_on - show real state of ring buffer enabled
1529  * @tr : the trace array to know if ring buffer is enabled
1530  *
1531  * Shows real state of the ring buffer if it is enabled or not.
1532  */
1533 bool tracer_tracing_is_on(struct trace_array *tr)
1534 {
1535         if (tr->array_buffer.buffer)
1536                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1537         return !tr->buffer_disabled;
1538 }
1539
1540 /**
1541  * tracing_is_on - show state of ring buffers enabled
1542  */
1543 int tracing_is_on(void)
1544 {
1545         return tracer_tracing_is_on(&global_trace);
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_is_on);
1548
1549 static int __init set_buf_size(char *str)
1550 {
1551         unsigned long buf_size;
1552
1553         if (!str)
1554                 return 0;
1555         buf_size = memparse(str, &str);
1556         /*
1557          * nr_entries can not be zero and the startup
1558          * tests require some buffer space. Therefore
1559          * ensure we have at least 4096 bytes of buffer.
1560          */
1561         trace_buf_size = max(4096UL, buf_size);
1562         return 1;
1563 }
1564 __setup("trace_buf_size=", set_buf_size);
1565
1566 static int __init set_tracing_thresh(char *str)
1567 {
1568         unsigned long threshold;
1569         int ret;
1570
1571         if (!str)
1572                 return 0;
1573         ret = kstrtoul(str, 0, &threshold);
1574         if (ret < 0)
1575                 return 0;
1576         tracing_thresh = threshold * 1000;
1577         return 1;
1578 }
1579 __setup("tracing_thresh=", set_tracing_thresh);
1580
1581 unsigned long nsecs_to_usecs(unsigned long nsecs)
1582 {
1583         return nsecs / 1000;
1584 }
1585
1586 /*
1587  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1588  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1589  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1590  * of strings in the order that the evals (enum) were defined.
1591  */
1592 #undef C
1593 #define C(a, b) b
1594
1595 /* These must match the bit positions in trace_iterator_flags */
1596 static const char *trace_options[] = {
1597         TRACE_FLAGS
1598         NULL
1599 };
1600
1601 static struct {
1602         u64 (*func)(void);
1603         const char *name;
1604         int in_ns;              /* is this clock in nanoseconds? */
1605 } trace_clocks[] = {
1606         { trace_clock_local,            "local",        1 },
1607         { trace_clock_global,           "global",       1 },
1608         { trace_clock_counter,          "counter",      0 },
1609         { trace_clock_jiffies,          "uptime",       0 },
1610         { trace_clock,                  "perf",         1 },
1611         { ktime_get_mono_fast_ns,       "mono",         1 },
1612         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1613         { ktime_get_boot_fast_ns,       "boot",         1 },
1614         { ktime_get_tai_fast_ns,        "tai",          1 },
1615         ARCH_TRACE_CLOCKS
1616 };
1617
1618 bool trace_clock_in_ns(struct trace_array *tr)
1619 {
1620         if (trace_clocks[tr->clock_id].in_ns)
1621                 return true;
1622
1623         return false;
1624 }
1625
1626 /*
1627  * trace_parser_get_init - gets the buffer for trace parser
1628  */
1629 int trace_parser_get_init(struct trace_parser *parser, int size)
1630 {
1631         memset(parser, 0, sizeof(*parser));
1632
1633         parser->buffer = kmalloc(size, GFP_KERNEL);
1634         if (!parser->buffer)
1635                 return 1;
1636
1637         parser->size = size;
1638         return 0;
1639 }
1640
1641 /*
1642  * trace_parser_put - frees the buffer for trace parser
1643  */
1644 void trace_parser_put(struct trace_parser *parser)
1645 {
1646         kfree(parser->buffer);
1647         parser->buffer = NULL;
1648 }
1649
1650 /*
1651  * trace_get_user - reads the user input string separated by  space
1652  * (matched by isspace(ch))
1653  *
1654  * For each string found the 'struct trace_parser' is updated,
1655  * and the function returns.
1656  *
1657  * Returns number of bytes read.
1658  *
1659  * See kernel/trace/trace.h for 'struct trace_parser' details.
1660  */
1661 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1662         size_t cnt, loff_t *ppos)
1663 {
1664         char ch;
1665         size_t read = 0;
1666         ssize_t ret;
1667
1668         if (!*ppos)
1669                 trace_parser_clear(parser);
1670
1671         ret = get_user(ch, ubuf++);
1672         if (ret)
1673                 goto out;
1674
1675         read++;
1676         cnt--;
1677
1678         /*
1679          * The parser is not finished with the last write,
1680          * continue reading the user input without skipping spaces.
1681          */
1682         if (!parser->cont) {
1683                 /* skip white space */
1684                 while (cnt && isspace(ch)) {
1685                         ret = get_user(ch, ubuf++);
1686                         if (ret)
1687                                 goto out;
1688                         read++;
1689                         cnt--;
1690                 }
1691
1692                 parser->idx = 0;
1693
1694                 /* only spaces were written */
1695                 if (isspace(ch) || !ch) {
1696                         *ppos += read;
1697                         ret = read;
1698                         goto out;
1699                 }
1700         }
1701
1702         /* read the non-space input */
1703         while (cnt && !isspace(ch) && ch) {
1704                 if (parser->idx < parser->size - 1)
1705                         parser->buffer[parser->idx++] = ch;
1706                 else {
1707                         ret = -EINVAL;
1708                         goto out;
1709                 }
1710                 ret = get_user(ch, ubuf++);
1711                 if (ret)
1712                         goto out;
1713                 read++;
1714                 cnt--;
1715         }
1716
1717         /* We either got finished input or we have to wait for another call. */
1718         if (isspace(ch) || !ch) {
1719                 parser->buffer[parser->idx] = 0;
1720                 parser->cont = false;
1721         } else if (parser->idx < parser->size - 1) {
1722                 parser->cont = true;
1723                 parser->buffer[parser->idx++] = ch;
1724                 /* Make sure the parsed string always terminates with '\0'. */
1725                 parser->buffer[parser->idx] = 0;
1726         } else {
1727                 ret = -EINVAL;
1728                 goto out;
1729         }
1730
1731         *ppos += read;
1732         ret = read;
1733
1734 out:
1735         return ret;
1736 }
1737
1738 /* TODO add a seq_buf_to_buffer() */
1739 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1740 {
1741         int len;
1742
1743         if (trace_seq_used(s) <= s->readpos)
1744                 return -EBUSY;
1745
1746         len = trace_seq_used(s) - s->readpos;
1747         if (cnt > len)
1748                 cnt = len;
1749         memcpy(buf, s->buffer + s->readpos, cnt);
1750
1751         s->readpos += cnt;
1752         return cnt;
1753 }
1754
1755 unsigned long __read_mostly     tracing_thresh;
1756
1757 #ifdef CONFIG_TRACER_MAX_TRACE
1758 static const struct file_operations tracing_max_lat_fops;
1759
1760 #ifdef LATENCY_FS_NOTIFY
1761
1762 static struct workqueue_struct *fsnotify_wq;
1763
1764 static void latency_fsnotify_workfn(struct work_struct *work)
1765 {
1766         struct trace_array *tr = container_of(work, struct trace_array,
1767                                               fsnotify_work);
1768         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1769 }
1770
1771 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1772 {
1773         struct trace_array *tr = container_of(iwork, struct trace_array,
1774                                               fsnotify_irqwork);
1775         queue_work(fsnotify_wq, &tr->fsnotify_work);
1776 }
1777
1778 static void trace_create_maxlat_file(struct trace_array *tr,
1779                                      struct dentry *d_tracer)
1780 {
1781         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1782         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1783         tr->d_max_latency = trace_create_file("tracing_max_latency",
1784                                               TRACE_MODE_WRITE,
1785                                               d_tracer, tr,
1786                                               &tracing_max_lat_fops);
1787 }
1788
1789 __init static int latency_fsnotify_init(void)
1790 {
1791         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1792                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1793         if (!fsnotify_wq) {
1794                 pr_err("Unable to allocate tr_max_lat_wq\n");
1795                 return -ENOMEM;
1796         }
1797         return 0;
1798 }
1799
1800 late_initcall_sync(latency_fsnotify_init);
1801
1802 void latency_fsnotify(struct trace_array *tr)
1803 {
1804         if (!fsnotify_wq)
1805                 return;
1806         /*
1807          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1808          * possible that we are called from __schedule() or do_idle(), which
1809          * could cause a deadlock.
1810          */
1811         irq_work_queue(&tr->fsnotify_irqwork);
1812 }
1813
1814 #else /* !LATENCY_FS_NOTIFY */
1815
1816 #define trace_create_maxlat_file(tr, d_tracer)                          \
1817         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1818                           d_tracer, tr, &tracing_max_lat_fops)
1819
1820 #endif
1821
1822 /*
1823  * Copy the new maximum trace into the separate maximum-trace
1824  * structure. (this way the maximum trace is permanently saved,
1825  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1826  */
1827 static void
1828 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1829 {
1830         struct array_buffer *trace_buf = &tr->array_buffer;
1831         struct array_buffer *max_buf = &tr->max_buffer;
1832         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1833         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1834
1835         max_buf->cpu = cpu;
1836         max_buf->time_start = data->preempt_timestamp;
1837
1838         max_data->saved_latency = tr->max_latency;
1839         max_data->critical_start = data->critical_start;
1840         max_data->critical_end = data->critical_end;
1841
1842         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1843         max_data->pid = tsk->pid;
1844         /*
1845          * If tsk == current, then use current_uid(), as that does not use
1846          * RCU. The irq tracer can be called out of RCU scope.
1847          */
1848         if (tsk == current)
1849                 max_data->uid = current_uid();
1850         else
1851                 max_data->uid = task_uid(tsk);
1852
1853         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1854         max_data->policy = tsk->policy;
1855         max_data->rt_priority = tsk->rt_priority;
1856
1857         /* record this tasks comm */
1858         tracing_record_cmdline(tsk);
1859         latency_fsnotify(tr);
1860 }
1861
1862 /**
1863  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1864  * @tr: tracer
1865  * @tsk: the task with the latency
1866  * @cpu: The cpu that initiated the trace.
1867  * @cond_data: User data associated with a conditional snapshot
1868  *
1869  * Flip the buffers between the @tr and the max_tr and record information
1870  * about which task was the cause of this latency.
1871  */
1872 void
1873 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1874               void *cond_data)
1875 {
1876         if (tr->stop_count)
1877                 return;
1878
1879         WARN_ON_ONCE(!irqs_disabled());
1880
1881         if (!tr->allocated_snapshot) {
1882                 /* Only the nop tracer should hit this when disabling */
1883                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1884                 return;
1885         }
1886
1887         arch_spin_lock(&tr->max_lock);
1888
1889         /* Inherit the recordable setting from array_buffer */
1890         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1891                 ring_buffer_record_on(tr->max_buffer.buffer);
1892         else
1893                 ring_buffer_record_off(tr->max_buffer.buffer);
1894
1895 #ifdef CONFIG_TRACER_SNAPSHOT
1896         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1897                 arch_spin_unlock(&tr->max_lock);
1898                 return;
1899         }
1900 #endif
1901         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1902
1903         __update_max_tr(tr, tsk, cpu);
1904
1905         arch_spin_unlock(&tr->max_lock);
1906
1907         /* Any waiters on the old snapshot buffer need to wake up */
1908         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1909 }
1910
1911 /**
1912  * update_max_tr_single - only copy one trace over, and reset the rest
1913  * @tr: tracer
1914  * @tsk: task with the latency
1915  * @cpu: the cpu of the buffer to copy.
1916  *
1917  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1918  */
1919 void
1920 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1921 {
1922         int ret;
1923
1924         if (tr->stop_count)
1925                 return;
1926
1927         WARN_ON_ONCE(!irqs_disabled());
1928         if (!tr->allocated_snapshot) {
1929                 /* Only the nop tracer should hit this when disabling */
1930                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1931                 return;
1932         }
1933
1934         arch_spin_lock(&tr->max_lock);
1935
1936         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1937
1938         if (ret == -EBUSY) {
1939                 /*
1940                  * We failed to swap the buffer due to a commit taking
1941                  * place on this CPU. We fail to record, but we reset
1942                  * the max trace buffer (no one writes directly to it)
1943                  * and flag that it failed.
1944                  * Another reason is resize is in progress.
1945                  */
1946                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1947                         "Failed to swap buffers due to commit or resize in progress\n");
1948         }
1949
1950         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1951
1952         __update_max_tr(tr, tsk, cpu);
1953         arch_spin_unlock(&tr->max_lock);
1954 }
1955
1956 #endif /* CONFIG_TRACER_MAX_TRACE */
1957
1958 struct pipe_wait {
1959         struct trace_iterator           *iter;
1960         int                             wait_index;
1961 };
1962
1963 static bool wait_pipe_cond(void *data)
1964 {
1965         struct pipe_wait *pwait = data;
1966         struct trace_iterator *iter = pwait->iter;
1967
1968         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1969                 return true;
1970
1971         return iter->closed;
1972 }
1973
1974 static int wait_on_pipe(struct trace_iterator *iter, int full)
1975 {
1976         struct pipe_wait pwait;
1977         int ret;
1978
1979         /* Iterators are static, they should be filled or empty */
1980         if (trace_buffer_iter(iter, iter->cpu_file))
1981                 return 0;
1982
1983         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1984         pwait.iter = iter;
1985
1986         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1987                                wait_pipe_cond, &pwait);
1988
1989 #ifdef CONFIG_TRACER_MAX_TRACE
1990         /*
1991          * Make sure this is still the snapshot buffer, as if a snapshot were
1992          * to happen, this would now be the main buffer.
1993          */
1994         if (iter->snapshot)
1995                 iter->array_buffer = &iter->tr->max_buffer;
1996 #endif
1997         return ret;
1998 }
1999
2000 #ifdef CONFIG_FTRACE_STARTUP_TEST
2001 static bool selftests_can_run;
2002
2003 struct trace_selftests {
2004         struct list_head                list;
2005         struct tracer                   *type;
2006 };
2007
2008 static LIST_HEAD(postponed_selftests);
2009
2010 static int save_selftest(struct tracer *type)
2011 {
2012         struct trace_selftests *selftest;
2013
2014         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2015         if (!selftest)
2016                 return -ENOMEM;
2017
2018         selftest->type = type;
2019         list_add(&selftest->list, &postponed_selftests);
2020         return 0;
2021 }
2022
2023 static int run_tracer_selftest(struct tracer *type)
2024 {
2025         struct trace_array *tr = &global_trace;
2026         struct tracer *saved_tracer = tr->current_trace;
2027         int ret;
2028
2029         if (!type->selftest || tracing_selftest_disabled)
2030                 return 0;
2031
2032         /*
2033          * If a tracer registers early in boot up (before scheduling is
2034          * initialized and such), then do not run its selftests yet.
2035          * Instead, run it a little later in the boot process.
2036          */
2037         if (!selftests_can_run)
2038                 return save_selftest(type);
2039
2040         if (!tracing_is_on()) {
2041                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2042                         type->name);
2043                 return 0;
2044         }
2045
2046         /*
2047          * Run a selftest on this tracer.
2048          * Here we reset the trace buffer, and set the current
2049          * tracer to be this tracer. The tracer can then run some
2050          * internal tracing to verify that everything is in order.
2051          * If we fail, we do not register this tracer.
2052          */
2053         tracing_reset_online_cpus(&tr->array_buffer);
2054
2055         tr->current_trace = type;
2056
2057 #ifdef CONFIG_TRACER_MAX_TRACE
2058         if (type->use_max_tr) {
2059                 /* If we expanded the buffers, make sure the max is expanded too */
2060                 if (tr->ring_buffer_expanded)
2061                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2062                                            RING_BUFFER_ALL_CPUS);
2063                 tr->allocated_snapshot = true;
2064         }
2065 #endif
2066
2067         /* the test is responsible for initializing and enabling */
2068         pr_info("Testing tracer %s: ", type->name);
2069         ret = type->selftest(type, tr);
2070         /* the test is responsible for resetting too */
2071         tr->current_trace = saved_tracer;
2072         if (ret) {
2073                 printk(KERN_CONT "FAILED!\n");
2074                 /* Add the warning after printing 'FAILED' */
2075                 WARN_ON(1);
2076                 return -1;
2077         }
2078         /* Only reset on passing, to avoid touching corrupted buffers */
2079         tracing_reset_online_cpus(&tr->array_buffer);
2080
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082         if (type->use_max_tr) {
2083                 tr->allocated_snapshot = false;
2084
2085                 /* Shrink the max buffer again */
2086                 if (tr->ring_buffer_expanded)
2087                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2088                                            RING_BUFFER_ALL_CPUS);
2089         }
2090 #endif
2091
2092         printk(KERN_CONT "PASSED\n");
2093         return 0;
2094 }
2095
2096 static int do_run_tracer_selftest(struct tracer *type)
2097 {
2098         int ret;
2099
2100         /*
2101          * Tests can take a long time, especially if they are run one after the
2102          * other, as does happen during bootup when all the tracers are
2103          * registered. This could cause the soft lockup watchdog to trigger.
2104          */
2105         cond_resched();
2106
2107         tracing_selftest_running = true;
2108         ret = run_tracer_selftest(type);
2109         tracing_selftest_running = false;
2110
2111         return ret;
2112 }
2113
2114 static __init int init_trace_selftests(void)
2115 {
2116         struct trace_selftests *p, *n;
2117         struct tracer *t, **last;
2118         int ret;
2119
2120         selftests_can_run = true;
2121
2122         mutex_lock(&trace_types_lock);
2123
2124         if (list_empty(&postponed_selftests))
2125                 goto out;
2126
2127         pr_info("Running postponed tracer tests:\n");
2128
2129         tracing_selftest_running = true;
2130         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2131                 /* This loop can take minutes when sanitizers are enabled, so
2132                  * lets make sure we allow RCU processing.
2133                  */
2134                 cond_resched();
2135                 ret = run_tracer_selftest(p->type);
2136                 /* If the test fails, then warn and remove from available_tracers */
2137                 if (ret < 0) {
2138                         WARN(1, "tracer: %s failed selftest, disabling\n",
2139                              p->type->name);
2140                         last = &trace_types;
2141                         for (t = trace_types; t; t = t->next) {
2142                                 if (t == p->type) {
2143                                         *last = t->next;
2144                                         break;
2145                                 }
2146                                 last = &t->next;
2147                         }
2148                 }
2149                 list_del(&p->list);
2150                 kfree(p);
2151         }
2152         tracing_selftest_running = false;
2153
2154  out:
2155         mutex_unlock(&trace_types_lock);
2156
2157         return 0;
2158 }
2159 core_initcall(init_trace_selftests);
2160 #else
2161 static inline int run_tracer_selftest(struct tracer *type)
2162 {
2163         return 0;
2164 }
2165 static inline int do_run_tracer_selftest(struct tracer *type)
2166 {
2167         return 0;
2168 }
2169 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2170
2171 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2172
2173 static void __init apply_trace_boot_options(void);
2174
2175 /**
2176  * register_tracer - register a tracer with the ftrace system.
2177  * @type: the plugin for the tracer
2178  *
2179  * Register a new plugin tracer.
2180  */
2181 int __init register_tracer(struct tracer *type)
2182 {
2183         struct tracer *t;
2184         int ret = 0;
2185
2186         if (!type->name) {
2187                 pr_info("Tracer must have a name\n");
2188                 return -1;
2189         }
2190
2191         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2192                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2193                 return -1;
2194         }
2195
2196         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2197                 pr_warn("Can not register tracer %s due to lockdown\n",
2198                            type->name);
2199                 return -EPERM;
2200         }
2201
2202         mutex_lock(&trace_types_lock);
2203
2204         for (t = trace_types; t; t = t->next) {
2205                 if (strcmp(type->name, t->name) == 0) {
2206                         /* already found */
2207                         pr_info("Tracer %s already registered\n",
2208                                 type->name);
2209                         ret = -1;
2210                         goto out;
2211                 }
2212         }
2213
2214         if (!type->set_flag)
2215                 type->set_flag = &dummy_set_flag;
2216         if (!type->flags) {
2217                 /*allocate a dummy tracer_flags*/
2218                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2219                 if (!type->flags) {
2220                         ret = -ENOMEM;
2221                         goto out;
2222                 }
2223                 type->flags->val = 0;
2224                 type->flags->opts = dummy_tracer_opt;
2225         } else
2226                 if (!type->flags->opts)
2227                         type->flags->opts = dummy_tracer_opt;
2228
2229         /* store the tracer for __set_tracer_option */
2230         type->flags->trace = type;
2231
2232         ret = do_run_tracer_selftest(type);
2233         if (ret < 0)
2234                 goto out;
2235
2236         type->next = trace_types;
2237         trace_types = type;
2238         add_tracer_options(&global_trace, type);
2239
2240  out:
2241         mutex_unlock(&trace_types_lock);
2242
2243         if (ret || !default_bootup_tracer)
2244                 goto out_unlock;
2245
2246         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2247                 goto out_unlock;
2248
2249         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2250         /* Do we want this tracer to start on bootup? */
2251         tracing_set_tracer(&global_trace, type->name);
2252         default_bootup_tracer = NULL;
2253
2254         apply_trace_boot_options();
2255
2256         /* disable other selftests, since this will break it. */
2257         disable_tracing_selftest("running a tracer");
2258
2259  out_unlock:
2260         return ret;
2261 }
2262
2263 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2264 {
2265         struct trace_buffer *buffer = buf->buffer;
2266
2267         if (!buffer)
2268                 return;
2269
2270         ring_buffer_record_disable(buffer);
2271
2272         /* Make sure all commits have finished */
2273         synchronize_rcu();
2274         ring_buffer_reset_cpu(buffer, cpu);
2275
2276         ring_buffer_record_enable(buffer);
2277 }
2278
2279 void tracing_reset_online_cpus(struct array_buffer *buf)
2280 {
2281         struct trace_buffer *buffer = buf->buffer;
2282
2283         if (!buffer)
2284                 return;
2285
2286         ring_buffer_record_disable(buffer);
2287
2288         /* Make sure all commits have finished */
2289         synchronize_rcu();
2290
2291         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2292
2293         ring_buffer_reset_online_cpus(buffer);
2294
2295         ring_buffer_record_enable(buffer);
2296 }
2297
2298 /* Must have trace_types_lock held */
2299 void tracing_reset_all_online_cpus_unlocked(void)
2300 {
2301         struct trace_array *tr;
2302
2303         lockdep_assert_held(&trace_types_lock);
2304
2305         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2306                 if (!tr->clear_trace)
2307                         continue;
2308                 tr->clear_trace = false;
2309                 tracing_reset_online_cpus(&tr->array_buffer);
2310 #ifdef CONFIG_TRACER_MAX_TRACE
2311                 tracing_reset_online_cpus(&tr->max_buffer);
2312 #endif
2313         }
2314 }
2315
2316 void tracing_reset_all_online_cpus(void)
2317 {
2318         mutex_lock(&trace_types_lock);
2319         tracing_reset_all_online_cpus_unlocked();
2320         mutex_unlock(&trace_types_lock);
2321 }
2322
2323 /*
2324  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2325  * is the tgid last observed corresponding to pid=i.
2326  */
2327 static int *tgid_map;
2328
2329 /* The maximum valid index into tgid_map. */
2330 static size_t tgid_map_max;
2331
2332 #define SAVED_CMDLINES_DEFAULT 128
2333 #define NO_CMDLINE_MAP UINT_MAX
2334 /*
2335  * Preemption must be disabled before acquiring trace_cmdline_lock.
2336  * The various trace_arrays' max_lock must be acquired in a context
2337  * where interrupt is disabled.
2338  */
2339 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2340 struct saved_cmdlines_buffer {
2341         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2342         unsigned *map_cmdline_to_pid;
2343         unsigned cmdline_num;
2344         int cmdline_idx;
2345         char saved_cmdlines[];
2346 };
2347 static struct saved_cmdlines_buffer *savedcmd;
2348
2349 static inline char *get_saved_cmdlines(int idx)
2350 {
2351         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2352 }
2353
2354 static inline void set_cmdline(int idx, const char *cmdline)
2355 {
2356         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2357 }
2358
2359 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2360 {
2361         int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2362
2363         kfree(s->map_cmdline_to_pid);
2364         kmemleak_free(s);
2365         free_pages((unsigned long)s, order);
2366 }
2367
2368 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2369 {
2370         struct saved_cmdlines_buffer *s;
2371         struct page *page;
2372         int orig_size, size;
2373         int order;
2374
2375         /* Figure out how much is needed to hold the given number of cmdlines */
2376         orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2377         order = get_order(orig_size);
2378         size = 1 << (order + PAGE_SHIFT);
2379         page = alloc_pages(GFP_KERNEL, order);
2380         if (!page)
2381                 return NULL;
2382
2383         s = page_address(page);
2384         kmemleak_alloc(s, size, 1, GFP_KERNEL);
2385         memset(s, 0, sizeof(*s));
2386
2387         /* Round up to actual allocation */
2388         val = (size - sizeof(*s)) / TASK_COMM_LEN;
2389         s->cmdline_num = val;
2390
2391         s->map_cmdline_to_pid = kmalloc_array(val,
2392                                               sizeof(*s->map_cmdline_to_pid),
2393                                               GFP_KERNEL);
2394         if (!s->map_cmdline_to_pid) {
2395                 free_saved_cmdlines_buffer(s);
2396                 return NULL;
2397         }
2398
2399         s->cmdline_idx = 0;
2400         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2401                sizeof(s->map_pid_to_cmdline));
2402         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2403                val * sizeof(*s->map_cmdline_to_pid));
2404
2405         return s;
2406 }
2407
2408 static int trace_create_savedcmd(void)
2409 {
2410         savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2411
2412         return savedcmd ? 0 : -ENOMEM;
2413 }
2414
2415 int is_tracing_stopped(void)
2416 {
2417         return global_trace.stop_count;
2418 }
2419
2420 static void tracing_start_tr(struct trace_array *tr)
2421 {
2422         struct trace_buffer *buffer;
2423         unsigned long flags;
2424
2425         if (tracing_disabled)
2426                 return;
2427
2428         raw_spin_lock_irqsave(&tr->start_lock, flags);
2429         if (--tr->stop_count) {
2430                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2431                         /* Someone screwed up their debugging */
2432                         tr->stop_count = 0;
2433                 }
2434                 goto out;
2435         }
2436
2437         /* Prevent the buffers from switching */
2438         arch_spin_lock(&tr->max_lock);
2439
2440         buffer = tr->array_buffer.buffer;
2441         if (buffer)
2442                 ring_buffer_record_enable(buffer);
2443
2444 #ifdef CONFIG_TRACER_MAX_TRACE
2445         buffer = tr->max_buffer.buffer;
2446         if (buffer)
2447                 ring_buffer_record_enable(buffer);
2448 #endif
2449
2450         arch_spin_unlock(&tr->max_lock);
2451
2452  out:
2453         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2454 }
2455
2456 /**
2457  * tracing_start - quick start of the tracer
2458  *
2459  * If tracing is enabled but was stopped by tracing_stop,
2460  * this will start the tracer back up.
2461  */
2462 void tracing_start(void)
2463
2464 {
2465         return tracing_start_tr(&global_trace);
2466 }
2467
2468 static void tracing_stop_tr(struct trace_array *tr)
2469 {
2470         struct trace_buffer *buffer;
2471         unsigned long flags;
2472
2473         raw_spin_lock_irqsave(&tr->start_lock, flags);
2474         if (tr->stop_count++)
2475                 goto out;
2476
2477         /* Prevent the buffers from switching */
2478         arch_spin_lock(&tr->max_lock);
2479
2480         buffer = tr->array_buffer.buffer;
2481         if (buffer)
2482                 ring_buffer_record_disable(buffer);
2483
2484 #ifdef CONFIG_TRACER_MAX_TRACE
2485         buffer = tr->max_buffer.buffer;
2486         if (buffer)
2487                 ring_buffer_record_disable(buffer);
2488 #endif
2489
2490         arch_spin_unlock(&tr->max_lock);
2491
2492  out:
2493         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2494 }
2495
2496 /**
2497  * tracing_stop - quick stop of the tracer
2498  *
2499  * Light weight way to stop tracing. Use in conjunction with
2500  * tracing_start.
2501  */
2502 void tracing_stop(void)
2503 {
2504         return tracing_stop_tr(&global_trace);
2505 }
2506
2507 static int trace_save_cmdline(struct task_struct *tsk)
2508 {
2509         unsigned tpid, idx;
2510
2511         /* treat recording of idle task as a success */
2512         if (!tsk->pid)
2513                 return 1;
2514
2515         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2516
2517         /*
2518          * It's not the end of the world if we don't get
2519          * the lock, but we also don't want to spin
2520          * nor do we want to disable interrupts,
2521          * so if we miss here, then better luck next time.
2522          *
2523          * This is called within the scheduler and wake up, so interrupts
2524          * had better been disabled and run queue lock been held.
2525          */
2526         lockdep_assert_preemption_disabled();
2527         if (!arch_spin_trylock(&trace_cmdline_lock))
2528                 return 0;
2529
2530         idx = savedcmd->map_pid_to_cmdline[tpid];
2531         if (idx == NO_CMDLINE_MAP) {
2532                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2533
2534                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2535                 savedcmd->cmdline_idx = idx;
2536         }
2537
2538         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2539         set_cmdline(idx, tsk->comm);
2540
2541         arch_spin_unlock(&trace_cmdline_lock);
2542
2543         return 1;
2544 }
2545
2546 static void __trace_find_cmdline(int pid, char comm[])
2547 {
2548         unsigned map;
2549         int tpid;
2550
2551         if (!pid) {
2552                 strcpy(comm, "<idle>");
2553                 return;
2554         }
2555
2556         if (WARN_ON_ONCE(pid < 0)) {
2557                 strcpy(comm, "<XXX>");
2558                 return;
2559         }
2560
2561         tpid = pid & (PID_MAX_DEFAULT - 1);
2562         map = savedcmd->map_pid_to_cmdline[tpid];
2563         if (map != NO_CMDLINE_MAP) {
2564                 tpid = savedcmd->map_cmdline_to_pid[map];
2565                 if (tpid == pid) {
2566                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2567                         return;
2568                 }
2569         }
2570         strcpy(comm, "<...>");
2571 }
2572
2573 void trace_find_cmdline(int pid, char comm[])
2574 {
2575         preempt_disable();
2576         arch_spin_lock(&trace_cmdline_lock);
2577
2578         __trace_find_cmdline(pid, comm);
2579
2580         arch_spin_unlock(&trace_cmdline_lock);
2581         preempt_enable();
2582 }
2583
2584 static int *trace_find_tgid_ptr(int pid)
2585 {
2586         /*
2587          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2588          * if we observe a non-NULL tgid_map then we also observe the correct
2589          * tgid_map_max.
2590          */
2591         int *map = smp_load_acquire(&tgid_map);
2592
2593         if (unlikely(!map || pid > tgid_map_max))
2594                 return NULL;
2595
2596         return &map[pid];
2597 }
2598
2599 int trace_find_tgid(int pid)
2600 {
2601         int *ptr = trace_find_tgid_ptr(pid);
2602
2603         return ptr ? *ptr : 0;
2604 }
2605
2606 static int trace_save_tgid(struct task_struct *tsk)
2607 {
2608         int *ptr;
2609
2610         /* treat recording of idle task as a success */
2611         if (!tsk->pid)
2612                 return 1;
2613
2614         ptr = trace_find_tgid_ptr(tsk->pid);
2615         if (!ptr)
2616                 return 0;
2617
2618         *ptr = tsk->tgid;
2619         return 1;
2620 }
2621
2622 static bool tracing_record_taskinfo_skip(int flags)
2623 {
2624         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2625                 return true;
2626         if (!__this_cpu_read(trace_taskinfo_save))
2627                 return true;
2628         return false;
2629 }
2630
2631 /**
2632  * tracing_record_taskinfo - record the task info of a task
2633  *
2634  * @task:  task to record
2635  * @flags: TRACE_RECORD_CMDLINE for recording comm
2636  *         TRACE_RECORD_TGID for recording tgid
2637  */
2638 void tracing_record_taskinfo(struct task_struct *task, int flags)
2639 {
2640         bool done;
2641
2642         if (tracing_record_taskinfo_skip(flags))
2643                 return;
2644
2645         /*
2646          * Record as much task information as possible. If some fail, continue
2647          * to try to record the others.
2648          */
2649         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2650         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2651
2652         /* If recording any information failed, retry again soon. */
2653         if (!done)
2654                 return;
2655
2656         __this_cpu_write(trace_taskinfo_save, false);
2657 }
2658
2659 /**
2660  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2661  *
2662  * @prev: previous task during sched_switch
2663  * @next: next task during sched_switch
2664  * @flags: TRACE_RECORD_CMDLINE for recording comm
2665  *         TRACE_RECORD_TGID for recording tgid
2666  */
2667 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2668                                           struct task_struct *next, int flags)
2669 {
2670         bool done;
2671
2672         if (tracing_record_taskinfo_skip(flags))
2673                 return;
2674
2675         /*
2676          * Record as much task information as possible. If some fail, continue
2677          * to try to record the others.
2678          */
2679         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2680         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2681         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2682         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2683
2684         /* If recording any information failed, retry again soon. */
2685         if (!done)
2686                 return;
2687
2688         __this_cpu_write(trace_taskinfo_save, false);
2689 }
2690
2691 /* Helpers to record a specific task information */
2692 void tracing_record_cmdline(struct task_struct *task)
2693 {
2694         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2695 }
2696
2697 void tracing_record_tgid(struct task_struct *task)
2698 {
2699         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2700 }
2701
2702 /*
2703  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2704  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2705  * simplifies those functions and keeps them in sync.
2706  */
2707 enum print_line_t trace_handle_return(struct trace_seq *s)
2708 {
2709         return trace_seq_has_overflowed(s) ?
2710                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2711 }
2712 EXPORT_SYMBOL_GPL(trace_handle_return);
2713
2714 static unsigned short migration_disable_value(void)
2715 {
2716 #if defined(CONFIG_SMP)
2717         return current->migration_disabled;
2718 #else
2719         return 0;
2720 #endif
2721 }
2722
2723 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2724 {
2725         unsigned int trace_flags = irqs_status;
2726         unsigned int pc;
2727
2728         pc = preempt_count();
2729
2730         if (pc & NMI_MASK)
2731                 trace_flags |= TRACE_FLAG_NMI;
2732         if (pc & HARDIRQ_MASK)
2733                 trace_flags |= TRACE_FLAG_HARDIRQ;
2734         if (in_serving_softirq())
2735                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2736         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2737                 trace_flags |= TRACE_FLAG_BH_OFF;
2738
2739         if (tif_need_resched())
2740                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2741         if (test_preempt_need_resched())
2742                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2743         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2744                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2745 }
2746
2747 struct ring_buffer_event *
2748 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2749                           int type,
2750                           unsigned long len,
2751                           unsigned int trace_ctx)
2752 {
2753         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2754 }
2755
2756 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2757 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2758 static int trace_buffered_event_ref;
2759
2760 /**
2761  * trace_buffered_event_enable - enable buffering events
2762  *
2763  * When events are being filtered, it is quicker to use a temporary
2764  * buffer to write the event data into if there's a likely chance
2765  * that it will not be committed. The discard of the ring buffer
2766  * is not as fast as committing, and is much slower than copying
2767  * a commit.
2768  *
2769  * When an event is to be filtered, allocate per cpu buffers to
2770  * write the event data into, and if the event is filtered and discarded
2771  * it is simply dropped, otherwise, the entire data is to be committed
2772  * in one shot.
2773  */
2774 void trace_buffered_event_enable(void)
2775 {
2776         struct ring_buffer_event *event;
2777         struct page *page;
2778         int cpu;
2779
2780         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2781
2782         if (trace_buffered_event_ref++)
2783                 return;
2784
2785         for_each_tracing_cpu(cpu) {
2786                 page = alloc_pages_node(cpu_to_node(cpu),
2787                                         GFP_KERNEL | __GFP_NORETRY, 0);
2788                 /* This is just an optimization and can handle failures */
2789                 if (!page) {
2790                         pr_err("Failed to allocate event buffer\n");
2791                         break;
2792                 }
2793
2794                 event = page_address(page);
2795                 memset(event, 0, sizeof(*event));
2796
2797                 per_cpu(trace_buffered_event, cpu) = event;
2798
2799                 preempt_disable();
2800                 if (cpu == smp_processor_id() &&
2801                     __this_cpu_read(trace_buffered_event) !=
2802                     per_cpu(trace_buffered_event, cpu))
2803                         WARN_ON_ONCE(1);
2804                 preempt_enable();
2805         }
2806 }
2807
2808 static void enable_trace_buffered_event(void *data)
2809 {
2810         /* Probably not needed, but do it anyway */
2811         smp_rmb();
2812         this_cpu_dec(trace_buffered_event_cnt);
2813 }
2814
2815 static void disable_trace_buffered_event(void *data)
2816 {
2817         this_cpu_inc(trace_buffered_event_cnt);
2818 }
2819
2820 /**
2821  * trace_buffered_event_disable - disable buffering events
2822  *
2823  * When a filter is removed, it is faster to not use the buffered
2824  * events, and to commit directly into the ring buffer. Free up
2825  * the temp buffers when there are no more users. This requires
2826  * special synchronization with current events.
2827  */
2828 void trace_buffered_event_disable(void)
2829 {
2830         int cpu;
2831
2832         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2833
2834         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2835                 return;
2836
2837         if (--trace_buffered_event_ref)
2838                 return;
2839
2840         /* For each CPU, set the buffer as used. */
2841         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2842                          NULL, true);
2843
2844         /* Wait for all current users to finish */
2845         synchronize_rcu();
2846
2847         for_each_tracing_cpu(cpu) {
2848                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2849                 per_cpu(trace_buffered_event, cpu) = NULL;
2850         }
2851
2852         /*
2853          * Wait for all CPUs that potentially started checking if they can use
2854          * their event buffer only after the previous synchronize_rcu() call and
2855          * they still read a valid pointer from trace_buffered_event. It must be
2856          * ensured they don't see cleared trace_buffered_event_cnt else they
2857          * could wrongly decide to use the pointed-to buffer which is now freed.
2858          */
2859         synchronize_rcu();
2860
2861         /* For each CPU, relinquish the buffer */
2862         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2863                          true);
2864 }
2865
2866 static struct trace_buffer *temp_buffer;
2867
2868 struct ring_buffer_event *
2869 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2870                           struct trace_event_file *trace_file,
2871                           int type, unsigned long len,
2872                           unsigned int trace_ctx)
2873 {
2874         struct ring_buffer_event *entry;
2875         struct trace_array *tr = trace_file->tr;
2876         int val;
2877
2878         *current_rb = tr->array_buffer.buffer;
2879
2880         if (!tr->no_filter_buffering_ref &&
2881             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2882                 preempt_disable_notrace();
2883                 /*
2884                  * Filtering is on, so try to use the per cpu buffer first.
2885                  * This buffer will simulate a ring_buffer_event,
2886                  * where the type_len is zero and the array[0] will
2887                  * hold the full length.
2888                  * (see include/linux/ring-buffer.h for details on
2889                  *  how the ring_buffer_event is structured).
2890                  *
2891                  * Using a temp buffer during filtering and copying it
2892                  * on a matched filter is quicker than writing directly
2893                  * into the ring buffer and then discarding it when
2894                  * it doesn't match. That is because the discard
2895                  * requires several atomic operations to get right.
2896                  * Copying on match and doing nothing on a failed match
2897                  * is still quicker than no copy on match, but having
2898                  * to discard out of the ring buffer on a failed match.
2899                  */
2900                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2901                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2902
2903                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2904
2905                         /*
2906                          * Preemption is disabled, but interrupts and NMIs
2907                          * can still come in now. If that happens after
2908                          * the above increment, then it will have to go
2909                          * back to the old method of allocating the event
2910                          * on the ring buffer, and if the filter fails, it
2911                          * will have to call ring_buffer_discard_commit()
2912                          * to remove it.
2913                          *
2914                          * Need to also check the unlikely case that the
2915                          * length is bigger than the temp buffer size.
2916                          * If that happens, then the reserve is pretty much
2917                          * guaranteed to fail, as the ring buffer currently
2918                          * only allows events less than a page. But that may
2919                          * change in the future, so let the ring buffer reserve
2920                          * handle the failure in that case.
2921                          */
2922                         if (val == 1 && likely(len <= max_len)) {
2923                                 trace_event_setup(entry, type, trace_ctx);
2924                                 entry->array[0] = len;
2925                                 /* Return with preemption disabled */
2926                                 return entry;
2927                         }
2928                         this_cpu_dec(trace_buffered_event_cnt);
2929                 }
2930                 /* __trace_buffer_lock_reserve() disables preemption */
2931                 preempt_enable_notrace();
2932         }
2933
2934         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2935                                             trace_ctx);
2936         /*
2937          * If tracing is off, but we have triggers enabled
2938          * we still need to look at the event data. Use the temp_buffer
2939          * to store the trace event for the trigger to use. It's recursive
2940          * safe and will not be recorded anywhere.
2941          */
2942         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2943                 *current_rb = temp_buffer;
2944                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2945                                                     trace_ctx);
2946         }
2947         return entry;
2948 }
2949 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2950
2951 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2952 static DEFINE_MUTEX(tracepoint_printk_mutex);
2953
2954 static void output_printk(struct trace_event_buffer *fbuffer)
2955 {
2956         struct trace_event_call *event_call;
2957         struct trace_event_file *file;
2958         struct trace_event *event;
2959         unsigned long flags;
2960         struct trace_iterator *iter = tracepoint_print_iter;
2961
2962         /* We should never get here if iter is NULL */
2963         if (WARN_ON_ONCE(!iter))
2964                 return;
2965
2966         event_call = fbuffer->trace_file->event_call;
2967         if (!event_call || !event_call->event.funcs ||
2968             !event_call->event.funcs->trace)
2969                 return;
2970
2971         file = fbuffer->trace_file;
2972         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2973             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2974              !filter_match_preds(file->filter, fbuffer->entry)))
2975                 return;
2976
2977         event = &fbuffer->trace_file->event_call->event;
2978
2979         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2980         trace_seq_init(&iter->seq);
2981         iter->ent = fbuffer->entry;
2982         event_call->event.funcs->trace(iter, 0, event);
2983         trace_seq_putc(&iter->seq, 0);
2984         printk("%s", iter->seq.buffer);
2985
2986         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2987 }
2988
2989 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2990                              void *buffer, size_t *lenp,
2991                              loff_t *ppos)
2992 {
2993         int save_tracepoint_printk;
2994         int ret;
2995
2996         mutex_lock(&tracepoint_printk_mutex);
2997         save_tracepoint_printk = tracepoint_printk;
2998
2999         ret = proc_dointvec(table, write, buffer, lenp, ppos);
3000
3001         /*
3002          * This will force exiting early, as tracepoint_printk
3003          * is always zero when tracepoint_printk_iter is not allocated
3004          */
3005         if (!tracepoint_print_iter)
3006                 tracepoint_printk = 0;
3007
3008         if (save_tracepoint_printk == tracepoint_printk)
3009                 goto out;
3010
3011         if (tracepoint_printk)
3012                 static_key_enable(&tracepoint_printk_key.key);
3013         else
3014                 static_key_disable(&tracepoint_printk_key.key);
3015
3016  out:
3017         mutex_unlock(&tracepoint_printk_mutex);
3018
3019         return ret;
3020 }
3021
3022 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3023 {
3024         enum event_trigger_type tt = ETT_NONE;
3025         struct trace_event_file *file = fbuffer->trace_file;
3026
3027         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3028                         fbuffer->entry, &tt))
3029                 goto discard;
3030
3031         if (static_key_false(&tracepoint_printk_key.key))
3032                 output_printk(fbuffer);
3033
3034         if (static_branch_unlikely(&trace_event_exports_enabled))
3035                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3036
3037         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3038                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3039
3040 discard:
3041         if (tt)
3042                 event_triggers_post_call(file, tt);
3043
3044 }
3045 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3046
3047 /*
3048  * Skip 3:
3049  *
3050  *   trace_buffer_unlock_commit_regs()
3051  *   trace_event_buffer_commit()
3052  *   trace_event_raw_event_xxx()
3053  */
3054 # define STACK_SKIP 3
3055
3056 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3057                                      struct trace_buffer *buffer,
3058                                      struct ring_buffer_event *event,
3059                                      unsigned int trace_ctx,
3060                                      struct pt_regs *regs)
3061 {
3062         __buffer_unlock_commit(buffer, event);
3063
3064         /*
3065          * If regs is not set, then skip the necessary functions.
3066          * Note, we can still get here via blktrace, wakeup tracer
3067          * and mmiotrace, but that's ok if they lose a function or
3068          * two. They are not that meaningful.
3069          */
3070         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3071         ftrace_trace_userstack(tr, buffer, trace_ctx);
3072 }
3073
3074 /*
3075  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3076  */
3077 void
3078 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3079                                    struct ring_buffer_event *event)
3080 {
3081         __buffer_unlock_commit(buffer, event);
3082 }
3083
3084 void
3085 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3086                parent_ip, unsigned int trace_ctx)
3087 {
3088         struct trace_event_call *call = &event_function;
3089         struct trace_buffer *buffer = tr->array_buffer.buffer;
3090         struct ring_buffer_event *event;
3091         struct ftrace_entry *entry;
3092
3093         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3094                                             trace_ctx);
3095         if (!event)
3096                 return;
3097         entry   = ring_buffer_event_data(event);
3098         entry->ip                       = ip;
3099         entry->parent_ip                = parent_ip;
3100
3101         if (!call_filter_check_discard(call, entry, buffer, event)) {
3102                 if (static_branch_unlikely(&trace_function_exports_enabled))
3103                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3104                 __buffer_unlock_commit(buffer, event);
3105         }
3106 }
3107
3108 #ifdef CONFIG_STACKTRACE
3109
3110 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3111 #define FTRACE_KSTACK_NESTING   4
3112
3113 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3114
3115 struct ftrace_stack {
3116         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3117 };
3118
3119
3120 struct ftrace_stacks {
3121         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3122 };
3123
3124 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3125 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3126
3127 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3128                                  unsigned int trace_ctx,
3129                                  int skip, struct pt_regs *regs)
3130 {
3131         struct trace_event_call *call = &event_kernel_stack;
3132         struct ring_buffer_event *event;
3133         unsigned int size, nr_entries;
3134         struct ftrace_stack *fstack;
3135         struct stack_entry *entry;
3136         int stackidx;
3137
3138         /*
3139          * Add one, for this function and the call to save_stack_trace()
3140          * If regs is set, then these functions will not be in the way.
3141          */
3142 #ifndef CONFIG_UNWINDER_ORC
3143         if (!regs)
3144                 skip++;
3145 #endif
3146
3147         preempt_disable_notrace();
3148
3149         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3150
3151         /* This should never happen. If it does, yell once and skip */
3152         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3153                 goto out;
3154
3155         /*
3156          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3157          * interrupt will either see the value pre increment or post
3158          * increment. If the interrupt happens pre increment it will have
3159          * restored the counter when it returns.  We just need a barrier to
3160          * keep gcc from moving things around.
3161          */
3162         barrier();
3163
3164         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3165         size = ARRAY_SIZE(fstack->calls);
3166
3167         if (regs) {
3168                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3169                                                    size, skip);
3170         } else {
3171                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3172         }
3173
3174         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3175                                     struct_size(entry, caller, nr_entries),
3176                                     trace_ctx);
3177         if (!event)
3178                 goto out;
3179         entry = ring_buffer_event_data(event);
3180
3181         entry->size = nr_entries;
3182         memcpy(&entry->caller, fstack->calls,
3183                flex_array_size(entry, caller, nr_entries));
3184
3185         if (!call_filter_check_discard(call, entry, buffer, event))
3186                 __buffer_unlock_commit(buffer, event);
3187
3188  out:
3189         /* Again, don't let gcc optimize things here */
3190         barrier();
3191         __this_cpu_dec(ftrace_stack_reserve);
3192         preempt_enable_notrace();
3193
3194 }
3195
3196 static inline void ftrace_trace_stack(struct trace_array *tr,
3197                                       struct trace_buffer *buffer,
3198                                       unsigned int trace_ctx,
3199                                       int skip, struct pt_regs *regs)
3200 {
3201         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3202                 return;
3203
3204         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3205 }
3206
3207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3208                    int skip)
3209 {
3210         struct trace_buffer *buffer = tr->array_buffer.buffer;
3211
3212         if (rcu_is_watching()) {
3213                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3214                 return;
3215         }
3216
3217         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3218                 return;
3219
3220         /*
3221          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3222          * but if the above rcu_is_watching() failed, then the NMI
3223          * triggered someplace critical, and ct_irq_enter() should
3224          * not be called from NMI.
3225          */
3226         if (unlikely(in_nmi()))
3227                 return;
3228
3229         ct_irq_enter_irqson();
3230         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3231         ct_irq_exit_irqson();
3232 }
3233
3234 /**
3235  * trace_dump_stack - record a stack back trace in the trace buffer
3236  * @skip: Number of functions to skip (helper handlers)
3237  */
3238 void trace_dump_stack(int skip)
3239 {
3240         if (tracing_disabled || tracing_selftest_running)
3241                 return;
3242
3243 #ifndef CONFIG_UNWINDER_ORC
3244         /* Skip 1 to skip this function. */
3245         skip++;
3246 #endif
3247         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3248                              tracing_gen_ctx(), skip, NULL);
3249 }
3250 EXPORT_SYMBOL_GPL(trace_dump_stack);
3251
3252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3253 static DEFINE_PER_CPU(int, user_stack_count);
3254
3255 static void
3256 ftrace_trace_userstack(struct trace_array *tr,
3257                        struct trace_buffer *buffer, unsigned int trace_ctx)
3258 {
3259         struct trace_event_call *call = &event_user_stack;
3260         struct ring_buffer_event *event;
3261         struct userstack_entry *entry;
3262
3263         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3264                 return;
3265
3266         /*
3267          * NMIs can not handle page faults, even with fix ups.
3268          * The save user stack can (and often does) fault.
3269          */
3270         if (unlikely(in_nmi()))
3271                 return;
3272
3273         /*
3274          * prevent recursion, since the user stack tracing may
3275          * trigger other kernel events.
3276          */
3277         preempt_disable();
3278         if (__this_cpu_read(user_stack_count))
3279                 goto out;
3280
3281         __this_cpu_inc(user_stack_count);
3282
3283         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3284                                             sizeof(*entry), trace_ctx);
3285         if (!event)
3286                 goto out_drop_count;
3287         entry   = ring_buffer_event_data(event);
3288
3289         entry->tgid             = current->tgid;
3290         memset(&entry->caller, 0, sizeof(entry->caller));
3291
3292         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3293         if (!call_filter_check_discard(call, entry, buffer, event))
3294                 __buffer_unlock_commit(buffer, event);
3295
3296  out_drop_count:
3297         __this_cpu_dec(user_stack_count);
3298  out:
3299         preempt_enable();
3300 }
3301 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3302 static void ftrace_trace_userstack(struct trace_array *tr,
3303                                    struct trace_buffer *buffer,
3304                                    unsigned int trace_ctx)
3305 {
3306 }
3307 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3308
3309 #endif /* CONFIG_STACKTRACE */
3310
3311 static inline void
3312 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3313                           unsigned long long delta)
3314 {
3315         entry->bottom_delta_ts = delta & U32_MAX;
3316         entry->top_delta_ts = (delta >> 32);
3317 }
3318
3319 void trace_last_func_repeats(struct trace_array *tr,
3320                              struct trace_func_repeats *last_info,
3321                              unsigned int trace_ctx)
3322 {
3323         struct trace_buffer *buffer = tr->array_buffer.buffer;
3324         struct func_repeats_entry *entry;
3325         struct ring_buffer_event *event;
3326         u64 delta;
3327
3328         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3329                                             sizeof(*entry), trace_ctx);
3330         if (!event)
3331                 return;
3332
3333         delta = ring_buffer_event_time_stamp(buffer, event) -
3334                 last_info->ts_last_call;
3335
3336         entry = ring_buffer_event_data(event);
3337         entry->ip = last_info->ip;
3338         entry->parent_ip = last_info->parent_ip;
3339         entry->count = last_info->count;
3340         func_repeats_set_delta_ts(entry, delta);
3341
3342         __buffer_unlock_commit(buffer, event);
3343 }
3344
3345 /* created for use with alloc_percpu */
3346 struct trace_buffer_struct {
3347         int nesting;
3348         char buffer[4][TRACE_BUF_SIZE];
3349 };
3350
3351 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3352
3353 /*
3354  * This allows for lockless recording.  If we're nested too deeply, then
3355  * this returns NULL.
3356  */
3357 static char *get_trace_buf(void)
3358 {
3359         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3360
3361         if (!trace_percpu_buffer || buffer->nesting >= 4)
3362                 return NULL;
3363
3364         buffer->nesting++;
3365
3366         /* Interrupts must see nesting incremented before we use the buffer */
3367         barrier();
3368         return &buffer->buffer[buffer->nesting - 1][0];
3369 }
3370
3371 static void put_trace_buf(void)
3372 {
3373         /* Don't let the decrement of nesting leak before this */
3374         barrier();
3375         this_cpu_dec(trace_percpu_buffer->nesting);
3376 }
3377
3378 static int alloc_percpu_trace_buffer(void)
3379 {
3380         struct trace_buffer_struct __percpu *buffers;
3381
3382         if (trace_percpu_buffer)
3383                 return 0;
3384
3385         buffers = alloc_percpu(struct trace_buffer_struct);
3386         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3387                 return -ENOMEM;
3388
3389         trace_percpu_buffer = buffers;
3390         return 0;
3391 }
3392
3393 static int buffers_allocated;
3394
3395 void trace_printk_init_buffers(void)
3396 {
3397         if (buffers_allocated)
3398                 return;
3399
3400         if (alloc_percpu_trace_buffer())
3401                 return;
3402
3403         /* trace_printk() is for debug use only. Don't use it in production. */
3404
3405         pr_warn("\n");
3406         pr_warn("**********************************************************\n");
3407         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3408         pr_warn("**                                                      **\n");
3409         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3410         pr_warn("**                                                      **\n");
3411         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3412         pr_warn("** unsafe for production use.                           **\n");
3413         pr_warn("**                                                      **\n");
3414         pr_warn("** If you see this message and you are not debugging    **\n");
3415         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3416         pr_warn("**                                                      **\n");
3417         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3418         pr_warn("**********************************************************\n");
3419
3420         /* Expand the buffers to set size */
3421         tracing_update_buffers(&global_trace);
3422
3423         buffers_allocated = 1;
3424
3425         /*
3426          * trace_printk_init_buffers() can be called by modules.
3427          * If that happens, then we need to start cmdline recording
3428          * directly here. If the global_trace.buffer is already
3429          * allocated here, then this was called by module code.
3430          */
3431         if (global_trace.array_buffer.buffer)
3432                 tracing_start_cmdline_record();
3433 }
3434 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3435
3436 void trace_printk_start_comm(void)
3437 {
3438         /* Start tracing comms if trace printk is set */
3439         if (!buffers_allocated)
3440                 return;
3441         tracing_start_cmdline_record();
3442 }
3443
3444 static void trace_printk_start_stop_comm(int enabled)
3445 {
3446         if (!buffers_allocated)
3447                 return;
3448
3449         if (enabled)
3450                 tracing_start_cmdline_record();
3451         else
3452                 tracing_stop_cmdline_record();
3453 }
3454
3455 /**
3456  * trace_vbprintk - write binary msg to tracing buffer
3457  * @ip:    The address of the caller
3458  * @fmt:   The string format to write to the buffer
3459  * @args:  Arguments for @fmt
3460  */
3461 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3462 {
3463         struct trace_event_call *call = &event_bprint;
3464         struct ring_buffer_event *event;
3465         struct trace_buffer *buffer;
3466         struct trace_array *tr = &global_trace;
3467         struct bprint_entry *entry;
3468         unsigned int trace_ctx;
3469         char *tbuffer;
3470         int len = 0, size;
3471
3472         if (unlikely(tracing_selftest_running || tracing_disabled))
3473                 return 0;
3474
3475         /* Don't pollute graph traces with trace_vprintk internals */
3476         pause_graph_tracing();
3477
3478         trace_ctx = tracing_gen_ctx();
3479         preempt_disable_notrace();
3480
3481         tbuffer = get_trace_buf();
3482         if (!tbuffer) {
3483                 len = 0;
3484                 goto out_nobuffer;
3485         }
3486
3487         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3488
3489         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3490                 goto out_put;
3491
3492         size = sizeof(*entry) + sizeof(u32) * len;
3493         buffer = tr->array_buffer.buffer;
3494         ring_buffer_nest_start(buffer);
3495         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3496                                             trace_ctx);
3497         if (!event)
3498                 goto out;
3499         entry = ring_buffer_event_data(event);
3500         entry->ip                       = ip;
3501         entry->fmt                      = fmt;
3502
3503         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3504         if (!call_filter_check_discard(call, entry, buffer, event)) {
3505                 __buffer_unlock_commit(buffer, event);
3506                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3507         }
3508
3509 out:
3510         ring_buffer_nest_end(buffer);
3511 out_put:
3512         put_trace_buf();
3513
3514 out_nobuffer:
3515         preempt_enable_notrace();
3516         unpause_graph_tracing();
3517
3518         return len;
3519 }
3520 EXPORT_SYMBOL_GPL(trace_vbprintk);
3521
3522 __printf(3, 0)
3523 static int
3524 __trace_array_vprintk(struct trace_buffer *buffer,
3525                       unsigned long ip, const char *fmt, va_list args)
3526 {
3527         struct trace_event_call *call = &event_print;
3528         struct ring_buffer_event *event;
3529         int len = 0, size;
3530         struct print_entry *entry;
3531         unsigned int trace_ctx;
3532         char *tbuffer;
3533
3534         if (tracing_disabled)
3535                 return 0;
3536
3537         /* Don't pollute graph traces with trace_vprintk internals */
3538         pause_graph_tracing();
3539
3540         trace_ctx = tracing_gen_ctx();
3541         preempt_disable_notrace();
3542
3543
3544         tbuffer = get_trace_buf();
3545         if (!tbuffer) {
3546                 len = 0;
3547                 goto out_nobuffer;
3548         }
3549
3550         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3551
3552         size = sizeof(*entry) + len + 1;
3553         ring_buffer_nest_start(buffer);
3554         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3555                                             trace_ctx);
3556         if (!event)
3557                 goto out;
3558         entry = ring_buffer_event_data(event);
3559         entry->ip = ip;
3560
3561         memcpy(&entry->buf, tbuffer, len + 1);
3562         if (!call_filter_check_discard(call, entry, buffer, event)) {
3563                 __buffer_unlock_commit(buffer, event);
3564                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3565         }
3566
3567 out:
3568         ring_buffer_nest_end(buffer);
3569         put_trace_buf();
3570
3571 out_nobuffer:
3572         preempt_enable_notrace();
3573         unpause_graph_tracing();
3574
3575         return len;
3576 }
3577
3578 __printf(3, 0)
3579 int trace_array_vprintk(struct trace_array *tr,
3580                         unsigned long ip, const char *fmt, va_list args)
3581 {
3582         if (tracing_selftest_running && tr == &global_trace)
3583                 return 0;
3584
3585         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3586 }
3587
3588 /**
3589  * trace_array_printk - Print a message to a specific instance
3590  * @tr: The instance trace_array descriptor
3591  * @ip: The instruction pointer that this is called from.
3592  * @fmt: The format to print (printf format)
3593  *
3594  * If a subsystem sets up its own instance, they have the right to
3595  * printk strings into their tracing instance buffer using this
3596  * function. Note, this function will not write into the top level
3597  * buffer (use trace_printk() for that), as writing into the top level
3598  * buffer should only have events that can be individually disabled.
3599  * trace_printk() is only used for debugging a kernel, and should not
3600  * be ever incorporated in normal use.
3601  *
3602  * trace_array_printk() can be used, as it will not add noise to the
3603  * top level tracing buffer.
3604  *
3605  * Note, trace_array_init_printk() must be called on @tr before this
3606  * can be used.
3607  */
3608 __printf(3, 0)
3609 int trace_array_printk(struct trace_array *tr,
3610                        unsigned long ip, const char *fmt, ...)
3611 {
3612         int ret;
3613         va_list ap;
3614
3615         if (!tr)
3616                 return -ENOENT;
3617
3618         /* This is only allowed for created instances */
3619         if (tr == &global_trace)
3620                 return 0;
3621
3622         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3623                 return 0;
3624
3625         va_start(ap, fmt);
3626         ret = trace_array_vprintk(tr, ip, fmt, ap);
3627         va_end(ap);
3628         return ret;
3629 }
3630 EXPORT_SYMBOL_GPL(trace_array_printk);
3631
3632 /**
3633  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3634  * @tr: The trace array to initialize the buffers for
3635  *
3636  * As trace_array_printk() only writes into instances, they are OK to
3637  * have in the kernel (unlike trace_printk()). This needs to be called
3638  * before trace_array_printk() can be used on a trace_array.
3639  */
3640 int trace_array_init_printk(struct trace_array *tr)
3641 {
3642         if (!tr)
3643                 return -ENOENT;
3644
3645         /* This is only allowed for created instances */
3646         if (tr == &global_trace)
3647                 return -EINVAL;
3648
3649         return alloc_percpu_trace_buffer();
3650 }
3651 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3652
3653 __printf(3, 4)
3654 int trace_array_printk_buf(struct trace_buffer *buffer,
3655                            unsigned long ip, const char *fmt, ...)
3656 {
3657         int ret;
3658         va_list ap;
3659
3660         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3661                 return 0;
3662
3663         va_start(ap, fmt);
3664         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3665         va_end(ap);
3666         return ret;
3667 }
3668
3669 __printf(2, 0)
3670 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3671 {
3672         return trace_array_vprintk(&global_trace, ip, fmt, args);
3673 }
3674 EXPORT_SYMBOL_GPL(trace_vprintk);
3675
3676 static void trace_iterator_increment(struct trace_iterator *iter)
3677 {
3678         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3679
3680         iter->idx++;
3681         if (buf_iter)
3682                 ring_buffer_iter_advance(buf_iter);
3683 }
3684
3685 static struct trace_entry *
3686 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3687                 unsigned long *lost_events)
3688 {
3689         struct ring_buffer_event *event;
3690         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3691
3692         if (buf_iter) {
3693                 event = ring_buffer_iter_peek(buf_iter, ts);
3694                 if (lost_events)
3695                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3696                                 (unsigned long)-1 : 0;
3697         } else {
3698                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3699                                          lost_events);
3700         }
3701
3702         if (event) {
3703                 iter->ent_size = ring_buffer_event_length(event);
3704                 return ring_buffer_event_data(event);
3705         }
3706         iter->ent_size = 0;
3707         return NULL;
3708 }
3709
3710 static struct trace_entry *
3711 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3712                   unsigned long *missing_events, u64 *ent_ts)
3713 {
3714         struct trace_buffer *buffer = iter->array_buffer->buffer;
3715         struct trace_entry *ent, *next = NULL;
3716         unsigned long lost_events = 0, next_lost = 0;
3717         int cpu_file = iter->cpu_file;
3718         u64 next_ts = 0, ts;
3719         int next_cpu = -1;
3720         int next_size = 0;
3721         int cpu;
3722
3723         /*
3724          * If we are in a per_cpu trace file, don't bother by iterating over
3725          * all cpu and peek directly.
3726          */
3727         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3728                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3729                         return NULL;
3730                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3731                 if (ent_cpu)
3732                         *ent_cpu = cpu_file;
3733
3734                 return ent;
3735         }
3736
3737         for_each_tracing_cpu(cpu) {
3738
3739                 if (ring_buffer_empty_cpu(buffer, cpu))
3740                         continue;
3741
3742                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3743
3744                 /*
3745                  * Pick the entry with the smallest timestamp:
3746                  */
3747                 if (ent && (!next || ts < next_ts)) {
3748                         next = ent;
3749                         next_cpu = cpu;
3750                         next_ts = ts;
3751                         next_lost = lost_events;
3752                         next_size = iter->ent_size;
3753                 }
3754         }
3755
3756         iter->ent_size = next_size;
3757
3758         if (ent_cpu)
3759                 *ent_cpu = next_cpu;
3760
3761         if (ent_ts)
3762                 *ent_ts = next_ts;
3763
3764         if (missing_events)
3765                 *missing_events = next_lost;
3766
3767         return next;
3768 }
3769
3770 #define STATIC_FMT_BUF_SIZE     128
3771 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3772
3773 char *trace_iter_expand_format(struct trace_iterator *iter)
3774 {
3775         char *tmp;
3776
3777         /*
3778          * iter->tr is NULL when used with tp_printk, which makes
3779          * this get called where it is not safe to call krealloc().
3780          */
3781         if (!iter->tr || iter->fmt == static_fmt_buf)
3782                 return NULL;
3783
3784         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3785                        GFP_KERNEL);
3786         if (tmp) {
3787                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3788                 iter->fmt = tmp;
3789         }
3790
3791         return tmp;
3792 }
3793
3794 /* Returns true if the string is safe to dereference from an event */
3795 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3796                            bool star, int len)
3797 {
3798         unsigned long addr = (unsigned long)str;
3799         struct trace_event *trace_event;
3800         struct trace_event_call *event;
3801
3802         /* Ignore strings with no length */
3803         if (star && !len)
3804                 return true;
3805
3806         /* OK if part of the event data */
3807         if ((addr >= (unsigned long)iter->ent) &&
3808             (addr < (unsigned long)iter->ent + iter->ent_size))
3809                 return true;
3810
3811         /* OK if part of the temp seq buffer */
3812         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3813             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3814                 return true;
3815
3816         /* Core rodata can not be freed */
3817         if (is_kernel_rodata(addr))
3818                 return true;
3819
3820         if (trace_is_tracepoint_string(str))
3821                 return true;
3822
3823         /*
3824          * Now this could be a module event, referencing core module
3825          * data, which is OK.
3826          */
3827         if (!iter->ent)
3828                 return false;
3829
3830         trace_event = ftrace_find_event(iter->ent->type);
3831         if (!trace_event)
3832                 return false;
3833
3834         event = container_of(trace_event, struct trace_event_call, event);
3835         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3836                 return false;
3837
3838         /* Would rather have rodata, but this will suffice */
3839         if (within_module_core(addr, event->module))
3840                 return true;
3841
3842         return false;
3843 }
3844
3845 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3846
3847 static int test_can_verify_check(const char *fmt, ...)
3848 {
3849         char buf[16];
3850         va_list ap;
3851         int ret;
3852
3853         /*
3854          * The verifier is dependent on vsnprintf() modifies the va_list
3855          * passed to it, where it is sent as a reference. Some architectures
3856          * (like x86_32) passes it by value, which means that vsnprintf()
3857          * does not modify the va_list passed to it, and the verifier
3858          * would then need to be able to understand all the values that
3859          * vsnprintf can use. If it is passed by value, then the verifier
3860          * is disabled.
3861          */
3862         va_start(ap, fmt);
3863         vsnprintf(buf, 16, "%d", ap);
3864         ret = va_arg(ap, int);
3865         va_end(ap);
3866
3867         return ret;
3868 }
3869
3870 static void test_can_verify(void)
3871 {
3872         if (!test_can_verify_check("%d %d", 0, 1)) {
3873                 pr_info("trace event string verifier disabled\n");
3874                 static_branch_inc(&trace_no_verify);
3875         }
3876 }
3877
3878 /**
3879  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3880  * @iter: The iterator that holds the seq buffer and the event being printed
3881  * @fmt: The format used to print the event
3882  * @ap: The va_list holding the data to print from @fmt.
3883  *
3884  * This writes the data into the @iter->seq buffer using the data from
3885  * @fmt and @ap. If the format has a %s, then the source of the string
3886  * is examined to make sure it is safe to print, otherwise it will
3887  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3888  * pointer.
3889  */
3890 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3891                          va_list ap)
3892 {
3893         const char *p = fmt;
3894         const char *str;
3895         int i, j;
3896
3897         if (WARN_ON_ONCE(!fmt))
3898                 return;
3899
3900         if (static_branch_unlikely(&trace_no_verify))
3901                 goto print;
3902
3903         /* Don't bother checking when doing a ftrace_dump() */
3904         if (iter->fmt == static_fmt_buf)
3905                 goto print;
3906
3907         while (*p) {
3908                 bool star = false;
3909                 int len = 0;
3910
3911                 j = 0;
3912
3913                 /* We only care about %s and variants */
3914                 for (i = 0; p[i]; i++) {
3915                         if (i + 1 >= iter->fmt_size) {
3916                                 /*
3917                                  * If we can't expand the copy buffer,
3918                                  * just print it.
3919                                  */
3920                                 if (!trace_iter_expand_format(iter))
3921                                         goto print;
3922                         }
3923
3924                         if (p[i] == '\\' && p[i+1]) {
3925                                 i++;
3926                                 continue;
3927                         }
3928                         if (p[i] == '%') {
3929                                 /* Need to test cases like %08.*s */
3930                                 for (j = 1; p[i+j]; j++) {
3931                                         if (isdigit(p[i+j]) ||
3932                                             p[i+j] == '.')
3933                                                 continue;
3934                                         if (p[i+j] == '*') {
3935                                                 star = true;
3936                                                 continue;
3937                                         }
3938                                         break;
3939                                 }
3940                                 if (p[i+j] == 's')
3941                                         break;
3942                                 star = false;
3943                         }
3944                         j = 0;
3945                 }
3946                 /* If no %s found then just print normally */
3947                 if (!p[i])
3948                         break;
3949
3950                 /* Copy up to the %s, and print that */
3951                 strncpy(iter->fmt, p, i);
3952                 iter->fmt[i] = '\0';
3953                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3954
3955                 /*
3956                  * If iter->seq is full, the above call no longer guarantees
3957                  * that ap is in sync with fmt processing, and further calls
3958                  * to va_arg() can return wrong positional arguments.
3959                  *
3960                  * Ensure that ap is no longer used in this case.
3961                  */
3962                 if (iter->seq.full) {
3963                         p = "";
3964                         break;
3965                 }
3966
3967                 if (star)
3968                         len = va_arg(ap, int);
3969
3970                 /* The ap now points to the string data of the %s */
3971                 str = va_arg(ap, const char *);
3972
3973                 /*
3974                  * If you hit this warning, it is likely that the
3975                  * trace event in question used %s on a string that
3976                  * was saved at the time of the event, but may not be
3977                  * around when the trace is read. Use __string(),
3978                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3979                  * instead. See samples/trace_events/trace-events-sample.h
3980                  * for reference.
3981                  */
3982                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3983                               "fmt: '%s' current_buffer: '%s'",
3984                               fmt, seq_buf_str(&iter->seq.seq))) {
3985                         int ret;
3986
3987                         /* Try to safely read the string */
3988                         if (star) {
3989                                 if (len + 1 > iter->fmt_size)
3990                                         len = iter->fmt_size - 1;
3991                                 if (len < 0)
3992                                         len = 0;
3993                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3994                                 iter->fmt[len] = 0;
3995                                 star = false;
3996                         } else {
3997                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3998                                                                   iter->fmt_size);
3999                         }
4000                         if (ret < 0)
4001                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
4002                         else
4003                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
4004                                                  str, iter->fmt);
4005                         str = "[UNSAFE-MEMORY]";
4006                         strcpy(iter->fmt, "%s");
4007                 } else {
4008                         strncpy(iter->fmt, p + i, j + 1);
4009                         iter->fmt[j+1] = '\0';
4010                 }
4011                 if (star)
4012                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4013                 else
4014                         trace_seq_printf(&iter->seq, iter->fmt, str);
4015
4016                 p += i + j + 1;
4017         }
4018  print:
4019         if (*p)
4020                 trace_seq_vprintf(&iter->seq, p, ap);
4021 }
4022
4023 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4024 {
4025         const char *p, *new_fmt;
4026         char *q;
4027
4028         if (WARN_ON_ONCE(!fmt))
4029                 return fmt;
4030
4031         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4032                 return fmt;
4033
4034         p = fmt;
4035         new_fmt = q = iter->fmt;
4036         while (*p) {
4037                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4038                         if (!trace_iter_expand_format(iter))
4039                                 return fmt;
4040
4041                         q += iter->fmt - new_fmt;
4042                         new_fmt = iter->fmt;
4043                 }
4044
4045                 *q++ = *p++;
4046
4047                 /* Replace %p with %px */
4048                 if (p[-1] == '%') {
4049                         if (p[0] == '%') {
4050                                 *q++ = *p++;
4051                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4052                                 *q++ = *p++;
4053                                 *q++ = 'x';
4054                         }
4055                 }
4056         }
4057         *q = '\0';
4058
4059         return new_fmt;
4060 }
4061
4062 #define STATIC_TEMP_BUF_SIZE    128
4063 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4064
4065 /* Find the next real entry, without updating the iterator itself */
4066 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4067                                           int *ent_cpu, u64 *ent_ts)
4068 {
4069         /* __find_next_entry will reset ent_size */
4070         int ent_size = iter->ent_size;
4071         struct trace_entry *entry;
4072
4073         /*
4074          * If called from ftrace_dump(), then the iter->temp buffer
4075          * will be the static_temp_buf and not created from kmalloc.
4076          * If the entry size is greater than the buffer, we can
4077          * not save it. Just return NULL in that case. This is only
4078          * used to add markers when two consecutive events' time
4079          * stamps have a large delta. See trace_print_lat_context()
4080          */
4081         if (iter->temp == static_temp_buf &&
4082             STATIC_TEMP_BUF_SIZE < ent_size)
4083                 return NULL;
4084
4085         /*
4086          * The __find_next_entry() may call peek_next_entry(), which may
4087          * call ring_buffer_peek() that may make the contents of iter->ent
4088          * undefined. Need to copy iter->ent now.
4089          */
4090         if (iter->ent && iter->ent != iter->temp) {
4091                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4092                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4093                         void *temp;
4094                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4095                         if (!temp)
4096                                 return NULL;
4097                         kfree(iter->temp);
4098                         iter->temp = temp;
4099                         iter->temp_size = iter->ent_size;
4100                 }
4101                 memcpy(iter->temp, iter->ent, iter->ent_size);
4102                 iter->ent = iter->temp;
4103         }
4104         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4105         /* Put back the original ent_size */
4106         iter->ent_size = ent_size;
4107
4108         return entry;
4109 }
4110
4111 /* Find the next real entry, and increment the iterator to the next entry */
4112 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4113 {
4114         iter->ent = __find_next_entry(iter, &iter->cpu,
4115                                       &iter->lost_events, &iter->ts);
4116
4117         if (iter->ent)
4118                 trace_iterator_increment(iter);
4119
4120         return iter->ent ? iter : NULL;
4121 }
4122
4123 static void trace_consume(struct trace_iterator *iter)
4124 {
4125         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4126                             &iter->lost_events);
4127 }
4128
4129 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4130 {
4131         struct trace_iterator *iter = m->private;
4132         int i = (int)*pos;
4133         void *ent;
4134
4135         WARN_ON_ONCE(iter->leftover);
4136
4137         (*pos)++;
4138
4139         /* can't go backwards */
4140         if (iter->idx > i)
4141                 return NULL;
4142
4143         if (iter->idx < 0)
4144                 ent = trace_find_next_entry_inc(iter);
4145         else
4146                 ent = iter;
4147
4148         while (ent && iter->idx < i)
4149                 ent = trace_find_next_entry_inc(iter);
4150
4151         iter->pos = *pos;
4152
4153         return ent;
4154 }
4155
4156 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4157 {
4158         struct ring_buffer_iter *buf_iter;
4159         unsigned long entries = 0;
4160         u64 ts;
4161
4162         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4163
4164         buf_iter = trace_buffer_iter(iter, cpu);
4165         if (!buf_iter)
4166                 return;
4167
4168         ring_buffer_iter_reset(buf_iter);
4169
4170         /*
4171          * We could have the case with the max latency tracers
4172          * that a reset never took place on a cpu. This is evident
4173          * by the timestamp being before the start of the buffer.
4174          */
4175         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4176                 if (ts >= iter->array_buffer->time_start)
4177                         break;
4178                 entries++;
4179                 ring_buffer_iter_advance(buf_iter);
4180         }
4181
4182         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4183 }
4184
4185 /*
4186  * The current tracer is copied to avoid a global locking
4187  * all around.
4188  */
4189 static void *s_start(struct seq_file *m, loff_t *pos)
4190 {
4191         struct trace_iterator *iter = m->private;
4192         struct trace_array *tr = iter->tr;
4193         int cpu_file = iter->cpu_file;
4194         void *p = NULL;
4195         loff_t l = 0;
4196         int cpu;
4197
4198         mutex_lock(&trace_types_lock);
4199         if (unlikely(tr->current_trace != iter->trace)) {
4200                 /* Close iter->trace before switching to the new current tracer */
4201                 if (iter->trace->close)
4202                         iter->trace->close(iter);
4203                 iter->trace = tr->current_trace;
4204                 /* Reopen the new current tracer */
4205                 if (iter->trace->open)
4206                         iter->trace->open(iter);
4207         }
4208         mutex_unlock(&trace_types_lock);
4209
4210 #ifdef CONFIG_TRACER_MAX_TRACE
4211         if (iter->snapshot && iter->trace->use_max_tr)
4212                 return ERR_PTR(-EBUSY);
4213 #endif
4214
4215         if (*pos != iter->pos) {
4216                 iter->ent = NULL;
4217                 iter->cpu = 0;
4218                 iter->idx = -1;
4219
4220                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4221                         for_each_tracing_cpu(cpu)
4222                                 tracing_iter_reset(iter, cpu);
4223                 } else
4224                         tracing_iter_reset(iter, cpu_file);
4225
4226                 iter->leftover = 0;
4227                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4228                         ;
4229
4230         } else {
4231                 /*
4232                  * If we overflowed the seq_file before, then we want
4233                  * to just reuse the trace_seq buffer again.
4234                  */
4235                 if (iter->leftover)
4236                         p = iter;
4237                 else {
4238                         l = *pos - 1;
4239                         p = s_next(m, p, &l);
4240                 }
4241         }
4242
4243         trace_event_read_lock();
4244         trace_access_lock(cpu_file);
4245         return p;
4246 }
4247
4248 static void s_stop(struct seq_file *m, void *p)
4249 {
4250         struct trace_iterator *iter = m->private;
4251
4252 #ifdef CONFIG_TRACER_MAX_TRACE
4253         if (iter->snapshot && iter->trace->use_max_tr)
4254                 return;
4255 #endif
4256
4257         trace_access_unlock(iter->cpu_file);
4258         trace_event_read_unlock();
4259 }
4260
4261 static void
4262 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4263                       unsigned long *entries, int cpu)
4264 {
4265         unsigned long count;
4266
4267         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4268         /*
4269          * If this buffer has skipped entries, then we hold all
4270          * entries for the trace and we need to ignore the
4271          * ones before the time stamp.
4272          */
4273         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4274                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4275                 /* total is the same as the entries */
4276                 *total = count;
4277         } else
4278                 *total = count +
4279                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4280         *entries = count;
4281 }
4282
4283 static void
4284 get_total_entries(struct array_buffer *buf,
4285                   unsigned long *total, unsigned long *entries)
4286 {
4287         unsigned long t, e;
4288         int cpu;
4289
4290         *total = 0;
4291         *entries = 0;
4292
4293         for_each_tracing_cpu(cpu) {
4294                 get_total_entries_cpu(buf, &t, &e, cpu);
4295                 *total += t;
4296                 *entries += e;
4297         }
4298 }
4299
4300 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4301 {
4302         unsigned long total, entries;
4303
4304         if (!tr)
4305                 tr = &global_trace;
4306
4307         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4308
4309         return entries;
4310 }
4311
4312 unsigned long trace_total_entries(struct trace_array *tr)
4313 {
4314         unsigned long total, entries;
4315
4316         if (!tr)
4317                 tr = &global_trace;
4318
4319         get_total_entries(&tr->array_buffer, &total, &entries);
4320
4321         return entries;
4322 }
4323
4324 static void print_lat_help_header(struct seq_file *m)
4325 {
4326         seq_puts(m, "#                    _------=> CPU#            \n"
4327                     "#                   / _-----=> irqs-off/BH-disabled\n"
4328                     "#                  | / _----=> need-resched    \n"
4329                     "#                  || / _---=> hardirq/softirq \n"
4330                     "#                  ||| / _--=> preempt-depth   \n"
4331                     "#                  |||| / _-=> migrate-disable \n"
4332                     "#                  ||||| /     delay           \n"
4333                     "#  cmd     pid     |||||| time  |   caller     \n"
4334                     "#     \\   /        ||||||  \\    |    /       \n");
4335 }
4336
4337 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4338 {
4339         unsigned long total;
4340         unsigned long entries;
4341
4342         get_total_entries(buf, &total, &entries);
4343         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4344                    entries, total, num_online_cpus());
4345         seq_puts(m, "#\n");
4346 }
4347
4348 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4349                                    unsigned int flags)
4350 {
4351         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4352
4353         print_event_info(buf, m);
4354
4355         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4356         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4357 }
4358
4359 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4360                                        unsigned int flags)
4361 {
4362         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4363         static const char space[] = "            ";
4364         int prec = tgid ? 12 : 2;
4365
4366         print_event_info(buf, m);
4367
4368         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4369         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4370         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4371         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4372         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4373         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4374         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4375         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4376 }
4377
4378 void
4379 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4380 {
4381         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4382         struct array_buffer *buf = iter->array_buffer;
4383         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4384         struct tracer *type = iter->trace;
4385         unsigned long entries;
4386         unsigned long total;
4387         const char *name = type->name;
4388
4389         get_total_entries(buf, &total, &entries);
4390
4391         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4392                    name, UTS_RELEASE);
4393         seq_puts(m, "# -----------------------------------"
4394                  "---------------------------------\n");
4395         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4396                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4397                    nsecs_to_usecs(data->saved_latency),
4398                    entries,
4399                    total,
4400                    buf->cpu,
4401                    preempt_model_none()      ? "server" :
4402                    preempt_model_voluntary() ? "desktop" :
4403                    preempt_model_full()      ? "preempt" :
4404                    preempt_model_rt()        ? "preempt_rt" :
4405                    "unknown",
4406                    /* These are reserved for later use */
4407                    0, 0, 0, 0);
4408 #ifdef CONFIG_SMP
4409         seq_printf(m, " #P:%d)\n", num_online_cpus());
4410 #else
4411         seq_puts(m, ")\n");
4412 #endif
4413         seq_puts(m, "#    -----------------\n");
4414         seq_printf(m, "#    | task: %.16s-%d "
4415                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4416                    data->comm, data->pid,
4417                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4418                    data->policy, data->rt_priority);
4419         seq_puts(m, "#    -----------------\n");
4420
4421         if (data->critical_start) {
4422                 seq_puts(m, "#  => started at: ");
4423                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4424                 trace_print_seq(m, &iter->seq);
4425                 seq_puts(m, "\n#  => ended at:   ");
4426                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4427                 trace_print_seq(m, &iter->seq);
4428                 seq_puts(m, "\n#\n");
4429         }
4430
4431         seq_puts(m, "#\n");
4432 }
4433
4434 static void test_cpu_buff_start(struct trace_iterator *iter)
4435 {
4436         struct trace_seq *s = &iter->seq;
4437         struct trace_array *tr = iter->tr;
4438
4439         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4440                 return;
4441
4442         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4443                 return;
4444
4445         if (cpumask_available(iter->started) &&
4446             cpumask_test_cpu(iter->cpu, iter->started))
4447                 return;
4448
4449         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4450                 return;
4451
4452         if (cpumask_available(iter->started))
4453                 cpumask_set_cpu(iter->cpu, iter->started);
4454
4455         /* Don't print started cpu buffer for the first entry of the trace */
4456         if (iter->idx > 1)
4457                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4458                                 iter->cpu);
4459 }
4460
4461 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4462 {
4463         struct trace_array *tr = iter->tr;
4464         struct trace_seq *s = &iter->seq;
4465         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4466         struct trace_entry *entry;
4467         struct trace_event *event;
4468
4469         entry = iter->ent;
4470
4471         test_cpu_buff_start(iter);
4472
4473         event = ftrace_find_event(entry->type);
4474
4475         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4476                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4477                         trace_print_lat_context(iter);
4478                 else
4479                         trace_print_context(iter);
4480         }
4481
4482         if (trace_seq_has_overflowed(s))
4483                 return TRACE_TYPE_PARTIAL_LINE;
4484
4485         if (event) {
4486                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4487                         return print_event_fields(iter, event);
4488                 return event->funcs->trace(iter, sym_flags, event);
4489         }
4490
4491         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4492
4493         return trace_handle_return(s);
4494 }
4495
4496 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4497 {
4498         struct trace_array *tr = iter->tr;
4499         struct trace_seq *s = &iter->seq;
4500         struct trace_entry *entry;
4501         struct trace_event *event;
4502
4503         entry = iter->ent;
4504
4505         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4506                 trace_seq_printf(s, "%d %d %llu ",
4507                                  entry->pid, iter->cpu, iter->ts);
4508
4509         if (trace_seq_has_overflowed(s))
4510                 return TRACE_TYPE_PARTIAL_LINE;
4511
4512         event = ftrace_find_event(entry->type);
4513         if (event)
4514                 return event->funcs->raw(iter, 0, event);
4515
4516         trace_seq_printf(s, "%d ?\n", entry->type);
4517
4518         return trace_handle_return(s);
4519 }
4520
4521 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4522 {
4523         struct trace_array *tr = iter->tr;
4524         struct trace_seq *s = &iter->seq;
4525         unsigned char newline = '\n';
4526         struct trace_entry *entry;
4527         struct trace_event *event;
4528
4529         entry = iter->ent;
4530
4531         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4532                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4533                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4534                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4535                 if (trace_seq_has_overflowed(s))
4536                         return TRACE_TYPE_PARTIAL_LINE;
4537         }
4538
4539         event = ftrace_find_event(entry->type);
4540         if (event) {
4541                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4542                 if (ret != TRACE_TYPE_HANDLED)
4543                         return ret;
4544         }
4545
4546         SEQ_PUT_FIELD(s, newline);
4547
4548         return trace_handle_return(s);
4549 }
4550
4551 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4552 {
4553         struct trace_array *tr = iter->tr;
4554         struct trace_seq *s = &iter->seq;
4555         struct trace_entry *entry;
4556         struct trace_event *event;
4557
4558         entry = iter->ent;
4559
4560         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4561                 SEQ_PUT_FIELD(s, entry->pid);
4562                 SEQ_PUT_FIELD(s, iter->cpu);
4563                 SEQ_PUT_FIELD(s, iter->ts);
4564                 if (trace_seq_has_overflowed(s))
4565                         return TRACE_TYPE_PARTIAL_LINE;
4566         }
4567
4568         event = ftrace_find_event(entry->type);
4569         return event ? event->funcs->binary(iter, 0, event) :
4570                 TRACE_TYPE_HANDLED;
4571 }
4572
4573 int trace_empty(struct trace_iterator *iter)
4574 {
4575         struct ring_buffer_iter *buf_iter;
4576         int cpu;
4577
4578         /* If we are looking at one CPU buffer, only check that one */
4579         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4580                 cpu = iter->cpu_file;
4581                 buf_iter = trace_buffer_iter(iter, cpu);
4582                 if (buf_iter) {
4583                         if (!ring_buffer_iter_empty(buf_iter))
4584                                 return 0;
4585                 } else {
4586                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4587                                 return 0;
4588                 }
4589                 return 1;
4590         }
4591
4592         for_each_tracing_cpu(cpu) {
4593                 buf_iter = trace_buffer_iter(iter, cpu);
4594                 if (buf_iter) {
4595                         if (!ring_buffer_iter_empty(buf_iter))
4596                                 return 0;
4597                 } else {
4598                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4599                                 return 0;
4600                 }
4601         }
4602
4603         return 1;
4604 }
4605
4606 /*  Called with trace_event_read_lock() held. */
4607 enum print_line_t print_trace_line(struct trace_iterator *iter)
4608 {
4609         struct trace_array *tr = iter->tr;
4610         unsigned long trace_flags = tr->trace_flags;
4611         enum print_line_t ret;
4612
4613         if (iter->lost_events) {
4614                 if (iter->lost_events == (unsigned long)-1)
4615                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4616                                          iter->cpu);
4617                 else
4618                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4619                                          iter->cpu, iter->lost_events);
4620                 if (trace_seq_has_overflowed(&iter->seq))
4621                         return TRACE_TYPE_PARTIAL_LINE;
4622         }
4623
4624         if (iter->trace && iter->trace->print_line) {
4625                 ret = iter->trace->print_line(iter);
4626                 if (ret != TRACE_TYPE_UNHANDLED)
4627                         return ret;
4628         }
4629
4630         if (iter->ent->type == TRACE_BPUTS &&
4631                         trace_flags & TRACE_ITER_PRINTK &&
4632                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4633                 return trace_print_bputs_msg_only(iter);
4634
4635         if (iter->ent->type == TRACE_BPRINT &&
4636                         trace_flags & TRACE_ITER_PRINTK &&
4637                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4638                 return trace_print_bprintk_msg_only(iter);
4639
4640         if (iter->ent->type == TRACE_PRINT &&
4641                         trace_flags & TRACE_ITER_PRINTK &&
4642                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4643                 return trace_print_printk_msg_only(iter);
4644
4645         if (trace_flags & TRACE_ITER_BIN)
4646                 return print_bin_fmt(iter);
4647
4648         if (trace_flags & TRACE_ITER_HEX)
4649                 return print_hex_fmt(iter);
4650
4651         if (trace_flags & TRACE_ITER_RAW)
4652                 return print_raw_fmt(iter);
4653
4654         return print_trace_fmt(iter);
4655 }
4656
4657 void trace_latency_header(struct seq_file *m)
4658 {
4659         struct trace_iterator *iter = m->private;
4660         struct trace_array *tr = iter->tr;
4661
4662         /* print nothing if the buffers are empty */
4663         if (trace_empty(iter))
4664                 return;
4665
4666         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4667                 print_trace_header(m, iter);
4668
4669         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4670                 print_lat_help_header(m);
4671 }
4672
4673 void trace_default_header(struct seq_file *m)
4674 {
4675         struct trace_iterator *iter = m->private;
4676         struct trace_array *tr = iter->tr;
4677         unsigned long trace_flags = tr->trace_flags;
4678
4679         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4680                 return;
4681
4682         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4683                 /* print nothing if the buffers are empty */
4684                 if (trace_empty(iter))
4685                         return;
4686                 print_trace_header(m, iter);
4687                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4688                         print_lat_help_header(m);
4689         } else {
4690                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4691                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4692                                 print_func_help_header_irq(iter->array_buffer,
4693                                                            m, trace_flags);
4694                         else
4695                                 print_func_help_header(iter->array_buffer, m,
4696                                                        trace_flags);
4697                 }
4698         }
4699 }
4700
4701 static void test_ftrace_alive(struct seq_file *m)
4702 {
4703         if (!ftrace_is_dead())
4704                 return;
4705         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4706                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4707 }
4708
4709 #ifdef CONFIG_TRACER_MAX_TRACE
4710 static void show_snapshot_main_help(struct seq_file *m)
4711 {
4712         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4713                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4714                     "#                      Takes a snapshot of the main buffer.\n"
4715                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4716                     "#                      (Doesn't have to be '2' works with any number that\n"
4717                     "#                       is not a '0' or '1')\n");
4718 }
4719
4720 static void show_snapshot_percpu_help(struct seq_file *m)
4721 {
4722         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4723 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4724         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4725                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4726 #else
4727         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4728                     "#                     Must use main snapshot file to allocate.\n");
4729 #endif
4730         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4731                     "#                      (Doesn't have to be '2' works with any number that\n"
4732                     "#                       is not a '0' or '1')\n");
4733 }
4734
4735 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4736 {
4737         if (iter->tr->allocated_snapshot)
4738                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4739         else
4740                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4741
4742         seq_puts(m, "# Snapshot commands:\n");
4743         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4744                 show_snapshot_main_help(m);
4745         else
4746                 show_snapshot_percpu_help(m);
4747 }
4748 #else
4749 /* Should never be called */
4750 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4751 #endif
4752
4753 static int s_show(struct seq_file *m, void *v)
4754 {
4755         struct trace_iterator *iter = v;
4756         int ret;
4757
4758         if (iter->ent == NULL) {
4759                 if (iter->tr) {
4760                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4761                         seq_puts(m, "#\n");
4762                         test_ftrace_alive(m);
4763                 }
4764                 if (iter->snapshot && trace_empty(iter))
4765                         print_snapshot_help(m, iter);
4766                 else if (iter->trace && iter->trace->print_header)
4767                         iter->trace->print_header(m);
4768                 else
4769                         trace_default_header(m);
4770
4771         } else if (iter->leftover) {
4772                 /*
4773                  * If we filled the seq_file buffer earlier, we
4774                  * want to just show it now.
4775                  */
4776                 ret = trace_print_seq(m, &iter->seq);
4777
4778                 /* ret should this time be zero, but you never know */
4779                 iter->leftover = ret;
4780
4781         } else {
4782                 ret = print_trace_line(iter);
4783                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4784                         iter->seq.full = 0;
4785                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4786                 }
4787                 ret = trace_print_seq(m, &iter->seq);
4788                 /*
4789                  * If we overflow the seq_file buffer, then it will
4790                  * ask us for this data again at start up.
4791                  * Use that instead.
4792                  *  ret is 0 if seq_file write succeeded.
4793                  *        -1 otherwise.
4794                  */
4795                 iter->leftover = ret;
4796         }
4797
4798         return 0;
4799 }
4800
4801 /*
4802  * Should be used after trace_array_get(), trace_types_lock
4803  * ensures that i_cdev was already initialized.
4804  */
4805 static inline int tracing_get_cpu(struct inode *inode)
4806 {
4807         if (inode->i_cdev) /* See trace_create_cpu_file() */
4808                 return (long)inode->i_cdev - 1;
4809         return RING_BUFFER_ALL_CPUS;
4810 }
4811
4812 static const struct seq_operations tracer_seq_ops = {
4813         .start          = s_start,
4814         .next           = s_next,
4815         .stop           = s_stop,
4816         .show           = s_show,
4817 };
4818
4819 /*
4820  * Note, as iter itself can be allocated and freed in different
4821  * ways, this function is only used to free its content, and not
4822  * the iterator itself. The only requirement to all the allocations
4823  * is that it must zero all fields (kzalloc), as freeing works with
4824  * ethier allocated content or NULL.
4825  */
4826 static void free_trace_iter_content(struct trace_iterator *iter)
4827 {
4828         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4829         if (iter->fmt != static_fmt_buf)
4830                 kfree(iter->fmt);
4831
4832         kfree(iter->temp);
4833         kfree(iter->buffer_iter);
4834         mutex_destroy(&iter->mutex);
4835         free_cpumask_var(iter->started);
4836 }
4837
4838 static struct trace_iterator *
4839 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4840 {
4841         struct trace_array *tr = inode->i_private;
4842         struct trace_iterator *iter;
4843         int cpu;
4844
4845         if (tracing_disabled)
4846                 return ERR_PTR(-ENODEV);
4847
4848         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4849         if (!iter)
4850                 return ERR_PTR(-ENOMEM);
4851
4852         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4853                                     GFP_KERNEL);
4854         if (!iter->buffer_iter)
4855                 goto release;
4856
4857         /*
4858          * trace_find_next_entry() may need to save off iter->ent.
4859          * It will place it into the iter->temp buffer. As most
4860          * events are less than 128, allocate a buffer of that size.
4861          * If one is greater, then trace_find_next_entry() will
4862          * allocate a new buffer to adjust for the bigger iter->ent.
4863          * It's not critical if it fails to get allocated here.
4864          */
4865         iter->temp = kmalloc(128, GFP_KERNEL);
4866         if (iter->temp)
4867                 iter->temp_size = 128;
4868
4869         /*
4870          * trace_event_printf() may need to modify given format
4871          * string to replace %p with %px so that it shows real address
4872          * instead of hash value. However, that is only for the event
4873          * tracing, other tracer may not need. Defer the allocation
4874          * until it is needed.
4875          */
4876         iter->fmt = NULL;
4877         iter->fmt_size = 0;
4878
4879         mutex_lock(&trace_types_lock);
4880         iter->trace = tr->current_trace;
4881
4882         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4883                 goto fail;
4884
4885         iter->tr = tr;
4886
4887 #ifdef CONFIG_TRACER_MAX_TRACE
4888         /* Currently only the top directory has a snapshot */
4889         if (tr->current_trace->print_max || snapshot)
4890                 iter->array_buffer = &tr->max_buffer;
4891         else
4892 #endif
4893                 iter->array_buffer = &tr->array_buffer;
4894         iter->snapshot = snapshot;
4895         iter->pos = -1;
4896         iter->cpu_file = tracing_get_cpu(inode);
4897         mutex_init(&iter->mutex);
4898
4899         /* Notify the tracer early; before we stop tracing. */
4900         if (iter->trace->open)
4901                 iter->trace->open(iter);
4902
4903         /* Annotate start of buffers if we had overruns */
4904         if (ring_buffer_overruns(iter->array_buffer->buffer))
4905                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4906
4907         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4908         if (trace_clocks[tr->clock_id].in_ns)
4909                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4910
4911         /*
4912          * If pause-on-trace is enabled, then stop the trace while
4913          * dumping, unless this is the "snapshot" file
4914          */
4915         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4916                 tracing_stop_tr(tr);
4917
4918         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4919                 for_each_tracing_cpu(cpu) {
4920                         iter->buffer_iter[cpu] =
4921                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4922                                                          cpu, GFP_KERNEL);
4923                 }
4924                 ring_buffer_read_prepare_sync();
4925                 for_each_tracing_cpu(cpu) {
4926                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4927                         tracing_iter_reset(iter, cpu);
4928                 }
4929         } else {
4930                 cpu = iter->cpu_file;
4931                 iter->buffer_iter[cpu] =
4932                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4933                                                  cpu, GFP_KERNEL);
4934                 ring_buffer_read_prepare_sync();
4935                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4936                 tracing_iter_reset(iter, cpu);
4937         }
4938
4939         mutex_unlock(&trace_types_lock);
4940
4941         return iter;
4942
4943  fail:
4944         mutex_unlock(&trace_types_lock);
4945         free_trace_iter_content(iter);
4946 release:
4947         seq_release_private(inode, file);
4948         return ERR_PTR(-ENOMEM);
4949 }
4950
4951 int tracing_open_generic(struct inode *inode, struct file *filp)
4952 {
4953         int ret;
4954
4955         ret = tracing_check_open_get_tr(NULL);
4956         if (ret)
4957                 return ret;
4958
4959         filp->private_data = inode->i_private;
4960         return 0;
4961 }
4962
4963 bool tracing_is_disabled(void)
4964 {
4965         return (tracing_disabled) ? true: false;
4966 }
4967
4968 /*
4969  * Open and update trace_array ref count.
4970  * Must have the current trace_array passed to it.
4971  */
4972 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4973 {
4974         struct trace_array *tr = inode->i_private;
4975         int ret;
4976
4977         ret = tracing_check_open_get_tr(tr);
4978         if (ret)
4979                 return ret;
4980
4981         filp->private_data = inode->i_private;
4982
4983         return 0;
4984 }
4985
4986 /*
4987  * The private pointer of the inode is the trace_event_file.
4988  * Update the tr ref count associated to it.
4989  */
4990 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4991 {
4992         struct trace_event_file *file = inode->i_private;
4993         int ret;
4994
4995         ret = tracing_check_open_get_tr(file->tr);
4996         if (ret)
4997                 return ret;
4998
4999         mutex_lock(&event_mutex);
5000
5001         /* Fail if the file is marked for removal */
5002         if (file->flags & EVENT_FILE_FL_FREED) {
5003                 trace_array_put(file->tr);
5004                 ret = -ENODEV;
5005         } else {
5006                 event_file_get(file);
5007         }
5008
5009         mutex_unlock(&event_mutex);
5010         if (ret)
5011                 return ret;
5012
5013         filp->private_data = inode->i_private;
5014
5015         return 0;
5016 }
5017
5018 int tracing_release_file_tr(struct inode *inode, struct file *filp)
5019 {
5020         struct trace_event_file *file = inode->i_private;
5021
5022         trace_array_put(file->tr);
5023         event_file_put(file);
5024
5025         return 0;
5026 }
5027
5028 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5029 {
5030         tracing_release_file_tr(inode, filp);
5031         return single_release(inode, filp);
5032 }
5033
5034 static int tracing_mark_open(struct inode *inode, struct file *filp)
5035 {
5036         stream_open(inode, filp);
5037         return tracing_open_generic_tr(inode, filp);
5038 }
5039
5040 static int tracing_release(struct inode *inode, struct file *file)
5041 {
5042         struct trace_array *tr = inode->i_private;
5043         struct seq_file *m = file->private_data;
5044         struct trace_iterator *iter;
5045         int cpu;
5046
5047         if (!(file->f_mode & FMODE_READ)) {
5048                 trace_array_put(tr);
5049                 return 0;
5050         }
5051
5052         /* Writes do not use seq_file */
5053         iter = m->private;
5054         mutex_lock(&trace_types_lock);
5055
5056         for_each_tracing_cpu(cpu) {
5057                 if (iter->buffer_iter[cpu])
5058                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5059         }
5060
5061         if (iter->trace && iter->trace->close)
5062                 iter->trace->close(iter);
5063
5064         if (!iter->snapshot && tr->stop_count)
5065                 /* reenable tracing if it was previously enabled */
5066                 tracing_start_tr(tr);
5067
5068         __trace_array_put(tr);
5069
5070         mutex_unlock(&trace_types_lock);
5071
5072         free_trace_iter_content(iter);
5073         seq_release_private(inode, file);
5074
5075         return 0;
5076 }
5077
5078 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5079 {
5080         struct trace_array *tr = inode->i_private;
5081
5082         trace_array_put(tr);
5083         return 0;
5084 }
5085
5086 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5087 {
5088         struct trace_array *tr = inode->i_private;
5089
5090         trace_array_put(tr);
5091
5092         return single_release(inode, file);
5093 }
5094
5095 static int tracing_open(struct inode *inode, struct file *file)
5096 {
5097         struct trace_array *tr = inode->i_private;
5098         struct trace_iterator *iter;
5099         int ret;
5100
5101         ret = tracing_check_open_get_tr(tr);
5102         if (ret)
5103                 return ret;
5104
5105         /* If this file was open for write, then erase contents */
5106         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5107                 int cpu = tracing_get_cpu(inode);
5108                 struct array_buffer *trace_buf = &tr->array_buffer;
5109
5110 #ifdef CONFIG_TRACER_MAX_TRACE
5111                 if (tr->current_trace->print_max)
5112                         trace_buf = &tr->max_buffer;
5113 #endif
5114
5115                 if (cpu == RING_BUFFER_ALL_CPUS)
5116                         tracing_reset_online_cpus(trace_buf);
5117                 else
5118                         tracing_reset_cpu(trace_buf, cpu);
5119         }
5120
5121         if (file->f_mode & FMODE_READ) {
5122                 iter = __tracing_open(inode, file, false);
5123                 if (IS_ERR(iter))
5124                         ret = PTR_ERR(iter);
5125                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5126                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5127         }
5128
5129         if (ret < 0)
5130                 trace_array_put(tr);
5131
5132         return ret;
5133 }
5134
5135 /*
5136  * Some tracers are not suitable for instance buffers.
5137  * A tracer is always available for the global array (toplevel)
5138  * or if it explicitly states that it is.
5139  */
5140 static bool
5141 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5142 {
5143         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5144 }
5145
5146 /* Find the next tracer that this trace array may use */
5147 static struct tracer *
5148 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5149 {
5150         while (t && !trace_ok_for_array(t, tr))
5151                 t = t->next;
5152
5153         return t;
5154 }
5155
5156 static void *
5157 t_next(struct seq_file *m, void *v, loff_t *pos)
5158 {
5159         struct trace_array *tr = m->private;
5160         struct tracer *t = v;
5161
5162         (*pos)++;
5163
5164         if (t)
5165                 t = get_tracer_for_array(tr, t->next);
5166
5167         return t;
5168 }
5169
5170 static void *t_start(struct seq_file *m, loff_t *pos)
5171 {
5172         struct trace_array *tr = m->private;
5173         struct tracer *t;
5174         loff_t l = 0;
5175
5176         mutex_lock(&trace_types_lock);
5177
5178         t = get_tracer_for_array(tr, trace_types);
5179         for (; t && l < *pos; t = t_next(m, t, &l))
5180                         ;
5181
5182         return t;
5183 }
5184
5185 static void t_stop(struct seq_file *m, void *p)
5186 {
5187         mutex_unlock(&trace_types_lock);
5188 }
5189
5190 static int t_show(struct seq_file *m, void *v)
5191 {
5192         struct tracer *t = v;
5193
5194         if (!t)
5195                 return 0;
5196
5197         seq_puts(m, t->name);
5198         if (t->next)
5199                 seq_putc(m, ' ');
5200         else
5201                 seq_putc(m, '\n');
5202
5203         return 0;
5204 }
5205
5206 static const struct seq_operations show_traces_seq_ops = {
5207         .start          = t_start,
5208         .next           = t_next,
5209         .stop           = t_stop,
5210         .show           = t_show,
5211 };
5212
5213 static int show_traces_open(struct inode *inode, struct file *file)
5214 {
5215         struct trace_array *tr = inode->i_private;
5216         struct seq_file *m;
5217         int ret;
5218
5219         ret = tracing_check_open_get_tr(tr);
5220         if (ret)
5221                 return ret;
5222
5223         ret = seq_open(file, &show_traces_seq_ops);
5224         if (ret) {
5225                 trace_array_put(tr);
5226                 return ret;
5227         }
5228
5229         m = file->private_data;
5230         m->private = tr;
5231
5232         return 0;
5233 }
5234
5235 static int show_traces_release(struct inode *inode, struct file *file)
5236 {
5237         struct trace_array *tr = inode->i_private;
5238
5239         trace_array_put(tr);
5240         return seq_release(inode, file);
5241 }
5242
5243 static ssize_t
5244 tracing_write_stub(struct file *filp, const char __user *ubuf,
5245                    size_t count, loff_t *ppos)
5246 {
5247         return count;
5248 }
5249
5250 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5251 {
5252         int ret;
5253
5254         if (file->f_mode & FMODE_READ)
5255                 ret = seq_lseek(file, offset, whence);
5256         else
5257                 file->f_pos = ret = 0;
5258
5259         return ret;
5260 }
5261
5262 static const struct file_operations tracing_fops = {
5263         .open           = tracing_open,
5264         .read           = seq_read,
5265         .read_iter      = seq_read_iter,
5266         .splice_read    = copy_splice_read,
5267         .write          = tracing_write_stub,
5268         .llseek         = tracing_lseek,
5269         .release        = tracing_release,
5270 };
5271
5272 static const struct file_operations show_traces_fops = {
5273         .open           = show_traces_open,
5274         .read           = seq_read,
5275         .llseek         = seq_lseek,
5276         .release        = show_traces_release,
5277 };
5278
5279 static ssize_t
5280 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5281                      size_t count, loff_t *ppos)
5282 {
5283         struct trace_array *tr = file_inode(filp)->i_private;
5284         char *mask_str;
5285         int len;
5286
5287         len = snprintf(NULL, 0, "%*pb\n",
5288                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5289         mask_str = kmalloc(len, GFP_KERNEL);
5290         if (!mask_str)
5291                 return -ENOMEM;
5292
5293         len = snprintf(mask_str, len, "%*pb\n",
5294                        cpumask_pr_args(tr->tracing_cpumask));
5295         if (len >= count) {
5296                 count = -EINVAL;
5297                 goto out_err;
5298         }
5299         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5300
5301 out_err:
5302         kfree(mask_str);
5303
5304         return count;
5305 }
5306
5307 int tracing_set_cpumask(struct trace_array *tr,
5308                         cpumask_var_t tracing_cpumask_new)
5309 {
5310         int cpu;
5311
5312         if (!tr)
5313                 return -EINVAL;
5314
5315         local_irq_disable();
5316         arch_spin_lock(&tr->max_lock);
5317         for_each_tracing_cpu(cpu) {
5318                 /*
5319                  * Increase/decrease the disabled counter if we are
5320                  * about to flip a bit in the cpumask:
5321                  */
5322                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5323                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5324                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5325                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5328 #endif
5329                 }
5330                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5331                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5332                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5333                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5334 #ifdef CONFIG_TRACER_MAX_TRACE
5335                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5336 #endif
5337                 }
5338         }
5339         arch_spin_unlock(&tr->max_lock);
5340         local_irq_enable();
5341
5342         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5343
5344         return 0;
5345 }
5346
5347 static ssize_t
5348 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5349                       size_t count, loff_t *ppos)
5350 {
5351         struct trace_array *tr = file_inode(filp)->i_private;
5352         cpumask_var_t tracing_cpumask_new;
5353         int err;
5354
5355         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5356                 return -ENOMEM;
5357
5358         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5359         if (err)
5360                 goto err_free;
5361
5362         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5363         if (err)
5364                 goto err_free;
5365
5366         free_cpumask_var(tracing_cpumask_new);
5367
5368         return count;
5369
5370 err_free:
5371         free_cpumask_var(tracing_cpumask_new);
5372
5373         return err;
5374 }
5375
5376 static const struct file_operations tracing_cpumask_fops = {
5377         .open           = tracing_open_generic_tr,
5378         .read           = tracing_cpumask_read,
5379         .write          = tracing_cpumask_write,
5380         .release        = tracing_release_generic_tr,
5381         .llseek         = generic_file_llseek,
5382 };
5383
5384 static int tracing_trace_options_show(struct seq_file *m, void *v)
5385 {
5386         struct tracer_opt *trace_opts;
5387         struct trace_array *tr = m->private;
5388         u32 tracer_flags;
5389         int i;
5390
5391         mutex_lock(&trace_types_lock);
5392         tracer_flags = tr->current_trace->flags->val;
5393         trace_opts = tr->current_trace->flags->opts;
5394
5395         for (i = 0; trace_options[i]; i++) {
5396                 if (tr->trace_flags & (1 << i))
5397                         seq_printf(m, "%s\n", trace_options[i]);
5398                 else
5399                         seq_printf(m, "no%s\n", trace_options[i]);
5400         }
5401
5402         for (i = 0; trace_opts[i].name; i++) {
5403                 if (tracer_flags & trace_opts[i].bit)
5404                         seq_printf(m, "%s\n", trace_opts[i].name);
5405                 else
5406                         seq_printf(m, "no%s\n", trace_opts[i].name);
5407         }
5408         mutex_unlock(&trace_types_lock);
5409
5410         return 0;
5411 }
5412
5413 static int __set_tracer_option(struct trace_array *tr,
5414                                struct tracer_flags *tracer_flags,
5415                                struct tracer_opt *opts, int neg)
5416 {
5417         struct tracer *trace = tracer_flags->trace;
5418         int ret;
5419
5420         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5421         if (ret)
5422                 return ret;
5423
5424         if (neg)
5425                 tracer_flags->val &= ~opts->bit;
5426         else
5427                 tracer_flags->val |= opts->bit;
5428         return 0;
5429 }
5430
5431 /* Try to assign a tracer specific option */
5432 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5433 {
5434         struct tracer *trace = tr->current_trace;
5435         struct tracer_flags *tracer_flags = trace->flags;
5436         struct tracer_opt *opts = NULL;
5437         int i;
5438
5439         for (i = 0; tracer_flags->opts[i].name; i++) {
5440                 opts = &tracer_flags->opts[i];
5441
5442                 if (strcmp(cmp, opts->name) == 0)
5443                         return __set_tracer_option(tr, trace->flags, opts, neg);
5444         }
5445
5446         return -EINVAL;
5447 }
5448
5449 /* Some tracers require overwrite to stay enabled */
5450 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5451 {
5452         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5453                 return -1;
5454
5455         return 0;
5456 }
5457
5458 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5459 {
5460         int *map;
5461
5462         if ((mask == TRACE_ITER_RECORD_TGID) ||
5463             (mask == TRACE_ITER_RECORD_CMD))
5464                 lockdep_assert_held(&event_mutex);
5465
5466         /* do nothing if flag is already set */
5467         if (!!(tr->trace_flags & mask) == !!enabled)
5468                 return 0;
5469
5470         /* Give the tracer a chance to approve the change */
5471         if (tr->current_trace->flag_changed)
5472                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5473                         return -EINVAL;
5474
5475         if (enabled)
5476                 tr->trace_flags |= mask;
5477         else
5478                 tr->trace_flags &= ~mask;
5479
5480         if (mask == TRACE_ITER_RECORD_CMD)
5481                 trace_event_enable_cmd_record(enabled);
5482
5483         if (mask == TRACE_ITER_RECORD_TGID) {
5484                 if (!tgid_map) {
5485                         tgid_map_max = pid_max;
5486                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5487                                        GFP_KERNEL);
5488
5489                         /*
5490                          * Pairs with smp_load_acquire() in
5491                          * trace_find_tgid_ptr() to ensure that if it observes
5492                          * the tgid_map we just allocated then it also observes
5493                          * the corresponding tgid_map_max value.
5494                          */
5495                         smp_store_release(&tgid_map, map);
5496                 }
5497                 if (!tgid_map) {
5498                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5499                         return -ENOMEM;
5500                 }
5501
5502                 trace_event_enable_tgid_record(enabled);
5503         }
5504
5505         if (mask == TRACE_ITER_EVENT_FORK)
5506                 trace_event_follow_fork(tr, enabled);
5507
5508         if (mask == TRACE_ITER_FUNC_FORK)
5509                 ftrace_pid_follow_fork(tr, enabled);
5510
5511         if (mask == TRACE_ITER_OVERWRITE) {
5512                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5513 #ifdef CONFIG_TRACER_MAX_TRACE
5514                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5515 #endif
5516         }
5517
5518         if (mask == TRACE_ITER_PRINTK) {
5519                 trace_printk_start_stop_comm(enabled);
5520                 trace_printk_control(enabled);
5521         }
5522
5523         return 0;
5524 }
5525
5526 int trace_set_options(struct trace_array *tr, char *option)
5527 {
5528         char *cmp;
5529         int neg = 0;
5530         int ret;
5531         size_t orig_len = strlen(option);
5532         int len;
5533
5534         cmp = strstrip(option);
5535
5536         len = str_has_prefix(cmp, "no");
5537         if (len)
5538                 neg = 1;
5539
5540         cmp += len;
5541
5542         mutex_lock(&event_mutex);
5543         mutex_lock(&trace_types_lock);
5544
5545         ret = match_string(trace_options, -1, cmp);
5546         /* If no option could be set, test the specific tracer options */
5547         if (ret < 0)
5548                 ret = set_tracer_option(tr, cmp, neg);
5549         else
5550                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5551
5552         mutex_unlock(&trace_types_lock);
5553         mutex_unlock(&event_mutex);
5554
5555         /*
5556          * If the first trailing whitespace is replaced with '\0' by strstrip,
5557          * turn it back into a space.
5558          */
5559         if (orig_len > strlen(option))
5560                 option[strlen(option)] = ' ';
5561
5562         return ret;
5563 }
5564
5565 static void __init apply_trace_boot_options(void)
5566 {
5567         char *buf = trace_boot_options_buf;
5568         char *option;
5569
5570         while (true) {
5571                 option = strsep(&buf, ",");
5572
5573                 if (!option)
5574                         break;
5575
5576                 if (*option)
5577                         trace_set_options(&global_trace, option);
5578
5579                 /* Put back the comma to allow this to be called again */
5580                 if (buf)
5581                         *(buf - 1) = ',';
5582         }
5583 }
5584
5585 static ssize_t
5586 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5587                         size_t cnt, loff_t *ppos)
5588 {
5589         struct seq_file *m = filp->private_data;
5590         struct trace_array *tr = m->private;
5591         char buf[64];
5592         int ret;
5593
5594         if (cnt >= sizeof(buf))
5595                 return -EINVAL;
5596
5597         if (copy_from_user(buf, ubuf, cnt))
5598                 return -EFAULT;
5599
5600         buf[cnt] = 0;
5601
5602         ret = trace_set_options(tr, buf);
5603         if (ret < 0)
5604                 return ret;
5605
5606         *ppos += cnt;
5607
5608         return cnt;
5609 }
5610
5611 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5612 {
5613         struct trace_array *tr = inode->i_private;
5614         int ret;
5615
5616         ret = tracing_check_open_get_tr(tr);
5617         if (ret)
5618                 return ret;
5619
5620         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5621         if (ret < 0)
5622                 trace_array_put(tr);
5623
5624         return ret;
5625 }
5626
5627 static const struct file_operations tracing_iter_fops = {
5628         .open           = tracing_trace_options_open,
5629         .read           = seq_read,
5630         .llseek         = seq_lseek,
5631         .release        = tracing_single_release_tr,
5632         .write          = tracing_trace_options_write,
5633 };
5634
5635 static const char readme_msg[] =
5636         "tracing mini-HOWTO:\n\n"
5637         "# echo 0 > tracing_on : quick way to disable tracing\n"
5638         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5639         " Important files:\n"
5640         "  trace\t\t\t- The static contents of the buffer\n"
5641         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5642         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5643         "  current_tracer\t- function and latency tracers\n"
5644         "  available_tracers\t- list of configured tracers for current_tracer\n"
5645         "  error_log\t- error log for failed commands (that support it)\n"
5646         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5647         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5648         "  trace_clock\t\t- change the clock used to order events\n"
5649         "       local:   Per cpu clock but may not be synced across CPUs\n"
5650         "      global:   Synced across CPUs but slows tracing down.\n"
5651         "     counter:   Not a clock, but just an increment\n"
5652         "      uptime:   Jiffy counter from time of boot\n"
5653         "        perf:   Same clock that perf events use\n"
5654 #ifdef CONFIG_X86_64
5655         "     x86-tsc:   TSC cycle counter\n"
5656 #endif
5657         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5658         "       delta:   Delta difference against a buffer-wide timestamp\n"
5659         "    absolute:   Absolute (standalone) timestamp\n"
5660         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5661         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5662         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5663         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5664         "\t\t\t  Remove sub-buffer with rmdir\n"
5665         "  trace_options\t\t- Set format or modify how tracing happens\n"
5666         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5667         "\t\t\t  option name\n"
5668         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5669 #ifdef CONFIG_DYNAMIC_FTRACE
5670         "\n  available_filter_functions - list of functions that can be filtered on\n"
5671         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5672         "\t\t\t  functions\n"
5673         "\t     accepts: func_full_name or glob-matching-pattern\n"
5674         "\t     modules: Can select a group via module\n"
5675         "\t      Format: :mod:<module-name>\n"
5676         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5677         "\t    triggers: a command to perform when function is hit\n"
5678         "\t      Format: <function>:<trigger>[:count]\n"
5679         "\t     trigger: traceon, traceoff\n"
5680         "\t\t      enable_event:<system>:<event>\n"
5681         "\t\t      disable_event:<system>:<event>\n"
5682 #ifdef CONFIG_STACKTRACE
5683         "\t\t      stacktrace\n"
5684 #endif
5685 #ifdef CONFIG_TRACER_SNAPSHOT
5686         "\t\t      snapshot\n"
5687 #endif
5688         "\t\t      dump\n"
5689         "\t\t      cpudump\n"
5690         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5691         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5692         "\t     The first one will disable tracing every time do_fault is hit\n"
5693         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5694         "\t       The first time do trap is hit and it disables tracing, the\n"
5695         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5696         "\t       the counter will not decrement. It only decrements when the\n"
5697         "\t       trigger did work\n"
5698         "\t     To remove trigger without count:\n"
5699         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5700         "\t     To remove trigger with a count:\n"
5701         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5702         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5703         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5704         "\t    modules: Can select a group via module command :mod:\n"
5705         "\t    Does not accept triggers\n"
5706 #endif /* CONFIG_DYNAMIC_FTRACE */
5707 #ifdef CONFIG_FUNCTION_TRACER
5708         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5709         "\t\t    (function)\n"
5710         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5711         "\t\t    (function)\n"
5712 #endif
5713 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5714         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5715         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5716         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5717 #endif
5718 #ifdef CONFIG_TRACER_SNAPSHOT
5719         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5720         "\t\t\t  snapshot buffer. Read the contents for more\n"
5721         "\t\t\t  information\n"
5722 #endif
5723 #ifdef CONFIG_STACK_TRACER
5724         "  stack_trace\t\t- Shows the max stack trace when active\n"
5725         "  stack_max_size\t- Shows current max stack size that was traced\n"
5726         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5727         "\t\t\t  new trace)\n"
5728 #ifdef CONFIG_DYNAMIC_FTRACE
5729         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5730         "\t\t\t  traces\n"
5731 #endif
5732 #endif /* CONFIG_STACK_TRACER */
5733 #ifdef CONFIG_DYNAMIC_EVENTS
5734         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5735         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5736 #endif
5737 #ifdef CONFIG_KPROBE_EVENTS
5738         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5739         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5740 #endif
5741 #ifdef CONFIG_UPROBE_EVENTS
5742         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5743         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5744 #endif
5745 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5746     defined(CONFIG_FPROBE_EVENTS)
5747         "\t  accepts: event-definitions (one definition per line)\n"
5748 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5749         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5750         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5751 #endif
5752 #ifdef CONFIG_FPROBE_EVENTS
5753         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5754         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5755 #endif
5756 #ifdef CONFIG_HIST_TRIGGERS
5757         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5758 #endif
5759         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5760         "\t           -:[<group>/][<event>]\n"
5761 #ifdef CONFIG_KPROBE_EVENTS
5762         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5763   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5764 #endif
5765 #ifdef CONFIG_UPROBE_EVENTS
5766   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5767 #endif
5768         "\t     args: <name>=fetcharg[:type]\n"
5769         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5770 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5771         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5772 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5773         "\t           <argname>[->field[->field|.field...]],\n"
5774 #endif
5775 #else
5776         "\t           $stack<index>, $stack, $retval, $comm,\n"
5777 #endif
5778         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5779         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5780         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5781         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5782         "\t           symstr, <type>\\[<array-size>\\]\n"
5783 #ifdef CONFIG_HIST_TRIGGERS
5784         "\t    field: <stype> <name>;\n"
5785         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5786         "\t           [unsigned] char/int/long\n"
5787 #endif
5788         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5789         "\t            of the <attached-group>/<attached-event>.\n"
5790 #endif
5791         "  events/\t\t- Directory containing all trace event subsystems:\n"
5792         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5793         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5794         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5795         "\t\t\t  events\n"
5796         "      filter\t\t- If set, only events passing filter are traced\n"
5797         "  events/<system>/<event>/\t- Directory containing control files for\n"
5798         "\t\t\t  <event>:\n"
5799         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5800         "      filter\t\t- If set, only events passing filter are traced\n"
5801         "      trigger\t\t- If set, a command to perform when event is hit\n"
5802         "\t    Format: <trigger>[:count][if <filter>]\n"
5803         "\t   trigger: traceon, traceoff\n"
5804         "\t            enable_event:<system>:<event>\n"
5805         "\t            disable_event:<system>:<event>\n"
5806 #ifdef CONFIG_HIST_TRIGGERS
5807         "\t            enable_hist:<system>:<event>\n"
5808         "\t            disable_hist:<system>:<event>\n"
5809 #endif
5810 #ifdef CONFIG_STACKTRACE
5811         "\t\t    stacktrace\n"
5812 #endif
5813 #ifdef CONFIG_TRACER_SNAPSHOT
5814         "\t\t    snapshot\n"
5815 #endif
5816 #ifdef CONFIG_HIST_TRIGGERS
5817         "\t\t    hist (see below)\n"
5818 #endif
5819         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5820         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5821         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5822         "\t                  events/block/block_unplug/trigger\n"
5823         "\t   The first disables tracing every time block_unplug is hit.\n"
5824         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5825         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5826         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5827         "\t   Like function triggers, the counter is only decremented if it\n"
5828         "\t    enabled or disabled tracing.\n"
5829         "\t   To remove a trigger without a count:\n"
5830         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5831         "\t   To remove a trigger with a count:\n"
5832         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5833         "\t   Filters can be ignored when removing a trigger.\n"
5834 #ifdef CONFIG_HIST_TRIGGERS
5835         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5836         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5837         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5838         "\t            [:values=<field1[,field2,...]>]\n"
5839         "\t            [:sort=<field1[,field2,...]>]\n"
5840         "\t            [:size=#entries]\n"
5841         "\t            [:pause][:continue][:clear]\n"
5842         "\t            [:name=histname1]\n"
5843         "\t            [:nohitcount]\n"
5844         "\t            [:<handler>.<action>]\n"
5845         "\t            [if <filter>]\n\n"
5846         "\t    Note, special fields can be used as well:\n"
5847         "\t            common_timestamp - to record current timestamp\n"
5848         "\t            common_cpu - to record the CPU the event happened on\n"
5849         "\n"
5850         "\t    A hist trigger variable can be:\n"
5851         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5852         "\t        - a reference to another variable e.g. y=$x,\n"
5853         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5854         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5855         "\n"
5856         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5857         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5858         "\t    variable reference, field or numeric literal.\n"
5859         "\n"
5860         "\t    When a matching event is hit, an entry is added to a hash\n"
5861         "\t    table using the key(s) and value(s) named, and the value of a\n"
5862         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5863         "\t    correspond to fields in the event's format description.  Keys\n"
5864         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5865         "\t    Compound keys consisting of up to two fields can be specified\n"
5866         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5867         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5868         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5869         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5870         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5871         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5872         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5873         "\t    its histogram data will be shared with other triggers of the\n"
5874         "\t    same name, and trigger hits will update this common data.\n\n"
5875         "\t    Reading the 'hist' file for the event will dump the hash\n"
5876         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5877         "\t    triggers attached to an event, there will be a table for each\n"
5878         "\t    trigger in the output.  The table displayed for a named\n"
5879         "\t    trigger will be the same as any other instance having the\n"
5880         "\t    same name.  The default format used to display a given field\n"
5881         "\t    can be modified by appending any of the following modifiers\n"
5882         "\t    to the field name, as applicable:\n\n"
5883         "\t            .hex        display a number as a hex value\n"
5884         "\t            .sym        display an address as a symbol\n"
5885         "\t            .sym-offset display an address as a symbol and offset\n"
5886         "\t            .execname   display a common_pid as a program name\n"
5887         "\t            .syscall    display a syscall id as a syscall name\n"
5888         "\t            .log2       display log2 value rather than raw number\n"
5889         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5890         "\t            .usecs      display a common_timestamp in microseconds\n"
5891         "\t            .percent    display a number of percentage value\n"
5892         "\t            .graph      display a bar-graph of a value\n\n"
5893         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5894         "\t    trigger or to start a hist trigger but not log any events\n"
5895         "\t    until told to do so.  'continue' can be used to start or\n"
5896         "\t    restart a paused hist trigger.\n\n"
5897         "\t    The 'clear' parameter will clear the contents of a running\n"
5898         "\t    hist trigger and leave its current paused/active state\n"
5899         "\t    unchanged.\n\n"
5900         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5901         "\t    raw hitcount in the histogram.\n\n"
5902         "\t    The enable_hist and disable_hist triggers can be used to\n"
5903         "\t    have one event conditionally start and stop another event's\n"
5904         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5905         "\t    the enable_event and disable_event triggers.\n\n"
5906         "\t    Hist trigger handlers and actions are executed whenever a\n"
5907         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5908         "\t        <handler>.<action>\n\n"
5909         "\t    The available handlers are:\n\n"
5910         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5911         "\t        onmax(var)               - invoke if var exceeds current max\n"
5912         "\t        onchange(var)            - invoke action if var changes\n\n"
5913         "\t    The available actions are:\n\n"
5914         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5915         "\t        save(field,...)                      - save current event fields\n"
5916 #ifdef CONFIG_TRACER_SNAPSHOT
5917         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5918 #endif
5919 #ifdef CONFIG_SYNTH_EVENTS
5920         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5921         "\t  Write into this file to define/undefine new synthetic events.\n"
5922         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5923 #endif
5924 #endif
5925 ;
5926
5927 static ssize_t
5928 tracing_readme_read(struct file *filp, char __user *ubuf,
5929                        size_t cnt, loff_t *ppos)
5930 {
5931         return simple_read_from_buffer(ubuf, cnt, ppos,
5932                                         readme_msg, strlen(readme_msg));
5933 }
5934
5935 static const struct file_operations tracing_readme_fops = {
5936         .open           = tracing_open_generic,
5937         .read           = tracing_readme_read,
5938         .llseek         = generic_file_llseek,
5939 };
5940
5941 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5942 {
5943         int pid = ++(*pos);
5944
5945         return trace_find_tgid_ptr(pid);
5946 }
5947
5948 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5949 {
5950         int pid = *pos;
5951
5952         return trace_find_tgid_ptr(pid);
5953 }
5954
5955 static void saved_tgids_stop(struct seq_file *m, void *v)
5956 {
5957 }
5958
5959 static int saved_tgids_show(struct seq_file *m, void *v)
5960 {
5961         int *entry = (int *)v;
5962         int pid = entry - tgid_map;
5963         int tgid = *entry;
5964
5965         if (tgid == 0)
5966                 return SEQ_SKIP;
5967
5968         seq_printf(m, "%d %d\n", pid, tgid);
5969         return 0;
5970 }
5971
5972 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5973         .start          = saved_tgids_start,
5974         .stop           = saved_tgids_stop,
5975         .next           = saved_tgids_next,
5976         .show           = saved_tgids_show,
5977 };
5978
5979 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5980 {
5981         int ret;
5982
5983         ret = tracing_check_open_get_tr(NULL);
5984         if (ret)
5985                 return ret;
5986
5987         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5988 }
5989
5990
5991 static const struct file_operations tracing_saved_tgids_fops = {
5992         .open           = tracing_saved_tgids_open,
5993         .read           = seq_read,
5994         .llseek         = seq_lseek,
5995         .release        = seq_release,
5996 };
5997
5998 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5999 {
6000         unsigned int *ptr = v;
6001
6002         if (*pos || m->count)
6003                 ptr++;
6004
6005         (*pos)++;
6006
6007         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
6008              ptr++) {
6009                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
6010                         continue;
6011
6012                 return ptr;
6013         }
6014
6015         return NULL;
6016 }
6017
6018 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6019 {
6020         void *v;
6021         loff_t l = 0;
6022
6023         preempt_disable();
6024         arch_spin_lock(&trace_cmdline_lock);
6025
6026         v = &savedcmd->map_cmdline_to_pid[0];
6027         while (l <= *pos) {
6028                 v = saved_cmdlines_next(m, v, &l);
6029                 if (!v)
6030                         return NULL;
6031         }
6032
6033         return v;
6034 }
6035
6036 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6037 {
6038         arch_spin_unlock(&trace_cmdline_lock);
6039         preempt_enable();
6040 }
6041
6042 static int saved_cmdlines_show(struct seq_file *m, void *v)
6043 {
6044         char buf[TASK_COMM_LEN];
6045         unsigned int *pid = v;
6046
6047         __trace_find_cmdline(*pid, buf);
6048         seq_printf(m, "%d %s\n", *pid, buf);
6049         return 0;
6050 }
6051
6052 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6053         .start          = saved_cmdlines_start,
6054         .next           = saved_cmdlines_next,
6055         .stop           = saved_cmdlines_stop,
6056         .show           = saved_cmdlines_show,
6057 };
6058
6059 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6060 {
6061         int ret;
6062
6063         ret = tracing_check_open_get_tr(NULL);
6064         if (ret)
6065                 return ret;
6066
6067         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6068 }
6069
6070 static const struct file_operations tracing_saved_cmdlines_fops = {
6071         .open           = tracing_saved_cmdlines_open,
6072         .read           = seq_read,
6073         .llseek         = seq_lseek,
6074         .release        = seq_release,
6075 };
6076
6077 static ssize_t
6078 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6079                                  size_t cnt, loff_t *ppos)
6080 {
6081         char buf[64];
6082         int r;
6083
6084         preempt_disable();
6085         arch_spin_lock(&trace_cmdline_lock);
6086         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6087         arch_spin_unlock(&trace_cmdline_lock);
6088         preempt_enable();
6089
6090         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6091 }
6092
6093 static int tracing_resize_saved_cmdlines(unsigned int val)
6094 {
6095         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6096
6097         s = allocate_cmdlines_buffer(val);
6098         if (!s)
6099                 return -ENOMEM;
6100
6101         preempt_disable();
6102         arch_spin_lock(&trace_cmdline_lock);
6103         savedcmd_temp = savedcmd;
6104         savedcmd = s;
6105         arch_spin_unlock(&trace_cmdline_lock);
6106         preempt_enable();
6107         free_saved_cmdlines_buffer(savedcmd_temp);
6108
6109         return 0;
6110 }
6111
6112 static ssize_t
6113 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6114                                   size_t cnt, loff_t *ppos)
6115 {
6116         unsigned long val;
6117         int ret;
6118
6119         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6120         if (ret)
6121                 return ret;
6122
6123         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6124         if (!val || val > PID_MAX_DEFAULT)
6125                 return -EINVAL;
6126
6127         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6128         if (ret < 0)
6129                 return ret;
6130
6131         *ppos += cnt;
6132
6133         return cnt;
6134 }
6135
6136 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6137         .open           = tracing_open_generic,
6138         .read           = tracing_saved_cmdlines_size_read,
6139         .write          = tracing_saved_cmdlines_size_write,
6140 };
6141
6142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6143 static union trace_eval_map_item *
6144 update_eval_map(union trace_eval_map_item *ptr)
6145 {
6146         if (!ptr->map.eval_string) {
6147                 if (ptr->tail.next) {
6148                         ptr = ptr->tail.next;
6149                         /* Set ptr to the next real item (skip head) */
6150                         ptr++;
6151                 } else
6152                         return NULL;
6153         }
6154         return ptr;
6155 }
6156
6157 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6158 {
6159         union trace_eval_map_item *ptr = v;
6160
6161         /*
6162          * Paranoid! If ptr points to end, we don't want to increment past it.
6163          * This really should never happen.
6164          */
6165         (*pos)++;
6166         ptr = update_eval_map(ptr);
6167         if (WARN_ON_ONCE(!ptr))
6168                 return NULL;
6169
6170         ptr++;
6171         ptr = update_eval_map(ptr);
6172
6173         return ptr;
6174 }
6175
6176 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6177 {
6178         union trace_eval_map_item *v;
6179         loff_t l = 0;
6180
6181         mutex_lock(&trace_eval_mutex);
6182
6183         v = trace_eval_maps;
6184         if (v)
6185                 v++;
6186
6187         while (v && l < *pos) {
6188                 v = eval_map_next(m, v, &l);
6189         }
6190
6191         return v;
6192 }
6193
6194 static void eval_map_stop(struct seq_file *m, void *v)
6195 {
6196         mutex_unlock(&trace_eval_mutex);
6197 }
6198
6199 static int eval_map_show(struct seq_file *m, void *v)
6200 {
6201         union trace_eval_map_item *ptr = v;
6202
6203         seq_printf(m, "%s %ld (%s)\n",
6204                    ptr->map.eval_string, ptr->map.eval_value,
6205                    ptr->map.system);
6206
6207         return 0;
6208 }
6209
6210 static const struct seq_operations tracing_eval_map_seq_ops = {
6211         .start          = eval_map_start,
6212         .next           = eval_map_next,
6213         .stop           = eval_map_stop,
6214         .show           = eval_map_show,
6215 };
6216
6217 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6218 {
6219         int ret;
6220
6221         ret = tracing_check_open_get_tr(NULL);
6222         if (ret)
6223                 return ret;
6224
6225         return seq_open(filp, &tracing_eval_map_seq_ops);
6226 }
6227
6228 static const struct file_operations tracing_eval_map_fops = {
6229         .open           = tracing_eval_map_open,
6230         .read           = seq_read,
6231         .llseek         = seq_lseek,
6232         .release        = seq_release,
6233 };
6234
6235 static inline union trace_eval_map_item *
6236 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6237 {
6238         /* Return tail of array given the head */
6239         return ptr + ptr->head.length + 1;
6240 }
6241
6242 static void
6243 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6244                            int len)
6245 {
6246         struct trace_eval_map **stop;
6247         struct trace_eval_map **map;
6248         union trace_eval_map_item *map_array;
6249         union trace_eval_map_item *ptr;
6250
6251         stop = start + len;
6252
6253         /*
6254          * The trace_eval_maps contains the map plus a head and tail item,
6255          * where the head holds the module and length of array, and the
6256          * tail holds a pointer to the next list.
6257          */
6258         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6259         if (!map_array) {
6260                 pr_warn("Unable to allocate trace eval mapping\n");
6261                 return;
6262         }
6263
6264         mutex_lock(&trace_eval_mutex);
6265
6266         if (!trace_eval_maps)
6267                 trace_eval_maps = map_array;
6268         else {
6269                 ptr = trace_eval_maps;
6270                 for (;;) {
6271                         ptr = trace_eval_jmp_to_tail(ptr);
6272                         if (!ptr->tail.next)
6273                                 break;
6274                         ptr = ptr->tail.next;
6275
6276                 }
6277                 ptr->tail.next = map_array;
6278         }
6279         map_array->head.mod = mod;
6280         map_array->head.length = len;
6281         map_array++;
6282
6283         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6284                 map_array->map = **map;
6285                 map_array++;
6286         }
6287         memset(map_array, 0, sizeof(*map_array));
6288
6289         mutex_unlock(&trace_eval_mutex);
6290 }
6291
6292 static void trace_create_eval_file(struct dentry *d_tracer)
6293 {
6294         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6295                           NULL, &tracing_eval_map_fops);
6296 }
6297
6298 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6299 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6300 static inline void trace_insert_eval_map_file(struct module *mod,
6301                               struct trace_eval_map **start, int len) { }
6302 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6303
6304 static void trace_insert_eval_map(struct module *mod,
6305                                   struct trace_eval_map **start, int len)
6306 {
6307         struct trace_eval_map **map;
6308
6309         if (len <= 0)
6310                 return;
6311
6312         map = start;
6313
6314         trace_event_eval_update(map, len);
6315
6316         trace_insert_eval_map_file(mod, start, len);
6317 }
6318
6319 static ssize_t
6320 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6321                        size_t cnt, loff_t *ppos)
6322 {
6323         struct trace_array *tr = filp->private_data;
6324         char buf[MAX_TRACER_SIZE+2];
6325         int r;
6326
6327         mutex_lock(&trace_types_lock);
6328         r = sprintf(buf, "%s\n", tr->current_trace->name);
6329         mutex_unlock(&trace_types_lock);
6330
6331         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6332 }
6333
6334 int tracer_init(struct tracer *t, struct trace_array *tr)
6335 {
6336         tracing_reset_online_cpus(&tr->array_buffer);
6337         return t->init(tr);
6338 }
6339
6340 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6341 {
6342         int cpu;
6343
6344         for_each_tracing_cpu(cpu)
6345                 per_cpu_ptr(buf->data, cpu)->entries = val;
6346 }
6347
6348 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6349 {
6350         if (cpu == RING_BUFFER_ALL_CPUS) {
6351                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6352         } else {
6353                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6354         }
6355 }
6356
6357 #ifdef CONFIG_TRACER_MAX_TRACE
6358 /* resize @tr's buffer to the size of @size_tr's entries */
6359 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6360                                         struct array_buffer *size_buf, int cpu_id)
6361 {
6362         int cpu, ret = 0;
6363
6364         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6365                 for_each_tracing_cpu(cpu) {
6366                         ret = ring_buffer_resize(trace_buf->buffer,
6367                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6368                         if (ret < 0)
6369                                 break;
6370                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6371                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6372                 }
6373         } else {
6374                 ret = ring_buffer_resize(trace_buf->buffer,
6375                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6376                 if (ret == 0)
6377                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6378                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6379         }
6380
6381         return ret;
6382 }
6383 #endif /* CONFIG_TRACER_MAX_TRACE */
6384
6385 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6386                                         unsigned long size, int cpu)
6387 {
6388         int ret;
6389
6390         /*
6391          * If kernel or user changes the size of the ring buffer
6392          * we use the size that was given, and we can forget about
6393          * expanding it later.
6394          */
6395         trace_set_ring_buffer_expanded(tr);
6396
6397         /* May be called before buffers are initialized */
6398         if (!tr->array_buffer.buffer)
6399                 return 0;
6400
6401         /* Do not allow tracing while resizing ring buffer */
6402         tracing_stop_tr(tr);
6403
6404         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6405         if (ret < 0)
6406                 goto out_start;
6407
6408 #ifdef CONFIG_TRACER_MAX_TRACE
6409         if (!tr->allocated_snapshot)
6410                 goto out;
6411
6412         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6413         if (ret < 0) {
6414                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6415                                                      &tr->array_buffer, cpu);
6416                 if (r < 0) {
6417                         /*
6418                          * AARGH! We are left with different
6419                          * size max buffer!!!!
6420                          * The max buffer is our "snapshot" buffer.
6421                          * When a tracer needs a snapshot (one of the
6422                          * latency tracers), it swaps the max buffer
6423                          * with the saved snap shot. We succeeded to
6424                          * update the size of the main buffer, but failed to
6425                          * update the size of the max buffer. But when we tried
6426                          * to reset the main buffer to the original size, we
6427                          * failed there too. This is very unlikely to
6428                          * happen, but if it does, warn and kill all
6429                          * tracing.
6430                          */
6431                         WARN_ON(1);
6432                         tracing_disabled = 1;
6433                 }
6434                 goto out_start;
6435         }
6436
6437         update_buffer_entries(&tr->max_buffer, cpu);
6438
6439  out:
6440 #endif /* CONFIG_TRACER_MAX_TRACE */
6441
6442         update_buffer_entries(&tr->array_buffer, cpu);
6443  out_start:
6444         tracing_start_tr(tr);
6445         return ret;
6446 }
6447
6448 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6449                                   unsigned long size, int cpu_id)
6450 {
6451         int ret;
6452
6453         mutex_lock(&trace_types_lock);
6454
6455         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6456                 /* make sure, this cpu is enabled in the mask */
6457                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6458                         ret = -EINVAL;
6459                         goto out;
6460                 }
6461         }
6462
6463         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6464         if (ret < 0)
6465                 ret = -ENOMEM;
6466
6467 out:
6468         mutex_unlock(&trace_types_lock);
6469
6470         return ret;
6471 }
6472
6473
6474 /**
6475  * tracing_update_buffers - used by tracing facility to expand ring buffers
6476  * @tr: The tracing instance
6477  *
6478  * To save on memory when the tracing is never used on a system with it
6479  * configured in. The ring buffers are set to a minimum size. But once
6480  * a user starts to use the tracing facility, then they need to grow
6481  * to their default size.
6482  *
6483  * This function is to be called when a tracer is about to be used.
6484  */
6485 int tracing_update_buffers(struct trace_array *tr)
6486 {
6487         int ret = 0;
6488
6489         mutex_lock(&trace_types_lock);
6490         if (!tr->ring_buffer_expanded)
6491                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6492                                                 RING_BUFFER_ALL_CPUS);
6493         mutex_unlock(&trace_types_lock);
6494
6495         return ret;
6496 }
6497
6498 struct trace_option_dentry;
6499
6500 static void
6501 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6502
6503 /*
6504  * Used to clear out the tracer before deletion of an instance.
6505  * Must have trace_types_lock held.
6506  */
6507 static void tracing_set_nop(struct trace_array *tr)
6508 {
6509         if (tr->current_trace == &nop_trace)
6510                 return;
6511         
6512         tr->current_trace->enabled--;
6513
6514         if (tr->current_trace->reset)
6515                 tr->current_trace->reset(tr);
6516
6517         tr->current_trace = &nop_trace;
6518 }
6519
6520 static bool tracer_options_updated;
6521
6522 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6523 {
6524         /* Only enable if the directory has been created already. */
6525         if (!tr->dir)
6526                 return;
6527
6528         /* Only create trace option files after update_tracer_options finish */
6529         if (!tracer_options_updated)
6530                 return;
6531
6532         create_trace_option_files(tr, t);
6533 }
6534
6535 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6536 {
6537         struct tracer *t;
6538 #ifdef CONFIG_TRACER_MAX_TRACE
6539         bool had_max_tr;
6540 #endif
6541         int ret = 0;
6542
6543         mutex_lock(&trace_types_lock);
6544
6545         if (!tr->ring_buffer_expanded) {
6546                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6547                                                 RING_BUFFER_ALL_CPUS);
6548                 if (ret < 0)
6549                         goto out;
6550                 ret = 0;
6551         }
6552
6553         for (t = trace_types; t; t = t->next) {
6554                 if (strcmp(t->name, buf) == 0)
6555                         break;
6556         }
6557         if (!t) {
6558                 ret = -EINVAL;
6559                 goto out;
6560         }
6561         if (t == tr->current_trace)
6562                 goto out;
6563
6564 #ifdef CONFIG_TRACER_SNAPSHOT
6565         if (t->use_max_tr) {
6566                 local_irq_disable();
6567                 arch_spin_lock(&tr->max_lock);
6568                 if (tr->cond_snapshot)
6569                         ret = -EBUSY;
6570                 arch_spin_unlock(&tr->max_lock);
6571                 local_irq_enable();
6572                 if (ret)
6573                         goto out;
6574         }
6575 #endif
6576         /* Some tracers won't work on kernel command line */
6577         if (system_state < SYSTEM_RUNNING && t->noboot) {
6578                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6579                         t->name);
6580                 goto out;
6581         }
6582
6583         /* Some tracers are only allowed for the top level buffer */
6584         if (!trace_ok_for_array(t, tr)) {
6585                 ret = -EINVAL;
6586                 goto out;
6587         }
6588
6589         /* If trace pipe files are being read, we can't change the tracer */
6590         if (tr->trace_ref) {
6591                 ret = -EBUSY;
6592                 goto out;
6593         }
6594
6595         trace_branch_disable();
6596
6597         tr->current_trace->enabled--;
6598
6599         if (tr->current_trace->reset)
6600                 tr->current_trace->reset(tr);
6601
6602 #ifdef CONFIG_TRACER_MAX_TRACE
6603         had_max_tr = tr->current_trace->use_max_tr;
6604
6605         /* Current trace needs to be nop_trace before synchronize_rcu */
6606         tr->current_trace = &nop_trace;
6607
6608         if (had_max_tr && !t->use_max_tr) {
6609                 /*
6610                  * We need to make sure that the update_max_tr sees that
6611                  * current_trace changed to nop_trace to keep it from
6612                  * swapping the buffers after we resize it.
6613                  * The update_max_tr is called from interrupts disabled
6614                  * so a synchronized_sched() is sufficient.
6615                  */
6616                 synchronize_rcu();
6617                 free_snapshot(tr);
6618         }
6619
6620         if (t->use_max_tr && !tr->allocated_snapshot) {
6621                 ret = tracing_alloc_snapshot_instance(tr);
6622                 if (ret < 0)
6623                         goto out;
6624         }
6625 #else
6626         tr->current_trace = &nop_trace;
6627 #endif
6628
6629         if (t->init) {
6630                 ret = tracer_init(t, tr);
6631                 if (ret)
6632                         goto out;
6633         }
6634
6635         tr->current_trace = t;
6636         tr->current_trace->enabled++;
6637         trace_branch_enable(tr);
6638  out:
6639         mutex_unlock(&trace_types_lock);
6640
6641         return ret;
6642 }
6643
6644 static ssize_t
6645 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6646                         size_t cnt, loff_t *ppos)
6647 {
6648         struct trace_array *tr = filp->private_data;
6649         char buf[MAX_TRACER_SIZE+1];
6650         char *name;
6651         size_t ret;
6652         int err;
6653
6654         ret = cnt;
6655
6656         if (cnt > MAX_TRACER_SIZE)
6657                 cnt = MAX_TRACER_SIZE;
6658
6659         if (copy_from_user(buf, ubuf, cnt))
6660                 return -EFAULT;
6661
6662         buf[cnt] = 0;
6663
6664         name = strim(buf);
6665
6666         err = tracing_set_tracer(tr, name);
6667         if (err)
6668                 return err;
6669
6670         *ppos += ret;
6671
6672         return ret;
6673 }
6674
6675 static ssize_t
6676 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6677                    size_t cnt, loff_t *ppos)
6678 {
6679         char buf[64];
6680         int r;
6681
6682         r = snprintf(buf, sizeof(buf), "%ld\n",
6683                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6684         if (r > sizeof(buf))
6685                 r = sizeof(buf);
6686         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6687 }
6688
6689 static ssize_t
6690 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6691                     size_t cnt, loff_t *ppos)
6692 {
6693         unsigned long val;
6694         int ret;
6695
6696         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6697         if (ret)
6698                 return ret;
6699
6700         *ptr = val * 1000;
6701
6702         return cnt;
6703 }
6704
6705 static ssize_t
6706 tracing_thresh_read(struct file *filp, char __user *ubuf,
6707                     size_t cnt, loff_t *ppos)
6708 {
6709         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6710 }
6711
6712 static ssize_t
6713 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6714                      size_t cnt, loff_t *ppos)
6715 {
6716         struct trace_array *tr = filp->private_data;
6717         int ret;
6718
6719         mutex_lock(&trace_types_lock);
6720         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6721         if (ret < 0)
6722                 goto out;
6723
6724         if (tr->current_trace->update_thresh) {
6725                 ret = tr->current_trace->update_thresh(tr);
6726                 if (ret < 0)
6727                         goto out;
6728         }
6729
6730         ret = cnt;
6731 out:
6732         mutex_unlock(&trace_types_lock);
6733
6734         return ret;
6735 }
6736
6737 #ifdef CONFIG_TRACER_MAX_TRACE
6738
6739 static ssize_t
6740 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6741                      size_t cnt, loff_t *ppos)
6742 {
6743         struct trace_array *tr = filp->private_data;
6744
6745         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6746 }
6747
6748 static ssize_t
6749 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6750                       size_t cnt, loff_t *ppos)
6751 {
6752         struct trace_array *tr = filp->private_data;
6753
6754         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6755 }
6756
6757 #endif
6758
6759 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6760 {
6761         if (cpu == RING_BUFFER_ALL_CPUS) {
6762                 if (cpumask_empty(tr->pipe_cpumask)) {
6763                         cpumask_setall(tr->pipe_cpumask);
6764                         return 0;
6765                 }
6766         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6767                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6768                 return 0;
6769         }
6770         return -EBUSY;
6771 }
6772
6773 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6774 {
6775         if (cpu == RING_BUFFER_ALL_CPUS) {
6776                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6777                 cpumask_clear(tr->pipe_cpumask);
6778         } else {
6779                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6780                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6781         }
6782 }
6783
6784 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6785 {
6786         struct trace_array *tr = inode->i_private;
6787         struct trace_iterator *iter;
6788         int cpu;
6789         int ret;
6790
6791         ret = tracing_check_open_get_tr(tr);
6792         if (ret)
6793                 return ret;
6794
6795         mutex_lock(&trace_types_lock);
6796         cpu = tracing_get_cpu(inode);
6797         ret = open_pipe_on_cpu(tr, cpu);
6798         if (ret)
6799                 goto fail_pipe_on_cpu;
6800
6801         /* create a buffer to store the information to pass to userspace */
6802         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6803         if (!iter) {
6804                 ret = -ENOMEM;
6805                 goto fail_alloc_iter;
6806         }
6807
6808         trace_seq_init(&iter->seq);
6809         iter->trace = tr->current_trace;
6810
6811         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6812                 ret = -ENOMEM;
6813                 goto fail;
6814         }
6815
6816         /* trace pipe does not show start of buffer */
6817         cpumask_setall(iter->started);
6818
6819         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6820                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6821
6822         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6823         if (trace_clocks[tr->clock_id].in_ns)
6824                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6825
6826         iter->tr = tr;
6827         iter->array_buffer = &tr->array_buffer;
6828         iter->cpu_file = cpu;
6829         mutex_init(&iter->mutex);
6830         filp->private_data = iter;
6831
6832         if (iter->trace->pipe_open)
6833                 iter->trace->pipe_open(iter);
6834
6835         nonseekable_open(inode, filp);
6836
6837         tr->trace_ref++;
6838
6839         mutex_unlock(&trace_types_lock);
6840         return ret;
6841
6842 fail:
6843         kfree(iter);
6844 fail_alloc_iter:
6845         close_pipe_on_cpu(tr, cpu);
6846 fail_pipe_on_cpu:
6847         __trace_array_put(tr);
6848         mutex_unlock(&trace_types_lock);
6849         return ret;
6850 }
6851
6852 static int tracing_release_pipe(struct inode *inode, struct file *file)
6853 {
6854         struct trace_iterator *iter = file->private_data;
6855         struct trace_array *tr = inode->i_private;
6856
6857         mutex_lock(&trace_types_lock);
6858
6859         tr->trace_ref--;
6860
6861         if (iter->trace->pipe_close)
6862                 iter->trace->pipe_close(iter);
6863         close_pipe_on_cpu(tr, iter->cpu_file);
6864         mutex_unlock(&trace_types_lock);
6865
6866         free_trace_iter_content(iter);
6867         kfree(iter);
6868
6869         trace_array_put(tr);
6870
6871         return 0;
6872 }
6873
6874 static __poll_t
6875 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6876 {
6877         struct trace_array *tr = iter->tr;
6878
6879         /* Iterators are static, they should be filled or empty */
6880         if (trace_buffer_iter(iter, iter->cpu_file))
6881                 return EPOLLIN | EPOLLRDNORM;
6882
6883         if (tr->trace_flags & TRACE_ITER_BLOCK)
6884                 /*
6885                  * Always select as readable when in blocking mode
6886                  */
6887                 return EPOLLIN | EPOLLRDNORM;
6888         else
6889                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6890                                              filp, poll_table, iter->tr->buffer_percent);
6891 }
6892
6893 static __poll_t
6894 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6895 {
6896         struct trace_iterator *iter = filp->private_data;
6897
6898         return trace_poll(iter, filp, poll_table);
6899 }
6900
6901 /* Must be called with iter->mutex held. */
6902 static int tracing_wait_pipe(struct file *filp)
6903 {
6904         struct trace_iterator *iter = filp->private_data;
6905         int ret;
6906
6907         while (trace_empty(iter)) {
6908
6909                 if ((filp->f_flags & O_NONBLOCK)) {
6910                         return -EAGAIN;
6911                 }
6912
6913                 /*
6914                  * We block until we read something and tracing is disabled.
6915                  * We still block if tracing is disabled, but we have never
6916                  * read anything. This allows a user to cat this file, and
6917                  * then enable tracing. But after we have read something,
6918                  * we give an EOF when tracing is again disabled.
6919                  *
6920                  * iter->pos will be 0 if we haven't read anything.
6921                  */
6922                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6923                         break;
6924
6925                 mutex_unlock(&iter->mutex);
6926
6927                 ret = wait_on_pipe(iter, 0);
6928
6929                 mutex_lock(&iter->mutex);
6930
6931                 if (ret)
6932                         return ret;
6933         }
6934
6935         return 1;
6936 }
6937
6938 /*
6939  * Consumer reader.
6940  */
6941 static ssize_t
6942 tracing_read_pipe(struct file *filp, char __user *ubuf,
6943                   size_t cnt, loff_t *ppos)
6944 {
6945         struct trace_iterator *iter = filp->private_data;
6946         ssize_t sret;
6947
6948         /*
6949          * Avoid more than one consumer on a single file descriptor
6950          * This is just a matter of traces coherency, the ring buffer itself
6951          * is protected.
6952          */
6953         mutex_lock(&iter->mutex);
6954
6955         /* return any leftover data */
6956         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6957         if (sret != -EBUSY)
6958                 goto out;
6959
6960         trace_seq_init(&iter->seq);
6961
6962         if (iter->trace->read) {
6963                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6964                 if (sret)
6965                         goto out;
6966         }
6967
6968 waitagain:
6969         sret = tracing_wait_pipe(filp);
6970         if (sret <= 0)
6971                 goto out;
6972
6973         /* stop when tracing is finished */
6974         if (trace_empty(iter)) {
6975                 sret = 0;
6976                 goto out;
6977         }
6978
6979         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6980                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6981
6982         /* reset all but tr, trace, and overruns */
6983         trace_iterator_reset(iter);
6984         cpumask_clear(iter->started);
6985         trace_seq_init(&iter->seq);
6986
6987         trace_event_read_lock();
6988         trace_access_lock(iter->cpu_file);
6989         while (trace_find_next_entry_inc(iter) != NULL) {
6990                 enum print_line_t ret;
6991                 int save_len = iter->seq.seq.len;
6992
6993                 ret = print_trace_line(iter);
6994                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6995                         /*
6996                          * If one print_trace_line() fills entire trace_seq in one shot,
6997                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6998                          * In this case, we need to consume it, otherwise, loop will peek
6999                          * this event next time, resulting in an infinite loop.
7000                          */
7001                         if (save_len == 0) {
7002                                 iter->seq.full = 0;
7003                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
7004                                 trace_consume(iter);
7005                                 break;
7006                         }
7007
7008                         /* In other cases, don't print partial lines */
7009                         iter->seq.seq.len = save_len;
7010                         break;
7011                 }
7012                 if (ret != TRACE_TYPE_NO_CONSUME)
7013                         trace_consume(iter);
7014
7015                 if (trace_seq_used(&iter->seq) >= cnt)
7016                         break;
7017
7018                 /*
7019                  * Setting the full flag means we reached the trace_seq buffer
7020                  * size and we should leave by partial output condition above.
7021                  * One of the trace_seq_* functions is not used properly.
7022                  */
7023                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7024                           iter->ent->type);
7025         }
7026         trace_access_unlock(iter->cpu_file);
7027         trace_event_read_unlock();
7028
7029         /* Now copy what we have to the user */
7030         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7031         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7032                 trace_seq_init(&iter->seq);
7033
7034         /*
7035          * If there was nothing to send to user, in spite of consuming trace
7036          * entries, go back to wait for more entries.
7037          */
7038         if (sret == -EBUSY)
7039                 goto waitagain;
7040
7041 out:
7042         mutex_unlock(&iter->mutex);
7043
7044         return sret;
7045 }
7046
7047 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7048                                      unsigned int idx)
7049 {
7050         __free_page(spd->pages[idx]);
7051 }
7052
7053 static size_t
7054 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7055 {
7056         size_t count;
7057         int save_len;
7058         int ret;
7059
7060         /* Seq buffer is page-sized, exactly what we need. */
7061         for (;;) {
7062                 save_len = iter->seq.seq.len;
7063                 ret = print_trace_line(iter);
7064
7065                 if (trace_seq_has_overflowed(&iter->seq)) {
7066                         iter->seq.seq.len = save_len;
7067                         break;
7068                 }
7069
7070                 /*
7071                  * This should not be hit, because it should only
7072                  * be set if the iter->seq overflowed. But check it
7073                  * anyway to be safe.
7074                  */
7075                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7076                         iter->seq.seq.len = save_len;
7077                         break;
7078                 }
7079
7080                 count = trace_seq_used(&iter->seq) - save_len;
7081                 if (rem < count) {
7082                         rem = 0;
7083                         iter->seq.seq.len = save_len;
7084                         break;
7085                 }
7086
7087                 if (ret != TRACE_TYPE_NO_CONSUME)
7088                         trace_consume(iter);
7089                 rem -= count;
7090                 if (!trace_find_next_entry_inc(iter))   {
7091                         rem = 0;
7092                         iter->ent = NULL;
7093                         break;
7094                 }
7095         }
7096
7097         return rem;
7098 }
7099
7100 static ssize_t tracing_splice_read_pipe(struct file *filp,
7101                                         loff_t *ppos,
7102                                         struct pipe_inode_info *pipe,
7103                                         size_t len,
7104                                         unsigned int flags)
7105 {
7106         struct page *pages_def[PIPE_DEF_BUFFERS];
7107         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7108         struct trace_iterator *iter = filp->private_data;
7109         struct splice_pipe_desc spd = {
7110                 .pages          = pages_def,
7111                 .partial        = partial_def,
7112                 .nr_pages       = 0, /* This gets updated below. */
7113                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7114                 .ops            = &default_pipe_buf_ops,
7115                 .spd_release    = tracing_spd_release_pipe,
7116         };
7117         ssize_t ret;
7118         size_t rem;
7119         unsigned int i;
7120
7121         if (splice_grow_spd(pipe, &spd))
7122                 return -ENOMEM;
7123
7124         mutex_lock(&iter->mutex);
7125
7126         if (iter->trace->splice_read) {
7127                 ret = iter->trace->splice_read(iter, filp,
7128                                                ppos, pipe, len, flags);
7129                 if (ret)
7130                         goto out_err;
7131         }
7132
7133         ret = tracing_wait_pipe(filp);
7134         if (ret <= 0)
7135                 goto out_err;
7136
7137         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7138                 ret = -EFAULT;
7139                 goto out_err;
7140         }
7141
7142         trace_event_read_lock();
7143         trace_access_lock(iter->cpu_file);
7144
7145         /* Fill as many pages as possible. */
7146         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7147                 spd.pages[i] = alloc_page(GFP_KERNEL);
7148                 if (!spd.pages[i])
7149                         break;
7150
7151                 rem = tracing_fill_pipe_page(rem, iter);
7152
7153                 /* Copy the data into the page, so we can start over. */
7154                 ret = trace_seq_to_buffer(&iter->seq,
7155                                           page_address(spd.pages[i]),
7156                                           trace_seq_used(&iter->seq));
7157                 if (ret < 0) {
7158                         __free_page(spd.pages[i]);
7159                         break;
7160                 }
7161                 spd.partial[i].offset = 0;
7162                 spd.partial[i].len = trace_seq_used(&iter->seq);
7163
7164                 trace_seq_init(&iter->seq);
7165         }
7166
7167         trace_access_unlock(iter->cpu_file);
7168         trace_event_read_unlock();
7169         mutex_unlock(&iter->mutex);
7170
7171         spd.nr_pages = i;
7172
7173         if (i)
7174                 ret = splice_to_pipe(pipe, &spd);
7175         else
7176                 ret = 0;
7177 out:
7178         splice_shrink_spd(&spd);
7179         return ret;
7180
7181 out_err:
7182         mutex_unlock(&iter->mutex);
7183         goto out;
7184 }
7185
7186 static ssize_t
7187 tracing_entries_read(struct file *filp, char __user *ubuf,
7188                      size_t cnt, loff_t *ppos)
7189 {
7190         struct inode *inode = file_inode(filp);
7191         struct trace_array *tr = inode->i_private;
7192         int cpu = tracing_get_cpu(inode);
7193         char buf[64];
7194         int r = 0;
7195         ssize_t ret;
7196
7197         mutex_lock(&trace_types_lock);
7198
7199         if (cpu == RING_BUFFER_ALL_CPUS) {
7200                 int cpu, buf_size_same;
7201                 unsigned long size;
7202
7203                 size = 0;
7204                 buf_size_same = 1;
7205                 /* check if all cpu sizes are same */
7206                 for_each_tracing_cpu(cpu) {
7207                         /* fill in the size from first enabled cpu */
7208                         if (size == 0)
7209                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7210                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7211                                 buf_size_same = 0;
7212                                 break;
7213                         }
7214                 }
7215
7216                 if (buf_size_same) {
7217                         if (!tr->ring_buffer_expanded)
7218                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7219                                             size >> 10,
7220                                             trace_buf_size >> 10);
7221                         else
7222                                 r = sprintf(buf, "%lu\n", size >> 10);
7223                 } else
7224                         r = sprintf(buf, "X\n");
7225         } else
7226                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7227
7228         mutex_unlock(&trace_types_lock);
7229
7230         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7231         return ret;
7232 }
7233
7234 static ssize_t
7235 tracing_entries_write(struct file *filp, const char __user *ubuf,
7236                       size_t cnt, loff_t *ppos)
7237 {
7238         struct inode *inode = file_inode(filp);
7239         struct trace_array *tr = inode->i_private;
7240         unsigned long val;
7241         int ret;
7242
7243         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7244         if (ret)
7245                 return ret;
7246
7247         /* must have at least 1 entry */
7248         if (!val)
7249                 return -EINVAL;
7250
7251         /* value is in KB */
7252         val <<= 10;
7253         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7254         if (ret < 0)
7255                 return ret;
7256
7257         *ppos += cnt;
7258
7259         return cnt;
7260 }
7261
7262 static ssize_t
7263 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7264                                 size_t cnt, loff_t *ppos)
7265 {
7266         struct trace_array *tr = filp->private_data;
7267         char buf[64];
7268         int r, cpu;
7269         unsigned long size = 0, expanded_size = 0;
7270
7271         mutex_lock(&trace_types_lock);
7272         for_each_tracing_cpu(cpu) {
7273                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7274                 if (!tr->ring_buffer_expanded)
7275                         expanded_size += trace_buf_size >> 10;
7276         }
7277         if (tr->ring_buffer_expanded)
7278                 r = sprintf(buf, "%lu\n", size);
7279         else
7280                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7281         mutex_unlock(&trace_types_lock);
7282
7283         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7284 }
7285
7286 static ssize_t
7287 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7288                           size_t cnt, loff_t *ppos)
7289 {
7290         /*
7291          * There is no need to read what the user has written, this function
7292          * is just to make sure that there is no error when "echo" is used
7293          */
7294
7295         *ppos += cnt;
7296
7297         return cnt;
7298 }
7299
7300 static int
7301 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7302 {
7303         struct trace_array *tr = inode->i_private;
7304
7305         /* disable tracing ? */
7306         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7307                 tracer_tracing_off(tr);
7308         /* resize the ring buffer to 0 */
7309         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7310
7311         trace_array_put(tr);
7312
7313         return 0;
7314 }
7315
7316 #define TRACE_MARKER_MAX_SIZE           4096
7317
7318 static ssize_t
7319 tracing_mark_write(struct file *filp, const char __user *ubuf,
7320                                         size_t cnt, loff_t *fpos)
7321 {
7322         struct trace_array *tr = filp->private_data;
7323         struct ring_buffer_event *event;
7324         enum event_trigger_type tt = ETT_NONE;
7325         struct trace_buffer *buffer;
7326         struct print_entry *entry;
7327         int meta_size;
7328         ssize_t written;
7329         size_t size;
7330         int len;
7331
7332 /* Used in tracing_mark_raw_write() as well */
7333 #define FAULTED_STR "<faulted>"
7334 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7335
7336         if (tracing_disabled)
7337                 return -EINVAL;
7338
7339         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7340                 return -EINVAL;
7341
7342         if ((ssize_t)cnt < 0)
7343                 return -EINVAL;
7344
7345         if (cnt > TRACE_MARKER_MAX_SIZE)
7346                 cnt = TRACE_MARKER_MAX_SIZE;
7347
7348         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7349  again:
7350         size = cnt + meta_size;
7351
7352         /* If less than "<faulted>", then make sure we can still add that */
7353         if (cnt < FAULTED_SIZE)
7354                 size += FAULTED_SIZE - cnt;
7355
7356         buffer = tr->array_buffer.buffer;
7357         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7358                                             tracing_gen_ctx());
7359         if (unlikely(!event)) {
7360                 /*
7361                  * If the size was greater than what was allowed, then
7362                  * make it smaller and try again.
7363                  */
7364                 if (size > ring_buffer_max_event_size(buffer)) {
7365                         /* cnt < FAULTED size should never be bigger than max */
7366                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7367                                 return -EBADF;
7368                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7369                         /* The above should only happen once */
7370                         if (WARN_ON_ONCE(cnt + meta_size == size))
7371                                 return -EBADF;
7372                         goto again;
7373                 }
7374
7375                 /* Ring buffer disabled, return as if not open for write */
7376                 return -EBADF;
7377         }
7378
7379         entry = ring_buffer_event_data(event);
7380         entry->ip = _THIS_IP_;
7381
7382         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7383         if (len) {
7384                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7385                 cnt = FAULTED_SIZE;
7386                 written = -EFAULT;
7387         } else
7388                 written = cnt;
7389
7390         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7391                 /* do not add \n before testing triggers, but add \0 */
7392                 entry->buf[cnt] = '\0';
7393                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7394         }
7395
7396         if (entry->buf[cnt - 1] != '\n') {
7397                 entry->buf[cnt] = '\n';
7398                 entry->buf[cnt + 1] = '\0';
7399         } else
7400                 entry->buf[cnt] = '\0';
7401
7402         if (static_branch_unlikely(&trace_marker_exports_enabled))
7403                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7404         __buffer_unlock_commit(buffer, event);
7405
7406         if (tt)
7407                 event_triggers_post_call(tr->trace_marker_file, tt);
7408
7409         return written;
7410 }
7411
7412 static ssize_t
7413 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7414                                         size_t cnt, loff_t *fpos)
7415 {
7416         struct trace_array *tr = filp->private_data;
7417         struct ring_buffer_event *event;
7418         struct trace_buffer *buffer;
7419         struct raw_data_entry *entry;
7420         ssize_t written;
7421         int size;
7422         int len;
7423
7424 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7425
7426         if (tracing_disabled)
7427                 return -EINVAL;
7428
7429         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7430                 return -EINVAL;
7431
7432         /* The marker must at least have a tag id */
7433         if (cnt < sizeof(unsigned int))
7434                 return -EINVAL;
7435
7436         size = sizeof(*entry) + cnt;
7437         if (cnt < FAULT_SIZE_ID)
7438                 size += FAULT_SIZE_ID - cnt;
7439
7440         buffer = tr->array_buffer.buffer;
7441
7442         if (size > ring_buffer_max_event_size(buffer))
7443                 return -EINVAL;
7444
7445         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7446                                             tracing_gen_ctx());
7447         if (!event)
7448                 /* Ring buffer disabled, return as if not open for write */
7449                 return -EBADF;
7450
7451         entry = ring_buffer_event_data(event);
7452
7453         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7454         if (len) {
7455                 entry->id = -1;
7456                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7457                 written = -EFAULT;
7458         } else
7459                 written = cnt;
7460
7461         __buffer_unlock_commit(buffer, event);
7462
7463         return written;
7464 }
7465
7466 static int tracing_clock_show(struct seq_file *m, void *v)
7467 {
7468         struct trace_array *tr = m->private;
7469         int i;
7470
7471         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7472                 seq_printf(m,
7473                         "%s%s%s%s", i ? " " : "",
7474                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7475                         i == tr->clock_id ? "]" : "");
7476         seq_putc(m, '\n');
7477
7478         return 0;
7479 }
7480
7481 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7482 {
7483         int i;
7484
7485         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7486                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7487                         break;
7488         }
7489         if (i == ARRAY_SIZE(trace_clocks))
7490                 return -EINVAL;
7491
7492         mutex_lock(&trace_types_lock);
7493
7494         tr->clock_id = i;
7495
7496         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7497
7498         /*
7499          * New clock may not be consistent with the previous clock.
7500          * Reset the buffer so that it doesn't have incomparable timestamps.
7501          */
7502         tracing_reset_online_cpus(&tr->array_buffer);
7503
7504 #ifdef CONFIG_TRACER_MAX_TRACE
7505         if (tr->max_buffer.buffer)
7506                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7507         tracing_reset_online_cpus(&tr->max_buffer);
7508 #endif
7509
7510         mutex_unlock(&trace_types_lock);
7511
7512         return 0;
7513 }
7514
7515 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7516                                    size_t cnt, loff_t *fpos)
7517 {
7518         struct seq_file *m = filp->private_data;
7519         struct trace_array *tr = m->private;
7520         char buf[64];
7521         const char *clockstr;
7522         int ret;
7523
7524         if (cnt >= sizeof(buf))
7525                 return -EINVAL;
7526
7527         if (copy_from_user(buf, ubuf, cnt))
7528                 return -EFAULT;
7529
7530         buf[cnt] = 0;
7531
7532         clockstr = strstrip(buf);
7533
7534         ret = tracing_set_clock(tr, clockstr);
7535         if (ret)
7536                 return ret;
7537
7538         *fpos += cnt;
7539
7540         return cnt;
7541 }
7542
7543 static int tracing_clock_open(struct inode *inode, struct file *file)
7544 {
7545         struct trace_array *tr = inode->i_private;
7546         int ret;
7547
7548         ret = tracing_check_open_get_tr(tr);
7549         if (ret)
7550                 return ret;
7551
7552         ret = single_open(file, tracing_clock_show, inode->i_private);
7553         if (ret < 0)
7554                 trace_array_put(tr);
7555
7556         return ret;
7557 }
7558
7559 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7560 {
7561         struct trace_array *tr = m->private;
7562
7563         mutex_lock(&trace_types_lock);
7564
7565         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7566                 seq_puts(m, "delta [absolute]\n");
7567         else
7568                 seq_puts(m, "[delta] absolute\n");
7569
7570         mutex_unlock(&trace_types_lock);
7571
7572         return 0;
7573 }
7574
7575 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7576 {
7577         struct trace_array *tr = inode->i_private;
7578         int ret;
7579
7580         ret = tracing_check_open_get_tr(tr);
7581         if (ret)
7582                 return ret;
7583
7584         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7585         if (ret < 0)
7586                 trace_array_put(tr);
7587
7588         return ret;
7589 }
7590
7591 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7592 {
7593         if (rbe == this_cpu_read(trace_buffered_event))
7594                 return ring_buffer_time_stamp(buffer);
7595
7596         return ring_buffer_event_time_stamp(buffer, rbe);
7597 }
7598
7599 /*
7600  * Set or disable using the per CPU trace_buffer_event when possible.
7601  */
7602 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7603 {
7604         int ret = 0;
7605
7606         mutex_lock(&trace_types_lock);
7607
7608         if (set && tr->no_filter_buffering_ref++)
7609                 goto out;
7610
7611         if (!set) {
7612                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7613                         ret = -EINVAL;
7614                         goto out;
7615                 }
7616
7617                 --tr->no_filter_buffering_ref;
7618         }
7619  out:
7620         mutex_unlock(&trace_types_lock);
7621
7622         return ret;
7623 }
7624
7625 struct ftrace_buffer_info {
7626         struct trace_iterator   iter;
7627         void                    *spare;
7628         unsigned int            spare_cpu;
7629         unsigned int            spare_size;
7630         unsigned int            read;
7631 };
7632
7633 #ifdef CONFIG_TRACER_SNAPSHOT
7634 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7635 {
7636         struct trace_array *tr = inode->i_private;
7637         struct trace_iterator *iter;
7638         struct seq_file *m;
7639         int ret;
7640
7641         ret = tracing_check_open_get_tr(tr);
7642         if (ret)
7643                 return ret;
7644
7645         if (file->f_mode & FMODE_READ) {
7646                 iter = __tracing_open(inode, file, true);
7647                 if (IS_ERR(iter))
7648                         ret = PTR_ERR(iter);
7649         } else {
7650                 /* Writes still need the seq_file to hold the private data */
7651                 ret = -ENOMEM;
7652                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7653                 if (!m)
7654                         goto out;
7655                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7656                 if (!iter) {
7657                         kfree(m);
7658                         goto out;
7659                 }
7660                 ret = 0;
7661
7662                 iter->tr = tr;
7663                 iter->array_buffer = &tr->max_buffer;
7664                 iter->cpu_file = tracing_get_cpu(inode);
7665                 m->private = iter;
7666                 file->private_data = m;
7667         }
7668 out:
7669         if (ret < 0)
7670                 trace_array_put(tr);
7671
7672         return ret;
7673 }
7674
7675 static void tracing_swap_cpu_buffer(void *tr)
7676 {
7677         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7678 }
7679
7680 static ssize_t
7681 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7682                        loff_t *ppos)
7683 {
7684         struct seq_file *m = filp->private_data;
7685         struct trace_iterator *iter = m->private;
7686         struct trace_array *tr = iter->tr;
7687         unsigned long val;
7688         int ret;
7689
7690         ret = tracing_update_buffers(tr);
7691         if (ret < 0)
7692                 return ret;
7693
7694         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7695         if (ret)
7696                 return ret;
7697
7698         mutex_lock(&trace_types_lock);
7699
7700         if (tr->current_trace->use_max_tr) {
7701                 ret = -EBUSY;
7702                 goto out;
7703         }
7704
7705         local_irq_disable();
7706         arch_spin_lock(&tr->max_lock);
7707         if (tr->cond_snapshot)
7708                 ret = -EBUSY;
7709         arch_spin_unlock(&tr->max_lock);
7710         local_irq_enable();
7711         if (ret)
7712                 goto out;
7713
7714         switch (val) {
7715         case 0:
7716                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7717                         ret = -EINVAL;
7718                         break;
7719                 }
7720                 if (tr->allocated_snapshot)
7721                         free_snapshot(tr);
7722                 break;
7723         case 1:
7724 /* Only allow per-cpu swap if the ring buffer supports it */
7725 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7726                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7727                         ret = -EINVAL;
7728                         break;
7729                 }
7730 #endif
7731                 if (tr->allocated_snapshot)
7732                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7733                                         &tr->array_buffer, iter->cpu_file);
7734                 else
7735                         ret = tracing_alloc_snapshot_instance(tr);
7736                 if (ret < 0)
7737                         break;
7738                 /* Now, we're going to swap */
7739                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7740                         local_irq_disable();
7741                         update_max_tr(tr, current, smp_processor_id(), NULL);
7742                         local_irq_enable();
7743                 } else {
7744                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7745                                                  (void *)tr, 1);
7746                 }
7747                 break;
7748         default:
7749                 if (tr->allocated_snapshot) {
7750                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7751                                 tracing_reset_online_cpus(&tr->max_buffer);
7752                         else
7753                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7754                 }
7755                 break;
7756         }
7757
7758         if (ret >= 0) {
7759                 *ppos += cnt;
7760                 ret = cnt;
7761         }
7762 out:
7763         mutex_unlock(&trace_types_lock);
7764         return ret;
7765 }
7766
7767 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7768 {
7769         struct seq_file *m = file->private_data;
7770         int ret;
7771
7772         ret = tracing_release(inode, file);
7773
7774         if (file->f_mode & FMODE_READ)
7775                 return ret;
7776
7777         /* If write only, the seq_file is just a stub */
7778         if (m)
7779                 kfree(m->private);
7780         kfree(m);
7781
7782         return 0;
7783 }
7784
7785 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7786 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7787                                     size_t count, loff_t *ppos);
7788 static int tracing_buffers_release(struct inode *inode, struct file *file);
7789 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7790                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7791
7792 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7793 {
7794         struct ftrace_buffer_info *info;
7795         int ret;
7796
7797         /* The following checks for tracefs lockdown */
7798         ret = tracing_buffers_open(inode, filp);
7799         if (ret < 0)
7800                 return ret;
7801
7802         info = filp->private_data;
7803
7804         if (info->iter.trace->use_max_tr) {
7805                 tracing_buffers_release(inode, filp);
7806                 return -EBUSY;
7807         }
7808
7809         info->iter.snapshot = true;
7810         info->iter.array_buffer = &info->iter.tr->max_buffer;
7811
7812         return ret;
7813 }
7814
7815 #endif /* CONFIG_TRACER_SNAPSHOT */
7816
7817
7818 static const struct file_operations tracing_thresh_fops = {
7819         .open           = tracing_open_generic,
7820         .read           = tracing_thresh_read,
7821         .write          = tracing_thresh_write,
7822         .llseek         = generic_file_llseek,
7823 };
7824
7825 #ifdef CONFIG_TRACER_MAX_TRACE
7826 static const struct file_operations tracing_max_lat_fops = {
7827         .open           = tracing_open_generic_tr,
7828         .read           = tracing_max_lat_read,
7829         .write          = tracing_max_lat_write,
7830         .llseek         = generic_file_llseek,
7831         .release        = tracing_release_generic_tr,
7832 };
7833 #endif
7834
7835 static const struct file_operations set_tracer_fops = {
7836         .open           = tracing_open_generic_tr,
7837         .read           = tracing_set_trace_read,
7838         .write          = tracing_set_trace_write,
7839         .llseek         = generic_file_llseek,
7840         .release        = tracing_release_generic_tr,
7841 };
7842
7843 static const struct file_operations tracing_pipe_fops = {
7844         .open           = tracing_open_pipe,
7845         .poll           = tracing_poll_pipe,
7846         .read           = tracing_read_pipe,
7847         .splice_read    = tracing_splice_read_pipe,
7848         .release        = tracing_release_pipe,
7849         .llseek         = no_llseek,
7850 };
7851
7852 static const struct file_operations tracing_entries_fops = {
7853         .open           = tracing_open_generic_tr,
7854         .read           = tracing_entries_read,
7855         .write          = tracing_entries_write,
7856         .llseek         = generic_file_llseek,
7857         .release        = tracing_release_generic_tr,
7858 };
7859
7860 static const struct file_operations tracing_total_entries_fops = {
7861         .open           = tracing_open_generic_tr,
7862         .read           = tracing_total_entries_read,
7863         .llseek         = generic_file_llseek,
7864         .release        = tracing_release_generic_tr,
7865 };
7866
7867 static const struct file_operations tracing_free_buffer_fops = {
7868         .open           = tracing_open_generic_tr,
7869         .write          = tracing_free_buffer_write,
7870         .release        = tracing_free_buffer_release,
7871 };
7872
7873 static const struct file_operations tracing_mark_fops = {
7874         .open           = tracing_mark_open,
7875         .write          = tracing_mark_write,
7876         .release        = tracing_release_generic_tr,
7877 };
7878
7879 static const struct file_operations tracing_mark_raw_fops = {
7880         .open           = tracing_mark_open,
7881         .write          = tracing_mark_raw_write,
7882         .release        = tracing_release_generic_tr,
7883 };
7884
7885 static const struct file_operations trace_clock_fops = {
7886         .open           = tracing_clock_open,
7887         .read           = seq_read,
7888         .llseek         = seq_lseek,
7889         .release        = tracing_single_release_tr,
7890         .write          = tracing_clock_write,
7891 };
7892
7893 static const struct file_operations trace_time_stamp_mode_fops = {
7894         .open           = tracing_time_stamp_mode_open,
7895         .read           = seq_read,
7896         .llseek         = seq_lseek,
7897         .release        = tracing_single_release_tr,
7898 };
7899
7900 #ifdef CONFIG_TRACER_SNAPSHOT
7901 static const struct file_operations snapshot_fops = {
7902         .open           = tracing_snapshot_open,
7903         .read           = seq_read,
7904         .write          = tracing_snapshot_write,
7905         .llseek         = tracing_lseek,
7906         .release        = tracing_snapshot_release,
7907 };
7908
7909 static const struct file_operations snapshot_raw_fops = {
7910         .open           = snapshot_raw_open,
7911         .read           = tracing_buffers_read,
7912         .release        = tracing_buffers_release,
7913         .splice_read    = tracing_buffers_splice_read,
7914         .llseek         = no_llseek,
7915 };
7916
7917 #endif /* CONFIG_TRACER_SNAPSHOT */
7918
7919 /*
7920  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7921  * @filp: The active open file structure
7922  * @ubuf: The userspace provided buffer to read value into
7923  * @cnt: The maximum number of bytes to read
7924  * @ppos: The current "file" position
7925  *
7926  * This function implements the write interface for a struct trace_min_max_param.
7927  * The filp->private_data must point to a trace_min_max_param structure that
7928  * defines where to write the value, the min and the max acceptable values,
7929  * and a lock to protect the write.
7930  */
7931 static ssize_t
7932 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7933 {
7934         struct trace_min_max_param *param = filp->private_data;
7935         u64 val;
7936         int err;
7937
7938         if (!param)
7939                 return -EFAULT;
7940
7941         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7942         if (err)
7943                 return err;
7944
7945         if (param->lock)
7946                 mutex_lock(param->lock);
7947
7948         if (param->min && val < *param->min)
7949                 err = -EINVAL;
7950
7951         if (param->max && val > *param->max)
7952                 err = -EINVAL;
7953
7954         if (!err)
7955                 *param->val = val;
7956
7957         if (param->lock)
7958                 mutex_unlock(param->lock);
7959
7960         if (err)
7961                 return err;
7962
7963         return cnt;
7964 }
7965
7966 /*
7967  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7968  * @filp: The active open file structure
7969  * @ubuf: The userspace provided buffer to read value into
7970  * @cnt: The maximum number of bytes to read
7971  * @ppos: The current "file" position
7972  *
7973  * This function implements the read interface for a struct trace_min_max_param.
7974  * The filp->private_data must point to a trace_min_max_param struct with valid
7975  * data.
7976  */
7977 static ssize_t
7978 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7979 {
7980         struct trace_min_max_param *param = filp->private_data;
7981         char buf[U64_STR_SIZE];
7982         int len;
7983         u64 val;
7984
7985         if (!param)
7986                 return -EFAULT;
7987
7988         val = *param->val;
7989
7990         if (cnt > sizeof(buf))
7991                 cnt = sizeof(buf);
7992
7993         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7994
7995         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7996 }
7997
7998 const struct file_operations trace_min_max_fops = {
7999         .open           = tracing_open_generic,
8000         .read           = trace_min_max_read,
8001         .write          = trace_min_max_write,
8002 };
8003
8004 #define TRACING_LOG_ERRS_MAX    8
8005 #define TRACING_LOG_LOC_MAX     128
8006
8007 #define CMD_PREFIX "  Command: "
8008
8009 struct err_info {
8010         const char      **errs; /* ptr to loc-specific array of err strings */
8011         u8              type;   /* index into errs -> specific err string */
8012         u16             pos;    /* caret position */
8013         u64             ts;
8014 };
8015
8016 struct tracing_log_err {
8017         struct list_head        list;
8018         struct err_info         info;
8019         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
8020         char                    *cmd;                     /* what caused err */
8021 };
8022
8023 static DEFINE_MUTEX(tracing_err_log_lock);
8024
8025 static struct tracing_log_err *alloc_tracing_log_err(int len)
8026 {
8027         struct tracing_log_err *err;
8028
8029         err = kzalloc(sizeof(*err), GFP_KERNEL);
8030         if (!err)
8031                 return ERR_PTR(-ENOMEM);
8032
8033         err->cmd = kzalloc(len, GFP_KERNEL);
8034         if (!err->cmd) {
8035                 kfree(err);
8036                 return ERR_PTR(-ENOMEM);
8037         }
8038
8039         return err;
8040 }
8041
8042 static void free_tracing_log_err(struct tracing_log_err *err)
8043 {
8044         kfree(err->cmd);
8045         kfree(err);
8046 }
8047
8048 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8049                                                    int len)
8050 {
8051         struct tracing_log_err *err;
8052         char *cmd;
8053
8054         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8055                 err = alloc_tracing_log_err(len);
8056                 if (PTR_ERR(err) != -ENOMEM)
8057                         tr->n_err_log_entries++;
8058
8059                 return err;
8060         }
8061         cmd = kzalloc(len, GFP_KERNEL);
8062         if (!cmd)
8063                 return ERR_PTR(-ENOMEM);
8064         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8065         kfree(err->cmd);
8066         err->cmd = cmd;
8067         list_del(&err->list);
8068
8069         return err;
8070 }
8071
8072 /**
8073  * err_pos - find the position of a string within a command for error careting
8074  * @cmd: The tracing command that caused the error
8075  * @str: The string to position the caret at within @cmd
8076  *
8077  * Finds the position of the first occurrence of @str within @cmd.  The
8078  * return value can be passed to tracing_log_err() for caret placement
8079  * within @cmd.
8080  *
8081  * Returns the index within @cmd of the first occurrence of @str or 0
8082  * if @str was not found.
8083  */
8084 unsigned int err_pos(char *cmd, const char *str)
8085 {
8086         char *found;
8087
8088         if (WARN_ON(!strlen(cmd)))
8089                 return 0;
8090
8091         found = strstr(cmd, str);
8092         if (found)
8093                 return found - cmd;
8094
8095         return 0;
8096 }
8097
8098 /**
8099  * tracing_log_err - write an error to the tracing error log
8100  * @tr: The associated trace array for the error (NULL for top level array)
8101  * @loc: A string describing where the error occurred
8102  * @cmd: The tracing command that caused the error
8103  * @errs: The array of loc-specific static error strings
8104  * @type: The index into errs[], which produces the specific static err string
8105  * @pos: The position the caret should be placed in the cmd
8106  *
8107  * Writes an error into tracing/error_log of the form:
8108  *
8109  * <loc>: error: <text>
8110  *   Command: <cmd>
8111  *              ^
8112  *
8113  * tracing/error_log is a small log file containing the last
8114  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8115  * unless there has been a tracing error, and the error log can be
8116  * cleared and have its memory freed by writing the empty string in
8117  * truncation mode to it i.e. echo > tracing/error_log.
8118  *
8119  * NOTE: the @errs array along with the @type param are used to
8120  * produce a static error string - this string is not copied and saved
8121  * when the error is logged - only a pointer to it is saved.  See
8122  * existing callers for examples of how static strings are typically
8123  * defined for use with tracing_log_err().
8124  */
8125 void tracing_log_err(struct trace_array *tr,
8126                      const char *loc, const char *cmd,
8127                      const char **errs, u8 type, u16 pos)
8128 {
8129         struct tracing_log_err *err;
8130         int len = 0;
8131
8132         if (!tr)
8133                 tr = &global_trace;
8134
8135         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8136
8137         mutex_lock(&tracing_err_log_lock);
8138         err = get_tracing_log_err(tr, len);
8139         if (PTR_ERR(err) == -ENOMEM) {
8140                 mutex_unlock(&tracing_err_log_lock);
8141                 return;
8142         }
8143
8144         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8145         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8146
8147         err->info.errs = errs;
8148         err->info.type = type;
8149         err->info.pos = pos;
8150         err->info.ts = local_clock();
8151
8152         list_add_tail(&err->list, &tr->err_log);
8153         mutex_unlock(&tracing_err_log_lock);
8154 }
8155
8156 static void clear_tracing_err_log(struct trace_array *tr)
8157 {
8158         struct tracing_log_err *err, *next;
8159
8160         mutex_lock(&tracing_err_log_lock);
8161         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8162                 list_del(&err->list);
8163                 free_tracing_log_err(err);
8164         }
8165
8166         tr->n_err_log_entries = 0;
8167         mutex_unlock(&tracing_err_log_lock);
8168 }
8169
8170 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8171 {
8172         struct trace_array *tr = m->private;
8173
8174         mutex_lock(&tracing_err_log_lock);
8175
8176         return seq_list_start(&tr->err_log, *pos);
8177 }
8178
8179 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8180 {
8181         struct trace_array *tr = m->private;
8182
8183         return seq_list_next(v, &tr->err_log, pos);
8184 }
8185
8186 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8187 {
8188         mutex_unlock(&tracing_err_log_lock);
8189 }
8190
8191 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8192 {
8193         u16 i;
8194
8195         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8196                 seq_putc(m, ' ');
8197         for (i = 0; i < pos; i++)
8198                 seq_putc(m, ' ');
8199         seq_puts(m, "^\n");
8200 }
8201
8202 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8203 {
8204         struct tracing_log_err *err = v;
8205
8206         if (err) {
8207                 const char *err_text = err->info.errs[err->info.type];
8208                 u64 sec = err->info.ts;
8209                 u32 nsec;
8210
8211                 nsec = do_div(sec, NSEC_PER_SEC);
8212                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8213                            err->loc, err_text);
8214                 seq_printf(m, "%s", err->cmd);
8215                 tracing_err_log_show_pos(m, err->info.pos);
8216         }
8217
8218         return 0;
8219 }
8220
8221 static const struct seq_operations tracing_err_log_seq_ops = {
8222         .start  = tracing_err_log_seq_start,
8223         .next   = tracing_err_log_seq_next,
8224         .stop   = tracing_err_log_seq_stop,
8225         .show   = tracing_err_log_seq_show
8226 };
8227
8228 static int tracing_err_log_open(struct inode *inode, struct file *file)
8229 {
8230         struct trace_array *tr = inode->i_private;
8231         int ret = 0;
8232
8233         ret = tracing_check_open_get_tr(tr);
8234         if (ret)
8235                 return ret;
8236
8237         /* If this file was opened for write, then erase contents */
8238         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8239                 clear_tracing_err_log(tr);
8240
8241         if (file->f_mode & FMODE_READ) {
8242                 ret = seq_open(file, &tracing_err_log_seq_ops);
8243                 if (!ret) {
8244                         struct seq_file *m = file->private_data;
8245                         m->private = tr;
8246                 } else {
8247                         trace_array_put(tr);
8248                 }
8249         }
8250         return ret;
8251 }
8252
8253 static ssize_t tracing_err_log_write(struct file *file,
8254                                      const char __user *buffer,
8255                                      size_t count, loff_t *ppos)
8256 {
8257         return count;
8258 }
8259
8260 static int tracing_err_log_release(struct inode *inode, struct file *file)
8261 {
8262         struct trace_array *tr = inode->i_private;
8263
8264         trace_array_put(tr);
8265
8266         if (file->f_mode & FMODE_READ)
8267                 seq_release(inode, file);
8268
8269         return 0;
8270 }
8271
8272 static const struct file_operations tracing_err_log_fops = {
8273         .open           = tracing_err_log_open,
8274         .write          = tracing_err_log_write,
8275         .read           = seq_read,
8276         .llseek         = tracing_lseek,
8277         .release        = tracing_err_log_release,
8278 };
8279
8280 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8281 {
8282         struct trace_array *tr = inode->i_private;
8283         struct ftrace_buffer_info *info;
8284         int ret;
8285
8286         ret = tracing_check_open_get_tr(tr);
8287         if (ret)
8288                 return ret;
8289
8290         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8291         if (!info) {
8292                 trace_array_put(tr);
8293                 return -ENOMEM;
8294         }
8295
8296         mutex_lock(&trace_types_lock);
8297
8298         info->iter.tr           = tr;
8299         info->iter.cpu_file     = tracing_get_cpu(inode);
8300         info->iter.trace        = tr->current_trace;
8301         info->iter.array_buffer = &tr->array_buffer;
8302         info->spare             = NULL;
8303         /* Force reading ring buffer for first read */
8304         info->read              = (unsigned int)-1;
8305
8306         filp->private_data = info;
8307
8308         tr->trace_ref++;
8309
8310         mutex_unlock(&trace_types_lock);
8311
8312         ret = nonseekable_open(inode, filp);
8313         if (ret < 0)
8314                 trace_array_put(tr);
8315
8316         return ret;
8317 }
8318
8319 static __poll_t
8320 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8321 {
8322         struct ftrace_buffer_info *info = filp->private_data;
8323         struct trace_iterator *iter = &info->iter;
8324
8325         return trace_poll(iter, filp, poll_table);
8326 }
8327
8328 static ssize_t
8329 tracing_buffers_read(struct file *filp, char __user *ubuf,
8330                      size_t count, loff_t *ppos)
8331 {
8332         struct ftrace_buffer_info *info = filp->private_data;
8333         struct trace_iterator *iter = &info->iter;
8334         void *trace_data;
8335         int page_size;
8336         ssize_t ret = 0;
8337         ssize_t size;
8338
8339         if (!count)
8340                 return 0;
8341
8342 #ifdef CONFIG_TRACER_MAX_TRACE
8343         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8344                 return -EBUSY;
8345 #endif
8346
8347         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8348
8349         /* Make sure the spare matches the current sub buffer size */
8350         if (info->spare) {
8351                 if (page_size != info->spare_size) {
8352                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8353                                                    info->spare_cpu, info->spare);
8354                         info->spare = NULL;
8355                 }
8356         }
8357
8358         if (!info->spare) {
8359                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8360                                                           iter->cpu_file);
8361                 if (IS_ERR(info->spare)) {
8362                         ret = PTR_ERR(info->spare);
8363                         info->spare = NULL;
8364                 } else {
8365                         info->spare_cpu = iter->cpu_file;
8366                         info->spare_size = page_size;
8367                 }
8368         }
8369         if (!info->spare)
8370                 return ret;
8371
8372         /* Do we have previous read data to read? */
8373         if (info->read < page_size)
8374                 goto read;
8375
8376  again:
8377         trace_access_lock(iter->cpu_file);
8378         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8379                                     info->spare,
8380                                     count,
8381                                     iter->cpu_file, 0);
8382         trace_access_unlock(iter->cpu_file);
8383
8384         if (ret < 0) {
8385                 if (trace_empty(iter)) {
8386                         if ((filp->f_flags & O_NONBLOCK))
8387                                 return -EAGAIN;
8388
8389                         ret = wait_on_pipe(iter, 0);
8390                         if (ret)
8391                                 return ret;
8392
8393                         goto again;
8394                 }
8395                 return 0;
8396         }
8397
8398         info->read = 0;
8399  read:
8400         size = page_size - info->read;
8401         if (size > count)
8402                 size = count;
8403         trace_data = ring_buffer_read_page_data(info->spare);
8404         ret = copy_to_user(ubuf, trace_data + info->read, size);
8405         if (ret == size)
8406                 return -EFAULT;
8407
8408         size -= ret;
8409
8410         *ppos += size;
8411         info->read += size;
8412
8413         return size;
8414 }
8415
8416 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8417 {
8418         struct ftrace_buffer_info *info = file->private_data;
8419         struct trace_iterator *iter = &info->iter;
8420
8421         iter->closed = true;
8422         /* Make sure the waiters see the new wait_index */
8423         (void)atomic_fetch_inc_release(&iter->wait_index);
8424
8425         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8426
8427         return 0;
8428 }
8429
8430 static int tracing_buffers_release(struct inode *inode, struct file *file)
8431 {
8432         struct ftrace_buffer_info *info = file->private_data;
8433         struct trace_iterator *iter = &info->iter;
8434
8435         mutex_lock(&trace_types_lock);
8436
8437         iter->tr->trace_ref--;
8438
8439         __trace_array_put(iter->tr);
8440
8441         if (info->spare)
8442                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8443                                            info->spare_cpu, info->spare);
8444         kvfree(info);
8445
8446         mutex_unlock(&trace_types_lock);
8447
8448         return 0;
8449 }
8450
8451 struct buffer_ref {
8452         struct trace_buffer     *buffer;
8453         void                    *page;
8454         int                     cpu;
8455         refcount_t              refcount;
8456 };
8457
8458 static void buffer_ref_release(struct buffer_ref *ref)
8459 {
8460         if (!refcount_dec_and_test(&ref->refcount))
8461                 return;
8462         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8463         kfree(ref);
8464 }
8465
8466 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8467                                     struct pipe_buffer *buf)
8468 {
8469         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8470
8471         buffer_ref_release(ref);
8472         buf->private = 0;
8473 }
8474
8475 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8476                                 struct pipe_buffer *buf)
8477 {
8478         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8479
8480         if (refcount_read(&ref->refcount) > INT_MAX/2)
8481                 return false;
8482
8483         refcount_inc(&ref->refcount);
8484         return true;
8485 }
8486
8487 /* Pipe buffer operations for a buffer. */
8488 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8489         .release                = buffer_pipe_buf_release,
8490         .get                    = buffer_pipe_buf_get,
8491 };
8492
8493 /*
8494  * Callback from splice_to_pipe(), if we need to release some pages
8495  * at the end of the spd in case we error'ed out in filling the pipe.
8496  */
8497 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8498 {
8499         struct buffer_ref *ref =
8500                 (struct buffer_ref *)spd->partial[i].private;
8501
8502         buffer_ref_release(ref);
8503         spd->partial[i].private = 0;
8504 }
8505
8506 static ssize_t
8507 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8508                             struct pipe_inode_info *pipe, size_t len,
8509                             unsigned int flags)
8510 {
8511         struct ftrace_buffer_info *info = file->private_data;
8512         struct trace_iterator *iter = &info->iter;
8513         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8514         struct page *pages_def[PIPE_DEF_BUFFERS];
8515         struct splice_pipe_desc spd = {
8516                 .pages          = pages_def,
8517                 .partial        = partial_def,
8518                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8519                 .ops            = &buffer_pipe_buf_ops,
8520                 .spd_release    = buffer_spd_release,
8521         };
8522         struct buffer_ref *ref;
8523         bool woken = false;
8524         int page_size;
8525         int entries, i;
8526         ssize_t ret = 0;
8527
8528 #ifdef CONFIG_TRACER_MAX_TRACE
8529         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8530                 return -EBUSY;
8531 #endif
8532
8533         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8534         if (*ppos & (page_size - 1))
8535                 return -EINVAL;
8536
8537         if (len & (page_size - 1)) {
8538                 if (len < page_size)
8539                         return -EINVAL;
8540                 len &= (~(page_size - 1));
8541         }
8542
8543         if (splice_grow_spd(pipe, &spd))
8544                 return -ENOMEM;
8545
8546  again:
8547         trace_access_lock(iter->cpu_file);
8548         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8549
8550         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8551                 struct page *page;
8552                 int r;
8553
8554                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8555                 if (!ref) {
8556                         ret = -ENOMEM;
8557                         break;
8558                 }
8559
8560                 refcount_set(&ref->refcount, 1);
8561                 ref->buffer = iter->array_buffer->buffer;
8562                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8563                 if (IS_ERR(ref->page)) {
8564                         ret = PTR_ERR(ref->page);
8565                         ref->page = NULL;
8566                         kfree(ref);
8567                         break;
8568                 }
8569                 ref->cpu = iter->cpu_file;
8570
8571                 r = ring_buffer_read_page(ref->buffer, ref->page,
8572                                           len, iter->cpu_file, 1);
8573                 if (r < 0) {
8574                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8575                                                    ref->page);
8576                         kfree(ref);
8577                         break;
8578                 }
8579
8580                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8581
8582                 spd.pages[i] = page;
8583                 spd.partial[i].len = page_size;
8584                 spd.partial[i].offset = 0;
8585                 spd.partial[i].private = (unsigned long)ref;
8586                 spd.nr_pages++;
8587                 *ppos += page_size;
8588
8589                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8590         }
8591
8592         trace_access_unlock(iter->cpu_file);
8593         spd.nr_pages = i;
8594
8595         /* did we read anything? */
8596         if (!spd.nr_pages) {
8597
8598                 if (ret)
8599                         goto out;
8600
8601                 if (woken)
8602                         goto out;
8603
8604                 ret = -EAGAIN;
8605                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8606                         goto out;
8607
8608                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8609                 if (ret)
8610                         goto out;
8611
8612                 /* No need to wait after waking up when tracing is off */
8613                 if (!tracer_tracing_is_on(iter->tr))
8614                         goto out;
8615
8616                 /* Iterate one more time to collect any new data then exit */
8617                 woken = true;
8618
8619                 goto again;
8620         }
8621
8622         ret = splice_to_pipe(pipe, &spd);
8623 out:
8624         splice_shrink_spd(&spd);
8625
8626         return ret;
8627 }
8628
8629 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8630 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8631 {
8632         struct ftrace_buffer_info *info = file->private_data;
8633         struct trace_iterator *iter = &info->iter;
8634
8635         if (cmd)
8636                 return -ENOIOCTLCMD;
8637
8638         mutex_lock(&trace_types_lock);
8639
8640         /* Make sure the waiters see the new wait_index */
8641         (void)atomic_fetch_inc_release(&iter->wait_index);
8642
8643         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8644
8645         mutex_unlock(&trace_types_lock);
8646         return 0;
8647 }
8648
8649 static const struct file_operations tracing_buffers_fops = {
8650         .open           = tracing_buffers_open,
8651         .read           = tracing_buffers_read,
8652         .poll           = tracing_buffers_poll,
8653         .release        = tracing_buffers_release,
8654         .flush          = tracing_buffers_flush,
8655         .splice_read    = tracing_buffers_splice_read,
8656         .unlocked_ioctl = tracing_buffers_ioctl,
8657         .llseek         = no_llseek,
8658 };
8659
8660 static ssize_t
8661 tracing_stats_read(struct file *filp, char __user *ubuf,
8662                    size_t count, loff_t *ppos)
8663 {
8664         struct inode *inode = file_inode(filp);
8665         struct trace_array *tr = inode->i_private;
8666         struct array_buffer *trace_buf = &tr->array_buffer;
8667         int cpu = tracing_get_cpu(inode);
8668         struct trace_seq *s;
8669         unsigned long cnt;
8670         unsigned long long t;
8671         unsigned long usec_rem;
8672
8673         s = kmalloc(sizeof(*s), GFP_KERNEL);
8674         if (!s)
8675                 return -ENOMEM;
8676
8677         trace_seq_init(s);
8678
8679         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8680         trace_seq_printf(s, "entries: %ld\n", cnt);
8681
8682         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8683         trace_seq_printf(s, "overrun: %ld\n", cnt);
8684
8685         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8686         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8687
8688         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8689         trace_seq_printf(s, "bytes: %ld\n", cnt);
8690
8691         if (trace_clocks[tr->clock_id].in_ns) {
8692                 /* local or global for trace_clock */
8693                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8694                 usec_rem = do_div(t, USEC_PER_SEC);
8695                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8696                                                                 t, usec_rem);
8697
8698                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8699                 usec_rem = do_div(t, USEC_PER_SEC);
8700                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8701         } else {
8702                 /* counter or tsc mode for trace_clock */
8703                 trace_seq_printf(s, "oldest event ts: %llu\n",
8704                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8705
8706                 trace_seq_printf(s, "now ts: %llu\n",
8707                                 ring_buffer_time_stamp(trace_buf->buffer));
8708         }
8709
8710         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8711         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8712
8713         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8714         trace_seq_printf(s, "read events: %ld\n", cnt);
8715
8716         count = simple_read_from_buffer(ubuf, count, ppos,
8717                                         s->buffer, trace_seq_used(s));
8718
8719         kfree(s);
8720
8721         return count;
8722 }
8723
8724 static const struct file_operations tracing_stats_fops = {
8725         .open           = tracing_open_generic_tr,
8726         .read           = tracing_stats_read,
8727         .llseek         = generic_file_llseek,
8728         .release        = tracing_release_generic_tr,
8729 };
8730
8731 #ifdef CONFIG_DYNAMIC_FTRACE
8732
8733 static ssize_t
8734 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8735                   size_t cnt, loff_t *ppos)
8736 {
8737         ssize_t ret;
8738         char *buf;
8739         int r;
8740
8741         /* 256 should be plenty to hold the amount needed */
8742         buf = kmalloc(256, GFP_KERNEL);
8743         if (!buf)
8744                 return -ENOMEM;
8745
8746         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8747                       ftrace_update_tot_cnt,
8748                       ftrace_number_of_pages,
8749                       ftrace_number_of_groups);
8750
8751         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8752         kfree(buf);
8753         return ret;
8754 }
8755
8756 static const struct file_operations tracing_dyn_info_fops = {
8757         .open           = tracing_open_generic,
8758         .read           = tracing_read_dyn_info,
8759         .llseek         = generic_file_llseek,
8760 };
8761 #endif /* CONFIG_DYNAMIC_FTRACE */
8762
8763 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8764 static void
8765 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8766                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8767                 void *data)
8768 {
8769         tracing_snapshot_instance(tr);
8770 }
8771
8772 static void
8773 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8774                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8775                       void *data)
8776 {
8777         struct ftrace_func_mapper *mapper = data;
8778         long *count = NULL;
8779
8780         if (mapper)
8781                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8782
8783         if (count) {
8784
8785                 if (*count <= 0)
8786                         return;
8787
8788                 (*count)--;
8789         }
8790
8791         tracing_snapshot_instance(tr);
8792 }
8793
8794 static int
8795 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8796                       struct ftrace_probe_ops *ops, void *data)
8797 {
8798         struct ftrace_func_mapper *mapper = data;
8799         long *count = NULL;
8800
8801         seq_printf(m, "%ps:", (void *)ip);
8802
8803         seq_puts(m, "snapshot");
8804
8805         if (mapper)
8806                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8807
8808         if (count)
8809                 seq_printf(m, ":count=%ld\n", *count);
8810         else
8811                 seq_puts(m, ":unlimited\n");
8812
8813         return 0;
8814 }
8815
8816 static int
8817 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8818                      unsigned long ip, void *init_data, void **data)
8819 {
8820         struct ftrace_func_mapper *mapper = *data;
8821
8822         if (!mapper) {
8823                 mapper = allocate_ftrace_func_mapper();
8824                 if (!mapper)
8825                         return -ENOMEM;
8826                 *data = mapper;
8827         }
8828
8829         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8830 }
8831
8832 static void
8833 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8834                      unsigned long ip, void *data)
8835 {
8836         struct ftrace_func_mapper *mapper = data;
8837
8838         if (!ip) {
8839                 if (!mapper)
8840                         return;
8841                 free_ftrace_func_mapper(mapper, NULL);
8842                 return;
8843         }
8844
8845         ftrace_func_mapper_remove_ip(mapper, ip);
8846 }
8847
8848 static struct ftrace_probe_ops snapshot_probe_ops = {
8849         .func                   = ftrace_snapshot,
8850         .print                  = ftrace_snapshot_print,
8851 };
8852
8853 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8854         .func                   = ftrace_count_snapshot,
8855         .print                  = ftrace_snapshot_print,
8856         .init                   = ftrace_snapshot_init,
8857         .free                   = ftrace_snapshot_free,
8858 };
8859
8860 static int
8861 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8862                                char *glob, char *cmd, char *param, int enable)
8863 {
8864         struct ftrace_probe_ops *ops;
8865         void *count = (void *)-1;
8866         char *number;
8867         int ret;
8868
8869         if (!tr)
8870                 return -ENODEV;
8871
8872         /* hash funcs only work with set_ftrace_filter */
8873         if (!enable)
8874                 return -EINVAL;
8875
8876         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8877
8878         if (glob[0] == '!')
8879                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8880
8881         if (!param)
8882                 goto out_reg;
8883
8884         number = strsep(&param, ":");
8885
8886         if (!strlen(number))
8887                 goto out_reg;
8888
8889         /*
8890          * We use the callback data field (which is a pointer)
8891          * as our counter.
8892          */
8893         ret = kstrtoul(number, 0, (unsigned long *)&count);
8894         if (ret)
8895                 return ret;
8896
8897  out_reg:
8898         ret = tracing_alloc_snapshot_instance(tr);
8899         if (ret < 0)
8900                 goto out;
8901
8902         ret = register_ftrace_function_probe(glob, tr, ops, count);
8903
8904  out:
8905         return ret < 0 ? ret : 0;
8906 }
8907
8908 static struct ftrace_func_command ftrace_snapshot_cmd = {
8909         .name                   = "snapshot",
8910         .func                   = ftrace_trace_snapshot_callback,
8911 };
8912
8913 static __init int register_snapshot_cmd(void)
8914 {
8915         return register_ftrace_command(&ftrace_snapshot_cmd);
8916 }
8917 #else
8918 static inline __init int register_snapshot_cmd(void) { return 0; }
8919 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8920
8921 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8922 {
8923         if (WARN_ON(!tr->dir))
8924                 return ERR_PTR(-ENODEV);
8925
8926         /* Top directory uses NULL as the parent */
8927         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8928                 return NULL;
8929
8930         /* All sub buffers have a descriptor */
8931         return tr->dir;
8932 }
8933
8934 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8935 {
8936         struct dentry *d_tracer;
8937
8938         if (tr->percpu_dir)
8939                 return tr->percpu_dir;
8940
8941         d_tracer = tracing_get_dentry(tr);
8942         if (IS_ERR(d_tracer))
8943                 return NULL;
8944
8945         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8946
8947         MEM_FAIL(!tr->percpu_dir,
8948                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8949
8950         return tr->percpu_dir;
8951 }
8952
8953 static struct dentry *
8954 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8955                       void *data, long cpu, const struct file_operations *fops)
8956 {
8957         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8958
8959         if (ret) /* See tracing_get_cpu() */
8960                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8961         return ret;
8962 }
8963
8964 static void
8965 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8966 {
8967         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8968         struct dentry *d_cpu;
8969         char cpu_dir[30]; /* 30 characters should be more than enough */
8970
8971         if (!d_percpu)
8972                 return;
8973
8974         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8975         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8976         if (!d_cpu) {
8977                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8978                 return;
8979         }
8980
8981         /* per cpu trace_pipe */
8982         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8983                                 tr, cpu, &tracing_pipe_fops);
8984
8985         /* per cpu trace */
8986         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8987                                 tr, cpu, &tracing_fops);
8988
8989         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8990                                 tr, cpu, &tracing_buffers_fops);
8991
8992         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8993                                 tr, cpu, &tracing_stats_fops);
8994
8995         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8996                                 tr, cpu, &tracing_entries_fops);
8997
8998 #ifdef CONFIG_TRACER_SNAPSHOT
8999         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9000                                 tr, cpu, &snapshot_fops);
9001
9002         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9003                                 tr, cpu, &snapshot_raw_fops);
9004 #endif
9005 }
9006
9007 #ifdef CONFIG_FTRACE_SELFTEST
9008 /* Let selftest have access to static functions in this file */
9009 #include "trace_selftest.c"
9010 #endif
9011
9012 static ssize_t
9013 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9014                         loff_t *ppos)
9015 {
9016         struct trace_option_dentry *topt = filp->private_data;
9017         char *buf;
9018
9019         if (topt->flags->val & topt->opt->bit)
9020                 buf = "1\n";
9021         else
9022                 buf = "0\n";
9023
9024         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9025 }
9026
9027 static ssize_t
9028 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9029                          loff_t *ppos)
9030 {
9031         struct trace_option_dentry *topt = filp->private_data;
9032         unsigned long val;
9033         int ret;
9034
9035         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9036         if (ret)
9037                 return ret;
9038
9039         if (val != 0 && val != 1)
9040                 return -EINVAL;
9041
9042         if (!!(topt->flags->val & topt->opt->bit) != val) {
9043                 mutex_lock(&trace_types_lock);
9044                 ret = __set_tracer_option(topt->tr, topt->flags,
9045                                           topt->opt, !val);
9046                 mutex_unlock(&trace_types_lock);
9047                 if (ret)
9048                         return ret;
9049         }
9050
9051         *ppos += cnt;
9052
9053         return cnt;
9054 }
9055
9056 static int tracing_open_options(struct inode *inode, struct file *filp)
9057 {
9058         struct trace_option_dentry *topt = inode->i_private;
9059         int ret;
9060
9061         ret = tracing_check_open_get_tr(topt->tr);
9062         if (ret)
9063                 return ret;
9064
9065         filp->private_data = inode->i_private;
9066         return 0;
9067 }
9068
9069 static int tracing_release_options(struct inode *inode, struct file *file)
9070 {
9071         struct trace_option_dentry *topt = file->private_data;
9072
9073         trace_array_put(topt->tr);
9074         return 0;
9075 }
9076
9077 static const struct file_operations trace_options_fops = {
9078         .open = tracing_open_options,
9079         .read = trace_options_read,
9080         .write = trace_options_write,
9081         .llseek = generic_file_llseek,
9082         .release = tracing_release_options,
9083 };
9084
9085 /*
9086  * In order to pass in both the trace_array descriptor as well as the index
9087  * to the flag that the trace option file represents, the trace_array
9088  * has a character array of trace_flags_index[], which holds the index
9089  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9090  * The address of this character array is passed to the flag option file
9091  * read/write callbacks.
9092  *
9093  * In order to extract both the index and the trace_array descriptor,
9094  * get_tr_index() uses the following algorithm.
9095  *
9096  *   idx = *ptr;
9097  *
9098  * As the pointer itself contains the address of the index (remember
9099  * index[1] == 1).
9100  *
9101  * Then to get the trace_array descriptor, by subtracting that index
9102  * from the ptr, we get to the start of the index itself.
9103  *
9104  *   ptr - idx == &index[0]
9105  *
9106  * Then a simple container_of() from that pointer gets us to the
9107  * trace_array descriptor.
9108  */
9109 static void get_tr_index(void *data, struct trace_array **ptr,
9110                          unsigned int *pindex)
9111 {
9112         *pindex = *(unsigned char *)data;
9113
9114         *ptr = container_of(data - *pindex, struct trace_array,
9115                             trace_flags_index);
9116 }
9117
9118 static ssize_t
9119 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9120                         loff_t *ppos)
9121 {
9122         void *tr_index = filp->private_data;
9123         struct trace_array *tr;
9124         unsigned int index;
9125         char *buf;
9126
9127         get_tr_index(tr_index, &tr, &index);
9128
9129         if (tr->trace_flags & (1 << index))
9130                 buf = "1\n";
9131         else
9132                 buf = "0\n";
9133
9134         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9135 }
9136
9137 static ssize_t
9138 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9139                          loff_t *ppos)
9140 {
9141         void *tr_index = filp->private_data;
9142         struct trace_array *tr;
9143         unsigned int index;
9144         unsigned long val;
9145         int ret;
9146
9147         get_tr_index(tr_index, &tr, &index);
9148
9149         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9150         if (ret)
9151                 return ret;
9152
9153         if (val != 0 && val != 1)
9154                 return -EINVAL;
9155
9156         mutex_lock(&event_mutex);
9157         mutex_lock(&trace_types_lock);
9158         ret = set_tracer_flag(tr, 1 << index, val);
9159         mutex_unlock(&trace_types_lock);
9160         mutex_unlock(&event_mutex);
9161
9162         if (ret < 0)
9163                 return ret;
9164
9165         *ppos += cnt;
9166
9167         return cnt;
9168 }
9169
9170 static const struct file_operations trace_options_core_fops = {
9171         .open = tracing_open_generic,
9172         .read = trace_options_core_read,
9173         .write = trace_options_core_write,
9174         .llseek = generic_file_llseek,
9175 };
9176
9177 struct dentry *trace_create_file(const char *name,
9178                                  umode_t mode,
9179                                  struct dentry *parent,
9180                                  void *data,
9181                                  const struct file_operations *fops)
9182 {
9183         struct dentry *ret;
9184
9185         ret = tracefs_create_file(name, mode, parent, data, fops);
9186         if (!ret)
9187                 pr_warn("Could not create tracefs '%s' entry\n", name);
9188
9189         return ret;
9190 }
9191
9192
9193 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9194 {
9195         struct dentry *d_tracer;
9196
9197         if (tr->options)
9198                 return tr->options;
9199
9200         d_tracer = tracing_get_dentry(tr);
9201         if (IS_ERR(d_tracer))
9202                 return NULL;
9203
9204         tr->options = tracefs_create_dir("options", d_tracer);
9205         if (!tr->options) {
9206                 pr_warn("Could not create tracefs directory 'options'\n");
9207                 return NULL;
9208         }
9209
9210         return tr->options;
9211 }
9212
9213 static void
9214 create_trace_option_file(struct trace_array *tr,
9215                          struct trace_option_dentry *topt,
9216                          struct tracer_flags *flags,
9217                          struct tracer_opt *opt)
9218 {
9219         struct dentry *t_options;
9220
9221         t_options = trace_options_init_dentry(tr);
9222         if (!t_options)
9223                 return;
9224
9225         topt->flags = flags;
9226         topt->opt = opt;
9227         topt->tr = tr;
9228
9229         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9230                                         t_options, topt, &trace_options_fops);
9231
9232 }
9233
9234 static void
9235 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9236 {
9237         struct trace_option_dentry *topts;
9238         struct trace_options *tr_topts;
9239         struct tracer_flags *flags;
9240         struct tracer_opt *opts;
9241         int cnt;
9242         int i;
9243
9244         if (!tracer)
9245                 return;
9246
9247         flags = tracer->flags;
9248
9249         if (!flags || !flags->opts)
9250                 return;
9251
9252         /*
9253          * If this is an instance, only create flags for tracers
9254          * the instance may have.
9255          */
9256         if (!trace_ok_for_array(tracer, tr))
9257                 return;
9258
9259         for (i = 0; i < tr->nr_topts; i++) {
9260                 /* Make sure there's no duplicate flags. */
9261                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9262                         return;
9263         }
9264
9265         opts = flags->opts;
9266
9267         for (cnt = 0; opts[cnt].name; cnt++)
9268                 ;
9269
9270         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9271         if (!topts)
9272                 return;
9273
9274         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9275                             GFP_KERNEL);
9276         if (!tr_topts) {
9277                 kfree(topts);
9278                 return;
9279         }
9280
9281         tr->topts = tr_topts;
9282         tr->topts[tr->nr_topts].tracer = tracer;
9283         tr->topts[tr->nr_topts].topts = topts;
9284         tr->nr_topts++;
9285
9286         for (cnt = 0; opts[cnt].name; cnt++) {
9287                 create_trace_option_file(tr, &topts[cnt], flags,
9288                                          &opts[cnt]);
9289                 MEM_FAIL(topts[cnt].entry == NULL,
9290                           "Failed to create trace option: %s",
9291                           opts[cnt].name);
9292         }
9293 }
9294
9295 static struct dentry *
9296 create_trace_option_core_file(struct trace_array *tr,
9297                               const char *option, long index)
9298 {
9299         struct dentry *t_options;
9300
9301         t_options = trace_options_init_dentry(tr);
9302         if (!t_options)
9303                 return NULL;
9304
9305         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9306                                  (void *)&tr->trace_flags_index[index],
9307                                  &trace_options_core_fops);
9308 }
9309
9310 static void create_trace_options_dir(struct trace_array *tr)
9311 {
9312         struct dentry *t_options;
9313         bool top_level = tr == &global_trace;
9314         int i;
9315
9316         t_options = trace_options_init_dentry(tr);
9317         if (!t_options)
9318                 return;
9319
9320         for (i = 0; trace_options[i]; i++) {
9321                 if (top_level ||
9322                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9323                         create_trace_option_core_file(tr, trace_options[i], i);
9324         }
9325 }
9326
9327 static ssize_t
9328 rb_simple_read(struct file *filp, char __user *ubuf,
9329                size_t cnt, loff_t *ppos)
9330 {
9331         struct trace_array *tr = filp->private_data;
9332         char buf[64];
9333         int r;
9334
9335         r = tracer_tracing_is_on(tr);
9336         r = sprintf(buf, "%d\n", r);
9337
9338         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9339 }
9340
9341 static ssize_t
9342 rb_simple_write(struct file *filp, const char __user *ubuf,
9343                 size_t cnt, loff_t *ppos)
9344 {
9345         struct trace_array *tr = filp->private_data;
9346         struct trace_buffer *buffer = tr->array_buffer.buffer;
9347         unsigned long val;
9348         int ret;
9349
9350         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9351         if (ret)
9352                 return ret;
9353
9354         if (buffer) {
9355                 mutex_lock(&trace_types_lock);
9356                 if (!!val == tracer_tracing_is_on(tr)) {
9357                         val = 0; /* do nothing */
9358                 } else if (val) {
9359                         tracer_tracing_on(tr);
9360                         if (tr->current_trace->start)
9361                                 tr->current_trace->start(tr);
9362                 } else {
9363                         tracer_tracing_off(tr);
9364                         if (tr->current_trace->stop)
9365                                 tr->current_trace->stop(tr);
9366                         /* Wake up any waiters */
9367                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9368                 }
9369                 mutex_unlock(&trace_types_lock);
9370         }
9371
9372         (*ppos)++;
9373
9374         return cnt;
9375 }
9376
9377 static const struct file_operations rb_simple_fops = {
9378         .open           = tracing_open_generic_tr,
9379         .read           = rb_simple_read,
9380         .write          = rb_simple_write,
9381         .release        = tracing_release_generic_tr,
9382         .llseek         = default_llseek,
9383 };
9384
9385 static ssize_t
9386 buffer_percent_read(struct file *filp, char __user *ubuf,
9387                     size_t cnt, loff_t *ppos)
9388 {
9389         struct trace_array *tr = filp->private_data;
9390         char buf[64];
9391         int r;
9392
9393         r = tr->buffer_percent;
9394         r = sprintf(buf, "%d\n", r);
9395
9396         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9397 }
9398
9399 static ssize_t
9400 buffer_percent_write(struct file *filp, const char __user *ubuf,
9401                      size_t cnt, loff_t *ppos)
9402 {
9403         struct trace_array *tr = filp->private_data;
9404         unsigned long val;
9405         int ret;
9406
9407         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9408         if (ret)
9409                 return ret;
9410
9411         if (val > 100)
9412                 return -EINVAL;
9413
9414         tr->buffer_percent = val;
9415
9416         (*ppos)++;
9417
9418         return cnt;
9419 }
9420
9421 static const struct file_operations buffer_percent_fops = {
9422         .open           = tracing_open_generic_tr,
9423         .read           = buffer_percent_read,
9424         .write          = buffer_percent_write,
9425         .release        = tracing_release_generic_tr,
9426         .llseek         = default_llseek,
9427 };
9428
9429 static ssize_t
9430 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9431 {
9432         struct trace_array *tr = filp->private_data;
9433         size_t size;
9434         char buf[64];
9435         int order;
9436         int r;
9437
9438         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9439         size = (PAGE_SIZE << order) / 1024;
9440
9441         r = sprintf(buf, "%zd\n", size);
9442
9443         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9444 }
9445
9446 static ssize_t
9447 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9448                          size_t cnt, loff_t *ppos)
9449 {
9450         struct trace_array *tr = filp->private_data;
9451         unsigned long val;
9452         int old_order;
9453         int order;
9454         int pages;
9455         int ret;
9456
9457         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9458         if (ret)
9459                 return ret;
9460
9461         val *= 1024; /* value passed in is in KB */
9462
9463         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9464         order = fls(pages - 1);
9465
9466         /* limit between 1 and 128 system pages */
9467         if (order < 0 || order > 7)
9468                 return -EINVAL;
9469
9470         /* Do not allow tracing while changing the order of the ring buffer */
9471         tracing_stop_tr(tr);
9472
9473         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9474         if (old_order == order)
9475                 goto out;
9476
9477         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9478         if (ret)
9479                 goto out;
9480
9481 #ifdef CONFIG_TRACER_MAX_TRACE
9482
9483         if (!tr->allocated_snapshot)
9484                 goto out_max;
9485
9486         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9487         if (ret) {
9488                 /* Put back the old order */
9489                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9490                 if (WARN_ON_ONCE(cnt)) {
9491                         /*
9492                          * AARGH! We are left with different orders!
9493                          * The max buffer is our "snapshot" buffer.
9494                          * When a tracer needs a snapshot (one of the
9495                          * latency tracers), it swaps the max buffer
9496                          * with the saved snap shot. We succeeded to
9497                          * update the order of the main buffer, but failed to
9498                          * update the order of the max buffer. But when we tried
9499                          * to reset the main buffer to the original size, we
9500                          * failed there too. This is very unlikely to
9501                          * happen, but if it does, warn and kill all
9502                          * tracing.
9503                          */
9504                         tracing_disabled = 1;
9505                 }
9506                 goto out;
9507         }
9508  out_max:
9509 #endif
9510         (*ppos)++;
9511  out:
9512         if (ret)
9513                 cnt = ret;
9514         tracing_start_tr(tr);
9515         return cnt;
9516 }
9517
9518 static const struct file_operations buffer_subbuf_size_fops = {
9519         .open           = tracing_open_generic_tr,
9520         .read           = buffer_subbuf_size_read,
9521         .write          = buffer_subbuf_size_write,
9522         .release        = tracing_release_generic_tr,
9523         .llseek         = default_llseek,
9524 };
9525
9526 static struct dentry *trace_instance_dir;
9527
9528 static void
9529 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9530
9531 static int
9532 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9533 {
9534         enum ring_buffer_flags rb_flags;
9535
9536         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9537
9538         buf->tr = tr;
9539
9540         buf->buffer = ring_buffer_alloc(size, rb_flags);
9541         if (!buf->buffer)
9542                 return -ENOMEM;
9543
9544         buf->data = alloc_percpu(struct trace_array_cpu);
9545         if (!buf->data) {
9546                 ring_buffer_free(buf->buffer);
9547                 buf->buffer = NULL;
9548                 return -ENOMEM;
9549         }
9550
9551         /* Allocate the first page for all buffers */
9552         set_buffer_entries(&tr->array_buffer,
9553                            ring_buffer_size(tr->array_buffer.buffer, 0));
9554
9555         return 0;
9556 }
9557
9558 static void free_trace_buffer(struct array_buffer *buf)
9559 {
9560         if (buf->buffer) {
9561                 ring_buffer_free(buf->buffer);
9562                 buf->buffer = NULL;
9563                 free_percpu(buf->data);
9564                 buf->data = NULL;
9565         }
9566 }
9567
9568 static int allocate_trace_buffers(struct trace_array *tr, int size)
9569 {
9570         int ret;
9571
9572         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9573         if (ret)
9574                 return ret;
9575
9576 #ifdef CONFIG_TRACER_MAX_TRACE
9577         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9578                                     allocate_snapshot ? size : 1);
9579         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9580                 free_trace_buffer(&tr->array_buffer);
9581                 return -ENOMEM;
9582         }
9583         tr->allocated_snapshot = allocate_snapshot;
9584
9585         allocate_snapshot = false;
9586 #endif
9587
9588         return 0;
9589 }
9590
9591 static void free_trace_buffers(struct trace_array *tr)
9592 {
9593         if (!tr)
9594                 return;
9595
9596         free_trace_buffer(&tr->array_buffer);
9597
9598 #ifdef CONFIG_TRACER_MAX_TRACE
9599         free_trace_buffer(&tr->max_buffer);
9600 #endif
9601 }
9602
9603 static void init_trace_flags_index(struct trace_array *tr)
9604 {
9605         int i;
9606
9607         /* Used by the trace options files */
9608         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9609                 tr->trace_flags_index[i] = i;
9610 }
9611
9612 static void __update_tracer_options(struct trace_array *tr)
9613 {
9614         struct tracer *t;
9615
9616         for (t = trace_types; t; t = t->next)
9617                 add_tracer_options(tr, t);
9618 }
9619
9620 static void update_tracer_options(struct trace_array *tr)
9621 {
9622         mutex_lock(&trace_types_lock);
9623         tracer_options_updated = true;
9624         __update_tracer_options(tr);
9625         mutex_unlock(&trace_types_lock);
9626 }
9627
9628 /* Must have trace_types_lock held */
9629 struct trace_array *trace_array_find(const char *instance)
9630 {
9631         struct trace_array *tr, *found = NULL;
9632
9633         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9634                 if (tr->name && strcmp(tr->name, instance) == 0) {
9635                         found = tr;
9636                         break;
9637                 }
9638         }
9639
9640         return found;
9641 }
9642
9643 struct trace_array *trace_array_find_get(const char *instance)
9644 {
9645         struct trace_array *tr;
9646
9647         mutex_lock(&trace_types_lock);
9648         tr = trace_array_find(instance);
9649         if (tr)
9650                 tr->ref++;
9651         mutex_unlock(&trace_types_lock);
9652
9653         return tr;
9654 }
9655
9656 static int trace_array_create_dir(struct trace_array *tr)
9657 {
9658         int ret;
9659
9660         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9661         if (!tr->dir)
9662                 return -EINVAL;
9663
9664         ret = event_trace_add_tracer(tr->dir, tr);
9665         if (ret) {
9666                 tracefs_remove(tr->dir);
9667                 return ret;
9668         }
9669
9670         init_tracer_tracefs(tr, tr->dir);
9671         __update_tracer_options(tr);
9672
9673         return ret;
9674 }
9675
9676 static struct trace_array *
9677 trace_array_create_systems(const char *name, const char *systems)
9678 {
9679         struct trace_array *tr;
9680         int ret;
9681
9682         ret = -ENOMEM;
9683         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9684         if (!tr)
9685                 return ERR_PTR(ret);
9686
9687         tr->name = kstrdup(name, GFP_KERNEL);
9688         if (!tr->name)
9689                 goto out_free_tr;
9690
9691         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9692                 goto out_free_tr;
9693
9694         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9695                 goto out_free_tr;
9696
9697         if (systems) {
9698                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9699                 if (!tr->system_names)
9700                         goto out_free_tr;
9701         }
9702
9703         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9704
9705         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9706
9707         raw_spin_lock_init(&tr->start_lock);
9708
9709         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9710
9711         tr->current_trace = &nop_trace;
9712
9713         INIT_LIST_HEAD(&tr->systems);
9714         INIT_LIST_HEAD(&tr->events);
9715         INIT_LIST_HEAD(&tr->hist_vars);
9716         INIT_LIST_HEAD(&tr->err_log);
9717
9718         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9719                 goto out_free_tr;
9720
9721         /* The ring buffer is defaultly expanded */
9722         trace_set_ring_buffer_expanded(tr);
9723
9724         if (ftrace_allocate_ftrace_ops(tr) < 0)
9725                 goto out_free_tr;
9726
9727         ftrace_init_trace_array(tr);
9728
9729         init_trace_flags_index(tr);
9730
9731         if (trace_instance_dir) {
9732                 ret = trace_array_create_dir(tr);
9733                 if (ret)
9734                         goto out_free_tr;
9735         } else
9736                 __trace_early_add_events(tr);
9737
9738         list_add(&tr->list, &ftrace_trace_arrays);
9739
9740         tr->ref++;
9741
9742         return tr;
9743
9744  out_free_tr:
9745         ftrace_free_ftrace_ops(tr);
9746         free_trace_buffers(tr);
9747         free_cpumask_var(tr->pipe_cpumask);
9748         free_cpumask_var(tr->tracing_cpumask);
9749         kfree_const(tr->system_names);
9750         kfree(tr->name);
9751         kfree(tr);
9752
9753         return ERR_PTR(ret);
9754 }
9755
9756 static struct trace_array *trace_array_create(const char *name)
9757 {
9758         return trace_array_create_systems(name, NULL);
9759 }
9760
9761 static int instance_mkdir(const char *name)
9762 {
9763         struct trace_array *tr;
9764         int ret;
9765
9766         mutex_lock(&event_mutex);
9767         mutex_lock(&trace_types_lock);
9768
9769         ret = -EEXIST;
9770         if (trace_array_find(name))
9771                 goto out_unlock;
9772
9773         tr = trace_array_create(name);
9774
9775         ret = PTR_ERR_OR_ZERO(tr);
9776
9777 out_unlock:
9778         mutex_unlock(&trace_types_lock);
9779         mutex_unlock(&event_mutex);
9780         return ret;
9781 }
9782
9783 /**
9784  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9785  * @name: The name of the trace array to be looked up/created.
9786  * @systems: A list of systems to create event directories for (NULL for all)
9787  *
9788  * Returns pointer to trace array with given name.
9789  * NULL, if it cannot be created.
9790  *
9791  * NOTE: This function increments the reference counter associated with the
9792  * trace array returned. This makes sure it cannot be freed while in use.
9793  * Use trace_array_put() once the trace array is no longer needed.
9794  * If the trace_array is to be freed, trace_array_destroy() needs to
9795  * be called after the trace_array_put(), or simply let user space delete
9796  * it from the tracefs instances directory. But until the
9797  * trace_array_put() is called, user space can not delete it.
9798  *
9799  */
9800 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9801 {
9802         struct trace_array *tr;
9803
9804         mutex_lock(&event_mutex);
9805         mutex_lock(&trace_types_lock);
9806
9807         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9808                 if (tr->name && strcmp(tr->name, name) == 0)
9809                         goto out_unlock;
9810         }
9811
9812         tr = trace_array_create_systems(name, systems);
9813
9814         if (IS_ERR(tr))
9815                 tr = NULL;
9816 out_unlock:
9817         if (tr)
9818                 tr->ref++;
9819
9820         mutex_unlock(&trace_types_lock);
9821         mutex_unlock(&event_mutex);
9822         return tr;
9823 }
9824 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9825
9826 static int __remove_instance(struct trace_array *tr)
9827 {
9828         int i;
9829
9830         /* Reference counter for a newly created trace array = 1. */
9831         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9832                 return -EBUSY;
9833
9834         list_del(&tr->list);
9835
9836         /* Disable all the flags that were enabled coming in */
9837         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9838                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9839                         set_tracer_flag(tr, 1 << i, 0);
9840         }
9841
9842         tracing_set_nop(tr);
9843         clear_ftrace_function_probes(tr);
9844         event_trace_del_tracer(tr);
9845         ftrace_clear_pids(tr);
9846         ftrace_destroy_function_files(tr);
9847         tracefs_remove(tr->dir);
9848         free_percpu(tr->last_func_repeats);
9849         free_trace_buffers(tr);
9850         clear_tracing_err_log(tr);
9851
9852         for (i = 0; i < tr->nr_topts; i++) {
9853                 kfree(tr->topts[i].topts);
9854         }
9855         kfree(tr->topts);
9856
9857         free_cpumask_var(tr->pipe_cpumask);
9858         free_cpumask_var(tr->tracing_cpumask);
9859         kfree_const(tr->system_names);
9860         kfree(tr->name);
9861         kfree(tr);
9862
9863         return 0;
9864 }
9865
9866 int trace_array_destroy(struct trace_array *this_tr)
9867 {
9868         struct trace_array *tr;
9869         int ret;
9870
9871         if (!this_tr)
9872                 return -EINVAL;
9873
9874         mutex_lock(&event_mutex);
9875         mutex_lock(&trace_types_lock);
9876
9877         ret = -ENODEV;
9878
9879         /* Making sure trace array exists before destroying it. */
9880         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9881                 if (tr == this_tr) {
9882                         ret = __remove_instance(tr);
9883                         break;
9884                 }
9885         }
9886
9887         mutex_unlock(&trace_types_lock);
9888         mutex_unlock(&event_mutex);
9889
9890         return ret;
9891 }
9892 EXPORT_SYMBOL_GPL(trace_array_destroy);
9893
9894 static int instance_rmdir(const char *name)
9895 {
9896         struct trace_array *tr;
9897         int ret;
9898
9899         mutex_lock(&event_mutex);
9900         mutex_lock(&trace_types_lock);
9901
9902         ret = -ENODEV;
9903         tr = trace_array_find(name);
9904         if (tr)
9905                 ret = __remove_instance(tr);
9906
9907         mutex_unlock(&trace_types_lock);
9908         mutex_unlock(&event_mutex);
9909
9910         return ret;
9911 }
9912
9913 static __init void create_trace_instances(struct dentry *d_tracer)
9914 {
9915         struct trace_array *tr;
9916
9917         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9918                                                          instance_mkdir,
9919                                                          instance_rmdir);
9920         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9921                 return;
9922
9923         mutex_lock(&event_mutex);
9924         mutex_lock(&trace_types_lock);
9925
9926         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9927                 if (!tr->name)
9928                         continue;
9929                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9930                              "Failed to create instance directory\n"))
9931                         break;
9932         }
9933
9934         mutex_unlock(&trace_types_lock);
9935         mutex_unlock(&event_mutex);
9936 }
9937
9938 static void
9939 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9940 {
9941         int cpu;
9942
9943         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9944                         tr, &show_traces_fops);
9945
9946         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9947                         tr, &set_tracer_fops);
9948
9949         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9950                           tr, &tracing_cpumask_fops);
9951
9952         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9953                           tr, &tracing_iter_fops);
9954
9955         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9956                           tr, &tracing_fops);
9957
9958         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9959                           tr, &tracing_pipe_fops);
9960
9961         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9962                           tr, &tracing_entries_fops);
9963
9964         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9965                           tr, &tracing_total_entries_fops);
9966
9967         trace_create_file("free_buffer", 0200, d_tracer,
9968                           tr, &tracing_free_buffer_fops);
9969
9970         trace_create_file("trace_marker", 0220, d_tracer,
9971                           tr, &tracing_mark_fops);
9972
9973         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9974
9975         trace_create_file("trace_marker_raw", 0220, d_tracer,
9976                           tr, &tracing_mark_raw_fops);
9977
9978         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9979                           &trace_clock_fops);
9980
9981         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9982                           tr, &rb_simple_fops);
9983
9984         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9985                           &trace_time_stamp_mode_fops);
9986
9987         tr->buffer_percent = 50;
9988
9989         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9990                         tr, &buffer_percent_fops);
9991
9992         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9993                           tr, &buffer_subbuf_size_fops);
9994
9995         create_trace_options_dir(tr);
9996
9997 #ifdef CONFIG_TRACER_MAX_TRACE
9998         trace_create_maxlat_file(tr, d_tracer);
9999 #endif
10000
10001         if (ftrace_create_function_files(tr, d_tracer))
10002                 MEM_FAIL(1, "Could not allocate function filter files");
10003
10004 #ifdef CONFIG_TRACER_SNAPSHOT
10005         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10006                           tr, &snapshot_fops);
10007 #endif
10008
10009         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10010                           tr, &tracing_err_log_fops);
10011
10012         for_each_tracing_cpu(cpu)
10013                 tracing_init_tracefs_percpu(tr, cpu);
10014
10015         ftrace_init_tracefs(tr, d_tracer);
10016 }
10017
10018 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10019 {
10020         struct vfsmount *mnt;
10021         struct file_system_type *type;
10022
10023         /*
10024          * To maintain backward compatibility for tools that mount
10025          * debugfs to get to the tracing facility, tracefs is automatically
10026          * mounted to the debugfs/tracing directory.
10027          */
10028         type = get_fs_type("tracefs");
10029         if (!type)
10030                 return NULL;
10031         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10032         put_filesystem(type);
10033         if (IS_ERR(mnt))
10034                 return NULL;
10035         mntget(mnt);
10036
10037         return mnt;
10038 }
10039
10040 /**
10041  * tracing_init_dentry - initialize top level trace array
10042  *
10043  * This is called when creating files or directories in the tracing
10044  * directory. It is called via fs_initcall() by any of the boot up code
10045  * and expects to return the dentry of the top level tracing directory.
10046  */
10047 int tracing_init_dentry(void)
10048 {
10049         struct trace_array *tr = &global_trace;
10050
10051         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10052                 pr_warn("Tracing disabled due to lockdown\n");
10053                 return -EPERM;
10054         }
10055
10056         /* The top level trace array uses  NULL as parent */
10057         if (tr->dir)
10058                 return 0;
10059
10060         if (WARN_ON(!tracefs_initialized()))
10061                 return -ENODEV;
10062
10063         /*
10064          * As there may still be users that expect the tracing
10065          * files to exist in debugfs/tracing, we must automount
10066          * the tracefs file system there, so older tools still
10067          * work with the newer kernel.
10068          */
10069         tr->dir = debugfs_create_automount("tracing", NULL,
10070                                            trace_automount, NULL);
10071
10072         return 0;
10073 }
10074
10075 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10076 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10077
10078 static struct workqueue_struct *eval_map_wq __initdata;
10079 static struct work_struct eval_map_work __initdata;
10080 static struct work_struct tracerfs_init_work __initdata;
10081
10082 static void __init eval_map_work_func(struct work_struct *work)
10083 {
10084         int len;
10085
10086         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10087         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10088 }
10089
10090 static int __init trace_eval_init(void)
10091 {
10092         INIT_WORK(&eval_map_work, eval_map_work_func);
10093
10094         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10095         if (!eval_map_wq) {
10096                 pr_err("Unable to allocate eval_map_wq\n");
10097                 /* Do work here */
10098                 eval_map_work_func(&eval_map_work);
10099                 return -ENOMEM;
10100         }
10101
10102         queue_work(eval_map_wq, &eval_map_work);
10103         return 0;
10104 }
10105
10106 subsys_initcall(trace_eval_init);
10107
10108 static int __init trace_eval_sync(void)
10109 {
10110         /* Make sure the eval map updates are finished */
10111         if (eval_map_wq)
10112                 destroy_workqueue(eval_map_wq);
10113         return 0;
10114 }
10115
10116 late_initcall_sync(trace_eval_sync);
10117
10118
10119 #ifdef CONFIG_MODULES
10120 static void trace_module_add_evals(struct module *mod)
10121 {
10122         if (!mod->num_trace_evals)
10123                 return;
10124
10125         /*
10126          * Modules with bad taint do not have events created, do
10127          * not bother with enums either.
10128          */
10129         if (trace_module_has_bad_taint(mod))
10130                 return;
10131
10132         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10133 }
10134
10135 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10136 static void trace_module_remove_evals(struct module *mod)
10137 {
10138         union trace_eval_map_item *map;
10139         union trace_eval_map_item **last = &trace_eval_maps;
10140
10141         if (!mod->num_trace_evals)
10142                 return;
10143
10144         mutex_lock(&trace_eval_mutex);
10145
10146         map = trace_eval_maps;
10147
10148         while (map) {
10149                 if (map->head.mod == mod)
10150                         break;
10151                 map = trace_eval_jmp_to_tail(map);
10152                 last = &map->tail.next;
10153                 map = map->tail.next;
10154         }
10155         if (!map)
10156                 goto out;
10157
10158         *last = trace_eval_jmp_to_tail(map)->tail.next;
10159         kfree(map);
10160  out:
10161         mutex_unlock(&trace_eval_mutex);
10162 }
10163 #else
10164 static inline void trace_module_remove_evals(struct module *mod) { }
10165 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10166
10167 static int trace_module_notify(struct notifier_block *self,
10168                                unsigned long val, void *data)
10169 {
10170         struct module *mod = data;
10171
10172         switch (val) {
10173         case MODULE_STATE_COMING:
10174                 trace_module_add_evals(mod);
10175                 break;
10176         case MODULE_STATE_GOING:
10177                 trace_module_remove_evals(mod);
10178                 break;
10179         }
10180
10181         return NOTIFY_OK;
10182 }
10183
10184 static struct notifier_block trace_module_nb = {
10185         .notifier_call = trace_module_notify,
10186         .priority = 0,
10187 };
10188 #endif /* CONFIG_MODULES */
10189
10190 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10191 {
10192
10193         event_trace_init();
10194
10195         init_tracer_tracefs(&global_trace, NULL);
10196         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10197
10198         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10199                         &global_trace, &tracing_thresh_fops);
10200
10201         trace_create_file("README", TRACE_MODE_READ, NULL,
10202                         NULL, &tracing_readme_fops);
10203
10204         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10205                         NULL, &tracing_saved_cmdlines_fops);
10206
10207         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10208                           NULL, &tracing_saved_cmdlines_size_fops);
10209
10210         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10211                         NULL, &tracing_saved_tgids_fops);
10212
10213         trace_create_eval_file(NULL);
10214
10215 #ifdef CONFIG_MODULES
10216         register_module_notifier(&trace_module_nb);
10217 #endif
10218
10219 #ifdef CONFIG_DYNAMIC_FTRACE
10220         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10221                         NULL, &tracing_dyn_info_fops);
10222 #endif
10223
10224         create_trace_instances(NULL);
10225
10226         update_tracer_options(&global_trace);
10227 }
10228
10229 static __init int tracer_init_tracefs(void)
10230 {
10231         int ret;
10232
10233         trace_access_lock_init();
10234
10235         ret = tracing_init_dentry();
10236         if (ret)
10237                 return 0;
10238
10239         if (eval_map_wq) {
10240                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10241                 queue_work(eval_map_wq, &tracerfs_init_work);
10242         } else {
10243                 tracer_init_tracefs_work_func(NULL);
10244         }
10245
10246         rv_init_interface();
10247
10248         return 0;
10249 }
10250
10251 fs_initcall(tracer_init_tracefs);
10252
10253 static int trace_die_panic_handler(struct notifier_block *self,
10254                                 unsigned long ev, void *unused);
10255
10256 static struct notifier_block trace_panic_notifier = {
10257         .notifier_call = trace_die_panic_handler,
10258         .priority = INT_MAX - 1,
10259 };
10260
10261 static struct notifier_block trace_die_notifier = {
10262         .notifier_call = trace_die_panic_handler,
10263         .priority = INT_MAX - 1,
10264 };
10265
10266 /*
10267  * The idea is to execute the following die/panic callback early, in order
10268  * to avoid showing irrelevant information in the trace (like other panic
10269  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10270  * warnings get disabled (to prevent potential log flooding).
10271  */
10272 static int trace_die_panic_handler(struct notifier_block *self,
10273                                 unsigned long ev, void *unused)
10274 {
10275         if (!ftrace_dump_on_oops)
10276                 return NOTIFY_DONE;
10277
10278         /* The die notifier requires DIE_OOPS to trigger */
10279         if (self == &trace_die_notifier && ev != DIE_OOPS)
10280                 return NOTIFY_DONE;
10281
10282         ftrace_dump(ftrace_dump_on_oops);
10283
10284         return NOTIFY_DONE;
10285 }
10286
10287 /*
10288  * printk is set to max of 1024, we really don't need it that big.
10289  * Nothing should be printing 1000 characters anyway.
10290  */
10291 #define TRACE_MAX_PRINT         1000
10292
10293 /*
10294  * Define here KERN_TRACE so that we have one place to modify
10295  * it if we decide to change what log level the ftrace dump
10296  * should be at.
10297  */
10298 #define KERN_TRACE              KERN_EMERG
10299
10300 void
10301 trace_printk_seq(struct trace_seq *s)
10302 {
10303         /* Probably should print a warning here. */
10304         if (s->seq.len >= TRACE_MAX_PRINT)
10305                 s->seq.len = TRACE_MAX_PRINT;
10306
10307         /*
10308          * More paranoid code. Although the buffer size is set to
10309          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10310          * an extra layer of protection.
10311          */
10312         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10313                 s->seq.len = s->seq.size - 1;
10314
10315         /* should be zero ended, but we are paranoid. */
10316         s->buffer[s->seq.len] = 0;
10317
10318         printk(KERN_TRACE "%s", s->buffer);
10319
10320         trace_seq_init(s);
10321 }
10322
10323 void trace_init_global_iter(struct trace_iterator *iter)
10324 {
10325         iter->tr = &global_trace;
10326         iter->trace = iter->tr->current_trace;
10327         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10328         iter->array_buffer = &global_trace.array_buffer;
10329
10330         if (iter->trace && iter->trace->open)
10331                 iter->trace->open(iter);
10332
10333         /* Annotate start of buffers if we had overruns */
10334         if (ring_buffer_overruns(iter->array_buffer->buffer))
10335                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10336
10337         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10338         if (trace_clocks[iter->tr->clock_id].in_ns)
10339                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10340
10341         /* Can not use kmalloc for iter.temp and iter.fmt */
10342         iter->temp = static_temp_buf;
10343         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10344         iter->fmt = static_fmt_buf;
10345         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10346 }
10347
10348 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10349 {
10350         /* use static because iter can be a bit big for the stack */
10351         static struct trace_iterator iter;
10352         static atomic_t dump_running;
10353         struct trace_array *tr = &global_trace;
10354         unsigned int old_userobj;
10355         unsigned long flags;
10356         int cnt = 0, cpu;
10357
10358         /* Only allow one dump user at a time. */
10359         if (atomic_inc_return(&dump_running) != 1) {
10360                 atomic_dec(&dump_running);
10361                 return;
10362         }
10363
10364         /*
10365          * Always turn off tracing when we dump.
10366          * We don't need to show trace output of what happens
10367          * between multiple crashes.
10368          *
10369          * If the user does a sysrq-z, then they can re-enable
10370          * tracing with echo 1 > tracing_on.
10371          */
10372         tracing_off();
10373
10374         local_irq_save(flags);
10375
10376         /* Simulate the iterator */
10377         trace_init_global_iter(&iter);
10378
10379         for_each_tracing_cpu(cpu) {
10380                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10381         }
10382
10383         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10384
10385         /* don't look at user memory in panic mode */
10386         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10387
10388         switch (oops_dump_mode) {
10389         case DUMP_ALL:
10390                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10391                 break;
10392         case DUMP_ORIG:
10393                 iter.cpu_file = raw_smp_processor_id();
10394                 break;
10395         case DUMP_NONE:
10396                 goto out_enable;
10397         default:
10398                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10399                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10400         }
10401
10402         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10403
10404         /* Did function tracer already get disabled? */
10405         if (ftrace_is_dead()) {
10406                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10407                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10408         }
10409
10410         /*
10411          * We need to stop all tracing on all CPUS to read
10412          * the next buffer. This is a bit expensive, but is
10413          * not done often. We fill all what we can read,
10414          * and then release the locks again.
10415          */
10416
10417         while (!trace_empty(&iter)) {
10418
10419                 if (!cnt)
10420                         printk(KERN_TRACE "---------------------------------\n");
10421
10422                 cnt++;
10423
10424                 trace_iterator_reset(&iter);
10425                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10426
10427                 if (trace_find_next_entry_inc(&iter) != NULL) {
10428                         int ret;
10429
10430                         ret = print_trace_line(&iter);
10431                         if (ret != TRACE_TYPE_NO_CONSUME)
10432                                 trace_consume(&iter);
10433                 }
10434                 touch_nmi_watchdog();
10435
10436                 trace_printk_seq(&iter.seq);
10437         }
10438
10439         if (!cnt)
10440                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10441         else
10442                 printk(KERN_TRACE "---------------------------------\n");
10443
10444  out_enable:
10445         tr->trace_flags |= old_userobj;
10446
10447         for_each_tracing_cpu(cpu) {
10448                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10449         }
10450         atomic_dec(&dump_running);
10451         local_irq_restore(flags);
10452 }
10453 EXPORT_SYMBOL_GPL(ftrace_dump);
10454
10455 #define WRITE_BUFSIZE  4096
10456
10457 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10458                                 size_t count, loff_t *ppos,
10459                                 int (*createfn)(const char *))
10460 {
10461         char *kbuf, *buf, *tmp;
10462         int ret = 0;
10463         size_t done = 0;
10464         size_t size;
10465
10466         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10467         if (!kbuf)
10468                 return -ENOMEM;
10469
10470         while (done < count) {
10471                 size = count - done;
10472
10473                 if (size >= WRITE_BUFSIZE)
10474                         size = WRITE_BUFSIZE - 1;
10475
10476                 if (copy_from_user(kbuf, buffer + done, size)) {
10477                         ret = -EFAULT;
10478                         goto out;
10479                 }
10480                 kbuf[size] = '\0';
10481                 buf = kbuf;
10482                 do {
10483                         tmp = strchr(buf, '\n');
10484                         if (tmp) {
10485                                 *tmp = '\0';
10486                                 size = tmp - buf + 1;
10487                         } else {
10488                                 size = strlen(buf);
10489                                 if (done + size < count) {
10490                                         if (buf != kbuf)
10491                                                 break;
10492                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10493                                         pr_warn("Line length is too long: Should be less than %d\n",
10494                                                 WRITE_BUFSIZE - 2);
10495                                         ret = -EINVAL;
10496                                         goto out;
10497                                 }
10498                         }
10499                         done += size;
10500
10501                         /* Remove comments */
10502                         tmp = strchr(buf, '#');
10503
10504                         if (tmp)
10505                                 *tmp = '\0';
10506
10507                         ret = createfn(buf);
10508                         if (ret)
10509                                 goto out;
10510                         buf += size;
10511
10512                 } while (done < count);
10513         }
10514         ret = done;
10515
10516 out:
10517         kfree(kbuf);
10518
10519         return ret;
10520 }
10521
10522 #ifdef CONFIG_TRACER_MAX_TRACE
10523 __init static bool tr_needs_alloc_snapshot(const char *name)
10524 {
10525         char *test;
10526         int len = strlen(name);
10527         bool ret;
10528
10529         if (!boot_snapshot_index)
10530                 return false;
10531
10532         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10533             boot_snapshot_info[len] == '\t')
10534                 return true;
10535
10536         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10537         if (!test)
10538                 return false;
10539
10540         sprintf(test, "\t%s\t", name);
10541         ret = strstr(boot_snapshot_info, test) == NULL;
10542         kfree(test);
10543         return ret;
10544 }
10545
10546 __init static void do_allocate_snapshot(const char *name)
10547 {
10548         if (!tr_needs_alloc_snapshot(name))
10549                 return;
10550
10551         /*
10552          * When allocate_snapshot is set, the next call to
10553          * allocate_trace_buffers() (called by trace_array_get_by_name())
10554          * will allocate the snapshot buffer. That will alse clear
10555          * this flag.
10556          */
10557         allocate_snapshot = true;
10558 }
10559 #else
10560 static inline void do_allocate_snapshot(const char *name) { }
10561 #endif
10562
10563 __init static void enable_instances(void)
10564 {
10565         struct trace_array *tr;
10566         char *curr_str;
10567         char *str;
10568         char *tok;
10569
10570         /* A tab is always appended */
10571         boot_instance_info[boot_instance_index - 1] = '\0';
10572         str = boot_instance_info;
10573
10574         while ((curr_str = strsep(&str, "\t"))) {
10575
10576                 tok = strsep(&curr_str, ",");
10577
10578                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10579                         do_allocate_snapshot(tok);
10580
10581                 tr = trace_array_get_by_name(tok, NULL);
10582                 if (!tr) {
10583                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10584                         continue;
10585                 }
10586                 /* Allow user space to delete it */
10587                 trace_array_put(tr);
10588
10589                 while ((tok = strsep(&curr_str, ","))) {
10590                         early_enable_events(tr, tok, true);
10591                 }
10592         }
10593 }
10594
10595 __init static int tracer_alloc_buffers(void)
10596 {
10597         int ring_buf_size;
10598         int ret = -ENOMEM;
10599
10600
10601         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10602                 pr_warn("Tracing disabled due to lockdown\n");
10603                 return -EPERM;
10604         }
10605
10606         /*
10607          * Make sure we don't accidentally add more trace options
10608          * than we have bits for.
10609          */
10610         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10611
10612         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10613                 goto out;
10614
10615         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10616                 goto out_free_buffer_mask;
10617
10618         /* Only allocate trace_printk buffers if a trace_printk exists */
10619         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10620                 /* Must be called before global_trace.buffer is allocated */
10621                 trace_printk_init_buffers();
10622
10623         /* To save memory, keep the ring buffer size to its minimum */
10624         if (global_trace.ring_buffer_expanded)
10625                 ring_buf_size = trace_buf_size;
10626         else
10627                 ring_buf_size = 1;
10628
10629         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10630         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10631
10632         raw_spin_lock_init(&global_trace.start_lock);
10633
10634         /*
10635          * The prepare callbacks allocates some memory for the ring buffer. We
10636          * don't free the buffer if the CPU goes down. If we were to free
10637          * the buffer, then the user would lose any trace that was in the
10638          * buffer. The memory will be removed once the "instance" is removed.
10639          */
10640         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10641                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10642                                       NULL);
10643         if (ret < 0)
10644                 goto out_free_cpumask;
10645         /* Used for event triggers */
10646         ret = -ENOMEM;
10647         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10648         if (!temp_buffer)
10649                 goto out_rm_hp_state;
10650
10651         if (trace_create_savedcmd() < 0)
10652                 goto out_free_temp_buffer;
10653
10654         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10655                 goto out_free_savedcmd;
10656
10657         /* TODO: make the number of buffers hot pluggable with CPUS */
10658         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10659                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10660                 goto out_free_pipe_cpumask;
10661         }
10662         if (global_trace.buffer_disabled)
10663                 tracing_off();
10664
10665         if (trace_boot_clock) {
10666                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10667                 if (ret < 0)
10668                         pr_warn("Trace clock %s not defined, going back to default\n",
10669                                 trace_boot_clock);
10670         }
10671
10672         /*
10673          * register_tracer() might reference current_trace, so it
10674          * needs to be set before we register anything. This is
10675          * just a bootstrap of current_trace anyway.
10676          */
10677         global_trace.current_trace = &nop_trace;
10678
10679         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10680
10681         ftrace_init_global_array_ops(&global_trace);
10682
10683         init_trace_flags_index(&global_trace);
10684
10685         register_tracer(&nop_trace);
10686
10687         /* Function tracing may start here (via kernel command line) */
10688         init_function_trace();
10689
10690         /* All seems OK, enable tracing */
10691         tracing_disabled = 0;
10692
10693         atomic_notifier_chain_register(&panic_notifier_list,
10694                                        &trace_panic_notifier);
10695
10696         register_die_notifier(&trace_die_notifier);
10697
10698         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10699
10700         INIT_LIST_HEAD(&global_trace.systems);
10701         INIT_LIST_HEAD(&global_trace.events);
10702         INIT_LIST_HEAD(&global_trace.hist_vars);
10703         INIT_LIST_HEAD(&global_trace.err_log);
10704         list_add(&global_trace.list, &ftrace_trace_arrays);
10705
10706         apply_trace_boot_options();
10707
10708         register_snapshot_cmd();
10709
10710         test_can_verify();
10711
10712         return 0;
10713
10714 out_free_pipe_cpumask:
10715         free_cpumask_var(global_trace.pipe_cpumask);
10716 out_free_savedcmd:
10717         free_saved_cmdlines_buffer(savedcmd);
10718 out_free_temp_buffer:
10719         ring_buffer_free(temp_buffer);
10720 out_rm_hp_state:
10721         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10722 out_free_cpumask:
10723         free_cpumask_var(global_trace.tracing_cpumask);
10724 out_free_buffer_mask:
10725         free_cpumask_var(tracing_buffer_mask);
10726 out:
10727         return ret;
10728 }
10729
10730 void __init ftrace_boot_snapshot(void)
10731 {
10732 #ifdef CONFIG_TRACER_MAX_TRACE
10733         struct trace_array *tr;
10734
10735         if (!snapshot_at_boot)
10736                 return;
10737
10738         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10739                 if (!tr->allocated_snapshot)
10740                         continue;
10741
10742                 tracing_snapshot_instance(tr);
10743                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10744         }
10745 #endif
10746 }
10747
10748 void __init early_trace_init(void)
10749 {
10750         if (tracepoint_printk) {
10751                 tracepoint_print_iter =
10752                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10753                 if (MEM_FAIL(!tracepoint_print_iter,
10754                              "Failed to allocate trace iterator\n"))
10755                         tracepoint_printk = 0;
10756                 else
10757                         static_key_enable(&tracepoint_printk_key.key);
10758         }
10759         tracer_alloc_buffers();
10760
10761         init_events();
10762 }
10763
10764 void __init trace_init(void)
10765 {
10766         trace_event_init();
10767
10768         if (boot_instance_index)
10769                 enable_instances();
10770 }
10771
10772 __init static void clear_boot_tracer(void)
10773 {
10774         /*
10775          * The default tracer at boot buffer is an init section.
10776          * This function is called in lateinit. If we did not
10777          * find the boot tracer, then clear it out, to prevent
10778          * later registration from accessing the buffer that is
10779          * about to be freed.
10780          */
10781         if (!default_bootup_tracer)
10782                 return;
10783
10784         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10785                default_bootup_tracer);
10786         default_bootup_tracer = NULL;
10787 }
10788
10789 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10790 __init static void tracing_set_default_clock(void)
10791 {
10792         /* sched_clock_stable() is determined in late_initcall */
10793         if (!trace_boot_clock && !sched_clock_stable()) {
10794                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10795                         pr_warn("Can not set tracing clock due to lockdown\n");
10796                         return;
10797                 }
10798
10799                 printk(KERN_WARNING
10800                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10801                        "If you want to keep using the local clock, then add:\n"
10802                        "  \"trace_clock=local\"\n"
10803                        "on the kernel command line\n");
10804                 tracing_set_clock(&global_trace, "global");
10805         }
10806 }
10807 #else
10808 static inline void tracing_set_default_clock(void) { }
10809 #endif
10810
10811 __init static int late_trace_init(void)
10812 {
10813         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10814                 static_key_disable(&tracepoint_printk_key.key);
10815                 tracepoint_printk = 0;
10816         }
10817
10818         tracing_set_default_clock();
10819         clear_boot_tracer();
10820         return 0;
10821 }
10822
10823 late_initcall_sync(late_trace_init);