]> git.samba.org - sfrench/cifs-2.6.git/blob - kernel/trace/trace.c
Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013         struct ring_buffer_event *event;
1014         struct trace_buffer *buffer;
1015         struct print_entry *entry;
1016         unsigned int trace_ctx;
1017         int alloc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         if (unlikely(tracing_selftest_running || tracing_disabled))
1023                 return 0;
1024
1025         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027         trace_ctx = tracing_gen_ctx();
1028         buffer = global_trace.array_buffer.buffer;
1029         ring_buffer_nest_start(buffer);
1030         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                             trace_ctx);
1032         if (!event) {
1033                 size = 0;
1034                 goto out;
1035         }
1036
1037         entry = ring_buffer_event_data(event);
1038         entry->ip = ip;
1039
1040         memcpy(&entry->buf, str, size);
1041
1042         /* Add a newline if necessary */
1043         if (entry->buf[size - 1] != '\n') {
1044                 entry->buf[size] = '\n';
1045                 entry->buf[size + 1] = '\0';
1046         } else
1047                 entry->buf[size] = '\0';
1048
1049         __buffer_unlock_commit(buffer, event);
1050         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052         ring_buffer_nest_end(buffer);
1053         return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064         struct ring_buffer_event *event;
1065         struct trace_buffer *buffer;
1066         struct bputs_entry *entry;
1067         unsigned int trace_ctx;
1068         int size = sizeof(struct bputs_entry);
1069         int ret = 0;
1070
1071         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         trace_ctx = tracing_gen_ctx();
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             trace_ctx);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         arch_spin_lock(&tr->max_lock);
1197
1198         if (tr->cond_snapshot)
1199                 cond_data = tr->cond_snapshot->cond_data;
1200
1201         arch_spin_unlock(&tr->max_lock);
1202
1203         return cond_data;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206
1207 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1208                                         struct array_buffer *size_buf, int cpu_id);
1209 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1210
1211 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 {
1213         int ret;
1214
1215         if (!tr->allocated_snapshot) {
1216
1217                 /* allocate spare buffer */
1218                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1219                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220                 if (ret < 0)
1221                         return ret;
1222
1223                 tr->allocated_snapshot = true;
1224         }
1225
1226         return 0;
1227 }
1228
1229 static void free_snapshot(struct trace_array *tr)
1230 {
1231         /*
1232          * We don't free the ring buffer. instead, resize it because
1233          * The max_tr ring buffer has some state (e.g. ring->clock) and
1234          * we want preserve it.
1235          */
1236         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1237         set_buffer_entries(&tr->max_buffer, 1);
1238         tracing_reset_online_cpus(&tr->max_buffer);
1239         tr->allocated_snapshot = false;
1240 }
1241
1242 /**
1243  * tracing_alloc_snapshot - allocate snapshot buffer.
1244  *
1245  * This only allocates the snapshot buffer if it isn't already
1246  * allocated - it doesn't also take a snapshot.
1247  *
1248  * This is meant to be used in cases where the snapshot buffer needs
1249  * to be set up for events that can't sleep but need to be able to
1250  * trigger a snapshot.
1251  */
1252 int tracing_alloc_snapshot(void)
1253 {
1254         struct trace_array *tr = &global_trace;
1255         int ret;
1256
1257         ret = tracing_alloc_snapshot_instance(tr);
1258         WARN_ON(ret < 0);
1259
1260         return ret;
1261 }
1262 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1263
1264 /**
1265  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1266  *
1267  * This is similar to tracing_snapshot(), but it will allocate the
1268  * snapshot buffer if it isn't already allocated. Use this only
1269  * where it is safe to sleep, as the allocation may sleep.
1270  *
1271  * This causes a swap between the snapshot buffer and the current live
1272  * tracing buffer. You can use this to take snapshots of the live
1273  * trace when some condition is triggered, but continue to trace.
1274  */
1275 void tracing_snapshot_alloc(void)
1276 {
1277         int ret;
1278
1279         ret = tracing_alloc_snapshot();
1280         if (ret < 0)
1281                 return;
1282
1283         tracing_snapshot();
1284 }
1285 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1286
1287 /**
1288  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1289  * @tr:         The tracing instance
1290  * @cond_data:  User data to associate with the snapshot
1291  * @update:     Implementation of the cond_snapshot update function
1292  *
1293  * Check whether the conditional snapshot for the given instance has
1294  * already been enabled, or if the current tracer is already using a
1295  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1296  * save the cond_data and update function inside.
1297  *
1298  * Returns 0 if successful, error otherwise.
1299  */
1300 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1301                                  cond_update_fn_t update)
1302 {
1303         struct cond_snapshot *cond_snapshot;
1304         int ret = 0;
1305
1306         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307         if (!cond_snapshot)
1308                 return -ENOMEM;
1309
1310         cond_snapshot->cond_data = cond_data;
1311         cond_snapshot->update = update;
1312
1313         mutex_lock(&trace_types_lock);
1314
1315         ret = tracing_alloc_snapshot_instance(tr);
1316         if (ret)
1317                 goto fail_unlock;
1318
1319         if (tr->current_trace->use_max_tr) {
1320                 ret = -EBUSY;
1321                 goto fail_unlock;
1322         }
1323
1324         /*
1325          * The cond_snapshot can only change to NULL without the
1326          * trace_types_lock. We don't care if we race with it going
1327          * to NULL, but we want to make sure that it's not set to
1328          * something other than NULL when we get here, which we can
1329          * do safely with only holding the trace_types_lock and not
1330          * having to take the max_lock.
1331          */
1332         if (tr->cond_snapshot) {
1333                 ret = -EBUSY;
1334                 goto fail_unlock;
1335         }
1336
1337         arch_spin_lock(&tr->max_lock);
1338         tr->cond_snapshot = cond_snapshot;
1339         arch_spin_unlock(&tr->max_lock);
1340
1341         mutex_unlock(&trace_types_lock);
1342
1343         return ret;
1344
1345  fail_unlock:
1346         mutex_unlock(&trace_types_lock);
1347         kfree(cond_snapshot);
1348         return ret;
1349 }
1350 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1351
1352 /**
1353  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1354  * @tr:         The tracing instance
1355  *
1356  * Check whether the conditional snapshot for the given instance is
1357  * enabled; if so, free the cond_snapshot associated with it,
1358  * otherwise return -EINVAL.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 {
1364         int ret = 0;
1365
1366         arch_spin_lock(&tr->max_lock);
1367
1368         if (!tr->cond_snapshot)
1369                 ret = -EINVAL;
1370         else {
1371                 kfree(tr->cond_snapshot);
1372                 tr->cond_snapshot = NULL;
1373         }
1374
1375         arch_spin_unlock(&tr->max_lock);
1376
1377         return ret;
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1380 #else
1381 void tracing_snapshot(void)
1382 {
1383         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot);
1386 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1387 {
1388         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1391 int tracing_alloc_snapshot(void)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1394         return -ENODEV;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1397 void tracing_snapshot_alloc(void)
1398 {
1399         /* Give warning */
1400         tracing_snapshot();
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1403 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 {
1405         return NULL;
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1408 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 {
1410         return -ENODEV;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 {
1415         return false;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1418 #endif /* CONFIG_TRACER_SNAPSHOT */
1419
1420 void tracer_tracing_off(struct trace_array *tr)
1421 {
1422         if (tr->array_buffer.buffer)
1423                 ring_buffer_record_off(tr->array_buffer.buffer);
1424         /*
1425          * This flag is looked at when buffers haven't been allocated
1426          * yet, or by some tracers (like irqsoff), that just want to
1427          * know if the ring buffer has been disabled, but it can handle
1428          * races of where it gets disabled but we still do a record.
1429          * As the check is in the fast path of the tracers, it is more
1430          * important to be fast than accurate.
1431          */
1432         tr->buffer_disabled = 1;
1433         /* Make the flag seen by readers */
1434         smp_wmb();
1435 }
1436
1437 /**
1438  * tracing_off - turn off tracing buffers
1439  *
1440  * This function stops the tracing buffers from recording data.
1441  * It does not disable any overhead the tracers themselves may
1442  * be causing. This function simply causes all recording to
1443  * the ring buffers to fail.
1444  */
1445 void tracing_off(void)
1446 {
1447         tracer_tracing_off(&global_trace);
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_off);
1450
1451 void disable_trace_on_warning(void)
1452 {
1453         if (__disable_trace_on_warning) {
1454                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1455                         "Disabling tracing due to warning\n");
1456                 tracing_off();
1457         }
1458 }
1459
1460 /**
1461  * tracer_tracing_is_on - show real state of ring buffer enabled
1462  * @tr : the trace array to know if ring buffer is enabled
1463  *
1464  * Shows real state of the ring buffer if it is enabled or not.
1465  */
1466 bool tracer_tracing_is_on(struct trace_array *tr)
1467 {
1468         if (tr->array_buffer.buffer)
1469                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1470         return !tr->buffer_disabled;
1471 }
1472
1473 /**
1474  * tracing_is_on - show state of ring buffers enabled
1475  */
1476 int tracing_is_on(void)
1477 {
1478         return tracer_tracing_is_on(&global_trace);
1479 }
1480 EXPORT_SYMBOL_GPL(tracing_is_on);
1481
1482 static int __init set_buf_size(char *str)
1483 {
1484         unsigned long buf_size;
1485
1486         if (!str)
1487                 return 0;
1488         buf_size = memparse(str, &str);
1489         /*
1490          * nr_entries can not be zero and the startup
1491          * tests require some buffer space. Therefore
1492          * ensure we have at least 4096 bytes of buffer.
1493          */
1494         trace_buf_size = max(4096UL, buf_size);
1495         return 1;
1496 }
1497 __setup("trace_buf_size=", set_buf_size);
1498
1499 static int __init set_tracing_thresh(char *str)
1500 {
1501         unsigned long threshold;
1502         int ret;
1503
1504         if (!str)
1505                 return 0;
1506         ret = kstrtoul(str, 0, &threshold);
1507         if (ret < 0)
1508                 return 0;
1509         tracing_thresh = threshold * 1000;
1510         return 1;
1511 }
1512 __setup("tracing_thresh=", set_tracing_thresh);
1513
1514 unsigned long nsecs_to_usecs(unsigned long nsecs)
1515 {
1516         return nsecs / 1000;
1517 }
1518
1519 /*
1520  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1521  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1522  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1523  * of strings in the order that the evals (enum) were defined.
1524  */
1525 #undef C
1526 #define C(a, b) b
1527
1528 /* These must match the bit positions in trace_iterator_flags */
1529 static const char *trace_options[] = {
1530         TRACE_FLAGS
1531         NULL
1532 };
1533
1534 static struct {
1535         u64 (*func)(void);
1536         const char *name;
1537         int in_ns;              /* is this clock in nanoseconds? */
1538 } trace_clocks[] = {
1539         { trace_clock_local,            "local",        1 },
1540         { trace_clock_global,           "global",       1 },
1541         { trace_clock_counter,          "counter",      0 },
1542         { trace_clock_jiffies,          "uptime",       0 },
1543         { trace_clock,                  "perf",         1 },
1544         { ktime_get_mono_fast_ns,       "mono",         1 },
1545         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1546         { ktime_get_boot_fast_ns,       "boot",         1 },
1547         { ktime_get_tai_fast_ns,        "tai",          1 },
1548         ARCH_TRACE_CLOCKS
1549 };
1550
1551 bool trace_clock_in_ns(struct trace_array *tr)
1552 {
1553         if (trace_clocks[tr->clock_id].in_ns)
1554                 return true;
1555
1556         return false;
1557 }
1558
1559 /*
1560  * trace_parser_get_init - gets the buffer for trace parser
1561  */
1562 int trace_parser_get_init(struct trace_parser *parser, int size)
1563 {
1564         memset(parser, 0, sizeof(*parser));
1565
1566         parser->buffer = kmalloc(size, GFP_KERNEL);
1567         if (!parser->buffer)
1568                 return 1;
1569
1570         parser->size = size;
1571         return 0;
1572 }
1573
1574 /*
1575  * trace_parser_put - frees the buffer for trace parser
1576  */
1577 void trace_parser_put(struct trace_parser *parser)
1578 {
1579         kfree(parser->buffer);
1580         parser->buffer = NULL;
1581 }
1582
1583 /*
1584  * trace_get_user - reads the user input string separated by  space
1585  * (matched by isspace(ch))
1586  *
1587  * For each string found the 'struct trace_parser' is updated,
1588  * and the function returns.
1589  *
1590  * Returns number of bytes read.
1591  *
1592  * See kernel/trace/trace.h for 'struct trace_parser' details.
1593  */
1594 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1595         size_t cnt, loff_t *ppos)
1596 {
1597         char ch;
1598         size_t read = 0;
1599         ssize_t ret;
1600
1601         if (!*ppos)
1602                 trace_parser_clear(parser);
1603
1604         ret = get_user(ch, ubuf++);
1605         if (ret)
1606                 goto out;
1607
1608         read++;
1609         cnt--;
1610
1611         /*
1612          * The parser is not finished with the last write,
1613          * continue reading the user input without skipping spaces.
1614          */
1615         if (!parser->cont) {
1616                 /* skip white space */
1617                 while (cnt && isspace(ch)) {
1618                         ret = get_user(ch, ubuf++);
1619                         if (ret)
1620                                 goto out;
1621                         read++;
1622                         cnt--;
1623                 }
1624
1625                 parser->idx = 0;
1626
1627                 /* only spaces were written */
1628                 if (isspace(ch) || !ch) {
1629                         *ppos += read;
1630                         ret = read;
1631                         goto out;
1632                 }
1633         }
1634
1635         /* read the non-space input */
1636         while (cnt && !isspace(ch) && ch) {
1637                 if (parser->idx < parser->size - 1)
1638                         parser->buffer[parser->idx++] = ch;
1639                 else {
1640                         ret = -EINVAL;
1641                         goto out;
1642                 }
1643                 ret = get_user(ch, ubuf++);
1644                 if (ret)
1645                         goto out;
1646                 read++;
1647                 cnt--;
1648         }
1649
1650         /* We either got finished input or we have to wait for another call. */
1651         if (isspace(ch) || !ch) {
1652                 parser->buffer[parser->idx] = 0;
1653                 parser->cont = false;
1654         } else if (parser->idx < parser->size - 1) {
1655                 parser->cont = true;
1656                 parser->buffer[parser->idx++] = ch;
1657                 /* Make sure the parsed string always terminates with '\0'. */
1658                 parser->buffer[parser->idx] = 0;
1659         } else {
1660                 ret = -EINVAL;
1661                 goto out;
1662         }
1663
1664         *ppos += read;
1665         ret = read;
1666
1667 out:
1668         return ret;
1669 }
1670
1671 /* TODO add a seq_buf_to_buffer() */
1672 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1673 {
1674         int len;
1675
1676         if (trace_seq_used(s) <= s->seq.readpos)
1677                 return -EBUSY;
1678
1679         len = trace_seq_used(s) - s->seq.readpos;
1680         if (cnt > len)
1681                 cnt = len;
1682         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1683
1684         s->seq.readpos += cnt;
1685         return cnt;
1686 }
1687
1688 unsigned long __read_mostly     tracing_thresh;
1689 static const struct file_operations tracing_max_lat_fops;
1690
1691 #ifdef LATENCY_FS_NOTIFY
1692
1693 static struct workqueue_struct *fsnotify_wq;
1694
1695 static void latency_fsnotify_workfn(struct work_struct *work)
1696 {
1697         struct trace_array *tr = container_of(work, struct trace_array,
1698                                               fsnotify_work);
1699         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1700 }
1701
1702 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1703 {
1704         struct trace_array *tr = container_of(iwork, struct trace_array,
1705                                               fsnotify_irqwork);
1706         queue_work(fsnotify_wq, &tr->fsnotify_work);
1707 }
1708
1709 static void trace_create_maxlat_file(struct trace_array *tr,
1710                                      struct dentry *d_tracer)
1711 {
1712         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1713         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1714         tr->d_max_latency = trace_create_file("tracing_max_latency",
1715                                               TRACE_MODE_WRITE,
1716                                               d_tracer, &tr->max_latency,
1717                                               &tracing_max_lat_fops);
1718 }
1719
1720 __init static int latency_fsnotify_init(void)
1721 {
1722         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1723                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1724         if (!fsnotify_wq) {
1725                 pr_err("Unable to allocate tr_max_lat_wq\n");
1726                 return -ENOMEM;
1727         }
1728         return 0;
1729 }
1730
1731 late_initcall_sync(latency_fsnotify_init);
1732
1733 void latency_fsnotify(struct trace_array *tr)
1734 {
1735         if (!fsnotify_wq)
1736                 return;
1737         /*
1738          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1739          * possible that we are called from __schedule() or do_idle(), which
1740          * could cause a deadlock.
1741          */
1742         irq_work_queue(&tr->fsnotify_irqwork);
1743 }
1744
1745 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1746         || defined(CONFIG_OSNOISE_TRACER)
1747
1748 #define trace_create_maxlat_file(tr, d_tracer)                          \
1749         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1750                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1751
1752 #else
1753 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1754 #endif
1755
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 /*
1758  * Copy the new maximum trace into the separate maximum-trace
1759  * structure. (this way the maximum trace is permanently saved,
1760  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1761  */
1762 static void
1763 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1764 {
1765         struct array_buffer *trace_buf = &tr->array_buffer;
1766         struct array_buffer *max_buf = &tr->max_buffer;
1767         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1768         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1769
1770         max_buf->cpu = cpu;
1771         max_buf->time_start = data->preempt_timestamp;
1772
1773         max_data->saved_latency = tr->max_latency;
1774         max_data->critical_start = data->critical_start;
1775         max_data->critical_end = data->critical_end;
1776
1777         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1778         max_data->pid = tsk->pid;
1779         /*
1780          * If tsk == current, then use current_uid(), as that does not use
1781          * RCU. The irq tracer can be called out of RCU scope.
1782          */
1783         if (tsk == current)
1784                 max_data->uid = current_uid();
1785         else
1786                 max_data->uid = task_uid(tsk);
1787
1788         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1789         max_data->policy = tsk->policy;
1790         max_data->rt_priority = tsk->rt_priority;
1791
1792         /* record this tasks comm */
1793         tracing_record_cmdline(tsk);
1794         latency_fsnotify(tr);
1795 }
1796
1797 /**
1798  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1799  * @tr: tracer
1800  * @tsk: the task with the latency
1801  * @cpu: The cpu that initiated the trace.
1802  * @cond_data: User data associated with a conditional snapshot
1803  *
1804  * Flip the buffers between the @tr and the max_tr and record information
1805  * about which task was the cause of this latency.
1806  */
1807 void
1808 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809               void *cond_data)
1810 {
1811         if (tr->stop_count)
1812                 return;
1813
1814         WARN_ON_ONCE(!irqs_disabled());
1815
1816         if (!tr->allocated_snapshot) {
1817                 /* Only the nop tracer should hit this when disabling */
1818                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1819                 return;
1820         }
1821
1822         arch_spin_lock(&tr->max_lock);
1823
1824         /* Inherit the recordable setting from array_buffer */
1825         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1826                 ring_buffer_record_on(tr->max_buffer.buffer);
1827         else
1828                 ring_buffer_record_off(tr->max_buffer.buffer);
1829
1830 #ifdef CONFIG_TRACER_SNAPSHOT
1831         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1832                 goto out_unlock;
1833 #endif
1834         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1835
1836         __update_max_tr(tr, tsk, cpu);
1837
1838  out_unlock:
1839         arch_spin_unlock(&tr->max_lock);
1840 }
1841
1842 /**
1843  * update_max_tr_single - only copy one trace over, and reset the rest
1844  * @tr: tracer
1845  * @tsk: task with the latency
1846  * @cpu: the cpu of the buffer to copy.
1847  *
1848  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1849  */
1850 void
1851 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1852 {
1853         int ret;
1854
1855         if (tr->stop_count)
1856                 return;
1857
1858         WARN_ON_ONCE(!irqs_disabled());
1859         if (!tr->allocated_snapshot) {
1860                 /* Only the nop tracer should hit this when disabling */
1861                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1862                 return;
1863         }
1864
1865         arch_spin_lock(&tr->max_lock);
1866
1867         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1868
1869         if (ret == -EBUSY) {
1870                 /*
1871                  * We failed to swap the buffer due to a commit taking
1872                  * place on this CPU. We fail to record, but we reset
1873                  * the max trace buffer (no one writes directly to it)
1874                  * and flag that it failed.
1875                  */
1876                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1877                         "Failed to swap buffers due to commit in progress\n");
1878         }
1879
1880         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1881
1882         __update_max_tr(tr, tsk, cpu);
1883         arch_spin_unlock(&tr->max_lock);
1884 }
1885 #endif /* CONFIG_TRACER_MAX_TRACE */
1886
1887 static int wait_on_pipe(struct trace_iterator *iter, int full)
1888 {
1889         /* Iterators are static, they should be filled or empty */
1890         if (trace_buffer_iter(iter, iter->cpu_file))
1891                 return 0;
1892
1893         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1894                                 full);
1895 }
1896
1897 #ifdef CONFIG_FTRACE_STARTUP_TEST
1898 static bool selftests_can_run;
1899
1900 struct trace_selftests {
1901         struct list_head                list;
1902         struct tracer                   *type;
1903 };
1904
1905 static LIST_HEAD(postponed_selftests);
1906
1907 static int save_selftest(struct tracer *type)
1908 {
1909         struct trace_selftests *selftest;
1910
1911         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1912         if (!selftest)
1913                 return -ENOMEM;
1914
1915         selftest->type = type;
1916         list_add(&selftest->list, &postponed_selftests);
1917         return 0;
1918 }
1919
1920 static int run_tracer_selftest(struct tracer *type)
1921 {
1922         struct trace_array *tr = &global_trace;
1923         struct tracer *saved_tracer = tr->current_trace;
1924         int ret;
1925
1926         if (!type->selftest || tracing_selftest_disabled)
1927                 return 0;
1928
1929         /*
1930          * If a tracer registers early in boot up (before scheduling is
1931          * initialized and such), then do not run its selftests yet.
1932          * Instead, run it a little later in the boot process.
1933          */
1934         if (!selftests_can_run)
1935                 return save_selftest(type);
1936
1937         if (!tracing_is_on()) {
1938                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1939                         type->name);
1940                 return 0;
1941         }
1942
1943         /*
1944          * Run a selftest on this tracer.
1945          * Here we reset the trace buffer, and set the current
1946          * tracer to be this tracer. The tracer can then run some
1947          * internal tracing to verify that everything is in order.
1948          * If we fail, we do not register this tracer.
1949          */
1950         tracing_reset_online_cpus(&tr->array_buffer);
1951
1952         tr->current_trace = type;
1953
1954 #ifdef CONFIG_TRACER_MAX_TRACE
1955         if (type->use_max_tr) {
1956                 /* If we expanded the buffers, make sure the max is expanded too */
1957                 if (ring_buffer_expanded)
1958                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1959                                            RING_BUFFER_ALL_CPUS);
1960                 tr->allocated_snapshot = true;
1961         }
1962 #endif
1963
1964         /* the test is responsible for initializing and enabling */
1965         pr_info("Testing tracer %s: ", type->name);
1966         ret = type->selftest(type, tr);
1967         /* the test is responsible for resetting too */
1968         tr->current_trace = saved_tracer;
1969         if (ret) {
1970                 printk(KERN_CONT "FAILED!\n");
1971                 /* Add the warning after printing 'FAILED' */
1972                 WARN_ON(1);
1973                 return -1;
1974         }
1975         /* Only reset on passing, to avoid touching corrupted buffers */
1976         tracing_reset_online_cpus(&tr->array_buffer);
1977
1978 #ifdef CONFIG_TRACER_MAX_TRACE
1979         if (type->use_max_tr) {
1980                 tr->allocated_snapshot = false;
1981
1982                 /* Shrink the max buffer again */
1983                 if (ring_buffer_expanded)
1984                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1985                                            RING_BUFFER_ALL_CPUS);
1986         }
1987 #endif
1988
1989         printk(KERN_CONT "PASSED\n");
1990         return 0;
1991 }
1992
1993 static __init int init_trace_selftests(void)
1994 {
1995         struct trace_selftests *p, *n;
1996         struct tracer *t, **last;
1997         int ret;
1998
1999         selftests_can_run = true;
2000
2001         mutex_lock(&trace_types_lock);
2002
2003         if (list_empty(&postponed_selftests))
2004                 goto out;
2005
2006         pr_info("Running postponed tracer tests:\n");
2007
2008         tracing_selftest_running = true;
2009         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2010                 /* This loop can take minutes when sanitizers are enabled, so
2011                  * lets make sure we allow RCU processing.
2012                  */
2013                 cond_resched();
2014                 ret = run_tracer_selftest(p->type);
2015                 /* If the test fails, then warn and remove from available_tracers */
2016                 if (ret < 0) {
2017                         WARN(1, "tracer: %s failed selftest, disabling\n",
2018                              p->type->name);
2019                         last = &trace_types;
2020                         for (t = trace_types; t; t = t->next) {
2021                                 if (t == p->type) {
2022                                         *last = t->next;
2023                                         break;
2024                                 }
2025                                 last = &t->next;
2026                         }
2027                 }
2028                 list_del(&p->list);
2029                 kfree(p);
2030         }
2031         tracing_selftest_running = false;
2032
2033  out:
2034         mutex_unlock(&trace_types_lock);
2035
2036         return 0;
2037 }
2038 core_initcall(init_trace_selftests);
2039 #else
2040 static inline int run_tracer_selftest(struct tracer *type)
2041 {
2042         return 0;
2043 }
2044 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2045
2046 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2047
2048 static void __init apply_trace_boot_options(void);
2049
2050 /**
2051  * register_tracer - register a tracer with the ftrace system.
2052  * @type: the plugin for the tracer
2053  *
2054  * Register a new plugin tracer.
2055  */
2056 int __init register_tracer(struct tracer *type)
2057 {
2058         struct tracer *t;
2059         int ret = 0;
2060
2061         if (!type->name) {
2062                 pr_info("Tracer must have a name\n");
2063                 return -1;
2064         }
2065
2066         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2067                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2068                 return -1;
2069         }
2070
2071         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2072                 pr_warn("Can not register tracer %s due to lockdown\n",
2073                            type->name);
2074                 return -EPERM;
2075         }
2076
2077         mutex_lock(&trace_types_lock);
2078
2079         tracing_selftest_running = true;
2080
2081         for (t = trace_types; t; t = t->next) {
2082                 if (strcmp(type->name, t->name) == 0) {
2083                         /* already found */
2084                         pr_info("Tracer %s already registered\n",
2085                                 type->name);
2086                         ret = -1;
2087                         goto out;
2088                 }
2089         }
2090
2091         if (!type->set_flag)
2092                 type->set_flag = &dummy_set_flag;
2093         if (!type->flags) {
2094                 /*allocate a dummy tracer_flags*/
2095                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2096                 if (!type->flags) {
2097                         ret = -ENOMEM;
2098                         goto out;
2099                 }
2100                 type->flags->val = 0;
2101                 type->flags->opts = dummy_tracer_opt;
2102         } else
2103                 if (!type->flags->opts)
2104                         type->flags->opts = dummy_tracer_opt;
2105
2106         /* store the tracer for __set_tracer_option */
2107         type->flags->trace = type;
2108
2109         ret = run_tracer_selftest(type);
2110         if (ret < 0)
2111                 goto out;
2112
2113         type->next = trace_types;
2114         trace_types = type;
2115         add_tracer_options(&global_trace, type);
2116
2117  out:
2118         tracing_selftest_running = false;
2119         mutex_unlock(&trace_types_lock);
2120
2121         if (ret || !default_bootup_tracer)
2122                 goto out_unlock;
2123
2124         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2125                 goto out_unlock;
2126
2127         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2128         /* Do we want this tracer to start on bootup? */
2129         tracing_set_tracer(&global_trace, type->name);
2130         default_bootup_tracer = NULL;
2131
2132         apply_trace_boot_options();
2133
2134         /* disable other selftests, since this will break it. */
2135         disable_tracing_selftest("running a tracer");
2136
2137  out_unlock:
2138         return ret;
2139 }
2140
2141 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2142 {
2143         struct trace_buffer *buffer = buf->buffer;
2144
2145         if (!buffer)
2146                 return;
2147
2148         ring_buffer_record_disable(buffer);
2149
2150         /* Make sure all commits have finished */
2151         synchronize_rcu();
2152         ring_buffer_reset_cpu(buffer, cpu);
2153
2154         ring_buffer_record_enable(buffer);
2155 }
2156
2157 void tracing_reset_online_cpus(struct array_buffer *buf)
2158 {
2159         struct trace_buffer *buffer = buf->buffer;
2160
2161         if (!buffer)
2162                 return;
2163
2164         ring_buffer_record_disable(buffer);
2165
2166         /* Make sure all commits have finished */
2167         synchronize_rcu();
2168
2169         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2170
2171         ring_buffer_reset_online_cpus(buffer);
2172
2173         ring_buffer_record_enable(buffer);
2174 }
2175
2176 /* Must have trace_types_lock held */
2177 void tracing_reset_all_online_cpus(void)
2178 {
2179         struct trace_array *tr;
2180
2181         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2182                 if (!tr->clear_trace)
2183                         continue;
2184                 tr->clear_trace = false;
2185                 tracing_reset_online_cpus(&tr->array_buffer);
2186 #ifdef CONFIG_TRACER_MAX_TRACE
2187                 tracing_reset_online_cpus(&tr->max_buffer);
2188 #endif
2189         }
2190 }
2191
2192 /*
2193  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2194  * is the tgid last observed corresponding to pid=i.
2195  */
2196 static int *tgid_map;
2197
2198 /* The maximum valid index into tgid_map. */
2199 static size_t tgid_map_max;
2200
2201 #define SAVED_CMDLINES_DEFAULT 128
2202 #define NO_CMDLINE_MAP UINT_MAX
2203 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2204 struct saved_cmdlines_buffer {
2205         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2206         unsigned *map_cmdline_to_pid;
2207         unsigned cmdline_num;
2208         int cmdline_idx;
2209         char *saved_cmdlines;
2210 };
2211 static struct saved_cmdlines_buffer *savedcmd;
2212
2213 static inline char *get_saved_cmdlines(int idx)
2214 {
2215         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2216 }
2217
2218 static inline void set_cmdline(int idx, const char *cmdline)
2219 {
2220         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2221 }
2222
2223 static int allocate_cmdlines_buffer(unsigned int val,
2224                                     struct saved_cmdlines_buffer *s)
2225 {
2226         s->map_cmdline_to_pid = kmalloc_array(val,
2227                                               sizeof(*s->map_cmdline_to_pid),
2228                                               GFP_KERNEL);
2229         if (!s->map_cmdline_to_pid)
2230                 return -ENOMEM;
2231
2232         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2233         if (!s->saved_cmdlines) {
2234                 kfree(s->map_cmdline_to_pid);
2235                 return -ENOMEM;
2236         }
2237
2238         s->cmdline_idx = 0;
2239         s->cmdline_num = val;
2240         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2241                sizeof(s->map_pid_to_cmdline));
2242         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2243                val * sizeof(*s->map_cmdline_to_pid));
2244
2245         return 0;
2246 }
2247
2248 static int trace_create_savedcmd(void)
2249 {
2250         int ret;
2251
2252         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2253         if (!savedcmd)
2254                 return -ENOMEM;
2255
2256         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2257         if (ret < 0) {
2258                 kfree(savedcmd);
2259                 savedcmd = NULL;
2260                 return -ENOMEM;
2261         }
2262
2263         return 0;
2264 }
2265
2266 int is_tracing_stopped(void)
2267 {
2268         return global_trace.stop_count;
2269 }
2270
2271 /**
2272  * tracing_start - quick start of the tracer
2273  *
2274  * If tracing is enabled but was stopped by tracing_stop,
2275  * this will start the tracer back up.
2276  */
2277 void tracing_start(void)
2278 {
2279         struct trace_buffer *buffer;
2280         unsigned long flags;
2281
2282         if (tracing_disabled)
2283                 return;
2284
2285         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2286         if (--global_trace.stop_count) {
2287                 if (global_trace.stop_count < 0) {
2288                         /* Someone screwed up their debugging */
2289                         WARN_ON_ONCE(1);
2290                         global_trace.stop_count = 0;
2291                 }
2292                 goto out;
2293         }
2294
2295         /* Prevent the buffers from switching */
2296         arch_spin_lock(&global_trace.max_lock);
2297
2298         buffer = global_trace.array_buffer.buffer;
2299         if (buffer)
2300                 ring_buffer_record_enable(buffer);
2301
2302 #ifdef CONFIG_TRACER_MAX_TRACE
2303         buffer = global_trace.max_buffer.buffer;
2304         if (buffer)
2305                 ring_buffer_record_enable(buffer);
2306 #endif
2307
2308         arch_spin_unlock(&global_trace.max_lock);
2309
2310  out:
2311         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2312 }
2313
2314 static void tracing_start_tr(struct trace_array *tr)
2315 {
2316         struct trace_buffer *buffer;
2317         unsigned long flags;
2318
2319         if (tracing_disabled)
2320                 return;
2321
2322         /* If global, we need to also start the max tracer */
2323         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2324                 return tracing_start();
2325
2326         raw_spin_lock_irqsave(&tr->start_lock, flags);
2327
2328         if (--tr->stop_count) {
2329                 if (tr->stop_count < 0) {
2330                         /* Someone screwed up their debugging */
2331                         WARN_ON_ONCE(1);
2332                         tr->stop_count = 0;
2333                 }
2334                 goto out;
2335         }
2336
2337         buffer = tr->array_buffer.buffer;
2338         if (buffer)
2339                 ring_buffer_record_enable(buffer);
2340
2341  out:
2342         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2343 }
2344
2345 /**
2346  * tracing_stop - quick stop of the tracer
2347  *
2348  * Light weight way to stop tracing. Use in conjunction with
2349  * tracing_start.
2350  */
2351 void tracing_stop(void)
2352 {
2353         struct trace_buffer *buffer;
2354         unsigned long flags;
2355
2356         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2357         if (global_trace.stop_count++)
2358                 goto out;
2359
2360         /* Prevent the buffers from switching */
2361         arch_spin_lock(&global_trace.max_lock);
2362
2363         buffer = global_trace.array_buffer.buffer;
2364         if (buffer)
2365                 ring_buffer_record_disable(buffer);
2366
2367 #ifdef CONFIG_TRACER_MAX_TRACE
2368         buffer = global_trace.max_buffer.buffer;
2369         if (buffer)
2370                 ring_buffer_record_disable(buffer);
2371 #endif
2372
2373         arch_spin_unlock(&global_trace.max_lock);
2374
2375  out:
2376         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2377 }
2378
2379 static void tracing_stop_tr(struct trace_array *tr)
2380 {
2381         struct trace_buffer *buffer;
2382         unsigned long flags;
2383
2384         /* If global, we need to also stop the max tracer */
2385         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2386                 return tracing_stop();
2387
2388         raw_spin_lock_irqsave(&tr->start_lock, flags);
2389         if (tr->stop_count++)
2390                 goto out;
2391
2392         buffer = tr->array_buffer.buffer;
2393         if (buffer)
2394                 ring_buffer_record_disable(buffer);
2395
2396  out:
2397         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2398 }
2399
2400 static int trace_save_cmdline(struct task_struct *tsk)
2401 {
2402         unsigned tpid, idx;
2403
2404         /* treat recording of idle task as a success */
2405         if (!tsk->pid)
2406                 return 1;
2407
2408         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2409
2410         /*
2411          * It's not the end of the world if we don't get
2412          * the lock, but we also don't want to spin
2413          * nor do we want to disable interrupts,
2414          * so if we miss here, then better luck next time.
2415          */
2416         if (!arch_spin_trylock(&trace_cmdline_lock))
2417                 return 0;
2418
2419         idx = savedcmd->map_pid_to_cmdline[tpid];
2420         if (idx == NO_CMDLINE_MAP) {
2421                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2422
2423                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2424                 savedcmd->cmdline_idx = idx;
2425         }
2426
2427         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2428         set_cmdline(idx, tsk->comm);
2429
2430         arch_spin_unlock(&trace_cmdline_lock);
2431
2432         return 1;
2433 }
2434
2435 static void __trace_find_cmdline(int pid, char comm[])
2436 {
2437         unsigned map;
2438         int tpid;
2439
2440         if (!pid) {
2441                 strcpy(comm, "<idle>");
2442                 return;
2443         }
2444
2445         if (WARN_ON_ONCE(pid < 0)) {
2446                 strcpy(comm, "<XXX>");
2447                 return;
2448         }
2449
2450         tpid = pid & (PID_MAX_DEFAULT - 1);
2451         map = savedcmd->map_pid_to_cmdline[tpid];
2452         if (map != NO_CMDLINE_MAP) {
2453                 tpid = savedcmd->map_cmdline_to_pid[map];
2454                 if (tpid == pid) {
2455                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2456                         return;
2457                 }
2458         }
2459         strcpy(comm, "<...>");
2460 }
2461
2462 void trace_find_cmdline(int pid, char comm[])
2463 {
2464         preempt_disable();
2465         arch_spin_lock(&trace_cmdline_lock);
2466
2467         __trace_find_cmdline(pid, comm);
2468
2469         arch_spin_unlock(&trace_cmdline_lock);
2470         preempt_enable();
2471 }
2472
2473 static int *trace_find_tgid_ptr(int pid)
2474 {
2475         /*
2476          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2477          * if we observe a non-NULL tgid_map then we also observe the correct
2478          * tgid_map_max.
2479          */
2480         int *map = smp_load_acquire(&tgid_map);
2481
2482         if (unlikely(!map || pid > tgid_map_max))
2483                 return NULL;
2484
2485         return &map[pid];
2486 }
2487
2488 int trace_find_tgid(int pid)
2489 {
2490         int *ptr = trace_find_tgid_ptr(pid);
2491
2492         return ptr ? *ptr : 0;
2493 }
2494
2495 static int trace_save_tgid(struct task_struct *tsk)
2496 {
2497         int *ptr;
2498
2499         /* treat recording of idle task as a success */
2500         if (!tsk->pid)
2501                 return 1;
2502
2503         ptr = trace_find_tgid_ptr(tsk->pid);
2504         if (!ptr)
2505                 return 0;
2506
2507         *ptr = tsk->tgid;
2508         return 1;
2509 }
2510
2511 static bool tracing_record_taskinfo_skip(int flags)
2512 {
2513         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2514                 return true;
2515         if (!__this_cpu_read(trace_taskinfo_save))
2516                 return true;
2517         return false;
2518 }
2519
2520 /**
2521  * tracing_record_taskinfo - record the task info of a task
2522  *
2523  * @task:  task to record
2524  * @flags: TRACE_RECORD_CMDLINE for recording comm
2525  *         TRACE_RECORD_TGID for recording tgid
2526  */
2527 void tracing_record_taskinfo(struct task_struct *task, int flags)
2528 {
2529         bool done;
2530
2531         if (tracing_record_taskinfo_skip(flags))
2532                 return;
2533
2534         /*
2535          * Record as much task information as possible. If some fail, continue
2536          * to try to record the others.
2537          */
2538         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2539         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2540
2541         /* If recording any information failed, retry again soon. */
2542         if (!done)
2543                 return;
2544
2545         __this_cpu_write(trace_taskinfo_save, false);
2546 }
2547
2548 /**
2549  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2550  *
2551  * @prev: previous task during sched_switch
2552  * @next: next task during sched_switch
2553  * @flags: TRACE_RECORD_CMDLINE for recording comm
2554  *         TRACE_RECORD_TGID for recording tgid
2555  */
2556 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2557                                           struct task_struct *next, int flags)
2558 {
2559         bool done;
2560
2561         if (tracing_record_taskinfo_skip(flags))
2562                 return;
2563
2564         /*
2565          * Record as much task information as possible. If some fail, continue
2566          * to try to record the others.
2567          */
2568         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2569         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2570         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2571         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2572
2573         /* If recording any information failed, retry again soon. */
2574         if (!done)
2575                 return;
2576
2577         __this_cpu_write(trace_taskinfo_save, false);
2578 }
2579
2580 /* Helpers to record a specific task information */
2581 void tracing_record_cmdline(struct task_struct *task)
2582 {
2583         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2584 }
2585
2586 void tracing_record_tgid(struct task_struct *task)
2587 {
2588         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2589 }
2590
2591 /*
2592  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2593  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2594  * simplifies those functions and keeps them in sync.
2595  */
2596 enum print_line_t trace_handle_return(struct trace_seq *s)
2597 {
2598         return trace_seq_has_overflowed(s) ?
2599                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2600 }
2601 EXPORT_SYMBOL_GPL(trace_handle_return);
2602
2603 static unsigned short migration_disable_value(void)
2604 {
2605 #if defined(CONFIG_SMP)
2606         return current->migration_disabled;
2607 #else
2608         return 0;
2609 #endif
2610 }
2611
2612 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2613 {
2614         unsigned int trace_flags = irqs_status;
2615         unsigned int pc;
2616
2617         pc = preempt_count();
2618
2619         if (pc & NMI_MASK)
2620                 trace_flags |= TRACE_FLAG_NMI;
2621         if (pc & HARDIRQ_MASK)
2622                 trace_flags |= TRACE_FLAG_HARDIRQ;
2623         if (in_serving_softirq())
2624                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2625         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2626                 trace_flags |= TRACE_FLAG_BH_OFF;
2627
2628         if (tif_need_resched())
2629                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630         if (test_preempt_need_resched())
2631                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634 }
2635
2636 struct ring_buffer_event *
2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638                           int type,
2639                           unsigned long len,
2640                           unsigned int trace_ctx)
2641 {
2642         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643 }
2644
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648
2649 /**
2650  * trace_buffered_event_enable - enable buffering events
2651  *
2652  * When events are being filtered, it is quicker to use a temporary
2653  * buffer to write the event data into if there's a likely chance
2654  * that it will not be committed. The discard of the ring buffer
2655  * is not as fast as committing, and is much slower than copying
2656  * a commit.
2657  *
2658  * When an event is to be filtered, allocate per cpu buffers to
2659  * write the event data into, and if the event is filtered and discarded
2660  * it is simply dropped, otherwise, the entire data is to be committed
2661  * in one shot.
2662  */
2663 void trace_buffered_event_enable(void)
2664 {
2665         struct ring_buffer_event *event;
2666         struct page *page;
2667         int cpu;
2668
2669         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670
2671         if (trace_buffered_event_ref++)
2672                 return;
2673
2674         for_each_tracing_cpu(cpu) {
2675                 page = alloc_pages_node(cpu_to_node(cpu),
2676                                         GFP_KERNEL | __GFP_NORETRY, 0);
2677                 if (!page)
2678                         goto failed;
2679
2680                 event = page_address(page);
2681                 memset(event, 0, sizeof(*event));
2682
2683                 per_cpu(trace_buffered_event, cpu) = event;
2684
2685                 preempt_disable();
2686                 if (cpu == smp_processor_id() &&
2687                     __this_cpu_read(trace_buffered_event) !=
2688                     per_cpu(trace_buffered_event, cpu))
2689                         WARN_ON_ONCE(1);
2690                 preempt_enable();
2691         }
2692
2693         return;
2694  failed:
2695         trace_buffered_event_disable();
2696 }
2697
2698 static void enable_trace_buffered_event(void *data)
2699 {
2700         /* Probably not needed, but do it anyway */
2701         smp_rmb();
2702         this_cpu_dec(trace_buffered_event_cnt);
2703 }
2704
2705 static void disable_trace_buffered_event(void *data)
2706 {
2707         this_cpu_inc(trace_buffered_event_cnt);
2708 }
2709
2710 /**
2711  * trace_buffered_event_disable - disable buffering events
2712  *
2713  * When a filter is removed, it is faster to not use the buffered
2714  * events, and to commit directly into the ring buffer. Free up
2715  * the temp buffers when there are no more users. This requires
2716  * special synchronization with current events.
2717  */
2718 void trace_buffered_event_disable(void)
2719 {
2720         int cpu;
2721
2722         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723
2724         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725                 return;
2726
2727         if (--trace_buffered_event_ref)
2728                 return;
2729
2730         preempt_disable();
2731         /* For each CPU, set the buffer as used. */
2732         smp_call_function_many(tracing_buffer_mask,
2733                                disable_trace_buffered_event, NULL, 1);
2734         preempt_enable();
2735
2736         /* Wait for all current users to finish */
2737         synchronize_rcu();
2738
2739         for_each_tracing_cpu(cpu) {
2740                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741                 per_cpu(trace_buffered_event, cpu) = NULL;
2742         }
2743         /*
2744          * Make sure trace_buffered_event is NULL before clearing
2745          * trace_buffered_event_cnt.
2746          */
2747         smp_wmb();
2748
2749         preempt_disable();
2750         /* Do the work on each cpu */
2751         smp_call_function_many(tracing_buffer_mask,
2752                                enable_trace_buffered_event, NULL, 1);
2753         preempt_enable();
2754 }
2755
2756 static struct trace_buffer *temp_buffer;
2757
2758 struct ring_buffer_event *
2759 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760                           struct trace_event_file *trace_file,
2761                           int type, unsigned long len,
2762                           unsigned int trace_ctx)
2763 {
2764         struct ring_buffer_event *entry;
2765         struct trace_array *tr = trace_file->tr;
2766         int val;
2767
2768         *current_rb = tr->array_buffer.buffer;
2769
2770         if (!tr->no_filter_buffering_ref &&
2771             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2772                 preempt_disable_notrace();
2773                 /*
2774                  * Filtering is on, so try to use the per cpu buffer first.
2775                  * This buffer will simulate a ring_buffer_event,
2776                  * where the type_len is zero and the array[0] will
2777                  * hold the full length.
2778                  * (see include/linux/ring-buffer.h for details on
2779                  *  how the ring_buffer_event is structured).
2780                  *
2781                  * Using a temp buffer during filtering and copying it
2782                  * on a matched filter is quicker than writing directly
2783                  * into the ring buffer and then discarding it when
2784                  * it doesn't match. That is because the discard
2785                  * requires several atomic operations to get right.
2786                  * Copying on match and doing nothing on a failed match
2787                  * is still quicker than no copy on match, but having
2788                  * to discard out of the ring buffer on a failed match.
2789                  */
2790                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2791                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2792
2793                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2794
2795                         /*
2796                          * Preemption is disabled, but interrupts and NMIs
2797                          * can still come in now. If that happens after
2798                          * the above increment, then it will have to go
2799                          * back to the old method of allocating the event
2800                          * on the ring buffer, and if the filter fails, it
2801                          * will have to call ring_buffer_discard_commit()
2802                          * to remove it.
2803                          *
2804                          * Need to also check the unlikely case that the
2805                          * length is bigger than the temp buffer size.
2806                          * If that happens, then the reserve is pretty much
2807                          * guaranteed to fail, as the ring buffer currently
2808                          * only allows events less than a page. But that may
2809                          * change in the future, so let the ring buffer reserve
2810                          * handle the failure in that case.
2811                          */
2812                         if (val == 1 && likely(len <= max_len)) {
2813                                 trace_event_setup(entry, type, trace_ctx);
2814                                 entry->array[0] = len;
2815                                 /* Return with preemption disabled */
2816                                 return entry;
2817                         }
2818                         this_cpu_dec(trace_buffered_event_cnt);
2819                 }
2820                 /* __trace_buffer_lock_reserve() disables preemption */
2821                 preempt_enable_notrace();
2822         }
2823
2824         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825                                             trace_ctx);
2826         /*
2827          * If tracing is off, but we have triggers enabled
2828          * we still need to look at the event data. Use the temp_buffer
2829          * to store the trace event for the trigger to use. It's recursive
2830          * safe and will not be recorded anywhere.
2831          */
2832         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2833                 *current_rb = temp_buffer;
2834                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2835                                                     trace_ctx);
2836         }
2837         return entry;
2838 }
2839 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2840
2841 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2842 static DEFINE_MUTEX(tracepoint_printk_mutex);
2843
2844 static void output_printk(struct trace_event_buffer *fbuffer)
2845 {
2846         struct trace_event_call *event_call;
2847         struct trace_event_file *file;
2848         struct trace_event *event;
2849         unsigned long flags;
2850         struct trace_iterator *iter = tracepoint_print_iter;
2851
2852         /* We should never get here if iter is NULL */
2853         if (WARN_ON_ONCE(!iter))
2854                 return;
2855
2856         event_call = fbuffer->trace_file->event_call;
2857         if (!event_call || !event_call->event.funcs ||
2858             !event_call->event.funcs->trace)
2859                 return;
2860
2861         file = fbuffer->trace_file;
2862         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2863             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2864              !filter_match_preds(file->filter, fbuffer->entry)))
2865                 return;
2866
2867         event = &fbuffer->trace_file->event_call->event;
2868
2869         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2870         trace_seq_init(&iter->seq);
2871         iter->ent = fbuffer->entry;
2872         event_call->event.funcs->trace(iter, 0, event);
2873         trace_seq_putc(&iter->seq, 0);
2874         printk("%s", iter->seq.buffer);
2875
2876         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2877 }
2878
2879 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2880                              void *buffer, size_t *lenp,
2881                              loff_t *ppos)
2882 {
2883         int save_tracepoint_printk;
2884         int ret;
2885
2886         mutex_lock(&tracepoint_printk_mutex);
2887         save_tracepoint_printk = tracepoint_printk;
2888
2889         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2890
2891         /*
2892          * This will force exiting early, as tracepoint_printk
2893          * is always zero when tracepoint_printk_iter is not allocated
2894          */
2895         if (!tracepoint_print_iter)
2896                 tracepoint_printk = 0;
2897
2898         if (save_tracepoint_printk == tracepoint_printk)
2899                 goto out;
2900
2901         if (tracepoint_printk)
2902                 static_key_enable(&tracepoint_printk_key.key);
2903         else
2904                 static_key_disable(&tracepoint_printk_key.key);
2905
2906  out:
2907         mutex_unlock(&tracepoint_printk_mutex);
2908
2909         return ret;
2910 }
2911
2912 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2913 {
2914         enum event_trigger_type tt = ETT_NONE;
2915         struct trace_event_file *file = fbuffer->trace_file;
2916
2917         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2918                         fbuffer->entry, &tt))
2919                 goto discard;
2920
2921         if (static_key_false(&tracepoint_printk_key.key))
2922                 output_printk(fbuffer);
2923
2924         if (static_branch_unlikely(&trace_event_exports_enabled))
2925                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2926
2927         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2928                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2929
2930 discard:
2931         if (tt)
2932                 event_triggers_post_call(file, tt);
2933
2934 }
2935 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2936
2937 /*
2938  * Skip 3:
2939  *
2940  *   trace_buffer_unlock_commit_regs()
2941  *   trace_event_buffer_commit()
2942  *   trace_event_raw_event_xxx()
2943  */
2944 # define STACK_SKIP 3
2945
2946 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2947                                      struct trace_buffer *buffer,
2948                                      struct ring_buffer_event *event,
2949                                      unsigned int trace_ctx,
2950                                      struct pt_regs *regs)
2951 {
2952         __buffer_unlock_commit(buffer, event);
2953
2954         /*
2955          * If regs is not set, then skip the necessary functions.
2956          * Note, we can still get here via blktrace, wakeup tracer
2957          * and mmiotrace, but that's ok if they lose a function or
2958          * two. They are not that meaningful.
2959          */
2960         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2961         ftrace_trace_userstack(tr, buffer, trace_ctx);
2962 }
2963
2964 /*
2965  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2966  */
2967 void
2968 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2969                                    struct ring_buffer_event *event)
2970 {
2971         __buffer_unlock_commit(buffer, event);
2972 }
2973
2974 void
2975 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2976                parent_ip, unsigned int trace_ctx)
2977 {
2978         struct trace_event_call *call = &event_function;
2979         struct trace_buffer *buffer = tr->array_buffer.buffer;
2980         struct ring_buffer_event *event;
2981         struct ftrace_entry *entry;
2982
2983         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2984                                             trace_ctx);
2985         if (!event)
2986                 return;
2987         entry   = ring_buffer_event_data(event);
2988         entry->ip                       = ip;
2989         entry->parent_ip                = parent_ip;
2990
2991         if (!call_filter_check_discard(call, entry, buffer, event)) {
2992                 if (static_branch_unlikely(&trace_function_exports_enabled))
2993                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2994                 __buffer_unlock_commit(buffer, event);
2995         }
2996 }
2997
2998 #ifdef CONFIG_STACKTRACE
2999
3000 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3001 #define FTRACE_KSTACK_NESTING   4
3002
3003 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3004
3005 struct ftrace_stack {
3006         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3007 };
3008
3009
3010 struct ftrace_stacks {
3011         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3012 };
3013
3014 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3015 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3016
3017 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3018                                  unsigned int trace_ctx,
3019                                  int skip, struct pt_regs *regs)
3020 {
3021         struct trace_event_call *call = &event_kernel_stack;
3022         struct ring_buffer_event *event;
3023         unsigned int size, nr_entries;
3024         struct ftrace_stack *fstack;
3025         struct stack_entry *entry;
3026         int stackidx;
3027
3028         /*
3029          * Add one, for this function and the call to save_stack_trace()
3030          * If regs is set, then these functions will not be in the way.
3031          */
3032 #ifndef CONFIG_UNWINDER_ORC
3033         if (!regs)
3034                 skip++;
3035 #endif
3036
3037         preempt_disable_notrace();
3038
3039         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3040
3041         /* This should never happen. If it does, yell once and skip */
3042         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3043                 goto out;
3044
3045         /*
3046          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3047          * interrupt will either see the value pre increment or post
3048          * increment. If the interrupt happens pre increment it will have
3049          * restored the counter when it returns.  We just need a barrier to
3050          * keep gcc from moving things around.
3051          */
3052         barrier();
3053
3054         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3055         size = ARRAY_SIZE(fstack->calls);
3056
3057         if (regs) {
3058                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3059                                                    size, skip);
3060         } else {
3061                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3062         }
3063
3064         size = nr_entries * sizeof(unsigned long);
3065         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3066                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3067                                     trace_ctx);
3068         if (!event)
3069                 goto out;
3070         entry = ring_buffer_event_data(event);
3071
3072         memcpy(&entry->caller, fstack->calls, size);
3073         entry->size = nr_entries;
3074
3075         if (!call_filter_check_discard(call, entry, buffer, event))
3076                 __buffer_unlock_commit(buffer, event);
3077
3078  out:
3079         /* Again, don't let gcc optimize things here */
3080         barrier();
3081         __this_cpu_dec(ftrace_stack_reserve);
3082         preempt_enable_notrace();
3083
3084 }
3085
3086 static inline void ftrace_trace_stack(struct trace_array *tr,
3087                                       struct trace_buffer *buffer,
3088                                       unsigned int trace_ctx,
3089                                       int skip, struct pt_regs *regs)
3090 {
3091         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3092                 return;
3093
3094         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3095 }
3096
3097 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3098                    int skip)
3099 {
3100         struct trace_buffer *buffer = tr->array_buffer.buffer;
3101
3102         if (rcu_is_watching()) {
3103                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104                 return;
3105         }
3106
3107         /*
3108          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3109          * but if the above rcu_is_watching() failed, then the NMI
3110          * triggered someplace critical, and ct_irq_enter() should
3111          * not be called from NMI.
3112          */
3113         if (unlikely(in_nmi()))
3114                 return;
3115
3116         ct_irq_enter_irqson();
3117         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3118         ct_irq_exit_irqson();
3119 }
3120
3121 /**
3122  * trace_dump_stack - record a stack back trace in the trace buffer
3123  * @skip: Number of functions to skip (helper handlers)
3124  */
3125 void trace_dump_stack(int skip)
3126 {
3127         if (tracing_disabled || tracing_selftest_running)
3128                 return;
3129
3130 #ifndef CONFIG_UNWINDER_ORC
3131         /* Skip 1 to skip this function. */
3132         skip++;
3133 #endif
3134         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3135                              tracing_gen_ctx(), skip, NULL);
3136 }
3137 EXPORT_SYMBOL_GPL(trace_dump_stack);
3138
3139 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3140 static DEFINE_PER_CPU(int, user_stack_count);
3141
3142 static void
3143 ftrace_trace_userstack(struct trace_array *tr,
3144                        struct trace_buffer *buffer, unsigned int trace_ctx)
3145 {
3146         struct trace_event_call *call = &event_user_stack;
3147         struct ring_buffer_event *event;
3148         struct userstack_entry *entry;
3149
3150         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3151                 return;
3152
3153         /*
3154          * NMIs can not handle page faults, even with fix ups.
3155          * The save user stack can (and often does) fault.
3156          */
3157         if (unlikely(in_nmi()))
3158                 return;
3159
3160         /*
3161          * prevent recursion, since the user stack tracing may
3162          * trigger other kernel events.
3163          */
3164         preempt_disable();
3165         if (__this_cpu_read(user_stack_count))
3166                 goto out;
3167
3168         __this_cpu_inc(user_stack_count);
3169
3170         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3171                                             sizeof(*entry), trace_ctx);
3172         if (!event)
3173                 goto out_drop_count;
3174         entry   = ring_buffer_event_data(event);
3175
3176         entry->tgid             = current->tgid;
3177         memset(&entry->caller, 0, sizeof(entry->caller));
3178
3179         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3180         if (!call_filter_check_discard(call, entry, buffer, event))
3181                 __buffer_unlock_commit(buffer, event);
3182
3183  out_drop_count:
3184         __this_cpu_dec(user_stack_count);
3185  out:
3186         preempt_enable();
3187 }
3188 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3189 static void ftrace_trace_userstack(struct trace_array *tr,
3190                                    struct trace_buffer *buffer,
3191                                    unsigned int trace_ctx)
3192 {
3193 }
3194 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3195
3196 #endif /* CONFIG_STACKTRACE */
3197
3198 static inline void
3199 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3200                           unsigned long long delta)
3201 {
3202         entry->bottom_delta_ts = delta & U32_MAX;
3203         entry->top_delta_ts = (delta >> 32);
3204 }
3205
3206 void trace_last_func_repeats(struct trace_array *tr,
3207                              struct trace_func_repeats *last_info,
3208                              unsigned int trace_ctx)
3209 {
3210         struct trace_buffer *buffer = tr->array_buffer.buffer;
3211         struct func_repeats_entry *entry;
3212         struct ring_buffer_event *event;
3213         u64 delta;
3214
3215         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3216                                             sizeof(*entry), trace_ctx);
3217         if (!event)
3218                 return;
3219
3220         delta = ring_buffer_event_time_stamp(buffer, event) -
3221                 last_info->ts_last_call;
3222
3223         entry = ring_buffer_event_data(event);
3224         entry->ip = last_info->ip;
3225         entry->parent_ip = last_info->parent_ip;
3226         entry->count = last_info->count;
3227         func_repeats_set_delta_ts(entry, delta);
3228
3229         __buffer_unlock_commit(buffer, event);
3230 }
3231
3232 /* created for use with alloc_percpu */
3233 struct trace_buffer_struct {
3234         int nesting;
3235         char buffer[4][TRACE_BUF_SIZE];
3236 };
3237
3238 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3239
3240 /*
3241  * This allows for lockless recording.  If we're nested too deeply, then
3242  * this returns NULL.
3243  */
3244 static char *get_trace_buf(void)
3245 {
3246         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3247
3248         if (!trace_percpu_buffer || buffer->nesting >= 4)
3249                 return NULL;
3250
3251         buffer->nesting++;
3252
3253         /* Interrupts must see nesting incremented before we use the buffer */
3254         barrier();
3255         return &buffer->buffer[buffer->nesting - 1][0];
3256 }
3257
3258 static void put_trace_buf(void)
3259 {
3260         /* Don't let the decrement of nesting leak before this */
3261         barrier();
3262         this_cpu_dec(trace_percpu_buffer->nesting);
3263 }
3264
3265 static int alloc_percpu_trace_buffer(void)
3266 {
3267         struct trace_buffer_struct __percpu *buffers;
3268
3269         if (trace_percpu_buffer)
3270                 return 0;
3271
3272         buffers = alloc_percpu(struct trace_buffer_struct);
3273         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3274                 return -ENOMEM;
3275
3276         trace_percpu_buffer = buffers;
3277         return 0;
3278 }
3279
3280 static int buffers_allocated;
3281
3282 void trace_printk_init_buffers(void)
3283 {
3284         if (buffers_allocated)
3285                 return;
3286
3287         if (alloc_percpu_trace_buffer())
3288                 return;
3289
3290         /* trace_printk() is for debug use only. Don't use it in production. */
3291
3292         pr_warn("\n");
3293         pr_warn("**********************************************************\n");
3294         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3295         pr_warn("**                                                      **\n");
3296         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3297         pr_warn("**                                                      **\n");
3298         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3299         pr_warn("** unsafe for production use.                           **\n");
3300         pr_warn("**                                                      **\n");
3301         pr_warn("** If you see this message and you are not debugging    **\n");
3302         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3303         pr_warn("**                                                      **\n");
3304         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3305         pr_warn("**********************************************************\n");
3306
3307         /* Expand the buffers to set size */
3308         tracing_update_buffers();
3309
3310         buffers_allocated = 1;
3311
3312         /*
3313          * trace_printk_init_buffers() can be called by modules.
3314          * If that happens, then we need to start cmdline recording
3315          * directly here. If the global_trace.buffer is already
3316          * allocated here, then this was called by module code.
3317          */
3318         if (global_trace.array_buffer.buffer)
3319                 tracing_start_cmdline_record();
3320 }
3321 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3322
3323 void trace_printk_start_comm(void)
3324 {
3325         /* Start tracing comms if trace printk is set */
3326         if (!buffers_allocated)
3327                 return;
3328         tracing_start_cmdline_record();
3329 }
3330
3331 static void trace_printk_start_stop_comm(int enabled)
3332 {
3333         if (!buffers_allocated)
3334                 return;
3335
3336         if (enabled)
3337                 tracing_start_cmdline_record();
3338         else
3339                 tracing_stop_cmdline_record();
3340 }
3341
3342 /**
3343  * trace_vbprintk - write binary msg to tracing buffer
3344  * @ip:    The address of the caller
3345  * @fmt:   The string format to write to the buffer
3346  * @args:  Arguments for @fmt
3347  */
3348 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3349 {
3350         struct trace_event_call *call = &event_bprint;
3351         struct ring_buffer_event *event;
3352         struct trace_buffer *buffer;
3353         struct trace_array *tr = &global_trace;
3354         struct bprint_entry *entry;
3355         unsigned int trace_ctx;
3356         char *tbuffer;
3357         int len = 0, size;
3358
3359         if (unlikely(tracing_selftest_running || tracing_disabled))
3360                 return 0;
3361
3362         /* Don't pollute graph traces with trace_vprintk internals */
3363         pause_graph_tracing();
3364
3365         trace_ctx = tracing_gen_ctx();
3366         preempt_disable_notrace();
3367
3368         tbuffer = get_trace_buf();
3369         if (!tbuffer) {
3370                 len = 0;
3371                 goto out_nobuffer;
3372         }
3373
3374         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375
3376         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377                 goto out_put;
3378
3379         size = sizeof(*entry) + sizeof(u32) * len;
3380         buffer = tr->array_buffer.buffer;
3381         ring_buffer_nest_start(buffer);
3382         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383                                             trace_ctx);
3384         if (!event)
3385                 goto out;
3386         entry = ring_buffer_event_data(event);
3387         entry->ip                       = ip;
3388         entry->fmt                      = fmt;
3389
3390         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391         if (!call_filter_check_discard(call, entry, buffer, event)) {
3392                 __buffer_unlock_commit(buffer, event);
3393                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3394         }
3395
3396 out:
3397         ring_buffer_nest_end(buffer);
3398 out_put:
3399         put_trace_buf();
3400
3401 out_nobuffer:
3402         preempt_enable_notrace();
3403         unpause_graph_tracing();
3404
3405         return len;
3406 }
3407 EXPORT_SYMBOL_GPL(trace_vbprintk);
3408
3409 __printf(3, 0)
3410 static int
3411 __trace_array_vprintk(struct trace_buffer *buffer,
3412                       unsigned long ip, const char *fmt, va_list args)
3413 {
3414         struct trace_event_call *call = &event_print;
3415         struct ring_buffer_event *event;
3416         int len = 0, size;
3417         struct print_entry *entry;
3418         unsigned int trace_ctx;
3419         char *tbuffer;
3420
3421         if (tracing_disabled || tracing_selftest_running)
3422                 return 0;
3423
3424         /* Don't pollute graph traces with trace_vprintk internals */
3425         pause_graph_tracing();
3426
3427         trace_ctx = tracing_gen_ctx();
3428         preempt_disable_notrace();
3429
3430
3431         tbuffer = get_trace_buf();
3432         if (!tbuffer) {
3433                 len = 0;
3434                 goto out_nobuffer;
3435         }
3436
3437         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3438
3439         size = sizeof(*entry) + len + 1;
3440         ring_buffer_nest_start(buffer);
3441         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3442                                             trace_ctx);
3443         if (!event)
3444                 goto out;
3445         entry = ring_buffer_event_data(event);
3446         entry->ip = ip;
3447
3448         memcpy(&entry->buf, tbuffer, len + 1);
3449         if (!call_filter_check_discard(call, entry, buffer, event)) {
3450                 __buffer_unlock_commit(buffer, event);
3451                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3452         }
3453
3454 out:
3455         ring_buffer_nest_end(buffer);
3456         put_trace_buf();
3457
3458 out_nobuffer:
3459         preempt_enable_notrace();
3460         unpause_graph_tracing();
3461
3462         return len;
3463 }
3464
3465 __printf(3, 0)
3466 int trace_array_vprintk(struct trace_array *tr,
3467                         unsigned long ip, const char *fmt, va_list args)
3468 {
3469         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3470 }
3471
3472 /**
3473  * trace_array_printk - Print a message to a specific instance
3474  * @tr: The instance trace_array descriptor
3475  * @ip: The instruction pointer that this is called from.
3476  * @fmt: The format to print (printf format)
3477  *
3478  * If a subsystem sets up its own instance, they have the right to
3479  * printk strings into their tracing instance buffer using this
3480  * function. Note, this function will not write into the top level
3481  * buffer (use trace_printk() for that), as writing into the top level
3482  * buffer should only have events that can be individually disabled.
3483  * trace_printk() is only used for debugging a kernel, and should not
3484  * be ever incorporated in normal use.
3485  *
3486  * trace_array_printk() can be used, as it will not add noise to the
3487  * top level tracing buffer.
3488  *
3489  * Note, trace_array_init_printk() must be called on @tr before this
3490  * can be used.
3491  */
3492 __printf(3, 0)
3493 int trace_array_printk(struct trace_array *tr,
3494                        unsigned long ip, const char *fmt, ...)
3495 {
3496         int ret;
3497         va_list ap;
3498
3499         if (!tr)
3500                 return -ENOENT;
3501
3502         /* This is only allowed for created instances */
3503         if (tr == &global_trace)
3504                 return 0;
3505
3506         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3507                 return 0;
3508
3509         va_start(ap, fmt);
3510         ret = trace_array_vprintk(tr, ip, fmt, ap);
3511         va_end(ap);
3512         return ret;
3513 }
3514 EXPORT_SYMBOL_GPL(trace_array_printk);
3515
3516 /**
3517  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3518  * @tr: The trace array to initialize the buffers for
3519  *
3520  * As trace_array_printk() only writes into instances, they are OK to
3521  * have in the kernel (unlike trace_printk()). This needs to be called
3522  * before trace_array_printk() can be used on a trace_array.
3523  */
3524 int trace_array_init_printk(struct trace_array *tr)
3525 {
3526         if (!tr)
3527                 return -ENOENT;
3528
3529         /* This is only allowed for created instances */
3530         if (tr == &global_trace)
3531                 return -EINVAL;
3532
3533         return alloc_percpu_trace_buffer();
3534 }
3535 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3536
3537 __printf(3, 4)
3538 int trace_array_printk_buf(struct trace_buffer *buffer,
3539                            unsigned long ip, const char *fmt, ...)
3540 {
3541         int ret;
3542         va_list ap;
3543
3544         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3545                 return 0;
3546
3547         va_start(ap, fmt);
3548         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3549         va_end(ap);
3550         return ret;
3551 }
3552
3553 __printf(2, 0)
3554 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3555 {
3556         return trace_array_vprintk(&global_trace, ip, fmt, args);
3557 }
3558 EXPORT_SYMBOL_GPL(trace_vprintk);
3559
3560 static void trace_iterator_increment(struct trace_iterator *iter)
3561 {
3562         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3563
3564         iter->idx++;
3565         if (buf_iter)
3566                 ring_buffer_iter_advance(buf_iter);
3567 }
3568
3569 static struct trace_entry *
3570 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3571                 unsigned long *lost_events)
3572 {
3573         struct ring_buffer_event *event;
3574         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3575
3576         if (buf_iter) {
3577                 event = ring_buffer_iter_peek(buf_iter, ts);
3578                 if (lost_events)
3579                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3580                                 (unsigned long)-1 : 0;
3581         } else {
3582                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3583                                          lost_events);
3584         }
3585
3586         if (event) {
3587                 iter->ent_size = ring_buffer_event_length(event);
3588                 return ring_buffer_event_data(event);
3589         }
3590         iter->ent_size = 0;
3591         return NULL;
3592 }
3593
3594 static struct trace_entry *
3595 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3596                   unsigned long *missing_events, u64 *ent_ts)
3597 {
3598         struct trace_buffer *buffer = iter->array_buffer->buffer;
3599         struct trace_entry *ent, *next = NULL;
3600         unsigned long lost_events = 0, next_lost = 0;
3601         int cpu_file = iter->cpu_file;
3602         u64 next_ts = 0, ts;
3603         int next_cpu = -1;
3604         int next_size = 0;
3605         int cpu;
3606
3607         /*
3608          * If we are in a per_cpu trace file, don't bother by iterating over
3609          * all cpu and peek directly.
3610          */
3611         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3612                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3613                         return NULL;
3614                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3615                 if (ent_cpu)
3616                         *ent_cpu = cpu_file;
3617
3618                 return ent;
3619         }
3620
3621         for_each_tracing_cpu(cpu) {
3622
3623                 if (ring_buffer_empty_cpu(buffer, cpu))
3624                         continue;
3625
3626                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3627
3628                 /*
3629                  * Pick the entry with the smallest timestamp:
3630                  */
3631                 if (ent && (!next || ts < next_ts)) {
3632                         next = ent;
3633                         next_cpu = cpu;
3634                         next_ts = ts;
3635                         next_lost = lost_events;
3636                         next_size = iter->ent_size;
3637                 }
3638         }
3639
3640         iter->ent_size = next_size;
3641
3642         if (ent_cpu)
3643                 *ent_cpu = next_cpu;
3644
3645         if (ent_ts)
3646                 *ent_ts = next_ts;
3647
3648         if (missing_events)
3649                 *missing_events = next_lost;
3650
3651         return next;
3652 }
3653
3654 #define STATIC_FMT_BUF_SIZE     128
3655 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3656
3657 static char *trace_iter_expand_format(struct trace_iterator *iter)
3658 {
3659         char *tmp;
3660
3661         /*
3662          * iter->tr is NULL when used with tp_printk, which makes
3663          * this get called where it is not safe to call krealloc().
3664          */
3665         if (!iter->tr || iter->fmt == static_fmt_buf)
3666                 return NULL;
3667
3668         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3669                        GFP_KERNEL);
3670         if (tmp) {
3671                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3672                 iter->fmt = tmp;
3673         }
3674
3675         return tmp;
3676 }
3677
3678 /* Returns true if the string is safe to dereference from an event */
3679 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3680                            bool star, int len)
3681 {
3682         unsigned long addr = (unsigned long)str;
3683         struct trace_event *trace_event;
3684         struct trace_event_call *event;
3685
3686         /* Ignore strings with no length */
3687         if (star && !len)
3688                 return true;
3689
3690         /* OK if part of the event data */
3691         if ((addr >= (unsigned long)iter->ent) &&
3692             (addr < (unsigned long)iter->ent + iter->ent_size))
3693                 return true;
3694
3695         /* OK if part of the temp seq buffer */
3696         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3697             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3698                 return true;
3699
3700         /* Core rodata can not be freed */
3701         if (is_kernel_rodata(addr))
3702                 return true;
3703
3704         if (trace_is_tracepoint_string(str))
3705                 return true;
3706
3707         /*
3708          * Now this could be a module event, referencing core module
3709          * data, which is OK.
3710          */
3711         if (!iter->ent)
3712                 return false;
3713
3714         trace_event = ftrace_find_event(iter->ent->type);
3715         if (!trace_event)
3716                 return false;
3717
3718         event = container_of(trace_event, struct trace_event_call, event);
3719         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3720                 return false;
3721
3722         /* Would rather have rodata, but this will suffice */
3723         if (within_module_core(addr, event->module))
3724                 return true;
3725
3726         return false;
3727 }
3728
3729 static const char *show_buffer(struct trace_seq *s)
3730 {
3731         struct seq_buf *seq = &s->seq;
3732
3733         seq_buf_terminate(seq);
3734
3735         return seq->buffer;
3736 }
3737
3738 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3739
3740 static int test_can_verify_check(const char *fmt, ...)
3741 {
3742         char buf[16];
3743         va_list ap;
3744         int ret;
3745
3746         /*
3747          * The verifier is dependent on vsnprintf() modifies the va_list
3748          * passed to it, where it is sent as a reference. Some architectures
3749          * (like x86_32) passes it by value, which means that vsnprintf()
3750          * does not modify the va_list passed to it, and the verifier
3751          * would then need to be able to understand all the values that
3752          * vsnprintf can use. If it is passed by value, then the verifier
3753          * is disabled.
3754          */
3755         va_start(ap, fmt);
3756         vsnprintf(buf, 16, "%d", ap);
3757         ret = va_arg(ap, int);
3758         va_end(ap);
3759
3760         return ret;
3761 }
3762
3763 static void test_can_verify(void)
3764 {
3765         if (!test_can_verify_check("%d %d", 0, 1)) {
3766                 pr_info("trace event string verifier disabled\n");
3767                 static_branch_inc(&trace_no_verify);
3768         }
3769 }
3770
3771 /**
3772  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3773  * @iter: The iterator that holds the seq buffer and the event being printed
3774  * @fmt: The format used to print the event
3775  * @ap: The va_list holding the data to print from @fmt.
3776  *
3777  * This writes the data into the @iter->seq buffer using the data from
3778  * @fmt and @ap. If the format has a %s, then the source of the string
3779  * is examined to make sure it is safe to print, otherwise it will
3780  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3781  * pointer.
3782  */
3783 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3784                          va_list ap)
3785 {
3786         const char *p = fmt;
3787         const char *str;
3788         int i, j;
3789
3790         if (WARN_ON_ONCE(!fmt))
3791                 return;
3792
3793         if (static_branch_unlikely(&trace_no_verify))
3794                 goto print;
3795
3796         /* Don't bother checking when doing a ftrace_dump() */
3797         if (iter->fmt == static_fmt_buf)
3798                 goto print;
3799
3800         while (*p) {
3801                 bool star = false;
3802                 int len = 0;
3803
3804                 j = 0;
3805
3806                 /* We only care about %s and variants */
3807                 for (i = 0; p[i]; i++) {
3808                         if (i + 1 >= iter->fmt_size) {
3809                                 /*
3810                                  * If we can't expand the copy buffer,
3811                                  * just print it.
3812                                  */
3813                                 if (!trace_iter_expand_format(iter))
3814                                         goto print;
3815                         }
3816
3817                         if (p[i] == '\\' && p[i+1]) {
3818                                 i++;
3819                                 continue;
3820                         }
3821                         if (p[i] == '%') {
3822                                 /* Need to test cases like %08.*s */
3823                                 for (j = 1; p[i+j]; j++) {
3824                                         if (isdigit(p[i+j]) ||
3825                                             p[i+j] == '.')
3826                                                 continue;
3827                                         if (p[i+j] == '*') {
3828                                                 star = true;
3829                                                 continue;
3830                                         }
3831                                         break;
3832                                 }
3833                                 if (p[i+j] == 's')
3834                                         break;
3835                                 star = false;
3836                         }
3837                         j = 0;
3838                 }
3839                 /* If no %s found then just print normally */
3840                 if (!p[i])
3841                         break;
3842
3843                 /* Copy up to the %s, and print that */
3844                 strncpy(iter->fmt, p, i);
3845                 iter->fmt[i] = '\0';
3846                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3847
3848                 /*
3849                  * If iter->seq is full, the above call no longer guarantees
3850                  * that ap is in sync with fmt processing, and further calls
3851                  * to va_arg() can return wrong positional arguments.
3852                  *
3853                  * Ensure that ap is no longer used in this case.
3854                  */
3855                 if (iter->seq.full) {
3856                         p = "";
3857                         break;
3858                 }
3859
3860                 if (star)
3861                         len = va_arg(ap, int);
3862
3863                 /* The ap now points to the string data of the %s */
3864                 str = va_arg(ap, const char *);
3865
3866                 /*
3867                  * If you hit this warning, it is likely that the
3868                  * trace event in question used %s on a string that
3869                  * was saved at the time of the event, but may not be
3870                  * around when the trace is read. Use __string(),
3871                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3872                  * instead. See samples/trace_events/trace-events-sample.h
3873                  * for reference.
3874                  */
3875                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3876                               "fmt: '%s' current_buffer: '%s'",
3877                               fmt, show_buffer(&iter->seq))) {
3878                         int ret;
3879
3880                         /* Try to safely read the string */
3881                         if (star) {
3882                                 if (len + 1 > iter->fmt_size)
3883                                         len = iter->fmt_size - 1;
3884                                 if (len < 0)
3885                                         len = 0;
3886                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3887                                 iter->fmt[len] = 0;
3888                                 star = false;
3889                         } else {
3890                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3891                                                                   iter->fmt_size);
3892                         }
3893                         if (ret < 0)
3894                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3895                         else
3896                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3897                                                  str, iter->fmt);
3898                         str = "[UNSAFE-MEMORY]";
3899                         strcpy(iter->fmt, "%s");
3900                 } else {
3901                         strncpy(iter->fmt, p + i, j + 1);
3902                         iter->fmt[j+1] = '\0';
3903                 }
3904                 if (star)
3905                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3906                 else
3907                         trace_seq_printf(&iter->seq, iter->fmt, str);
3908
3909                 p += i + j + 1;
3910         }
3911  print:
3912         if (*p)
3913                 trace_seq_vprintf(&iter->seq, p, ap);
3914 }
3915
3916 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3917 {
3918         const char *p, *new_fmt;
3919         char *q;
3920
3921         if (WARN_ON_ONCE(!fmt))
3922                 return fmt;
3923
3924         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3925                 return fmt;
3926
3927         p = fmt;
3928         new_fmt = q = iter->fmt;
3929         while (*p) {
3930                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3931                         if (!trace_iter_expand_format(iter))
3932                                 return fmt;
3933
3934                         q += iter->fmt - new_fmt;
3935                         new_fmt = iter->fmt;
3936                 }
3937
3938                 *q++ = *p++;
3939
3940                 /* Replace %p with %px */
3941                 if (p[-1] == '%') {
3942                         if (p[0] == '%') {
3943                                 *q++ = *p++;
3944                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3945                                 *q++ = *p++;
3946                                 *q++ = 'x';
3947                         }
3948                 }
3949         }
3950         *q = '\0';
3951
3952         return new_fmt;
3953 }
3954
3955 #define STATIC_TEMP_BUF_SIZE    128
3956 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3957
3958 /* Find the next real entry, without updating the iterator itself */
3959 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3960                                           int *ent_cpu, u64 *ent_ts)
3961 {
3962         /* __find_next_entry will reset ent_size */
3963         int ent_size = iter->ent_size;
3964         struct trace_entry *entry;
3965
3966         /*
3967          * If called from ftrace_dump(), then the iter->temp buffer
3968          * will be the static_temp_buf and not created from kmalloc.
3969          * If the entry size is greater than the buffer, we can
3970          * not save it. Just return NULL in that case. This is only
3971          * used to add markers when two consecutive events' time
3972          * stamps have a large delta. See trace_print_lat_context()
3973          */
3974         if (iter->temp == static_temp_buf &&
3975             STATIC_TEMP_BUF_SIZE < ent_size)
3976                 return NULL;
3977
3978         /*
3979          * The __find_next_entry() may call peek_next_entry(), which may
3980          * call ring_buffer_peek() that may make the contents of iter->ent
3981          * undefined. Need to copy iter->ent now.
3982          */
3983         if (iter->ent && iter->ent != iter->temp) {
3984                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3985                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3986                         void *temp;
3987                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3988                         if (!temp)
3989                                 return NULL;
3990                         kfree(iter->temp);
3991                         iter->temp = temp;
3992                         iter->temp_size = iter->ent_size;
3993                 }
3994                 memcpy(iter->temp, iter->ent, iter->ent_size);
3995                 iter->ent = iter->temp;
3996         }
3997         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3998         /* Put back the original ent_size */
3999         iter->ent_size = ent_size;
4000
4001         return entry;
4002 }
4003
4004 /* Find the next real entry, and increment the iterator to the next entry */
4005 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4006 {
4007         iter->ent = __find_next_entry(iter, &iter->cpu,
4008                                       &iter->lost_events, &iter->ts);
4009
4010         if (iter->ent)
4011                 trace_iterator_increment(iter);
4012
4013         return iter->ent ? iter : NULL;
4014 }
4015
4016 static void trace_consume(struct trace_iterator *iter)
4017 {
4018         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4019                             &iter->lost_events);
4020 }
4021
4022 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4023 {
4024         struct trace_iterator *iter = m->private;
4025         int i = (int)*pos;
4026         void *ent;
4027
4028         WARN_ON_ONCE(iter->leftover);
4029
4030         (*pos)++;
4031
4032         /* can't go backwards */
4033         if (iter->idx > i)
4034                 return NULL;
4035
4036         if (iter->idx < 0)
4037                 ent = trace_find_next_entry_inc(iter);
4038         else
4039                 ent = iter;
4040
4041         while (ent && iter->idx < i)
4042                 ent = trace_find_next_entry_inc(iter);
4043
4044         iter->pos = *pos;
4045
4046         return ent;
4047 }
4048
4049 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4050 {
4051         struct ring_buffer_iter *buf_iter;
4052         unsigned long entries = 0;
4053         u64 ts;
4054
4055         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4056
4057         buf_iter = trace_buffer_iter(iter, cpu);
4058         if (!buf_iter)
4059                 return;
4060
4061         ring_buffer_iter_reset(buf_iter);
4062
4063         /*
4064          * We could have the case with the max latency tracers
4065          * that a reset never took place on a cpu. This is evident
4066          * by the timestamp being before the start of the buffer.
4067          */
4068         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4069                 if (ts >= iter->array_buffer->time_start)
4070                         break;
4071                 entries++;
4072                 ring_buffer_iter_advance(buf_iter);
4073         }
4074
4075         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4076 }
4077
4078 /*
4079  * The current tracer is copied to avoid a global locking
4080  * all around.
4081  */
4082 static void *s_start(struct seq_file *m, loff_t *pos)
4083 {
4084         struct trace_iterator *iter = m->private;
4085         struct trace_array *tr = iter->tr;
4086         int cpu_file = iter->cpu_file;
4087         void *p = NULL;
4088         loff_t l = 0;
4089         int cpu;
4090
4091         /*
4092          * copy the tracer to avoid using a global lock all around.
4093          * iter->trace is a copy of current_trace, the pointer to the
4094          * name may be used instead of a strcmp(), as iter->trace->name
4095          * will point to the same string as current_trace->name.
4096          */
4097         mutex_lock(&trace_types_lock);
4098         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4099                 *iter->trace = *tr->current_trace;
4100         mutex_unlock(&trace_types_lock);
4101
4102 #ifdef CONFIG_TRACER_MAX_TRACE
4103         if (iter->snapshot && iter->trace->use_max_tr)
4104                 return ERR_PTR(-EBUSY);
4105 #endif
4106
4107         if (*pos != iter->pos) {
4108                 iter->ent = NULL;
4109                 iter->cpu = 0;
4110                 iter->idx = -1;
4111
4112                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4113                         for_each_tracing_cpu(cpu)
4114                                 tracing_iter_reset(iter, cpu);
4115                 } else
4116                         tracing_iter_reset(iter, cpu_file);
4117
4118                 iter->leftover = 0;
4119                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4120                         ;
4121
4122         } else {
4123                 /*
4124                  * If we overflowed the seq_file before, then we want
4125                  * to just reuse the trace_seq buffer again.
4126                  */
4127                 if (iter->leftover)
4128                         p = iter;
4129                 else {
4130                         l = *pos - 1;
4131                         p = s_next(m, p, &l);
4132                 }
4133         }
4134
4135         trace_event_read_lock();
4136         trace_access_lock(cpu_file);
4137         return p;
4138 }
4139
4140 static void s_stop(struct seq_file *m, void *p)
4141 {
4142         struct trace_iterator *iter = m->private;
4143
4144 #ifdef CONFIG_TRACER_MAX_TRACE
4145         if (iter->snapshot && iter->trace->use_max_tr)
4146                 return;
4147 #endif
4148
4149         trace_access_unlock(iter->cpu_file);
4150         trace_event_read_unlock();
4151 }
4152
4153 static void
4154 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4155                       unsigned long *entries, int cpu)
4156 {
4157         unsigned long count;
4158
4159         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4160         /*
4161          * If this buffer has skipped entries, then we hold all
4162          * entries for the trace and we need to ignore the
4163          * ones before the time stamp.
4164          */
4165         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4166                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4167                 /* total is the same as the entries */
4168                 *total = count;
4169         } else
4170                 *total = count +
4171                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4172         *entries = count;
4173 }
4174
4175 static void
4176 get_total_entries(struct array_buffer *buf,
4177                   unsigned long *total, unsigned long *entries)
4178 {
4179         unsigned long t, e;
4180         int cpu;
4181
4182         *total = 0;
4183         *entries = 0;
4184
4185         for_each_tracing_cpu(cpu) {
4186                 get_total_entries_cpu(buf, &t, &e, cpu);
4187                 *total += t;
4188                 *entries += e;
4189         }
4190 }
4191
4192 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4193 {
4194         unsigned long total, entries;
4195
4196         if (!tr)
4197                 tr = &global_trace;
4198
4199         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4200
4201         return entries;
4202 }
4203
4204 unsigned long trace_total_entries(struct trace_array *tr)
4205 {
4206         unsigned long total, entries;
4207
4208         if (!tr)
4209                 tr = &global_trace;
4210
4211         get_total_entries(&tr->array_buffer, &total, &entries);
4212
4213         return entries;
4214 }
4215
4216 static void print_lat_help_header(struct seq_file *m)
4217 {
4218         seq_puts(m, "#                    _------=> CPU#            \n"
4219                     "#                   / _-----=> irqs-off/BH-disabled\n"
4220                     "#                  | / _----=> need-resched    \n"
4221                     "#                  || / _---=> hardirq/softirq \n"
4222                     "#                  ||| / _--=> preempt-depth   \n"
4223                     "#                  |||| / _-=> migrate-disable \n"
4224                     "#                  ||||| /     delay           \n"
4225                     "#  cmd     pid     |||||| time  |   caller     \n"
4226                     "#     \\   /        ||||||  \\    |    /       \n");
4227 }
4228
4229 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4230 {
4231         unsigned long total;
4232         unsigned long entries;
4233
4234         get_total_entries(buf, &total, &entries);
4235         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4236                    entries, total, num_online_cpus());
4237         seq_puts(m, "#\n");
4238 }
4239
4240 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4241                                    unsigned int flags)
4242 {
4243         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4244
4245         print_event_info(buf, m);
4246
4247         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4248         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4249 }
4250
4251 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4252                                        unsigned int flags)
4253 {
4254         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4255         static const char space[] = "            ";
4256         int prec = tgid ? 12 : 2;
4257
4258         print_event_info(buf, m);
4259
4260         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4261         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4262         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4263         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4264         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4265         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4266         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4267         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4268 }
4269
4270 void
4271 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4272 {
4273         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4274         struct array_buffer *buf = iter->array_buffer;
4275         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4276         struct tracer *type = iter->trace;
4277         unsigned long entries;
4278         unsigned long total;
4279         const char *name = type->name;
4280
4281         get_total_entries(buf, &total, &entries);
4282
4283         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4284                    name, UTS_RELEASE);
4285         seq_puts(m, "# -----------------------------------"
4286                  "---------------------------------\n");
4287         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4288                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4289                    nsecs_to_usecs(data->saved_latency),
4290                    entries,
4291                    total,
4292                    buf->cpu,
4293                    preempt_model_none()      ? "server" :
4294                    preempt_model_voluntary() ? "desktop" :
4295                    preempt_model_full()      ? "preempt" :
4296                    preempt_model_rt()        ? "preempt_rt" :
4297                    "unknown",
4298                    /* These are reserved for later use */
4299                    0, 0, 0, 0);
4300 #ifdef CONFIG_SMP
4301         seq_printf(m, " #P:%d)\n", num_online_cpus());
4302 #else
4303         seq_puts(m, ")\n");
4304 #endif
4305         seq_puts(m, "#    -----------------\n");
4306         seq_printf(m, "#    | task: %.16s-%d "
4307                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4308                    data->comm, data->pid,
4309                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4310                    data->policy, data->rt_priority);
4311         seq_puts(m, "#    -----------------\n");
4312
4313         if (data->critical_start) {
4314                 seq_puts(m, "#  => started at: ");
4315                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4316                 trace_print_seq(m, &iter->seq);
4317                 seq_puts(m, "\n#  => ended at:   ");
4318                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4319                 trace_print_seq(m, &iter->seq);
4320                 seq_puts(m, "\n#\n");
4321         }
4322
4323         seq_puts(m, "#\n");
4324 }
4325
4326 static void test_cpu_buff_start(struct trace_iterator *iter)
4327 {
4328         struct trace_seq *s = &iter->seq;
4329         struct trace_array *tr = iter->tr;
4330
4331         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4332                 return;
4333
4334         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4335                 return;
4336
4337         if (cpumask_available(iter->started) &&
4338             cpumask_test_cpu(iter->cpu, iter->started))
4339                 return;
4340
4341         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4342                 return;
4343
4344         if (cpumask_available(iter->started))
4345                 cpumask_set_cpu(iter->cpu, iter->started);
4346
4347         /* Don't print started cpu buffer for the first entry of the trace */
4348         if (iter->idx > 1)
4349                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4350                                 iter->cpu);
4351 }
4352
4353 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4354 {
4355         struct trace_array *tr = iter->tr;
4356         struct trace_seq *s = &iter->seq;
4357         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4358         struct trace_entry *entry;
4359         struct trace_event *event;
4360
4361         entry = iter->ent;
4362
4363         test_cpu_buff_start(iter);
4364
4365         event = ftrace_find_event(entry->type);
4366
4367         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4368                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4369                         trace_print_lat_context(iter);
4370                 else
4371                         trace_print_context(iter);
4372         }
4373
4374         if (trace_seq_has_overflowed(s))
4375                 return TRACE_TYPE_PARTIAL_LINE;
4376
4377         if (event)
4378                 return event->funcs->trace(iter, sym_flags, event);
4379
4380         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4381
4382         return trace_handle_return(s);
4383 }
4384
4385 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4386 {
4387         struct trace_array *tr = iter->tr;
4388         struct trace_seq *s = &iter->seq;
4389         struct trace_entry *entry;
4390         struct trace_event *event;
4391
4392         entry = iter->ent;
4393
4394         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4395                 trace_seq_printf(s, "%d %d %llu ",
4396                                  entry->pid, iter->cpu, iter->ts);
4397
4398         if (trace_seq_has_overflowed(s))
4399                 return TRACE_TYPE_PARTIAL_LINE;
4400
4401         event = ftrace_find_event(entry->type);
4402         if (event)
4403                 return event->funcs->raw(iter, 0, event);
4404
4405         trace_seq_printf(s, "%d ?\n", entry->type);
4406
4407         return trace_handle_return(s);
4408 }
4409
4410 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4411 {
4412         struct trace_array *tr = iter->tr;
4413         struct trace_seq *s = &iter->seq;
4414         unsigned char newline = '\n';
4415         struct trace_entry *entry;
4416         struct trace_event *event;
4417
4418         entry = iter->ent;
4419
4420         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4421                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4422                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4423                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4424                 if (trace_seq_has_overflowed(s))
4425                         return TRACE_TYPE_PARTIAL_LINE;
4426         }
4427
4428         event = ftrace_find_event(entry->type);
4429         if (event) {
4430                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4431                 if (ret != TRACE_TYPE_HANDLED)
4432                         return ret;
4433         }
4434
4435         SEQ_PUT_FIELD(s, newline);
4436
4437         return trace_handle_return(s);
4438 }
4439
4440 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4441 {
4442         struct trace_array *tr = iter->tr;
4443         struct trace_seq *s = &iter->seq;
4444         struct trace_entry *entry;
4445         struct trace_event *event;
4446
4447         entry = iter->ent;
4448
4449         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4450                 SEQ_PUT_FIELD(s, entry->pid);
4451                 SEQ_PUT_FIELD(s, iter->cpu);
4452                 SEQ_PUT_FIELD(s, iter->ts);
4453                 if (trace_seq_has_overflowed(s))
4454                         return TRACE_TYPE_PARTIAL_LINE;
4455         }
4456
4457         event = ftrace_find_event(entry->type);
4458         return event ? event->funcs->binary(iter, 0, event) :
4459                 TRACE_TYPE_HANDLED;
4460 }
4461
4462 int trace_empty(struct trace_iterator *iter)
4463 {
4464         struct ring_buffer_iter *buf_iter;
4465         int cpu;
4466
4467         /* If we are looking at one CPU buffer, only check that one */
4468         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4469                 cpu = iter->cpu_file;
4470                 buf_iter = trace_buffer_iter(iter, cpu);
4471                 if (buf_iter) {
4472                         if (!ring_buffer_iter_empty(buf_iter))
4473                                 return 0;
4474                 } else {
4475                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476                                 return 0;
4477                 }
4478                 return 1;
4479         }
4480
4481         for_each_tracing_cpu(cpu) {
4482                 buf_iter = trace_buffer_iter(iter, cpu);
4483                 if (buf_iter) {
4484                         if (!ring_buffer_iter_empty(buf_iter))
4485                                 return 0;
4486                 } else {
4487                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4488                                 return 0;
4489                 }
4490         }
4491
4492         return 1;
4493 }
4494
4495 /*  Called with trace_event_read_lock() held. */
4496 enum print_line_t print_trace_line(struct trace_iterator *iter)
4497 {
4498         struct trace_array *tr = iter->tr;
4499         unsigned long trace_flags = tr->trace_flags;
4500         enum print_line_t ret;
4501
4502         if (iter->lost_events) {
4503                 if (iter->lost_events == (unsigned long)-1)
4504                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4505                                          iter->cpu);
4506                 else
4507                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4508                                          iter->cpu, iter->lost_events);
4509                 if (trace_seq_has_overflowed(&iter->seq))
4510                         return TRACE_TYPE_PARTIAL_LINE;
4511         }
4512
4513         if (iter->trace && iter->trace->print_line) {
4514                 ret = iter->trace->print_line(iter);
4515                 if (ret != TRACE_TYPE_UNHANDLED)
4516                         return ret;
4517         }
4518
4519         if (iter->ent->type == TRACE_BPUTS &&
4520                         trace_flags & TRACE_ITER_PRINTK &&
4521                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4522                 return trace_print_bputs_msg_only(iter);
4523
4524         if (iter->ent->type == TRACE_BPRINT &&
4525                         trace_flags & TRACE_ITER_PRINTK &&
4526                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4527                 return trace_print_bprintk_msg_only(iter);
4528
4529         if (iter->ent->type == TRACE_PRINT &&
4530                         trace_flags & TRACE_ITER_PRINTK &&
4531                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532                 return trace_print_printk_msg_only(iter);
4533
4534         if (trace_flags & TRACE_ITER_BIN)
4535                 return print_bin_fmt(iter);
4536
4537         if (trace_flags & TRACE_ITER_HEX)
4538                 return print_hex_fmt(iter);
4539
4540         if (trace_flags & TRACE_ITER_RAW)
4541                 return print_raw_fmt(iter);
4542
4543         return print_trace_fmt(iter);
4544 }
4545
4546 void trace_latency_header(struct seq_file *m)
4547 {
4548         struct trace_iterator *iter = m->private;
4549         struct trace_array *tr = iter->tr;
4550
4551         /* print nothing if the buffers are empty */
4552         if (trace_empty(iter))
4553                 return;
4554
4555         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4556                 print_trace_header(m, iter);
4557
4558         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4559                 print_lat_help_header(m);
4560 }
4561
4562 void trace_default_header(struct seq_file *m)
4563 {
4564         struct trace_iterator *iter = m->private;
4565         struct trace_array *tr = iter->tr;
4566         unsigned long trace_flags = tr->trace_flags;
4567
4568         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4569                 return;
4570
4571         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4572                 /* print nothing if the buffers are empty */
4573                 if (trace_empty(iter))
4574                         return;
4575                 print_trace_header(m, iter);
4576                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4577                         print_lat_help_header(m);
4578         } else {
4579                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4580                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4581                                 print_func_help_header_irq(iter->array_buffer,
4582                                                            m, trace_flags);
4583                         else
4584                                 print_func_help_header(iter->array_buffer, m,
4585                                                        trace_flags);
4586                 }
4587         }
4588 }
4589
4590 static void test_ftrace_alive(struct seq_file *m)
4591 {
4592         if (!ftrace_is_dead())
4593                 return;
4594         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4595                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4596 }
4597
4598 #ifdef CONFIG_TRACER_MAX_TRACE
4599 static void show_snapshot_main_help(struct seq_file *m)
4600 {
4601         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4602                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4603                     "#                      Takes a snapshot of the main buffer.\n"
4604                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4605                     "#                      (Doesn't have to be '2' works with any number that\n"
4606                     "#                       is not a '0' or '1')\n");
4607 }
4608
4609 static void show_snapshot_percpu_help(struct seq_file *m)
4610 {
4611         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4612 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4613         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4614                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4615 #else
4616         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4617                     "#                     Must use main snapshot file to allocate.\n");
4618 #endif
4619         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4620                     "#                      (Doesn't have to be '2' works with any number that\n"
4621                     "#                       is not a '0' or '1')\n");
4622 }
4623
4624 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4625 {
4626         if (iter->tr->allocated_snapshot)
4627                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4628         else
4629                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4630
4631         seq_puts(m, "# Snapshot commands:\n");
4632         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4633                 show_snapshot_main_help(m);
4634         else
4635                 show_snapshot_percpu_help(m);
4636 }
4637 #else
4638 /* Should never be called */
4639 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4640 #endif
4641
4642 static int s_show(struct seq_file *m, void *v)
4643 {
4644         struct trace_iterator *iter = v;
4645         int ret;
4646
4647         if (iter->ent == NULL) {
4648                 if (iter->tr) {
4649                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4650                         seq_puts(m, "#\n");
4651                         test_ftrace_alive(m);
4652                 }
4653                 if (iter->snapshot && trace_empty(iter))
4654                         print_snapshot_help(m, iter);
4655                 else if (iter->trace && iter->trace->print_header)
4656                         iter->trace->print_header(m);
4657                 else
4658                         trace_default_header(m);
4659
4660         } else if (iter->leftover) {
4661                 /*
4662                  * If we filled the seq_file buffer earlier, we
4663                  * want to just show it now.
4664                  */
4665                 ret = trace_print_seq(m, &iter->seq);
4666
4667                 /* ret should this time be zero, but you never know */
4668                 iter->leftover = ret;
4669
4670         } else {
4671                 print_trace_line(iter);
4672                 ret = trace_print_seq(m, &iter->seq);
4673                 /*
4674                  * If we overflow the seq_file buffer, then it will
4675                  * ask us for this data again at start up.
4676                  * Use that instead.
4677                  *  ret is 0 if seq_file write succeeded.
4678                  *        -1 otherwise.
4679                  */
4680                 iter->leftover = ret;
4681         }
4682
4683         return 0;
4684 }
4685
4686 /*
4687  * Should be used after trace_array_get(), trace_types_lock
4688  * ensures that i_cdev was already initialized.
4689  */
4690 static inline int tracing_get_cpu(struct inode *inode)
4691 {
4692         if (inode->i_cdev) /* See trace_create_cpu_file() */
4693                 return (long)inode->i_cdev - 1;
4694         return RING_BUFFER_ALL_CPUS;
4695 }
4696
4697 static const struct seq_operations tracer_seq_ops = {
4698         .start          = s_start,
4699         .next           = s_next,
4700         .stop           = s_stop,
4701         .show           = s_show,
4702 };
4703
4704 static struct trace_iterator *
4705 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4706 {
4707         struct trace_array *tr = inode->i_private;
4708         struct trace_iterator *iter;
4709         int cpu;
4710
4711         if (tracing_disabled)
4712                 return ERR_PTR(-ENODEV);
4713
4714         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4715         if (!iter)
4716                 return ERR_PTR(-ENOMEM);
4717
4718         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4719                                     GFP_KERNEL);
4720         if (!iter->buffer_iter)
4721                 goto release;
4722
4723         /*
4724          * trace_find_next_entry() may need to save off iter->ent.
4725          * It will place it into the iter->temp buffer. As most
4726          * events are less than 128, allocate a buffer of that size.
4727          * If one is greater, then trace_find_next_entry() will
4728          * allocate a new buffer to adjust for the bigger iter->ent.
4729          * It's not critical if it fails to get allocated here.
4730          */
4731         iter->temp = kmalloc(128, GFP_KERNEL);
4732         if (iter->temp)
4733                 iter->temp_size = 128;
4734
4735         /*
4736          * trace_event_printf() may need to modify given format
4737          * string to replace %p with %px so that it shows real address
4738          * instead of hash value. However, that is only for the event
4739          * tracing, other tracer may not need. Defer the allocation
4740          * until it is needed.
4741          */
4742         iter->fmt = NULL;
4743         iter->fmt_size = 0;
4744
4745         /*
4746          * We make a copy of the current tracer to avoid concurrent
4747          * changes on it while we are reading.
4748          */
4749         mutex_lock(&trace_types_lock);
4750         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4751         if (!iter->trace)
4752                 goto fail;
4753
4754         *iter->trace = *tr->current_trace;
4755
4756         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4757                 goto fail;
4758
4759         iter->tr = tr;
4760
4761 #ifdef CONFIG_TRACER_MAX_TRACE
4762         /* Currently only the top directory has a snapshot */
4763         if (tr->current_trace->print_max || snapshot)
4764                 iter->array_buffer = &tr->max_buffer;
4765         else
4766 #endif
4767                 iter->array_buffer = &tr->array_buffer;
4768         iter->snapshot = snapshot;
4769         iter->pos = -1;
4770         iter->cpu_file = tracing_get_cpu(inode);
4771         mutex_init(&iter->mutex);
4772
4773         /* Notify the tracer early; before we stop tracing. */
4774         if (iter->trace->open)
4775                 iter->trace->open(iter);
4776
4777         /* Annotate start of buffers if we had overruns */
4778         if (ring_buffer_overruns(iter->array_buffer->buffer))
4779                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4780
4781         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4782         if (trace_clocks[tr->clock_id].in_ns)
4783                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4784
4785         /*
4786          * If pause-on-trace is enabled, then stop the trace while
4787          * dumping, unless this is the "snapshot" file
4788          */
4789         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4790                 tracing_stop_tr(tr);
4791
4792         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4793                 for_each_tracing_cpu(cpu) {
4794                         iter->buffer_iter[cpu] =
4795                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4796                                                          cpu, GFP_KERNEL);
4797                 }
4798                 ring_buffer_read_prepare_sync();
4799                 for_each_tracing_cpu(cpu) {
4800                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4801                         tracing_iter_reset(iter, cpu);
4802                 }
4803         } else {
4804                 cpu = iter->cpu_file;
4805                 iter->buffer_iter[cpu] =
4806                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4807                                                  cpu, GFP_KERNEL);
4808                 ring_buffer_read_prepare_sync();
4809                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4810                 tracing_iter_reset(iter, cpu);
4811         }
4812
4813         mutex_unlock(&trace_types_lock);
4814
4815         return iter;
4816
4817  fail:
4818         mutex_unlock(&trace_types_lock);
4819         kfree(iter->trace);
4820         kfree(iter->temp);
4821         kfree(iter->buffer_iter);
4822 release:
4823         seq_release_private(inode, file);
4824         return ERR_PTR(-ENOMEM);
4825 }
4826
4827 int tracing_open_generic(struct inode *inode, struct file *filp)
4828 {
4829         int ret;
4830
4831         ret = tracing_check_open_get_tr(NULL);
4832         if (ret)
4833                 return ret;
4834
4835         filp->private_data = inode->i_private;
4836         return 0;
4837 }
4838
4839 bool tracing_is_disabled(void)
4840 {
4841         return (tracing_disabled) ? true: false;
4842 }
4843
4844 /*
4845  * Open and update trace_array ref count.
4846  * Must have the current trace_array passed to it.
4847  */
4848 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4849 {
4850         struct trace_array *tr = inode->i_private;
4851         int ret;
4852
4853         ret = tracing_check_open_get_tr(tr);
4854         if (ret)
4855                 return ret;
4856
4857         filp->private_data = inode->i_private;
4858
4859         return 0;
4860 }
4861
4862 static int tracing_mark_open(struct inode *inode, struct file *filp)
4863 {
4864         stream_open(inode, filp);
4865         return tracing_open_generic_tr(inode, filp);
4866 }
4867
4868 static int tracing_release(struct inode *inode, struct file *file)
4869 {
4870         struct trace_array *tr = inode->i_private;
4871         struct seq_file *m = file->private_data;
4872         struct trace_iterator *iter;
4873         int cpu;
4874
4875         if (!(file->f_mode & FMODE_READ)) {
4876                 trace_array_put(tr);
4877                 return 0;
4878         }
4879
4880         /* Writes do not use seq_file */
4881         iter = m->private;
4882         mutex_lock(&trace_types_lock);
4883
4884         for_each_tracing_cpu(cpu) {
4885                 if (iter->buffer_iter[cpu])
4886                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4887         }
4888
4889         if (iter->trace && iter->trace->close)
4890                 iter->trace->close(iter);
4891
4892         if (!iter->snapshot && tr->stop_count)
4893                 /* reenable tracing if it was previously enabled */
4894                 tracing_start_tr(tr);
4895
4896         __trace_array_put(tr);
4897
4898         mutex_unlock(&trace_types_lock);
4899
4900         mutex_destroy(&iter->mutex);
4901         free_cpumask_var(iter->started);
4902         kfree(iter->fmt);
4903         kfree(iter->temp);
4904         kfree(iter->trace);
4905         kfree(iter->buffer_iter);
4906         seq_release_private(inode, file);
4907
4908         return 0;
4909 }
4910
4911 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4912 {
4913         struct trace_array *tr = inode->i_private;
4914
4915         trace_array_put(tr);
4916         return 0;
4917 }
4918
4919 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4920 {
4921         struct trace_array *tr = inode->i_private;
4922
4923         trace_array_put(tr);
4924
4925         return single_release(inode, file);
4926 }
4927
4928 static int tracing_open(struct inode *inode, struct file *file)
4929 {
4930         struct trace_array *tr = inode->i_private;
4931         struct trace_iterator *iter;
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(tr);
4935         if (ret)
4936                 return ret;
4937
4938         /* If this file was open for write, then erase contents */
4939         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4940                 int cpu = tracing_get_cpu(inode);
4941                 struct array_buffer *trace_buf = &tr->array_buffer;
4942
4943 #ifdef CONFIG_TRACER_MAX_TRACE
4944                 if (tr->current_trace->print_max)
4945                         trace_buf = &tr->max_buffer;
4946 #endif
4947
4948                 if (cpu == RING_BUFFER_ALL_CPUS)
4949                         tracing_reset_online_cpus(trace_buf);
4950                 else
4951                         tracing_reset_cpu(trace_buf, cpu);
4952         }
4953
4954         if (file->f_mode & FMODE_READ) {
4955                 iter = __tracing_open(inode, file, false);
4956                 if (IS_ERR(iter))
4957                         ret = PTR_ERR(iter);
4958                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4959                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4960         }
4961
4962         if (ret < 0)
4963                 trace_array_put(tr);
4964
4965         return ret;
4966 }
4967
4968 /*
4969  * Some tracers are not suitable for instance buffers.
4970  * A tracer is always available for the global array (toplevel)
4971  * or if it explicitly states that it is.
4972  */
4973 static bool
4974 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4975 {
4976         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4977 }
4978
4979 /* Find the next tracer that this trace array may use */
4980 static struct tracer *
4981 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4982 {
4983         while (t && !trace_ok_for_array(t, tr))
4984                 t = t->next;
4985
4986         return t;
4987 }
4988
4989 static void *
4990 t_next(struct seq_file *m, void *v, loff_t *pos)
4991 {
4992         struct trace_array *tr = m->private;
4993         struct tracer *t = v;
4994
4995         (*pos)++;
4996
4997         if (t)
4998                 t = get_tracer_for_array(tr, t->next);
4999
5000         return t;
5001 }
5002
5003 static void *t_start(struct seq_file *m, loff_t *pos)
5004 {
5005         struct trace_array *tr = m->private;
5006         struct tracer *t;
5007         loff_t l = 0;
5008
5009         mutex_lock(&trace_types_lock);
5010
5011         t = get_tracer_for_array(tr, trace_types);
5012         for (; t && l < *pos; t = t_next(m, t, &l))
5013                         ;
5014
5015         return t;
5016 }
5017
5018 static void t_stop(struct seq_file *m, void *p)
5019 {
5020         mutex_unlock(&trace_types_lock);
5021 }
5022
5023 static int t_show(struct seq_file *m, void *v)
5024 {
5025         struct tracer *t = v;
5026
5027         if (!t)
5028                 return 0;
5029
5030         seq_puts(m, t->name);
5031         if (t->next)
5032                 seq_putc(m, ' ');
5033         else
5034                 seq_putc(m, '\n');
5035
5036         return 0;
5037 }
5038
5039 static const struct seq_operations show_traces_seq_ops = {
5040         .start          = t_start,
5041         .next           = t_next,
5042         .stop           = t_stop,
5043         .show           = t_show,
5044 };
5045
5046 static int show_traces_open(struct inode *inode, struct file *file)
5047 {
5048         struct trace_array *tr = inode->i_private;
5049         struct seq_file *m;
5050         int ret;
5051
5052         ret = tracing_check_open_get_tr(tr);
5053         if (ret)
5054                 return ret;
5055
5056         ret = seq_open(file, &show_traces_seq_ops);
5057         if (ret) {
5058                 trace_array_put(tr);
5059                 return ret;
5060         }
5061
5062         m = file->private_data;
5063         m->private = tr;
5064
5065         return 0;
5066 }
5067
5068 static int show_traces_release(struct inode *inode, struct file *file)
5069 {
5070         struct trace_array *tr = inode->i_private;
5071
5072         trace_array_put(tr);
5073         return seq_release(inode, file);
5074 }
5075
5076 static ssize_t
5077 tracing_write_stub(struct file *filp, const char __user *ubuf,
5078                    size_t count, loff_t *ppos)
5079 {
5080         return count;
5081 }
5082
5083 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5084 {
5085         int ret;
5086
5087         if (file->f_mode & FMODE_READ)
5088                 ret = seq_lseek(file, offset, whence);
5089         else
5090                 file->f_pos = ret = 0;
5091
5092         return ret;
5093 }
5094
5095 static const struct file_operations tracing_fops = {
5096         .open           = tracing_open,
5097         .read           = seq_read,
5098         .write          = tracing_write_stub,
5099         .llseek         = tracing_lseek,
5100         .release        = tracing_release,
5101 };
5102
5103 static const struct file_operations show_traces_fops = {
5104         .open           = show_traces_open,
5105         .read           = seq_read,
5106         .llseek         = seq_lseek,
5107         .release        = show_traces_release,
5108 };
5109
5110 static ssize_t
5111 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5112                      size_t count, loff_t *ppos)
5113 {
5114         struct trace_array *tr = file_inode(filp)->i_private;
5115         char *mask_str;
5116         int len;
5117
5118         len = snprintf(NULL, 0, "%*pb\n",
5119                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5120         mask_str = kmalloc(len, GFP_KERNEL);
5121         if (!mask_str)
5122                 return -ENOMEM;
5123
5124         len = snprintf(mask_str, len, "%*pb\n",
5125                        cpumask_pr_args(tr->tracing_cpumask));
5126         if (len >= count) {
5127                 count = -EINVAL;
5128                 goto out_err;
5129         }
5130         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5131
5132 out_err:
5133         kfree(mask_str);
5134
5135         return count;
5136 }
5137
5138 int tracing_set_cpumask(struct trace_array *tr,
5139                         cpumask_var_t tracing_cpumask_new)
5140 {
5141         int cpu;
5142
5143         if (!tr)
5144                 return -EINVAL;
5145
5146         local_irq_disable();
5147         arch_spin_lock(&tr->max_lock);
5148         for_each_tracing_cpu(cpu) {
5149                 /*
5150                  * Increase/decrease the disabled counter if we are
5151                  * about to flip a bit in the cpumask:
5152                  */
5153                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5154                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5155                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5156                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5157                 }
5158                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5159                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5160                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5161                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5162                 }
5163         }
5164         arch_spin_unlock(&tr->max_lock);
5165         local_irq_enable();
5166
5167         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5168
5169         return 0;
5170 }
5171
5172 static ssize_t
5173 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5174                       size_t count, loff_t *ppos)
5175 {
5176         struct trace_array *tr = file_inode(filp)->i_private;
5177         cpumask_var_t tracing_cpumask_new;
5178         int err;
5179
5180         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5181                 return -ENOMEM;
5182
5183         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5184         if (err)
5185                 goto err_free;
5186
5187         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5188         if (err)
5189                 goto err_free;
5190
5191         free_cpumask_var(tracing_cpumask_new);
5192
5193         return count;
5194
5195 err_free:
5196         free_cpumask_var(tracing_cpumask_new);
5197
5198         return err;
5199 }
5200
5201 static const struct file_operations tracing_cpumask_fops = {
5202         .open           = tracing_open_generic_tr,
5203         .read           = tracing_cpumask_read,
5204         .write          = tracing_cpumask_write,
5205         .release        = tracing_release_generic_tr,
5206         .llseek         = generic_file_llseek,
5207 };
5208
5209 static int tracing_trace_options_show(struct seq_file *m, void *v)
5210 {
5211         struct tracer_opt *trace_opts;
5212         struct trace_array *tr = m->private;
5213         u32 tracer_flags;
5214         int i;
5215
5216         mutex_lock(&trace_types_lock);
5217         tracer_flags = tr->current_trace->flags->val;
5218         trace_opts = tr->current_trace->flags->opts;
5219
5220         for (i = 0; trace_options[i]; i++) {
5221                 if (tr->trace_flags & (1 << i))
5222                         seq_printf(m, "%s\n", trace_options[i]);
5223                 else
5224                         seq_printf(m, "no%s\n", trace_options[i]);
5225         }
5226
5227         for (i = 0; trace_opts[i].name; i++) {
5228                 if (tracer_flags & trace_opts[i].bit)
5229                         seq_printf(m, "%s\n", trace_opts[i].name);
5230                 else
5231                         seq_printf(m, "no%s\n", trace_opts[i].name);
5232         }
5233         mutex_unlock(&trace_types_lock);
5234
5235         return 0;
5236 }
5237
5238 static int __set_tracer_option(struct trace_array *tr,
5239                                struct tracer_flags *tracer_flags,
5240                                struct tracer_opt *opts, int neg)
5241 {
5242         struct tracer *trace = tracer_flags->trace;
5243         int ret;
5244
5245         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5246         if (ret)
5247                 return ret;
5248
5249         if (neg)
5250                 tracer_flags->val &= ~opts->bit;
5251         else
5252                 tracer_flags->val |= opts->bit;
5253         return 0;
5254 }
5255
5256 /* Try to assign a tracer specific option */
5257 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5258 {
5259         struct tracer *trace = tr->current_trace;
5260         struct tracer_flags *tracer_flags = trace->flags;
5261         struct tracer_opt *opts = NULL;
5262         int i;
5263
5264         for (i = 0; tracer_flags->opts[i].name; i++) {
5265                 opts = &tracer_flags->opts[i];
5266
5267                 if (strcmp(cmp, opts->name) == 0)
5268                         return __set_tracer_option(tr, trace->flags, opts, neg);
5269         }
5270
5271         return -EINVAL;
5272 }
5273
5274 /* Some tracers require overwrite to stay enabled */
5275 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5276 {
5277         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5278                 return -1;
5279
5280         return 0;
5281 }
5282
5283 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5284 {
5285         int *map;
5286
5287         if ((mask == TRACE_ITER_RECORD_TGID) ||
5288             (mask == TRACE_ITER_RECORD_CMD))
5289                 lockdep_assert_held(&event_mutex);
5290
5291         /* do nothing if flag is already set */
5292         if (!!(tr->trace_flags & mask) == !!enabled)
5293                 return 0;
5294
5295         /* Give the tracer a chance to approve the change */
5296         if (tr->current_trace->flag_changed)
5297                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5298                         return -EINVAL;
5299
5300         if (enabled)
5301                 tr->trace_flags |= mask;
5302         else
5303                 tr->trace_flags &= ~mask;
5304
5305         if (mask == TRACE_ITER_RECORD_CMD)
5306                 trace_event_enable_cmd_record(enabled);
5307
5308         if (mask == TRACE_ITER_RECORD_TGID) {
5309                 if (!tgid_map) {
5310                         tgid_map_max = pid_max;
5311                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5312                                        GFP_KERNEL);
5313
5314                         /*
5315                          * Pairs with smp_load_acquire() in
5316                          * trace_find_tgid_ptr() to ensure that if it observes
5317                          * the tgid_map we just allocated then it also observes
5318                          * the corresponding tgid_map_max value.
5319                          */
5320                         smp_store_release(&tgid_map, map);
5321                 }
5322                 if (!tgid_map) {
5323                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5324                         return -ENOMEM;
5325                 }
5326
5327                 trace_event_enable_tgid_record(enabled);
5328         }
5329
5330         if (mask == TRACE_ITER_EVENT_FORK)
5331                 trace_event_follow_fork(tr, enabled);
5332
5333         if (mask == TRACE_ITER_FUNC_FORK)
5334                 ftrace_pid_follow_fork(tr, enabled);
5335
5336         if (mask == TRACE_ITER_OVERWRITE) {
5337                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5340 #endif
5341         }
5342
5343         if (mask == TRACE_ITER_PRINTK) {
5344                 trace_printk_start_stop_comm(enabled);
5345                 trace_printk_control(enabled);
5346         }
5347
5348         return 0;
5349 }
5350
5351 int trace_set_options(struct trace_array *tr, char *option)
5352 {
5353         char *cmp;
5354         int neg = 0;
5355         int ret;
5356         size_t orig_len = strlen(option);
5357         int len;
5358
5359         cmp = strstrip(option);
5360
5361         len = str_has_prefix(cmp, "no");
5362         if (len)
5363                 neg = 1;
5364
5365         cmp += len;
5366
5367         mutex_lock(&event_mutex);
5368         mutex_lock(&trace_types_lock);
5369
5370         ret = match_string(trace_options, -1, cmp);
5371         /* If no option could be set, test the specific tracer options */
5372         if (ret < 0)
5373                 ret = set_tracer_option(tr, cmp, neg);
5374         else
5375                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5376
5377         mutex_unlock(&trace_types_lock);
5378         mutex_unlock(&event_mutex);
5379
5380         /*
5381          * If the first trailing whitespace is replaced with '\0' by strstrip,
5382          * turn it back into a space.
5383          */
5384         if (orig_len > strlen(option))
5385                 option[strlen(option)] = ' ';
5386
5387         return ret;
5388 }
5389
5390 static void __init apply_trace_boot_options(void)
5391 {
5392         char *buf = trace_boot_options_buf;
5393         char *option;
5394
5395         while (true) {
5396                 option = strsep(&buf, ",");
5397
5398                 if (!option)
5399                         break;
5400
5401                 if (*option)
5402                         trace_set_options(&global_trace, option);
5403
5404                 /* Put back the comma to allow this to be called again */
5405                 if (buf)
5406                         *(buf - 1) = ',';
5407         }
5408 }
5409
5410 static ssize_t
5411 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5412                         size_t cnt, loff_t *ppos)
5413 {
5414         struct seq_file *m = filp->private_data;
5415         struct trace_array *tr = m->private;
5416         char buf[64];
5417         int ret;
5418
5419         if (cnt >= sizeof(buf))
5420                 return -EINVAL;
5421
5422         if (copy_from_user(buf, ubuf, cnt))
5423                 return -EFAULT;
5424
5425         buf[cnt] = 0;
5426
5427         ret = trace_set_options(tr, buf);
5428         if (ret < 0)
5429                 return ret;
5430
5431         *ppos += cnt;
5432
5433         return cnt;
5434 }
5435
5436 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5437 {
5438         struct trace_array *tr = inode->i_private;
5439         int ret;
5440
5441         ret = tracing_check_open_get_tr(tr);
5442         if (ret)
5443                 return ret;
5444
5445         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5446         if (ret < 0)
5447                 trace_array_put(tr);
5448
5449         return ret;
5450 }
5451
5452 static const struct file_operations tracing_iter_fops = {
5453         .open           = tracing_trace_options_open,
5454         .read           = seq_read,
5455         .llseek         = seq_lseek,
5456         .release        = tracing_single_release_tr,
5457         .write          = tracing_trace_options_write,
5458 };
5459
5460 static const char readme_msg[] =
5461         "tracing mini-HOWTO:\n\n"
5462         "# echo 0 > tracing_on : quick way to disable tracing\n"
5463         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5464         " Important files:\n"
5465         "  trace\t\t\t- The static contents of the buffer\n"
5466         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5467         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5468         "  current_tracer\t- function and latency tracers\n"
5469         "  available_tracers\t- list of configured tracers for current_tracer\n"
5470         "  error_log\t- error log for failed commands (that support it)\n"
5471         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5472         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5473         "  trace_clock\t\t- change the clock used to order events\n"
5474         "       local:   Per cpu clock but may not be synced across CPUs\n"
5475         "      global:   Synced across CPUs but slows tracing down.\n"
5476         "     counter:   Not a clock, but just an increment\n"
5477         "      uptime:   Jiffy counter from time of boot\n"
5478         "        perf:   Same clock that perf events use\n"
5479 #ifdef CONFIG_X86_64
5480         "     x86-tsc:   TSC cycle counter\n"
5481 #endif
5482         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5483         "       delta:   Delta difference against a buffer-wide timestamp\n"
5484         "    absolute:   Absolute (standalone) timestamp\n"
5485         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5486         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5487         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5488         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5489         "\t\t\t  Remove sub-buffer with rmdir\n"
5490         "  trace_options\t\t- Set format or modify how tracing happens\n"
5491         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5492         "\t\t\t  option name\n"
5493         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5494 #ifdef CONFIG_DYNAMIC_FTRACE
5495         "\n  available_filter_functions - list of functions that can be filtered on\n"
5496         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5497         "\t\t\t  functions\n"
5498         "\t     accepts: func_full_name or glob-matching-pattern\n"
5499         "\t     modules: Can select a group via module\n"
5500         "\t      Format: :mod:<module-name>\n"
5501         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5502         "\t    triggers: a command to perform when function is hit\n"
5503         "\t      Format: <function>:<trigger>[:count]\n"
5504         "\t     trigger: traceon, traceoff\n"
5505         "\t\t      enable_event:<system>:<event>\n"
5506         "\t\t      disable_event:<system>:<event>\n"
5507 #ifdef CONFIG_STACKTRACE
5508         "\t\t      stacktrace\n"
5509 #endif
5510 #ifdef CONFIG_TRACER_SNAPSHOT
5511         "\t\t      snapshot\n"
5512 #endif
5513         "\t\t      dump\n"
5514         "\t\t      cpudump\n"
5515         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5516         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5517         "\t     The first one will disable tracing every time do_fault is hit\n"
5518         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5519         "\t       The first time do trap is hit and it disables tracing, the\n"
5520         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5521         "\t       the counter will not decrement. It only decrements when the\n"
5522         "\t       trigger did work\n"
5523         "\t     To remove trigger without count:\n"
5524         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5525         "\t     To remove trigger with a count:\n"
5526         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5527         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5528         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5529         "\t    modules: Can select a group via module command :mod:\n"
5530         "\t    Does not accept triggers\n"
5531 #endif /* CONFIG_DYNAMIC_FTRACE */
5532 #ifdef CONFIG_FUNCTION_TRACER
5533         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5534         "\t\t    (function)\n"
5535         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5536         "\t\t    (function)\n"
5537 #endif
5538 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5539         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5540         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5541         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5542 #endif
5543 #ifdef CONFIG_TRACER_SNAPSHOT
5544         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5545         "\t\t\t  snapshot buffer. Read the contents for more\n"
5546         "\t\t\t  information\n"
5547 #endif
5548 #ifdef CONFIG_STACK_TRACER
5549         "  stack_trace\t\t- Shows the max stack trace when active\n"
5550         "  stack_max_size\t- Shows current max stack size that was traced\n"
5551         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5552         "\t\t\t  new trace)\n"
5553 #ifdef CONFIG_DYNAMIC_FTRACE
5554         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5555         "\t\t\t  traces\n"
5556 #endif
5557 #endif /* CONFIG_STACK_TRACER */
5558 #ifdef CONFIG_DYNAMIC_EVENTS
5559         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5560         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5561 #endif
5562 #ifdef CONFIG_KPROBE_EVENTS
5563         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5564         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5565 #endif
5566 #ifdef CONFIG_UPROBE_EVENTS
5567         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5568         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5569 #endif
5570 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571         "\t  accepts: event-definitions (one definition per line)\n"
5572         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5573         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5576 #endif
5577         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5578         "\t           -:[<group>/]<event>\n"
5579 #ifdef CONFIG_KPROBE_EVENTS
5580         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5581   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5582 #endif
5583 #ifdef CONFIG_UPROBE_EVENTS
5584   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5585 #endif
5586         "\t     args: <name>=fetcharg[:type]\n"
5587         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5588 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5589         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5590 #else
5591         "\t           $stack<index>, $stack, $retval, $comm,\n"
5592 #endif
5593         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5594         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5595         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5596         "\t           <type>\\[<array-size>\\]\n"
5597 #ifdef CONFIG_HIST_TRIGGERS
5598         "\t    field: <stype> <name>;\n"
5599         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5600         "\t           [unsigned] char/int/long\n"
5601 #endif
5602         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5603         "\t            of the <attached-group>/<attached-event>.\n"
5604 #endif
5605         "  events/\t\t- Directory containing all trace event subsystems:\n"
5606         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5607         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5608         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5609         "\t\t\t  events\n"
5610         "      filter\t\t- If set, only events passing filter are traced\n"
5611         "  events/<system>/<event>/\t- Directory containing control files for\n"
5612         "\t\t\t  <event>:\n"
5613         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5614         "      filter\t\t- If set, only events passing filter are traced\n"
5615         "      trigger\t\t- If set, a command to perform when event is hit\n"
5616         "\t    Format: <trigger>[:count][if <filter>]\n"
5617         "\t   trigger: traceon, traceoff\n"
5618         "\t            enable_event:<system>:<event>\n"
5619         "\t            disable_event:<system>:<event>\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621         "\t            enable_hist:<system>:<event>\n"
5622         "\t            disable_hist:<system>:<event>\n"
5623 #endif
5624 #ifdef CONFIG_STACKTRACE
5625         "\t\t    stacktrace\n"
5626 #endif
5627 #ifdef CONFIG_TRACER_SNAPSHOT
5628         "\t\t    snapshot\n"
5629 #endif
5630 #ifdef CONFIG_HIST_TRIGGERS
5631         "\t\t    hist (see below)\n"
5632 #endif
5633         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5634         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5635         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5636         "\t                  events/block/block_unplug/trigger\n"
5637         "\t   The first disables tracing every time block_unplug is hit.\n"
5638         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5639         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5640         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5641         "\t   Like function triggers, the counter is only decremented if it\n"
5642         "\t    enabled or disabled tracing.\n"
5643         "\t   To remove a trigger without a count:\n"
5644         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5645         "\t   To remove a trigger with a count:\n"
5646         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5647         "\t   Filters can be ignored when removing a trigger.\n"
5648 #ifdef CONFIG_HIST_TRIGGERS
5649         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5650         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5651         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5652         "\t            [:values=<field1[,field2,...]>]\n"
5653         "\t            [:sort=<field1[,field2,...]>]\n"
5654         "\t            [:size=#entries]\n"
5655         "\t            [:pause][:continue][:clear]\n"
5656         "\t            [:name=histname1]\n"
5657         "\t            [:<handler>.<action>]\n"
5658         "\t            [if <filter>]\n\n"
5659         "\t    Note, special fields can be used as well:\n"
5660         "\t            common_timestamp - to record current timestamp\n"
5661         "\t            common_cpu - to record the CPU the event happened on\n"
5662         "\n"
5663         "\t    A hist trigger variable can be:\n"
5664         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5665         "\t        - a reference to another variable e.g. y=$x,\n"
5666         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5667         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5668         "\n"
5669         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5670         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5671         "\t    variable reference, field or numeric literal.\n"
5672         "\n"
5673         "\t    When a matching event is hit, an entry is added to a hash\n"
5674         "\t    table using the key(s) and value(s) named, and the value of a\n"
5675         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5676         "\t    correspond to fields in the event's format description.  Keys\n"
5677         "\t    can be any field, or the special string 'stacktrace'.\n"
5678         "\t    Compound keys consisting of up to two fields can be specified\n"
5679         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5680         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5681         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5682         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5683         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5684         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5685         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5686         "\t    its histogram data will be shared with other triggers of the\n"
5687         "\t    same name, and trigger hits will update this common data.\n\n"
5688         "\t    Reading the 'hist' file for the event will dump the hash\n"
5689         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5690         "\t    triggers attached to an event, there will be a table for each\n"
5691         "\t    trigger in the output.  The table displayed for a named\n"
5692         "\t    trigger will be the same as any other instance having the\n"
5693         "\t    same name.  The default format used to display a given field\n"
5694         "\t    can be modified by appending any of the following modifiers\n"
5695         "\t    to the field name, as applicable:\n\n"
5696         "\t            .hex        display a number as a hex value\n"
5697         "\t            .sym        display an address as a symbol\n"
5698         "\t            .sym-offset display an address as a symbol and offset\n"
5699         "\t            .execname   display a common_pid as a program name\n"
5700         "\t            .syscall    display a syscall id as a syscall name\n"
5701         "\t            .log2       display log2 value rather than raw number\n"
5702         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5703         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5704         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5705         "\t    trigger or to start a hist trigger but not log any events\n"
5706         "\t    until told to do so.  'continue' can be used to start or\n"
5707         "\t    restart a paused hist trigger.\n\n"
5708         "\t    The 'clear' parameter will clear the contents of a running\n"
5709         "\t    hist trigger and leave its current paused/active state\n"
5710         "\t    unchanged.\n\n"
5711         "\t    The enable_hist and disable_hist triggers can be used to\n"
5712         "\t    have one event conditionally start and stop another event's\n"
5713         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5714         "\t    the enable_event and disable_event triggers.\n\n"
5715         "\t    Hist trigger handlers and actions are executed whenever a\n"
5716         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5717         "\t        <handler>.<action>\n\n"
5718         "\t    The available handlers are:\n\n"
5719         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5720         "\t        onmax(var)               - invoke if var exceeds current max\n"
5721         "\t        onchange(var)            - invoke action if var changes\n\n"
5722         "\t    The available actions are:\n\n"
5723         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5724         "\t        save(field,...)                      - save current event fields\n"
5725 #ifdef CONFIG_TRACER_SNAPSHOT
5726         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5727 #endif
5728 #ifdef CONFIG_SYNTH_EVENTS
5729         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5730         "\t  Write into this file to define/undefine new synthetic events.\n"
5731         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5732 #endif
5733 #endif
5734 ;
5735
5736 static ssize_t
5737 tracing_readme_read(struct file *filp, char __user *ubuf,
5738                        size_t cnt, loff_t *ppos)
5739 {
5740         return simple_read_from_buffer(ubuf, cnt, ppos,
5741                                         readme_msg, strlen(readme_msg));
5742 }
5743
5744 static const struct file_operations tracing_readme_fops = {
5745         .open           = tracing_open_generic,
5746         .read           = tracing_readme_read,
5747         .llseek         = generic_file_llseek,
5748 };
5749
5750 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5751 {
5752         int pid = ++(*pos);
5753
5754         return trace_find_tgid_ptr(pid);
5755 }
5756
5757 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5758 {
5759         int pid = *pos;
5760
5761         return trace_find_tgid_ptr(pid);
5762 }
5763
5764 static void saved_tgids_stop(struct seq_file *m, void *v)
5765 {
5766 }
5767
5768 static int saved_tgids_show(struct seq_file *m, void *v)
5769 {
5770         int *entry = (int *)v;
5771         int pid = entry - tgid_map;
5772         int tgid = *entry;
5773
5774         if (tgid == 0)
5775                 return SEQ_SKIP;
5776
5777         seq_printf(m, "%d %d\n", pid, tgid);
5778         return 0;
5779 }
5780
5781 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5782         .start          = saved_tgids_start,
5783         .stop           = saved_tgids_stop,
5784         .next           = saved_tgids_next,
5785         .show           = saved_tgids_show,
5786 };
5787
5788 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5789 {
5790         int ret;
5791
5792         ret = tracing_check_open_get_tr(NULL);
5793         if (ret)
5794                 return ret;
5795
5796         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5797 }
5798
5799
5800 static const struct file_operations tracing_saved_tgids_fops = {
5801         .open           = tracing_saved_tgids_open,
5802         .read           = seq_read,
5803         .llseek         = seq_lseek,
5804         .release        = seq_release,
5805 };
5806
5807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5808 {
5809         unsigned int *ptr = v;
5810
5811         if (*pos || m->count)
5812                 ptr++;
5813
5814         (*pos)++;
5815
5816         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5817              ptr++) {
5818                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5819                         continue;
5820
5821                 return ptr;
5822         }
5823
5824         return NULL;
5825 }
5826
5827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5828 {
5829         void *v;
5830         loff_t l = 0;
5831
5832         preempt_disable();
5833         arch_spin_lock(&trace_cmdline_lock);
5834
5835         v = &savedcmd->map_cmdline_to_pid[0];
5836         while (l <= *pos) {
5837                 v = saved_cmdlines_next(m, v, &l);
5838                 if (!v)
5839                         return NULL;
5840         }
5841
5842         return v;
5843 }
5844
5845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5846 {
5847         arch_spin_unlock(&trace_cmdline_lock);
5848         preempt_enable();
5849 }
5850
5851 static int saved_cmdlines_show(struct seq_file *m, void *v)
5852 {
5853         char buf[TASK_COMM_LEN];
5854         unsigned int *pid = v;
5855
5856         __trace_find_cmdline(*pid, buf);
5857         seq_printf(m, "%d %s\n", *pid, buf);
5858         return 0;
5859 }
5860
5861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5862         .start          = saved_cmdlines_start,
5863         .next           = saved_cmdlines_next,
5864         .stop           = saved_cmdlines_stop,
5865         .show           = saved_cmdlines_show,
5866 };
5867
5868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5869 {
5870         int ret;
5871
5872         ret = tracing_check_open_get_tr(NULL);
5873         if (ret)
5874                 return ret;
5875
5876         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5877 }
5878
5879 static const struct file_operations tracing_saved_cmdlines_fops = {
5880         .open           = tracing_saved_cmdlines_open,
5881         .read           = seq_read,
5882         .llseek         = seq_lseek,
5883         .release        = seq_release,
5884 };
5885
5886 static ssize_t
5887 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5888                                  size_t cnt, loff_t *ppos)
5889 {
5890         char buf[64];
5891         int r;
5892
5893         arch_spin_lock(&trace_cmdline_lock);
5894         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5895         arch_spin_unlock(&trace_cmdline_lock);
5896
5897         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5898 }
5899
5900 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5901 {
5902         kfree(s->saved_cmdlines);
5903         kfree(s->map_cmdline_to_pid);
5904         kfree(s);
5905 }
5906
5907 static int tracing_resize_saved_cmdlines(unsigned int val)
5908 {
5909         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5910
5911         s = kmalloc(sizeof(*s), GFP_KERNEL);
5912         if (!s)
5913                 return -ENOMEM;
5914
5915         if (allocate_cmdlines_buffer(val, s) < 0) {
5916                 kfree(s);
5917                 return -ENOMEM;
5918         }
5919
5920         arch_spin_lock(&trace_cmdline_lock);
5921         savedcmd_temp = savedcmd;
5922         savedcmd = s;
5923         arch_spin_unlock(&trace_cmdline_lock);
5924         free_saved_cmdlines_buffer(savedcmd_temp);
5925
5926         return 0;
5927 }
5928
5929 static ssize_t
5930 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5931                                   size_t cnt, loff_t *ppos)
5932 {
5933         unsigned long val;
5934         int ret;
5935
5936         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5937         if (ret)
5938                 return ret;
5939
5940         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5941         if (!val || val > PID_MAX_DEFAULT)
5942                 return -EINVAL;
5943
5944         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5945         if (ret < 0)
5946                 return ret;
5947
5948         *ppos += cnt;
5949
5950         return cnt;
5951 }
5952
5953 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5954         .open           = tracing_open_generic,
5955         .read           = tracing_saved_cmdlines_size_read,
5956         .write          = tracing_saved_cmdlines_size_write,
5957 };
5958
5959 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5960 static union trace_eval_map_item *
5961 update_eval_map(union trace_eval_map_item *ptr)
5962 {
5963         if (!ptr->map.eval_string) {
5964                 if (ptr->tail.next) {
5965                         ptr = ptr->tail.next;
5966                         /* Set ptr to the next real item (skip head) */
5967                         ptr++;
5968                 } else
5969                         return NULL;
5970         }
5971         return ptr;
5972 }
5973
5974 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5975 {
5976         union trace_eval_map_item *ptr = v;
5977
5978         /*
5979          * Paranoid! If ptr points to end, we don't want to increment past it.
5980          * This really should never happen.
5981          */
5982         (*pos)++;
5983         ptr = update_eval_map(ptr);
5984         if (WARN_ON_ONCE(!ptr))
5985                 return NULL;
5986
5987         ptr++;
5988         ptr = update_eval_map(ptr);
5989
5990         return ptr;
5991 }
5992
5993 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5994 {
5995         union trace_eval_map_item *v;
5996         loff_t l = 0;
5997
5998         mutex_lock(&trace_eval_mutex);
5999
6000         v = trace_eval_maps;
6001         if (v)
6002                 v++;
6003
6004         while (v && l < *pos) {
6005                 v = eval_map_next(m, v, &l);
6006         }
6007
6008         return v;
6009 }
6010
6011 static void eval_map_stop(struct seq_file *m, void *v)
6012 {
6013         mutex_unlock(&trace_eval_mutex);
6014 }
6015
6016 static int eval_map_show(struct seq_file *m, void *v)
6017 {
6018         union trace_eval_map_item *ptr = v;
6019
6020         seq_printf(m, "%s %ld (%s)\n",
6021                    ptr->map.eval_string, ptr->map.eval_value,
6022                    ptr->map.system);
6023
6024         return 0;
6025 }
6026
6027 static const struct seq_operations tracing_eval_map_seq_ops = {
6028         .start          = eval_map_start,
6029         .next           = eval_map_next,
6030         .stop           = eval_map_stop,
6031         .show           = eval_map_show,
6032 };
6033
6034 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6035 {
6036         int ret;
6037
6038         ret = tracing_check_open_get_tr(NULL);
6039         if (ret)
6040                 return ret;
6041
6042         return seq_open(filp, &tracing_eval_map_seq_ops);
6043 }
6044
6045 static const struct file_operations tracing_eval_map_fops = {
6046         .open           = tracing_eval_map_open,
6047         .read           = seq_read,
6048         .llseek         = seq_lseek,
6049         .release        = seq_release,
6050 };
6051
6052 static inline union trace_eval_map_item *
6053 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6054 {
6055         /* Return tail of array given the head */
6056         return ptr + ptr->head.length + 1;
6057 }
6058
6059 static void
6060 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6061                            int len)
6062 {
6063         struct trace_eval_map **stop;
6064         struct trace_eval_map **map;
6065         union trace_eval_map_item *map_array;
6066         union trace_eval_map_item *ptr;
6067
6068         stop = start + len;
6069
6070         /*
6071          * The trace_eval_maps contains the map plus a head and tail item,
6072          * where the head holds the module and length of array, and the
6073          * tail holds a pointer to the next list.
6074          */
6075         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6076         if (!map_array) {
6077                 pr_warn("Unable to allocate trace eval mapping\n");
6078                 return;
6079         }
6080
6081         mutex_lock(&trace_eval_mutex);
6082
6083         if (!trace_eval_maps)
6084                 trace_eval_maps = map_array;
6085         else {
6086                 ptr = trace_eval_maps;
6087                 for (;;) {
6088                         ptr = trace_eval_jmp_to_tail(ptr);
6089                         if (!ptr->tail.next)
6090                                 break;
6091                         ptr = ptr->tail.next;
6092
6093                 }
6094                 ptr->tail.next = map_array;
6095         }
6096         map_array->head.mod = mod;
6097         map_array->head.length = len;
6098         map_array++;
6099
6100         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6101                 map_array->map = **map;
6102                 map_array++;
6103         }
6104         memset(map_array, 0, sizeof(*map_array));
6105
6106         mutex_unlock(&trace_eval_mutex);
6107 }
6108
6109 static void trace_create_eval_file(struct dentry *d_tracer)
6110 {
6111         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6112                           NULL, &tracing_eval_map_fops);
6113 }
6114
6115 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6116 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6117 static inline void trace_insert_eval_map_file(struct module *mod,
6118                               struct trace_eval_map **start, int len) { }
6119 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6120
6121 static void trace_insert_eval_map(struct module *mod,
6122                                   struct trace_eval_map **start, int len)
6123 {
6124         struct trace_eval_map **map;
6125
6126         if (len <= 0)
6127                 return;
6128
6129         map = start;
6130
6131         trace_event_eval_update(map, len);
6132
6133         trace_insert_eval_map_file(mod, start, len);
6134 }
6135
6136 static ssize_t
6137 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6138                        size_t cnt, loff_t *ppos)
6139 {
6140         struct trace_array *tr = filp->private_data;
6141         char buf[MAX_TRACER_SIZE+2];
6142         int r;
6143
6144         mutex_lock(&trace_types_lock);
6145         r = sprintf(buf, "%s\n", tr->current_trace->name);
6146         mutex_unlock(&trace_types_lock);
6147
6148         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6149 }
6150
6151 int tracer_init(struct tracer *t, struct trace_array *tr)
6152 {
6153         tracing_reset_online_cpus(&tr->array_buffer);
6154         return t->init(tr);
6155 }
6156
6157 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6158 {
6159         int cpu;
6160
6161         for_each_tracing_cpu(cpu)
6162                 per_cpu_ptr(buf->data, cpu)->entries = val;
6163 }
6164
6165 #ifdef CONFIG_TRACER_MAX_TRACE
6166 /* resize @tr's buffer to the size of @size_tr's entries */
6167 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6168                                         struct array_buffer *size_buf, int cpu_id)
6169 {
6170         int cpu, ret = 0;
6171
6172         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6173                 for_each_tracing_cpu(cpu) {
6174                         ret = ring_buffer_resize(trace_buf->buffer,
6175                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6176                         if (ret < 0)
6177                                 break;
6178                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6179                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6180                 }
6181         } else {
6182                 ret = ring_buffer_resize(trace_buf->buffer,
6183                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6184                 if (ret == 0)
6185                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6186                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6187         }
6188
6189         return ret;
6190 }
6191 #endif /* CONFIG_TRACER_MAX_TRACE */
6192
6193 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6194                                         unsigned long size, int cpu)
6195 {
6196         int ret;
6197
6198         /*
6199          * If kernel or user changes the size of the ring buffer
6200          * we use the size that was given, and we can forget about
6201          * expanding it later.
6202          */
6203         ring_buffer_expanded = true;
6204
6205         /* May be called before buffers are initialized */
6206         if (!tr->array_buffer.buffer)
6207                 return 0;
6208
6209         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6210         if (ret < 0)
6211                 return ret;
6212
6213 #ifdef CONFIG_TRACER_MAX_TRACE
6214         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6215             !tr->current_trace->use_max_tr)
6216                 goto out;
6217
6218         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6219         if (ret < 0) {
6220                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6221                                                      &tr->array_buffer, cpu);
6222                 if (r < 0) {
6223                         /*
6224                          * AARGH! We are left with different
6225                          * size max buffer!!!!
6226                          * The max buffer is our "snapshot" buffer.
6227                          * When a tracer needs a snapshot (one of the
6228                          * latency tracers), it swaps the max buffer
6229                          * with the saved snap shot. We succeeded to
6230                          * update the size of the main buffer, but failed to
6231                          * update the size of the max buffer. But when we tried
6232                          * to reset the main buffer to the original size, we
6233                          * failed there too. This is very unlikely to
6234                          * happen, but if it does, warn and kill all
6235                          * tracing.
6236                          */
6237                         WARN_ON(1);
6238                         tracing_disabled = 1;
6239                 }
6240                 return ret;
6241         }
6242
6243         if (cpu == RING_BUFFER_ALL_CPUS)
6244                 set_buffer_entries(&tr->max_buffer, size);
6245         else
6246                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6247
6248  out:
6249 #endif /* CONFIG_TRACER_MAX_TRACE */
6250
6251         if (cpu == RING_BUFFER_ALL_CPUS)
6252                 set_buffer_entries(&tr->array_buffer, size);
6253         else
6254                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6255
6256         return ret;
6257 }
6258
6259 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6260                                   unsigned long size, int cpu_id)
6261 {
6262         int ret;
6263
6264         mutex_lock(&trace_types_lock);
6265
6266         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6267                 /* make sure, this cpu is enabled in the mask */
6268                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6269                         ret = -EINVAL;
6270                         goto out;
6271                 }
6272         }
6273
6274         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6275         if (ret < 0)
6276                 ret = -ENOMEM;
6277
6278 out:
6279         mutex_unlock(&trace_types_lock);
6280
6281         return ret;
6282 }
6283
6284
6285 /**
6286  * tracing_update_buffers - used by tracing facility to expand ring buffers
6287  *
6288  * To save on memory when the tracing is never used on a system with it
6289  * configured in. The ring buffers are set to a minimum size. But once
6290  * a user starts to use the tracing facility, then they need to grow
6291  * to their default size.
6292  *
6293  * This function is to be called when a tracer is about to be used.
6294  */
6295 int tracing_update_buffers(void)
6296 {
6297         int ret = 0;
6298
6299         mutex_lock(&trace_types_lock);
6300         if (!ring_buffer_expanded)
6301                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6302                                                 RING_BUFFER_ALL_CPUS);
6303         mutex_unlock(&trace_types_lock);
6304
6305         return ret;
6306 }
6307
6308 struct trace_option_dentry;
6309
6310 static void
6311 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6312
6313 /*
6314  * Used to clear out the tracer before deletion of an instance.
6315  * Must have trace_types_lock held.
6316  */
6317 static void tracing_set_nop(struct trace_array *tr)
6318 {
6319         if (tr->current_trace == &nop_trace)
6320                 return;
6321         
6322         tr->current_trace->enabled--;
6323
6324         if (tr->current_trace->reset)
6325                 tr->current_trace->reset(tr);
6326
6327         tr->current_trace = &nop_trace;
6328 }
6329
6330 static bool tracer_options_updated;
6331
6332 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6333 {
6334         /* Only enable if the directory has been created already. */
6335         if (!tr->dir)
6336                 return;
6337
6338         /* Only create trace option files after update_tracer_options finish */
6339         if (!tracer_options_updated)
6340                 return;
6341
6342         create_trace_option_files(tr, t);
6343 }
6344
6345 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6346 {
6347         struct tracer *t;
6348 #ifdef CONFIG_TRACER_MAX_TRACE
6349         bool had_max_tr;
6350 #endif
6351         int ret = 0;
6352
6353         mutex_lock(&trace_types_lock);
6354
6355         if (!ring_buffer_expanded) {
6356                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6357                                                 RING_BUFFER_ALL_CPUS);
6358                 if (ret < 0)
6359                         goto out;
6360                 ret = 0;
6361         }
6362
6363         for (t = trace_types; t; t = t->next) {
6364                 if (strcmp(t->name, buf) == 0)
6365                         break;
6366         }
6367         if (!t) {
6368                 ret = -EINVAL;
6369                 goto out;
6370         }
6371         if (t == tr->current_trace)
6372                 goto out;
6373
6374 #ifdef CONFIG_TRACER_SNAPSHOT
6375         if (t->use_max_tr) {
6376                 arch_spin_lock(&tr->max_lock);
6377                 if (tr->cond_snapshot)
6378                         ret = -EBUSY;
6379                 arch_spin_unlock(&tr->max_lock);
6380                 if (ret)
6381                         goto out;
6382         }
6383 #endif
6384         /* Some tracers won't work on kernel command line */
6385         if (system_state < SYSTEM_RUNNING && t->noboot) {
6386                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6387                         t->name);
6388                 goto out;
6389         }
6390
6391         /* Some tracers are only allowed for the top level buffer */
6392         if (!trace_ok_for_array(t, tr)) {
6393                 ret = -EINVAL;
6394                 goto out;
6395         }
6396
6397         /* If trace pipe files are being read, we can't change the tracer */
6398         if (tr->trace_ref) {
6399                 ret = -EBUSY;
6400                 goto out;
6401         }
6402
6403         trace_branch_disable();
6404
6405         tr->current_trace->enabled--;
6406
6407         if (tr->current_trace->reset)
6408                 tr->current_trace->reset(tr);
6409
6410         /* Current trace needs to be nop_trace before synchronize_rcu */
6411         tr->current_trace = &nop_trace;
6412
6413 #ifdef CONFIG_TRACER_MAX_TRACE
6414         had_max_tr = tr->allocated_snapshot;
6415
6416         if (had_max_tr && !t->use_max_tr) {
6417                 /*
6418                  * We need to make sure that the update_max_tr sees that
6419                  * current_trace changed to nop_trace to keep it from
6420                  * swapping the buffers after we resize it.
6421                  * The update_max_tr is called from interrupts disabled
6422                  * so a synchronized_sched() is sufficient.
6423                  */
6424                 synchronize_rcu();
6425                 free_snapshot(tr);
6426         }
6427
6428         if (t->use_max_tr && !had_max_tr) {
6429                 ret = tracing_alloc_snapshot_instance(tr);
6430                 if (ret < 0)
6431                         goto out;
6432         }
6433 #endif
6434
6435         if (t->init) {
6436                 ret = tracer_init(t, tr);
6437                 if (ret)
6438                         goto out;
6439         }
6440
6441         tr->current_trace = t;
6442         tr->current_trace->enabled++;
6443         trace_branch_enable(tr);
6444  out:
6445         mutex_unlock(&trace_types_lock);
6446
6447         return ret;
6448 }
6449
6450 static ssize_t
6451 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6452                         size_t cnt, loff_t *ppos)
6453 {
6454         struct trace_array *tr = filp->private_data;
6455         char buf[MAX_TRACER_SIZE+1];
6456         char *name;
6457         size_t ret;
6458         int err;
6459
6460         ret = cnt;
6461
6462         if (cnt > MAX_TRACER_SIZE)
6463                 cnt = MAX_TRACER_SIZE;
6464
6465         if (copy_from_user(buf, ubuf, cnt))
6466                 return -EFAULT;
6467
6468         buf[cnt] = 0;
6469
6470         name = strim(buf);
6471
6472         err = tracing_set_tracer(tr, name);
6473         if (err)
6474                 return err;
6475
6476         *ppos += ret;
6477
6478         return ret;
6479 }
6480
6481 static ssize_t
6482 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6483                    size_t cnt, loff_t *ppos)
6484 {
6485         char buf[64];
6486         int r;
6487
6488         r = snprintf(buf, sizeof(buf), "%ld\n",
6489                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6490         if (r > sizeof(buf))
6491                 r = sizeof(buf);
6492         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6493 }
6494
6495 static ssize_t
6496 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6497                     size_t cnt, loff_t *ppos)
6498 {
6499         unsigned long val;
6500         int ret;
6501
6502         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6503         if (ret)
6504                 return ret;
6505
6506         *ptr = val * 1000;
6507
6508         return cnt;
6509 }
6510
6511 static ssize_t
6512 tracing_thresh_read(struct file *filp, char __user *ubuf,
6513                     size_t cnt, loff_t *ppos)
6514 {
6515         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6516 }
6517
6518 static ssize_t
6519 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6520                      size_t cnt, loff_t *ppos)
6521 {
6522         struct trace_array *tr = filp->private_data;
6523         int ret;
6524
6525         mutex_lock(&trace_types_lock);
6526         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6527         if (ret < 0)
6528                 goto out;
6529
6530         if (tr->current_trace->update_thresh) {
6531                 ret = tr->current_trace->update_thresh(tr);
6532                 if (ret < 0)
6533                         goto out;
6534         }
6535
6536         ret = cnt;
6537 out:
6538         mutex_unlock(&trace_types_lock);
6539
6540         return ret;
6541 }
6542
6543 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6544
6545 static ssize_t
6546 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6547                      size_t cnt, loff_t *ppos)
6548 {
6549         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6550 }
6551
6552 static ssize_t
6553 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6554                       size_t cnt, loff_t *ppos)
6555 {
6556         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6557 }
6558
6559 #endif
6560
6561 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6562 {
6563         struct trace_array *tr = inode->i_private;
6564         struct trace_iterator *iter;
6565         int ret;
6566
6567         ret = tracing_check_open_get_tr(tr);
6568         if (ret)
6569                 return ret;
6570
6571         mutex_lock(&trace_types_lock);
6572
6573         /* create a buffer to store the information to pass to userspace */
6574         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6575         if (!iter) {
6576                 ret = -ENOMEM;
6577                 __trace_array_put(tr);
6578                 goto out;
6579         }
6580
6581         trace_seq_init(&iter->seq);
6582         iter->trace = tr->current_trace;
6583
6584         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6585                 ret = -ENOMEM;
6586                 goto fail;
6587         }
6588
6589         /* trace pipe does not show start of buffer */
6590         cpumask_setall(iter->started);
6591
6592         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6593                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6594
6595         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6596         if (trace_clocks[tr->clock_id].in_ns)
6597                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6598
6599         iter->tr = tr;
6600         iter->array_buffer = &tr->array_buffer;
6601         iter->cpu_file = tracing_get_cpu(inode);
6602         mutex_init(&iter->mutex);
6603         filp->private_data = iter;
6604
6605         if (iter->trace->pipe_open)
6606                 iter->trace->pipe_open(iter);
6607
6608         nonseekable_open(inode, filp);
6609
6610         tr->trace_ref++;
6611 out:
6612         mutex_unlock(&trace_types_lock);
6613         return ret;
6614
6615 fail:
6616         kfree(iter);
6617         __trace_array_put(tr);
6618         mutex_unlock(&trace_types_lock);
6619         return ret;
6620 }
6621
6622 static int tracing_release_pipe(struct inode *inode, struct file *file)
6623 {
6624         struct trace_iterator *iter = file->private_data;
6625         struct trace_array *tr = inode->i_private;
6626
6627         mutex_lock(&trace_types_lock);
6628
6629         tr->trace_ref--;
6630
6631         if (iter->trace->pipe_close)
6632                 iter->trace->pipe_close(iter);
6633
6634         mutex_unlock(&trace_types_lock);
6635
6636         free_cpumask_var(iter->started);
6637         mutex_destroy(&iter->mutex);
6638         kfree(iter);
6639
6640         trace_array_put(tr);
6641
6642         return 0;
6643 }
6644
6645 static __poll_t
6646 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6647 {
6648         struct trace_array *tr = iter->tr;
6649
6650         /* Iterators are static, they should be filled or empty */
6651         if (trace_buffer_iter(iter, iter->cpu_file))
6652                 return EPOLLIN | EPOLLRDNORM;
6653
6654         if (tr->trace_flags & TRACE_ITER_BLOCK)
6655                 /*
6656                  * Always select as readable when in blocking mode
6657                  */
6658                 return EPOLLIN | EPOLLRDNORM;
6659         else
6660                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6661                                              filp, poll_table);
6662 }
6663
6664 static __poll_t
6665 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6666 {
6667         struct trace_iterator *iter = filp->private_data;
6668
6669         return trace_poll(iter, filp, poll_table);
6670 }
6671
6672 /* Must be called with iter->mutex held. */
6673 static int tracing_wait_pipe(struct file *filp)
6674 {
6675         struct trace_iterator *iter = filp->private_data;
6676         int ret;
6677
6678         while (trace_empty(iter)) {
6679
6680                 if ((filp->f_flags & O_NONBLOCK)) {
6681                         return -EAGAIN;
6682                 }
6683
6684                 /*
6685                  * We block until we read something and tracing is disabled.
6686                  * We still block if tracing is disabled, but we have never
6687                  * read anything. This allows a user to cat this file, and
6688                  * then enable tracing. But after we have read something,
6689                  * we give an EOF when tracing is again disabled.
6690                  *
6691                  * iter->pos will be 0 if we haven't read anything.
6692                  */
6693                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6694                         break;
6695
6696                 mutex_unlock(&iter->mutex);
6697
6698                 ret = wait_on_pipe(iter, 0);
6699
6700                 mutex_lock(&iter->mutex);
6701
6702                 if (ret)
6703                         return ret;
6704         }
6705
6706         return 1;
6707 }
6708
6709 /*
6710  * Consumer reader.
6711  */
6712 static ssize_t
6713 tracing_read_pipe(struct file *filp, char __user *ubuf,
6714                   size_t cnt, loff_t *ppos)
6715 {
6716         struct trace_iterator *iter = filp->private_data;
6717         ssize_t sret;
6718
6719         /*
6720          * Avoid more than one consumer on a single file descriptor
6721          * This is just a matter of traces coherency, the ring buffer itself
6722          * is protected.
6723          */
6724         mutex_lock(&iter->mutex);
6725
6726         /* return any leftover data */
6727         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728         if (sret != -EBUSY)
6729                 goto out;
6730
6731         trace_seq_init(&iter->seq);
6732
6733         if (iter->trace->read) {
6734                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6735                 if (sret)
6736                         goto out;
6737         }
6738
6739 waitagain:
6740         sret = tracing_wait_pipe(filp);
6741         if (sret <= 0)
6742                 goto out;
6743
6744         /* stop when tracing is finished */
6745         if (trace_empty(iter)) {
6746                 sret = 0;
6747                 goto out;
6748         }
6749
6750         if (cnt >= PAGE_SIZE)
6751                 cnt = PAGE_SIZE - 1;
6752
6753         /* reset all but tr, trace, and overruns */
6754         trace_iterator_reset(iter);
6755         cpumask_clear(iter->started);
6756         trace_seq_init(&iter->seq);
6757
6758         trace_event_read_lock();
6759         trace_access_lock(iter->cpu_file);
6760         while (trace_find_next_entry_inc(iter) != NULL) {
6761                 enum print_line_t ret;
6762                 int save_len = iter->seq.seq.len;
6763
6764                 ret = print_trace_line(iter);
6765                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6766                         /* don't print partial lines */
6767                         iter->seq.seq.len = save_len;
6768                         break;
6769                 }
6770                 if (ret != TRACE_TYPE_NO_CONSUME)
6771                         trace_consume(iter);
6772
6773                 if (trace_seq_used(&iter->seq) >= cnt)
6774                         break;
6775
6776                 /*
6777                  * Setting the full flag means we reached the trace_seq buffer
6778                  * size and we should leave by partial output condition above.
6779                  * One of the trace_seq_* functions is not used properly.
6780                  */
6781                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6782                           iter->ent->type);
6783         }
6784         trace_access_unlock(iter->cpu_file);
6785         trace_event_read_unlock();
6786
6787         /* Now copy what we have to the user */
6788         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6789         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6790                 trace_seq_init(&iter->seq);
6791
6792         /*
6793          * If there was nothing to send to user, in spite of consuming trace
6794          * entries, go back to wait for more entries.
6795          */
6796         if (sret == -EBUSY)
6797                 goto waitagain;
6798
6799 out:
6800         mutex_unlock(&iter->mutex);
6801
6802         return sret;
6803 }
6804
6805 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6806                                      unsigned int idx)
6807 {
6808         __free_page(spd->pages[idx]);
6809 }
6810
6811 static size_t
6812 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6813 {
6814         size_t count;
6815         int save_len;
6816         int ret;
6817
6818         /* Seq buffer is page-sized, exactly what we need. */
6819         for (;;) {
6820                 save_len = iter->seq.seq.len;
6821                 ret = print_trace_line(iter);
6822
6823                 if (trace_seq_has_overflowed(&iter->seq)) {
6824                         iter->seq.seq.len = save_len;
6825                         break;
6826                 }
6827
6828                 /*
6829                  * This should not be hit, because it should only
6830                  * be set if the iter->seq overflowed. But check it
6831                  * anyway to be safe.
6832                  */
6833                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6834                         iter->seq.seq.len = save_len;
6835                         break;
6836                 }
6837
6838                 count = trace_seq_used(&iter->seq) - save_len;
6839                 if (rem < count) {
6840                         rem = 0;
6841                         iter->seq.seq.len = save_len;
6842                         break;
6843                 }
6844
6845                 if (ret != TRACE_TYPE_NO_CONSUME)
6846                         trace_consume(iter);
6847                 rem -= count;
6848                 if (!trace_find_next_entry_inc(iter))   {
6849                         rem = 0;
6850                         iter->ent = NULL;
6851                         break;
6852                 }
6853         }
6854
6855         return rem;
6856 }
6857
6858 static ssize_t tracing_splice_read_pipe(struct file *filp,
6859                                         loff_t *ppos,
6860                                         struct pipe_inode_info *pipe,
6861                                         size_t len,
6862                                         unsigned int flags)
6863 {
6864         struct page *pages_def[PIPE_DEF_BUFFERS];
6865         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6866         struct trace_iterator *iter = filp->private_data;
6867         struct splice_pipe_desc spd = {
6868                 .pages          = pages_def,
6869                 .partial        = partial_def,
6870                 .nr_pages       = 0, /* This gets updated below. */
6871                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6872                 .ops            = &default_pipe_buf_ops,
6873                 .spd_release    = tracing_spd_release_pipe,
6874         };
6875         ssize_t ret;
6876         size_t rem;
6877         unsigned int i;
6878
6879         if (splice_grow_spd(pipe, &spd))
6880                 return -ENOMEM;
6881
6882         mutex_lock(&iter->mutex);
6883
6884         if (iter->trace->splice_read) {
6885                 ret = iter->trace->splice_read(iter, filp,
6886                                                ppos, pipe, len, flags);
6887                 if (ret)
6888                         goto out_err;
6889         }
6890
6891         ret = tracing_wait_pipe(filp);
6892         if (ret <= 0)
6893                 goto out_err;
6894
6895         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6896                 ret = -EFAULT;
6897                 goto out_err;
6898         }
6899
6900         trace_event_read_lock();
6901         trace_access_lock(iter->cpu_file);
6902
6903         /* Fill as many pages as possible. */
6904         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6905                 spd.pages[i] = alloc_page(GFP_KERNEL);
6906                 if (!spd.pages[i])
6907                         break;
6908
6909                 rem = tracing_fill_pipe_page(rem, iter);
6910
6911                 /* Copy the data into the page, so we can start over. */
6912                 ret = trace_seq_to_buffer(&iter->seq,
6913                                           page_address(spd.pages[i]),
6914                                           trace_seq_used(&iter->seq));
6915                 if (ret < 0) {
6916                         __free_page(spd.pages[i]);
6917                         break;
6918                 }
6919                 spd.partial[i].offset = 0;
6920                 spd.partial[i].len = trace_seq_used(&iter->seq);
6921
6922                 trace_seq_init(&iter->seq);
6923         }
6924
6925         trace_access_unlock(iter->cpu_file);
6926         trace_event_read_unlock();
6927         mutex_unlock(&iter->mutex);
6928
6929         spd.nr_pages = i;
6930
6931         if (i)
6932                 ret = splice_to_pipe(pipe, &spd);
6933         else
6934                 ret = 0;
6935 out:
6936         splice_shrink_spd(&spd);
6937         return ret;
6938
6939 out_err:
6940         mutex_unlock(&iter->mutex);
6941         goto out;
6942 }
6943
6944 static ssize_t
6945 tracing_entries_read(struct file *filp, char __user *ubuf,
6946                      size_t cnt, loff_t *ppos)
6947 {
6948         struct inode *inode = file_inode(filp);
6949         struct trace_array *tr = inode->i_private;
6950         int cpu = tracing_get_cpu(inode);
6951         char buf[64];
6952         int r = 0;
6953         ssize_t ret;
6954
6955         mutex_lock(&trace_types_lock);
6956
6957         if (cpu == RING_BUFFER_ALL_CPUS) {
6958                 int cpu, buf_size_same;
6959                 unsigned long size;
6960
6961                 size = 0;
6962                 buf_size_same = 1;
6963                 /* check if all cpu sizes are same */
6964                 for_each_tracing_cpu(cpu) {
6965                         /* fill in the size from first enabled cpu */
6966                         if (size == 0)
6967                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6968                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6969                                 buf_size_same = 0;
6970                                 break;
6971                         }
6972                 }
6973
6974                 if (buf_size_same) {
6975                         if (!ring_buffer_expanded)
6976                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6977                                             size >> 10,
6978                                             trace_buf_size >> 10);
6979                         else
6980                                 r = sprintf(buf, "%lu\n", size >> 10);
6981                 } else
6982                         r = sprintf(buf, "X\n");
6983         } else
6984                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6985
6986         mutex_unlock(&trace_types_lock);
6987
6988         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6989         return ret;
6990 }
6991
6992 static ssize_t
6993 tracing_entries_write(struct file *filp, const char __user *ubuf,
6994                       size_t cnt, loff_t *ppos)
6995 {
6996         struct inode *inode = file_inode(filp);
6997         struct trace_array *tr = inode->i_private;
6998         unsigned long val;
6999         int ret;
7000
7001         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002         if (ret)
7003                 return ret;
7004
7005         /* must have at least 1 entry */
7006         if (!val)
7007                 return -EINVAL;
7008
7009         /* value is in KB */
7010         val <<= 10;
7011         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7012         if (ret < 0)
7013                 return ret;
7014
7015         *ppos += cnt;
7016
7017         return cnt;
7018 }
7019
7020 static ssize_t
7021 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7022                                 size_t cnt, loff_t *ppos)
7023 {
7024         struct trace_array *tr = filp->private_data;
7025         char buf[64];
7026         int r, cpu;
7027         unsigned long size = 0, expanded_size = 0;
7028
7029         mutex_lock(&trace_types_lock);
7030         for_each_tracing_cpu(cpu) {
7031                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7032                 if (!ring_buffer_expanded)
7033                         expanded_size += trace_buf_size >> 10;
7034         }
7035         if (ring_buffer_expanded)
7036                 r = sprintf(buf, "%lu\n", size);
7037         else
7038                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7039         mutex_unlock(&trace_types_lock);
7040
7041         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7042 }
7043
7044 static ssize_t
7045 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7046                           size_t cnt, loff_t *ppos)
7047 {
7048         /*
7049          * There is no need to read what the user has written, this function
7050          * is just to make sure that there is no error when "echo" is used
7051          */
7052
7053         *ppos += cnt;
7054
7055         return cnt;
7056 }
7057
7058 static int
7059 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7060 {
7061         struct trace_array *tr = inode->i_private;
7062
7063         /* disable tracing ? */
7064         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7065                 tracer_tracing_off(tr);
7066         /* resize the ring buffer to 0 */
7067         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7068
7069         trace_array_put(tr);
7070
7071         return 0;
7072 }
7073
7074 static ssize_t
7075 tracing_mark_write(struct file *filp, const char __user *ubuf,
7076                                         size_t cnt, loff_t *fpos)
7077 {
7078         struct trace_array *tr = filp->private_data;
7079         struct ring_buffer_event *event;
7080         enum event_trigger_type tt = ETT_NONE;
7081         struct trace_buffer *buffer;
7082         struct print_entry *entry;
7083         ssize_t written;
7084         int size;
7085         int len;
7086
7087 /* Used in tracing_mark_raw_write() as well */
7088 #define FAULTED_STR "<faulted>"
7089 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7090
7091         if (tracing_disabled)
7092                 return -EINVAL;
7093
7094         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7095                 return -EINVAL;
7096
7097         if (cnt > TRACE_BUF_SIZE)
7098                 cnt = TRACE_BUF_SIZE;
7099
7100         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7101
7102         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7103
7104         /* If less than "<faulted>", then make sure we can still add that */
7105         if (cnt < FAULTED_SIZE)
7106                 size += FAULTED_SIZE - cnt;
7107
7108         buffer = tr->array_buffer.buffer;
7109         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7110                                             tracing_gen_ctx());
7111         if (unlikely(!event))
7112                 /* Ring buffer disabled, return as if not open for write */
7113                 return -EBADF;
7114
7115         entry = ring_buffer_event_data(event);
7116         entry->ip = _THIS_IP_;
7117
7118         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7119         if (len) {
7120                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7121                 cnt = FAULTED_SIZE;
7122                 written = -EFAULT;
7123         } else
7124                 written = cnt;
7125
7126         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7127                 /* do not add \n before testing triggers, but add \0 */
7128                 entry->buf[cnt] = '\0';
7129                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7130         }
7131
7132         if (entry->buf[cnt - 1] != '\n') {
7133                 entry->buf[cnt] = '\n';
7134                 entry->buf[cnt + 1] = '\0';
7135         } else
7136                 entry->buf[cnt] = '\0';
7137
7138         if (static_branch_unlikely(&trace_marker_exports_enabled))
7139                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7140         __buffer_unlock_commit(buffer, event);
7141
7142         if (tt)
7143                 event_triggers_post_call(tr->trace_marker_file, tt);
7144
7145         return written;
7146 }
7147
7148 /* Limit it for now to 3K (including tag) */
7149 #define RAW_DATA_MAX_SIZE (1024*3)
7150
7151 static ssize_t
7152 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7153                                         size_t cnt, loff_t *fpos)
7154 {
7155         struct trace_array *tr = filp->private_data;
7156         struct ring_buffer_event *event;
7157         struct trace_buffer *buffer;
7158         struct raw_data_entry *entry;
7159         ssize_t written;
7160         int size;
7161         int len;
7162
7163 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7164
7165         if (tracing_disabled)
7166                 return -EINVAL;
7167
7168         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7169                 return -EINVAL;
7170
7171         /* The marker must at least have a tag id */
7172         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7173                 return -EINVAL;
7174
7175         if (cnt > TRACE_BUF_SIZE)
7176                 cnt = TRACE_BUF_SIZE;
7177
7178         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7179
7180         size = sizeof(*entry) + cnt;
7181         if (cnt < FAULT_SIZE_ID)
7182                 size += FAULT_SIZE_ID - cnt;
7183
7184         buffer = tr->array_buffer.buffer;
7185         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7186                                             tracing_gen_ctx());
7187         if (!event)
7188                 /* Ring buffer disabled, return as if not open for write */
7189                 return -EBADF;
7190
7191         entry = ring_buffer_event_data(event);
7192
7193         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7194         if (len) {
7195                 entry->id = -1;
7196                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7197                 written = -EFAULT;
7198         } else
7199                 written = cnt;
7200
7201         __buffer_unlock_commit(buffer, event);
7202
7203         return written;
7204 }
7205
7206 static int tracing_clock_show(struct seq_file *m, void *v)
7207 {
7208         struct trace_array *tr = m->private;
7209         int i;
7210
7211         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7212                 seq_printf(m,
7213                         "%s%s%s%s", i ? " " : "",
7214                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7215                         i == tr->clock_id ? "]" : "");
7216         seq_putc(m, '\n');
7217
7218         return 0;
7219 }
7220
7221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7222 {
7223         int i;
7224
7225         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7226                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7227                         break;
7228         }
7229         if (i == ARRAY_SIZE(trace_clocks))
7230                 return -EINVAL;
7231
7232         mutex_lock(&trace_types_lock);
7233
7234         tr->clock_id = i;
7235
7236         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7237
7238         /*
7239          * New clock may not be consistent with the previous clock.
7240          * Reset the buffer so that it doesn't have incomparable timestamps.
7241          */
7242         tracing_reset_online_cpus(&tr->array_buffer);
7243
7244 #ifdef CONFIG_TRACER_MAX_TRACE
7245         if (tr->max_buffer.buffer)
7246                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7247         tracing_reset_online_cpus(&tr->max_buffer);
7248 #endif
7249
7250         mutex_unlock(&trace_types_lock);
7251
7252         return 0;
7253 }
7254
7255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7256                                    size_t cnt, loff_t *fpos)
7257 {
7258         struct seq_file *m = filp->private_data;
7259         struct trace_array *tr = m->private;
7260         char buf[64];
7261         const char *clockstr;
7262         int ret;
7263
7264         if (cnt >= sizeof(buf))
7265                 return -EINVAL;
7266
7267         if (copy_from_user(buf, ubuf, cnt))
7268                 return -EFAULT;
7269
7270         buf[cnt] = 0;
7271
7272         clockstr = strstrip(buf);
7273
7274         ret = tracing_set_clock(tr, clockstr);
7275         if (ret)
7276                 return ret;
7277
7278         *fpos += cnt;
7279
7280         return cnt;
7281 }
7282
7283 static int tracing_clock_open(struct inode *inode, struct file *file)
7284 {
7285         struct trace_array *tr = inode->i_private;
7286         int ret;
7287
7288         ret = tracing_check_open_get_tr(tr);
7289         if (ret)
7290                 return ret;
7291
7292         ret = single_open(file, tracing_clock_show, inode->i_private);
7293         if (ret < 0)
7294                 trace_array_put(tr);
7295
7296         return ret;
7297 }
7298
7299 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7300 {
7301         struct trace_array *tr = m->private;
7302
7303         mutex_lock(&trace_types_lock);
7304
7305         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7306                 seq_puts(m, "delta [absolute]\n");
7307         else
7308                 seq_puts(m, "[delta] absolute\n");
7309
7310         mutex_unlock(&trace_types_lock);
7311
7312         return 0;
7313 }
7314
7315 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7316 {
7317         struct trace_array *tr = inode->i_private;
7318         int ret;
7319
7320         ret = tracing_check_open_get_tr(tr);
7321         if (ret)
7322                 return ret;
7323
7324         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7325         if (ret < 0)
7326                 trace_array_put(tr);
7327
7328         return ret;
7329 }
7330
7331 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7332 {
7333         if (rbe == this_cpu_read(trace_buffered_event))
7334                 return ring_buffer_time_stamp(buffer);
7335
7336         return ring_buffer_event_time_stamp(buffer, rbe);
7337 }
7338
7339 /*
7340  * Set or disable using the per CPU trace_buffer_event when possible.
7341  */
7342 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7343 {
7344         int ret = 0;
7345
7346         mutex_lock(&trace_types_lock);
7347
7348         if (set && tr->no_filter_buffering_ref++)
7349                 goto out;
7350
7351         if (!set) {
7352                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7353                         ret = -EINVAL;
7354                         goto out;
7355                 }
7356
7357                 --tr->no_filter_buffering_ref;
7358         }
7359  out:
7360         mutex_unlock(&trace_types_lock);
7361
7362         return ret;
7363 }
7364
7365 struct ftrace_buffer_info {
7366         struct trace_iterator   iter;
7367         void                    *spare;
7368         unsigned int            spare_cpu;
7369         unsigned int            read;
7370 };
7371
7372 #ifdef CONFIG_TRACER_SNAPSHOT
7373 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7374 {
7375         struct trace_array *tr = inode->i_private;
7376         struct trace_iterator *iter;
7377         struct seq_file *m;
7378         int ret;
7379
7380         ret = tracing_check_open_get_tr(tr);
7381         if (ret)
7382                 return ret;
7383
7384         if (file->f_mode & FMODE_READ) {
7385                 iter = __tracing_open(inode, file, true);
7386                 if (IS_ERR(iter))
7387                         ret = PTR_ERR(iter);
7388         } else {
7389                 /* Writes still need the seq_file to hold the private data */
7390                 ret = -ENOMEM;
7391                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7392                 if (!m)
7393                         goto out;
7394                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7395                 if (!iter) {
7396                         kfree(m);
7397                         goto out;
7398                 }
7399                 ret = 0;
7400
7401                 iter->tr = tr;
7402                 iter->array_buffer = &tr->max_buffer;
7403                 iter->cpu_file = tracing_get_cpu(inode);
7404                 m->private = iter;
7405                 file->private_data = m;
7406         }
7407 out:
7408         if (ret < 0)
7409                 trace_array_put(tr);
7410
7411         return ret;
7412 }
7413
7414 static ssize_t
7415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7416                        loff_t *ppos)
7417 {
7418         struct seq_file *m = filp->private_data;
7419         struct trace_iterator *iter = m->private;
7420         struct trace_array *tr = iter->tr;
7421         unsigned long val;
7422         int ret;
7423
7424         ret = tracing_update_buffers();
7425         if (ret < 0)
7426                 return ret;
7427
7428         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7429         if (ret)
7430                 return ret;
7431
7432         mutex_lock(&trace_types_lock);
7433
7434         if (tr->current_trace->use_max_tr) {
7435                 ret = -EBUSY;
7436                 goto out;
7437         }
7438
7439         arch_spin_lock(&tr->max_lock);
7440         if (tr->cond_snapshot)
7441                 ret = -EBUSY;
7442         arch_spin_unlock(&tr->max_lock);
7443         if (ret)
7444                 goto out;
7445
7446         switch (val) {
7447         case 0:
7448                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7449                         ret = -EINVAL;
7450                         break;
7451                 }
7452                 if (tr->allocated_snapshot)
7453                         free_snapshot(tr);
7454                 break;
7455         case 1:
7456 /* Only allow per-cpu swap if the ring buffer supports it */
7457 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7458                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7459                         ret = -EINVAL;
7460                         break;
7461                 }
7462 #endif
7463                 if (tr->allocated_snapshot)
7464                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7465                                         &tr->array_buffer, iter->cpu_file);
7466                 else
7467                         ret = tracing_alloc_snapshot_instance(tr);
7468                 if (ret < 0)
7469                         break;
7470                 local_irq_disable();
7471                 /* Now, we're going to swap */
7472                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7473                         update_max_tr(tr, current, smp_processor_id(), NULL);
7474                 else
7475                         update_max_tr_single(tr, current, iter->cpu_file);
7476                 local_irq_enable();
7477                 break;
7478         default:
7479                 if (tr->allocated_snapshot) {
7480                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7481                                 tracing_reset_online_cpus(&tr->max_buffer);
7482                         else
7483                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7484                 }
7485                 break;
7486         }
7487
7488         if (ret >= 0) {
7489                 *ppos += cnt;
7490                 ret = cnt;
7491         }
7492 out:
7493         mutex_unlock(&trace_types_lock);
7494         return ret;
7495 }
7496
7497 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7498 {
7499         struct seq_file *m = file->private_data;
7500         int ret;
7501
7502         ret = tracing_release(inode, file);
7503
7504         if (file->f_mode & FMODE_READ)
7505                 return ret;
7506
7507         /* If write only, the seq_file is just a stub */
7508         if (m)
7509                 kfree(m->private);
7510         kfree(m);
7511
7512         return 0;
7513 }
7514
7515 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7516 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7517                                     size_t count, loff_t *ppos);
7518 static int tracing_buffers_release(struct inode *inode, struct file *file);
7519 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7520                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7521
7522 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7523 {
7524         struct ftrace_buffer_info *info;
7525         int ret;
7526
7527         /* The following checks for tracefs lockdown */
7528         ret = tracing_buffers_open(inode, filp);
7529         if (ret < 0)
7530                 return ret;
7531
7532         info = filp->private_data;
7533
7534         if (info->iter.trace->use_max_tr) {
7535                 tracing_buffers_release(inode, filp);
7536                 return -EBUSY;
7537         }
7538
7539         info->iter.snapshot = true;
7540         info->iter.array_buffer = &info->iter.tr->max_buffer;
7541
7542         return ret;
7543 }
7544
7545 #endif /* CONFIG_TRACER_SNAPSHOT */
7546
7547
7548 static const struct file_operations tracing_thresh_fops = {
7549         .open           = tracing_open_generic,
7550         .read           = tracing_thresh_read,
7551         .write          = tracing_thresh_write,
7552         .llseek         = generic_file_llseek,
7553 };
7554
7555 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7556 static const struct file_operations tracing_max_lat_fops = {
7557         .open           = tracing_open_generic,
7558         .read           = tracing_max_lat_read,
7559         .write          = tracing_max_lat_write,
7560         .llseek         = generic_file_llseek,
7561 };
7562 #endif
7563
7564 static const struct file_operations set_tracer_fops = {
7565         .open           = tracing_open_generic,
7566         .read           = tracing_set_trace_read,
7567         .write          = tracing_set_trace_write,
7568         .llseek         = generic_file_llseek,
7569 };
7570
7571 static const struct file_operations tracing_pipe_fops = {
7572         .open           = tracing_open_pipe,
7573         .poll           = tracing_poll_pipe,
7574         .read           = tracing_read_pipe,
7575         .splice_read    = tracing_splice_read_pipe,
7576         .release        = tracing_release_pipe,
7577         .llseek         = no_llseek,
7578 };
7579
7580 static const struct file_operations tracing_entries_fops = {
7581         .open           = tracing_open_generic_tr,
7582         .read           = tracing_entries_read,
7583         .write          = tracing_entries_write,
7584         .llseek         = generic_file_llseek,
7585         .release        = tracing_release_generic_tr,
7586 };
7587
7588 static const struct file_operations tracing_total_entries_fops = {
7589         .open           = tracing_open_generic_tr,
7590         .read           = tracing_total_entries_read,
7591         .llseek         = generic_file_llseek,
7592         .release        = tracing_release_generic_tr,
7593 };
7594
7595 static const struct file_operations tracing_free_buffer_fops = {
7596         .open           = tracing_open_generic_tr,
7597         .write          = tracing_free_buffer_write,
7598         .release        = tracing_free_buffer_release,
7599 };
7600
7601 static const struct file_operations tracing_mark_fops = {
7602         .open           = tracing_mark_open,
7603         .write          = tracing_mark_write,
7604         .release        = tracing_release_generic_tr,
7605 };
7606
7607 static const struct file_operations tracing_mark_raw_fops = {
7608         .open           = tracing_mark_open,
7609         .write          = tracing_mark_raw_write,
7610         .release        = tracing_release_generic_tr,
7611 };
7612
7613 static const struct file_operations trace_clock_fops = {
7614         .open           = tracing_clock_open,
7615         .read           = seq_read,
7616         .llseek         = seq_lseek,
7617         .release        = tracing_single_release_tr,
7618         .write          = tracing_clock_write,
7619 };
7620
7621 static const struct file_operations trace_time_stamp_mode_fops = {
7622         .open           = tracing_time_stamp_mode_open,
7623         .read           = seq_read,
7624         .llseek         = seq_lseek,
7625         .release        = tracing_single_release_tr,
7626 };
7627
7628 #ifdef CONFIG_TRACER_SNAPSHOT
7629 static const struct file_operations snapshot_fops = {
7630         .open           = tracing_snapshot_open,
7631         .read           = seq_read,
7632         .write          = tracing_snapshot_write,
7633         .llseek         = tracing_lseek,
7634         .release        = tracing_snapshot_release,
7635 };
7636
7637 static const struct file_operations snapshot_raw_fops = {
7638         .open           = snapshot_raw_open,
7639         .read           = tracing_buffers_read,
7640         .release        = tracing_buffers_release,
7641         .splice_read    = tracing_buffers_splice_read,
7642         .llseek         = no_llseek,
7643 };
7644
7645 #endif /* CONFIG_TRACER_SNAPSHOT */
7646
7647 /*
7648  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7649  * @filp: The active open file structure
7650  * @ubuf: The userspace provided buffer to read value into
7651  * @cnt: The maximum number of bytes to read
7652  * @ppos: The current "file" position
7653  *
7654  * This function implements the write interface for a struct trace_min_max_param.
7655  * The filp->private_data must point to a trace_min_max_param structure that
7656  * defines where to write the value, the min and the max acceptable values,
7657  * and a lock to protect the write.
7658  */
7659 static ssize_t
7660 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7661 {
7662         struct trace_min_max_param *param = filp->private_data;
7663         u64 val;
7664         int err;
7665
7666         if (!param)
7667                 return -EFAULT;
7668
7669         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7670         if (err)
7671                 return err;
7672
7673         if (param->lock)
7674                 mutex_lock(param->lock);
7675
7676         if (param->min && val < *param->min)
7677                 err = -EINVAL;
7678
7679         if (param->max && val > *param->max)
7680                 err = -EINVAL;
7681
7682         if (!err)
7683                 *param->val = val;
7684
7685         if (param->lock)
7686                 mutex_unlock(param->lock);
7687
7688         if (err)
7689                 return err;
7690
7691         return cnt;
7692 }
7693
7694 /*
7695  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7696  * @filp: The active open file structure
7697  * @ubuf: The userspace provided buffer to read value into
7698  * @cnt: The maximum number of bytes to read
7699  * @ppos: The current "file" position
7700  *
7701  * This function implements the read interface for a struct trace_min_max_param.
7702  * The filp->private_data must point to a trace_min_max_param struct with valid
7703  * data.
7704  */
7705 static ssize_t
7706 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7707 {
7708         struct trace_min_max_param *param = filp->private_data;
7709         char buf[U64_STR_SIZE];
7710         int len;
7711         u64 val;
7712
7713         if (!param)
7714                 return -EFAULT;
7715
7716         val = *param->val;
7717
7718         if (cnt > sizeof(buf))
7719                 cnt = sizeof(buf);
7720
7721         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7722
7723         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7724 }
7725
7726 const struct file_operations trace_min_max_fops = {
7727         .open           = tracing_open_generic,
7728         .read           = trace_min_max_read,
7729         .write          = trace_min_max_write,
7730 };
7731
7732 #define TRACING_LOG_ERRS_MAX    8
7733 #define TRACING_LOG_LOC_MAX     128
7734
7735 #define CMD_PREFIX "  Command: "
7736
7737 struct err_info {
7738         const char      **errs; /* ptr to loc-specific array of err strings */
7739         u8              type;   /* index into errs -> specific err string */
7740         u16             pos;    /* caret position */
7741         u64             ts;
7742 };
7743
7744 struct tracing_log_err {
7745         struct list_head        list;
7746         struct err_info         info;
7747         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7748         char                    *cmd;                     /* what caused err */
7749 };
7750
7751 static DEFINE_MUTEX(tracing_err_log_lock);
7752
7753 static struct tracing_log_err *alloc_tracing_log_err(int len)
7754 {
7755         struct tracing_log_err *err;
7756
7757         err = kzalloc(sizeof(*err), GFP_KERNEL);
7758         if (!err)
7759                 return ERR_PTR(-ENOMEM);
7760
7761         err->cmd = kzalloc(len, GFP_KERNEL);
7762         if (!err->cmd) {
7763                 kfree(err);
7764                 return ERR_PTR(-ENOMEM);
7765         }
7766
7767         return err;
7768 }
7769
7770 static void free_tracing_log_err(struct tracing_log_err *err)
7771 {
7772         kfree(err->cmd);
7773         kfree(err);
7774 }
7775
7776 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7777                                                    int len)
7778 {
7779         struct tracing_log_err *err;
7780
7781         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7782                 err = alloc_tracing_log_err(len);
7783                 if (PTR_ERR(err) != -ENOMEM)
7784                         tr->n_err_log_entries++;
7785
7786                 return err;
7787         }
7788
7789         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7790         kfree(err->cmd);
7791         err->cmd = kzalloc(len, GFP_KERNEL);
7792         if (!err->cmd)
7793                 return ERR_PTR(-ENOMEM);
7794         list_del(&err->list);
7795
7796         return err;
7797 }
7798
7799 /**
7800  * err_pos - find the position of a string within a command for error careting
7801  * @cmd: The tracing command that caused the error
7802  * @str: The string to position the caret at within @cmd
7803  *
7804  * Finds the position of the first occurrence of @str within @cmd.  The
7805  * return value can be passed to tracing_log_err() for caret placement
7806  * within @cmd.
7807  *
7808  * Returns the index within @cmd of the first occurrence of @str or 0
7809  * if @str was not found.
7810  */
7811 unsigned int err_pos(char *cmd, const char *str)
7812 {
7813         char *found;
7814
7815         if (WARN_ON(!strlen(cmd)))
7816                 return 0;
7817
7818         found = strstr(cmd, str);
7819         if (found)
7820                 return found - cmd;
7821
7822         return 0;
7823 }
7824
7825 /**
7826  * tracing_log_err - write an error to the tracing error log
7827  * @tr: The associated trace array for the error (NULL for top level array)
7828  * @loc: A string describing where the error occurred
7829  * @cmd: The tracing command that caused the error
7830  * @errs: The array of loc-specific static error strings
7831  * @type: The index into errs[], which produces the specific static err string
7832  * @pos: The position the caret should be placed in the cmd
7833  *
7834  * Writes an error into tracing/error_log of the form:
7835  *
7836  * <loc>: error: <text>
7837  *   Command: <cmd>
7838  *              ^
7839  *
7840  * tracing/error_log is a small log file containing the last
7841  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7842  * unless there has been a tracing error, and the error log can be
7843  * cleared and have its memory freed by writing the empty string in
7844  * truncation mode to it i.e. echo > tracing/error_log.
7845  *
7846  * NOTE: the @errs array along with the @type param are used to
7847  * produce a static error string - this string is not copied and saved
7848  * when the error is logged - only a pointer to it is saved.  See
7849  * existing callers for examples of how static strings are typically
7850  * defined for use with tracing_log_err().
7851  */
7852 void tracing_log_err(struct trace_array *tr,
7853                      const char *loc, const char *cmd,
7854                      const char **errs, u8 type, u16 pos)
7855 {
7856         struct tracing_log_err *err;
7857         int len = 0;
7858
7859         if (!tr)
7860                 tr = &global_trace;
7861
7862         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7863
7864         mutex_lock(&tracing_err_log_lock);
7865         err = get_tracing_log_err(tr, len);
7866         if (PTR_ERR(err) == -ENOMEM) {
7867                 mutex_unlock(&tracing_err_log_lock);
7868                 return;
7869         }
7870
7871         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7872         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7873
7874         err->info.errs = errs;
7875         err->info.type = type;
7876         err->info.pos = pos;
7877         err->info.ts = local_clock();
7878
7879         list_add_tail(&err->list, &tr->err_log);
7880         mutex_unlock(&tracing_err_log_lock);
7881 }
7882
7883 static void clear_tracing_err_log(struct trace_array *tr)
7884 {
7885         struct tracing_log_err *err, *next;
7886
7887         mutex_lock(&tracing_err_log_lock);
7888         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7889                 list_del(&err->list);
7890                 free_tracing_log_err(err);
7891         }
7892
7893         tr->n_err_log_entries = 0;
7894         mutex_unlock(&tracing_err_log_lock);
7895 }
7896
7897 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7898 {
7899         struct trace_array *tr = m->private;
7900
7901         mutex_lock(&tracing_err_log_lock);
7902
7903         return seq_list_start(&tr->err_log, *pos);
7904 }
7905
7906 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7907 {
7908         struct trace_array *tr = m->private;
7909
7910         return seq_list_next(v, &tr->err_log, pos);
7911 }
7912
7913 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7914 {
7915         mutex_unlock(&tracing_err_log_lock);
7916 }
7917
7918 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7919 {
7920         u16 i;
7921
7922         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7923                 seq_putc(m, ' ');
7924         for (i = 0; i < pos; i++)
7925                 seq_putc(m, ' ');
7926         seq_puts(m, "^\n");
7927 }
7928
7929 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7930 {
7931         struct tracing_log_err *err = v;
7932
7933         if (err) {
7934                 const char *err_text = err->info.errs[err->info.type];
7935                 u64 sec = err->info.ts;
7936                 u32 nsec;
7937
7938                 nsec = do_div(sec, NSEC_PER_SEC);
7939                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7940                            err->loc, err_text);
7941                 seq_printf(m, "%s", err->cmd);
7942                 tracing_err_log_show_pos(m, err->info.pos);
7943         }
7944
7945         return 0;
7946 }
7947
7948 static const struct seq_operations tracing_err_log_seq_ops = {
7949         .start  = tracing_err_log_seq_start,
7950         .next   = tracing_err_log_seq_next,
7951         .stop   = tracing_err_log_seq_stop,
7952         .show   = tracing_err_log_seq_show
7953 };
7954
7955 static int tracing_err_log_open(struct inode *inode, struct file *file)
7956 {
7957         struct trace_array *tr = inode->i_private;
7958         int ret = 0;
7959
7960         ret = tracing_check_open_get_tr(tr);
7961         if (ret)
7962                 return ret;
7963
7964         /* If this file was opened for write, then erase contents */
7965         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7966                 clear_tracing_err_log(tr);
7967
7968         if (file->f_mode & FMODE_READ) {
7969                 ret = seq_open(file, &tracing_err_log_seq_ops);
7970                 if (!ret) {
7971                         struct seq_file *m = file->private_data;
7972                         m->private = tr;
7973                 } else {
7974                         trace_array_put(tr);
7975                 }
7976         }
7977         return ret;
7978 }
7979
7980 static ssize_t tracing_err_log_write(struct file *file,
7981                                      const char __user *buffer,
7982                                      size_t count, loff_t *ppos)
7983 {
7984         return count;
7985 }
7986
7987 static int tracing_err_log_release(struct inode *inode, struct file *file)
7988 {
7989         struct trace_array *tr = inode->i_private;
7990
7991         trace_array_put(tr);
7992
7993         if (file->f_mode & FMODE_READ)
7994                 seq_release(inode, file);
7995
7996         return 0;
7997 }
7998
7999 static const struct file_operations tracing_err_log_fops = {
8000         .open           = tracing_err_log_open,
8001         .write          = tracing_err_log_write,
8002         .read           = seq_read,
8003         .llseek         = seq_lseek,
8004         .release        = tracing_err_log_release,
8005 };
8006
8007 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8008 {
8009         struct trace_array *tr = inode->i_private;
8010         struct ftrace_buffer_info *info;
8011         int ret;
8012
8013         ret = tracing_check_open_get_tr(tr);
8014         if (ret)
8015                 return ret;
8016
8017         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8018         if (!info) {
8019                 trace_array_put(tr);
8020                 return -ENOMEM;
8021         }
8022
8023         mutex_lock(&trace_types_lock);
8024
8025         info->iter.tr           = tr;
8026         info->iter.cpu_file     = tracing_get_cpu(inode);
8027         info->iter.trace        = tr->current_trace;
8028         info->iter.array_buffer = &tr->array_buffer;
8029         info->spare             = NULL;
8030         /* Force reading ring buffer for first read */
8031         info->read              = (unsigned int)-1;
8032
8033         filp->private_data = info;
8034
8035         tr->trace_ref++;
8036
8037         mutex_unlock(&trace_types_lock);
8038
8039         ret = nonseekable_open(inode, filp);
8040         if (ret < 0)
8041                 trace_array_put(tr);
8042
8043         return ret;
8044 }
8045
8046 static __poll_t
8047 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8048 {
8049         struct ftrace_buffer_info *info = filp->private_data;
8050         struct trace_iterator *iter = &info->iter;
8051
8052         return trace_poll(iter, filp, poll_table);
8053 }
8054
8055 static ssize_t
8056 tracing_buffers_read(struct file *filp, char __user *ubuf,
8057                      size_t count, loff_t *ppos)
8058 {
8059         struct ftrace_buffer_info *info = filp->private_data;
8060         struct trace_iterator *iter = &info->iter;
8061         ssize_t ret = 0;
8062         ssize_t size;
8063
8064         if (!count)
8065                 return 0;
8066
8067 #ifdef CONFIG_TRACER_MAX_TRACE
8068         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8069                 return -EBUSY;
8070 #endif
8071
8072         if (!info->spare) {
8073                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8074                                                           iter->cpu_file);
8075                 if (IS_ERR(info->spare)) {
8076                         ret = PTR_ERR(info->spare);
8077                         info->spare = NULL;
8078                 } else {
8079                         info->spare_cpu = iter->cpu_file;
8080                 }
8081         }
8082         if (!info->spare)
8083                 return ret;
8084
8085         /* Do we have previous read data to read? */
8086         if (info->read < PAGE_SIZE)
8087                 goto read;
8088
8089  again:
8090         trace_access_lock(iter->cpu_file);
8091         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8092                                     &info->spare,
8093                                     count,
8094                                     iter->cpu_file, 0);
8095         trace_access_unlock(iter->cpu_file);
8096
8097         if (ret < 0) {
8098                 if (trace_empty(iter)) {
8099                         if ((filp->f_flags & O_NONBLOCK))
8100                                 return -EAGAIN;
8101
8102                         ret = wait_on_pipe(iter, 0);
8103                         if (ret)
8104                                 return ret;
8105
8106                         goto again;
8107                 }
8108                 return 0;
8109         }
8110
8111         info->read = 0;
8112  read:
8113         size = PAGE_SIZE - info->read;
8114         if (size > count)
8115                 size = count;
8116
8117         ret = copy_to_user(ubuf, info->spare + info->read, size);
8118         if (ret == size)
8119                 return -EFAULT;
8120
8121         size -= ret;
8122
8123         *ppos += size;
8124         info->read += size;
8125
8126         return size;
8127 }
8128
8129 static int tracing_buffers_release(struct inode *inode, struct file *file)
8130 {
8131         struct ftrace_buffer_info *info = file->private_data;
8132         struct trace_iterator *iter = &info->iter;
8133
8134         mutex_lock(&trace_types_lock);
8135
8136         iter->tr->trace_ref--;
8137
8138         __trace_array_put(iter->tr);
8139
8140         if (info->spare)
8141                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8142                                            info->spare_cpu, info->spare);
8143         kvfree(info);
8144
8145         mutex_unlock(&trace_types_lock);
8146
8147         return 0;
8148 }
8149
8150 struct buffer_ref {
8151         struct trace_buffer     *buffer;
8152         void                    *page;
8153         int                     cpu;
8154         refcount_t              refcount;
8155 };
8156
8157 static void buffer_ref_release(struct buffer_ref *ref)
8158 {
8159         if (!refcount_dec_and_test(&ref->refcount))
8160                 return;
8161         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8162         kfree(ref);
8163 }
8164
8165 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8166                                     struct pipe_buffer *buf)
8167 {
8168         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8169
8170         buffer_ref_release(ref);
8171         buf->private = 0;
8172 }
8173
8174 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8175                                 struct pipe_buffer *buf)
8176 {
8177         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8178
8179         if (refcount_read(&ref->refcount) > INT_MAX/2)
8180                 return false;
8181
8182         refcount_inc(&ref->refcount);
8183         return true;
8184 }
8185
8186 /* Pipe buffer operations for a buffer. */
8187 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8188         .release                = buffer_pipe_buf_release,
8189         .get                    = buffer_pipe_buf_get,
8190 };
8191
8192 /*
8193  * Callback from splice_to_pipe(), if we need to release some pages
8194  * at the end of the spd in case we error'ed out in filling the pipe.
8195  */
8196 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8197 {
8198         struct buffer_ref *ref =
8199                 (struct buffer_ref *)spd->partial[i].private;
8200
8201         buffer_ref_release(ref);
8202         spd->partial[i].private = 0;
8203 }
8204
8205 static ssize_t
8206 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8207                             struct pipe_inode_info *pipe, size_t len,
8208                             unsigned int flags)
8209 {
8210         struct ftrace_buffer_info *info = file->private_data;
8211         struct trace_iterator *iter = &info->iter;
8212         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8213         struct page *pages_def[PIPE_DEF_BUFFERS];
8214         struct splice_pipe_desc spd = {
8215                 .pages          = pages_def,
8216                 .partial        = partial_def,
8217                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8218                 .ops            = &buffer_pipe_buf_ops,
8219                 .spd_release    = buffer_spd_release,
8220         };
8221         struct buffer_ref *ref;
8222         int entries, i;
8223         ssize_t ret = 0;
8224
8225 #ifdef CONFIG_TRACER_MAX_TRACE
8226         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8227                 return -EBUSY;
8228 #endif
8229
8230         if (*ppos & (PAGE_SIZE - 1))
8231                 return -EINVAL;
8232
8233         if (len & (PAGE_SIZE - 1)) {
8234                 if (len < PAGE_SIZE)
8235                         return -EINVAL;
8236                 len &= PAGE_MASK;
8237         }
8238
8239         if (splice_grow_spd(pipe, &spd))
8240                 return -ENOMEM;
8241
8242  again:
8243         trace_access_lock(iter->cpu_file);
8244         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8245
8246         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8247                 struct page *page;
8248                 int r;
8249
8250                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8251                 if (!ref) {
8252                         ret = -ENOMEM;
8253                         break;
8254                 }
8255
8256                 refcount_set(&ref->refcount, 1);
8257                 ref->buffer = iter->array_buffer->buffer;
8258                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8259                 if (IS_ERR(ref->page)) {
8260                         ret = PTR_ERR(ref->page);
8261                         ref->page = NULL;
8262                         kfree(ref);
8263                         break;
8264                 }
8265                 ref->cpu = iter->cpu_file;
8266
8267                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8268                                           len, iter->cpu_file, 1);
8269                 if (r < 0) {
8270                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8271                                                    ref->page);
8272                         kfree(ref);
8273                         break;
8274                 }
8275
8276                 page = virt_to_page(ref->page);
8277
8278                 spd.pages[i] = page;
8279                 spd.partial[i].len = PAGE_SIZE;
8280                 spd.partial[i].offset = 0;
8281                 spd.partial[i].private = (unsigned long)ref;
8282                 spd.nr_pages++;
8283                 *ppos += PAGE_SIZE;
8284
8285                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8286         }
8287
8288         trace_access_unlock(iter->cpu_file);
8289         spd.nr_pages = i;
8290
8291         /* did we read anything? */
8292         if (!spd.nr_pages) {
8293                 if (ret)
8294                         goto out;
8295
8296                 ret = -EAGAIN;
8297                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8298                         goto out;
8299
8300                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8301                 if (ret)
8302                         goto out;
8303
8304                 goto again;
8305         }
8306
8307         ret = splice_to_pipe(pipe, &spd);
8308 out:
8309         splice_shrink_spd(&spd);
8310
8311         return ret;
8312 }
8313
8314 static const struct file_operations tracing_buffers_fops = {
8315         .open           = tracing_buffers_open,
8316         .read           = tracing_buffers_read,
8317         .poll           = tracing_buffers_poll,
8318         .release        = tracing_buffers_release,
8319         .splice_read    = tracing_buffers_splice_read,
8320         .llseek         = no_llseek,
8321 };
8322
8323 static ssize_t
8324 tracing_stats_read(struct file *filp, char __user *ubuf,
8325                    size_t count, loff_t *ppos)
8326 {
8327         struct inode *inode = file_inode(filp);
8328         struct trace_array *tr = inode->i_private;
8329         struct array_buffer *trace_buf = &tr->array_buffer;
8330         int cpu = tracing_get_cpu(inode);
8331         struct trace_seq *s;
8332         unsigned long cnt;
8333         unsigned long long t;
8334         unsigned long usec_rem;
8335
8336         s = kmalloc(sizeof(*s), GFP_KERNEL);
8337         if (!s)
8338                 return -ENOMEM;
8339
8340         trace_seq_init(s);
8341
8342         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343         trace_seq_printf(s, "entries: %ld\n", cnt);
8344
8345         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346         trace_seq_printf(s, "overrun: %ld\n", cnt);
8347
8348         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350
8351         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352         trace_seq_printf(s, "bytes: %ld\n", cnt);
8353
8354         if (trace_clocks[tr->clock_id].in_ns) {
8355                 /* local or global for trace_clock */
8356                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357                 usec_rem = do_div(t, USEC_PER_SEC);
8358                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359                                                                 t, usec_rem);
8360
8361                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362                 usec_rem = do_div(t, USEC_PER_SEC);
8363                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364         } else {
8365                 /* counter or tsc mode for trace_clock */
8366                 trace_seq_printf(s, "oldest event ts: %llu\n",
8367                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368
8369                 trace_seq_printf(s, "now ts: %llu\n",
8370                                 ring_buffer_time_stamp(trace_buf->buffer));
8371         }
8372
8373         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375
8376         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377         trace_seq_printf(s, "read events: %ld\n", cnt);
8378
8379         count = simple_read_from_buffer(ubuf, count, ppos,
8380                                         s->buffer, trace_seq_used(s));
8381
8382         kfree(s);
8383
8384         return count;
8385 }
8386
8387 static const struct file_operations tracing_stats_fops = {
8388         .open           = tracing_open_generic_tr,
8389         .read           = tracing_stats_read,
8390         .llseek         = generic_file_llseek,
8391         .release        = tracing_release_generic_tr,
8392 };
8393
8394 #ifdef CONFIG_DYNAMIC_FTRACE
8395
8396 static ssize_t
8397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398                   size_t cnt, loff_t *ppos)
8399 {
8400         ssize_t ret;
8401         char *buf;
8402         int r;
8403
8404         /* 256 should be plenty to hold the amount needed */
8405         buf = kmalloc(256, GFP_KERNEL);
8406         if (!buf)
8407                 return -ENOMEM;
8408
8409         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8410                       ftrace_update_tot_cnt,
8411                       ftrace_number_of_pages,
8412                       ftrace_number_of_groups);
8413
8414         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8415         kfree(buf);
8416         return ret;
8417 }
8418
8419 static const struct file_operations tracing_dyn_info_fops = {
8420         .open           = tracing_open_generic,
8421         .read           = tracing_read_dyn_info,
8422         .llseek         = generic_file_llseek,
8423 };
8424 #endif /* CONFIG_DYNAMIC_FTRACE */
8425
8426 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8427 static void
8428 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8429                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8430                 void *data)
8431 {
8432         tracing_snapshot_instance(tr);
8433 }
8434
8435 static void
8436 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8437                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8438                       void *data)
8439 {
8440         struct ftrace_func_mapper *mapper = data;
8441         long *count = NULL;
8442
8443         if (mapper)
8444                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8445
8446         if (count) {
8447
8448                 if (*count <= 0)
8449                         return;
8450
8451                 (*count)--;
8452         }
8453
8454         tracing_snapshot_instance(tr);
8455 }
8456
8457 static int
8458 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8459                       struct ftrace_probe_ops *ops, void *data)
8460 {
8461         struct ftrace_func_mapper *mapper = data;
8462         long *count = NULL;
8463
8464         seq_printf(m, "%ps:", (void *)ip);
8465
8466         seq_puts(m, "snapshot");
8467
8468         if (mapper)
8469                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8470
8471         if (count)
8472                 seq_printf(m, ":count=%ld\n", *count);
8473         else
8474                 seq_puts(m, ":unlimited\n");
8475
8476         return 0;
8477 }
8478
8479 static int
8480 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8481                      unsigned long ip, void *init_data, void **data)
8482 {
8483         struct ftrace_func_mapper *mapper = *data;
8484
8485         if (!mapper) {
8486                 mapper = allocate_ftrace_func_mapper();
8487                 if (!mapper)
8488                         return -ENOMEM;
8489                 *data = mapper;
8490         }
8491
8492         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8493 }
8494
8495 static void
8496 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8497                      unsigned long ip, void *data)
8498 {
8499         struct ftrace_func_mapper *mapper = data;
8500
8501         if (!ip) {
8502                 if (!mapper)
8503                         return;
8504                 free_ftrace_func_mapper(mapper, NULL);
8505                 return;
8506         }
8507
8508         ftrace_func_mapper_remove_ip(mapper, ip);
8509 }
8510
8511 static struct ftrace_probe_ops snapshot_probe_ops = {
8512         .func                   = ftrace_snapshot,
8513         .print                  = ftrace_snapshot_print,
8514 };
8515
8516 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8517         .func                   = ftrace_count_snapshot,
8518         .print                  = ftrace_snapshot_print,
8519         .init                   = ftrace_snapshot_init,
8520         .free                   = ftrace_snapshot_free,
8521 };
8522
8523 static int
8524 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8525                                char *glob, char *cmd, char *param, int enable)
8526 {
8527         struct ftrace_probe_ops *ops;
8528         void *count = (void *)-1;
8529         char *number;
8530         int ret;
8531
8532         if (!tr)
8533                 return -ENODEV;
8534
8535         /* hash funcs only work with set_ftrace_filter */
8536         if (!enable)
8537                 return -EINVAL;
8538
8539         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8540
8541         if (glob[0] == '!')
8542                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8543
8544         if (!param)
8545                 goto out_reg;
8546
8547         number = strsep(&param, ":");
8548
8549         if (!strlen(number))
8550                 goto out_reg;
8551
8552         /*
8553          * We use the callback data field (which is a pointer)
8554          * as our counter.
8555          */
8556         ret = kstrtoul(number, 0, (unsigned long *)&count);
8557         if (ret)
8558                 return ret;
8559
8560  out_reg:
8561         ret = tracing_alloc_snapshot_instance(tr);
8562         if (ret < 0)
8563                 goto out;
8564
8565         ret = register_ftrace_function_probe(glob, tr, ops, count);
8566
8567  out:
8568         return ret < 0 ? ret : 0;
8569 }
8570
8571 static struct ftrace_func_command ftrace_snapshot_cmd = {
8572         .name                   = "snapshot",
8573         .func                   = ftrace_trace_snapshot_callback,
8574 };
8575
8576 static __init int register_snapshot_cmd(void)
8577 {
8578         return register_ftrace_command(&ftrace_snapshot_cmd);
8579 }
8580 #else
8581 static inline __init int register_snapshot_cmd(void) { return 0; }
8582 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8583
8584 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8585 {
8586         if (WARN_ON(!tr->dir))
8587                 return ERR_PTR(-ENODEV);
8588
8589         /* Top directory uses NULL as the parent */
8590         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8591                 return NULL;
8592
8593         /* All sub buffers have a descriptor */
8594         return tr->dir;
8595 }
8596
8597 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8598 {
8599         struct dentry *d_tracer;
8600
8601         if (tr->percpu_dir)
8602                 return tr->percpu_dir;
8603
8604         d_tracer = tracing_get_dentry(tr);
8605         if (IS_ERR(d_tracer))
8606                 return NULL;
8607
8608         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8609
8610         MEM_FAIL(!tr->percpu_dir,
8611                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8612
8613         return tr->percpu_dir;
8614 }
8615
8616 static struct dentry *
8617 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8618                       void *data, long cpu, const struct file_operations *fops)
8619 {
8620         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8621
8622         if (ret) /* See tracing_get_cpu() */
8623                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8624         return ret;
8625 }
8626
8627 static void
8628 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8629 {
8630         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8631         struct dentry *d_cpu;
8632         char cpu_dir[30]; /* 30 characters should be more than enough */
8633
8634         if (!d_percpu)
8635                 return;
8636
8637         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8638         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8639         if (!d_cpu) {
8640                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8641                 return;
8642         }
8643
8644         /* per cpu trace_pipe */
8645         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8646                                 tr, cpu, &tracing_pipe_fops);
8647
8648         /* per cpu trace */
8649         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8650                                 tr, cpu, &tracing_fops);
8651
8652         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8653                                 tr, cpu, &tracing_buffers_fops);
8654
8655         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8656                                 tr, cpu, &tracing_stats_fops);
8657
8658         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8659                                 tr, cpu, &tracing_entries_fops);
8660
8661 #ifdef CONFIG_TRACER_SNAPSHOT
8662         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8663                                 tr, cpu, &snapshot_fops);
8664
8665         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8666                                 tr, cpu, &snapshot_raw_fops);
8667 #endif
8668 }
8669
8670 #ifdef CONFIG_FTRACE_SELFTEST
8671 /* Let selftest have access to static functions in this file */
8672 #include "trace_selftest.c"
8673 #endif
8674
8675 static ssize_t
8676 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8677                         loff_t *ppos)
8678 {
8679         struct trace_option_dentry *topt = filp->private_data;
8680         char *buf;
8681
8682         if (topt->flags->val & topt->opt->bit)
8683                 buf = "1\n";
8684         else
8685                 buf = "0\n";
8686
8687         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8688 }
8689
8690 static ssize_t
8691 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8692                          loff_t *ppos)
8693 {
8694         struct trace_option_dentry *topt = filp->private_data;
8695         unsigned long val;
8696         int ret;
8697
8698         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8699         if (ret)
8700                 return ret;
8701
8702         if (val != 0 && val != 1)
8703                 return -EINVAL;
8704
8705         if (!!(topt->flags->val & topt->opt->bit) != val) {
8706                 mutex_lock(&trace_types_lock);
8707                 ret = __set_tracer_option(topt->tr, topt->flags,
8708                                           topt->opt, !val);
8709                 mutex_unlock(&trace_types_lock);
8710                 if (ret)
8711                         return ret;
8712         }
8713
8714         *ppos += cnt;
8715
8716         return cnt;
8717 }
8718
8719
8720 static const struct file_operations trace_options_fops = {
8721         .open = tracing_open_generic,
8722         .read = trace_options_read,
8723         .write = trace_options_write,
8724         .llseek = generic_file_llseek,
8725 };
8726
8727 /*
8728  * In order to pass in both the trace_array descriptor as well as the index
8729  * to the flag that the trace option file represents, the trace_array
8730  * has a character array of trace_flags_index[], which holds the index
8731  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8732  * The address of this character array is passed to the flag option file
8733  * read/write callbacks.
8734  *
8735  * In order to extract both the index and the trace_array descriptor,
8736  * get_tr_index() uses the following algorithm.
8737  *
8738  *   idx = *ptr;
8739  *
8740  * As the pointer itself contains the address of the index (remember
8741  * index[1] == 1).
8742  *
8743  * Then to get the trace_array descriptor, by subtracting that index
8744  * from the ptr, we get to the start of the index itself.
8745  *
8746  *   ptr - idx == &index[0]
8747  *
8748  * Then a simple container_of() from that pointer gets us to the
8749  * trace_array descriptor.
8750  */
8751 static void get_tr_index(void *data, struct trace_array **ptr,
8752                          unsigned int *pindex)
8753 {
8754         *pindex = *(unsigned char *)data;
8755
8756         *ptr = container_of(data - *pindex, struct trace_array,
8757                             trace_flags_index);
8758 }
8759
8760 static ssize_t
8761 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8762                         loff_t *ppos)
8763 {
8764         void *tr_index = filp->private_data;
8765         struct trace_array *tr;
8766         unsigned int index;
8767         char *buf;
8768
8769         get_tr_index(tr_index, &tr, &index);
8770
8771         if (tr->trace_flags & (1 << index))
8772                 buf = "1\n";
8773         else
8774                 buf = "0\n";
8775
8776         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8777 }
8778
8779 static ssize_t
8780 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8781                          loff_t *ppos)
8782 {
8783         void *tr_index = filp->private_data;
8784         struct trace_array *tr;
8785         unsigned int index;
8786         unsigned long val;
8787         int ret;
8788
8789         get_tr_index(tr_index, &tr, &index);
8790
8791         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8792         if (ret)
8793                 return ret;
8794
8795         if (val != 0 && val != 1)
8796                 return -EINVAL;
8797
8798         mutex_lock(&event_mutex);
8799         mutex_lock(&trace_types_lock);
8800         ret = set_tracer_flag(tr, 1 << index, val);
8801         mutex_unlock(&trace_types_lock);
8802         mutex_unlock(&event_mutex);
8803
8804         if (ret < 0)
8805                 return ret;
8806
8807         *ppos += cnt;
8808
8809         return cnt;
8810 }
8811
8812 static const struct file_operations trace_options_core_fops = {
8813         .open = tracing_open_generic,
8814         .read = trace_options_core_read,
8815         .write = trace_options_core_write,
8816         .llseek = generic_file_llseek,
8817 };
8818
8819 struct dentry *trace_create_file(const char *name,
8820                                  umode_t mode,
8821                                  struct dentry *parent,
8822                                  void *data,
8823                                  const struct file_operations *fops)
8824 {
8825         struct dentry *ret;
8826
8827         ret = tracefs_create_file(name, mode, parent, data, fops);
8828         if (!ret)
8829                 pr_warn("Could not create tracefs '%s' entry\n", name);
8830
8831         return ret;
8832 }
8833
8834
8835 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8836 {
8837         struct dentry *d_tracer;
8838
8839         if (tr->options)
8840                 return tr->options;
8841
8842         d_tracer = tracing_get_dentry(tr);
8843         if (IS_ERR(d_tracer))
8844                 return NULL;
8845
8846         tr->options = tracefs_create_dir("options", d_tracer);
8847         if (!tr->options) {
8848                 pr_warn("Could not create tracefs directory 'options'\n");
8849                 return NULL;
8850         }
8851
8852         return tr->options;
8853 }
8854
8855 static void
8856 create_trace_option_file(struct trace_array *tr,
8857                          struct trace_option_dentry *topt,
8858                          struct tracer_flags *flags,
8859                          struct tracer_opt *opt)
8860 {
8861         struct dentry *t_options;
8862
8863         t_options = trace_options_init_dentry(tr);
8864         if (!t_options)
8865                 return;
8866
8867         topt->flags = flags;
8868         topt->opt = opt;
8869         topt->tr = tr;
8870
8871         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8872                                         t_options, topt, &trace_options_fops);
8873
8874 }
8875
8876 static void
8877 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8878 {
8879         struct trace_option_dentry *topts;
8880         struct trace_options *tr_topts;
8881         struct tracer_flags *flags;
8882         struct tracer_opt *opts;
8883         int cnt;
8884         int i;
8885
8886         if (!tracer)
8887                 return;
8888
8889         flags = tracer->flags;
8890
8891         if (!flags || !flags->opts)
8892                 return;
8893
8894         /*
8895          * If this is an instance, only create flags for tracers
8896          * the instance may have.
8897          */
8898         if (!trace_ok_for_array(tracer, tr))
8899                 return;
8900
8901         for (i = 0; i < tr->nr_topts; i++) {
8902                 /* Make sure there's no duplicate flags. */
8903                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8904                         return;
8905         }
8906
8907         opts = flags->opts;
8908
8909         for (cnt = 0; opts[cnt].name; cnt++)
8910                 ;
8911
8912         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8913         if (!topts)
8914                 return;
8915
8916         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8917                             GFP_KERNEL);
8918         if (!tr_topts) {
8919                 kfree(topts);
8920                 return;
8921         }
8922
8923         tr->topts = tr_topts;
8924         tr->topts[tr->nr_topts].tracer = tracer;
8925         tr->topts[tr->nr_topts].topts = topts;
8926         tr->nr_topts++;
8927
8928         for (cnt = 0; opts[cnt].name; cnt++) {
8929                 create_trace_option_file(tr, &topts[cnt], flags,
8930                                          &opts[cnt]);
8931                 MEM_FAIL(topts[cnt].entry == NULL,
8932                           "Failed to create trace option: %s",
8933                           opts[cnt].name);
8934         }
8935 }
8936
8937 static struct dentry *
8938 create_trace_option_core_file(struct trace_array *tr,
8939                               const char *option, long index)
8940 {
8941         struct dentry *t_options;
8942
8943         t_options = trace_options_init_dentry(tr);
8944         if (!t_options)
8945                 return NULL;
8946
8947         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8948                                  (void *)&tr->trace_flags_index[index],
8949                                  &trace_options_core_fops);
8950 }
8951
8952 static void create_trace_options_dir(struct trace_array *tr)
8953 {
8954         struct dentry *t_options;
8955         bool top_level = tr == &global_trace;
8956         int i;
8957
8958         t_options = trace_options_init_dentry(tr);
8959         if (!t_options)
8960                 return;
8961
8962         for (i = 0; trace_options[i]; i++) {
8963                 if (top_level ||
8964                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8965                         create_trace_option_core_file(tr, trace_options[i], i);
8966         }
8967 }
8968
8969 static ssize_t
8970 rb_simple_read(struct file *filp, char __user *ubuf,
8971                size_t cnt, loff_t *ppos)
8972 {
8973         struct trace_array *tr = filp->private_data;
8974         char buf[64];
8975         int r;
8976
8977         r = tracer_tracing_is_on(tr);
8978         r = sprintf(buf, "%d\n", r);
8979
8980         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8981 }
8982
8983 static ssize_t
8984 rb_simple_write(struct file *filp, const char __user *ubuf,
8985                 size_t cnt, loff_t *ppos)
8986 {
8987         struct trace_array *tr = filp->private_data;
8988         struct trace_buffer *buffer = tr->array_buffer.buffer;
8989         unsigned long val;
8990         int ret;
8991
8992         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8993         if (ret)
8994                 return ret;
8995
8996         if (buffer) {
8997                 mutex_lock(&trace_types_lock);
8998                 if (!!val == tracer_tracing_is_on(tr)) {
8999                         val = 0; /* do nothing */
9000                 } else if (val) {
9001                         tracer_tracing_on(tr);
9002                         if (tr->current_trace->start)
9003                                 tr->current_trace->start(tr);
9004                 } else {
9005                         tracer_tracing_off(tr);
9006                         if (tr->current_trace->stop)
9007                                 tr->current_trace->stop(tr);
9008                 }
9009                 mutex_unlock(&trace_types_lock);
9010         }
9011
9012         (*ppos)++;
9013
9014         return cnt;
9015 }
9016
9017 static const struct file_operations rb_simple_fops = {
9018         .open           = tracing_open_generic_tr,
9019         .read           = rb_simple_read,
9020         .write          = rb_simple_write,
9021         .release        = tracing_release_generic_tr,
9022         .llseek         = default_llseek,
9023 };
9024
9025 static ssize_t
9026 buffer_percent_read(struct file *filp, char __user *ubuf,
9027                     size_t cnt, loff_t *ppos)
9028 {
9029         struct trace_array *tr = filp->private_data;
9030         char buf[64];
9031         int r;
9032
9033         r = tr->buffer_percent;
9034         r = sprintf(buf, "%d\n", r);
9035
9036         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9037 }
9038
9039 static ssize_t
9040 buffer_percent_write(struct file *filp, const char __user *ubuf,
9041                      size_t cnt, loff_t *ppos)
9042 {
9043         struct trace_array *tr = filp->private_data;
9044         unsigned long val;
9045         int ret;
9046
9047         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9048         if (ret)
9049                 return ret;
9050
9051         if (val > 100)
9052                 return -EINVAL;
9053
9054         if (!val)
9055                 val = 1;
9056
9057         tr->buffer_percent = val;
9058
9059         (*ppos)++;
9060
9061         return cnt;
9062 }
9063
9064 static const struct file_operations buffer_percent_fops = {
9065         .open           = tracing_open_generic_tr,
9066         .read           = buffer_percent_read,
9067         .write          = buffer_percent_write,
9068         .release        = tracing_release_generic_tr,
9069         .llseek         = default_llseek,
9070 };
9071
9072 static struct dentry *trace_instance_dir;
9073
9074 static void
9075 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9076
9077 static int
9078 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9079 {
9080         enum ring_buffer_flags rb_flags;
9081
9082         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9083
9084         buf->tr = tr;
9085
9086         buf->buffer = ring_buffer_alloc(size, rb_flags);
9087         if (!buf->buffer)
9088                 return -ENOMEM;
9089
9090         buf->data = alloc_percpu(struct trace_array_cpu);
9091         if (!buf->data) {
9092                 ring_buffer_free(buf->buffer);
9093                 buf->buffer = NULL;
9094                 return -ENOMEM;
9095         }
9096
9097         /* Allocate the first page for all buffers */
9098         set_buffer_entries(&tr->array_buffer,
9099                            ring_buffer_size(tr->array_buffer.buffer, 0));
9100
9101         return 0;
9102 }
9103
9104 static int allocate_trace_buffers(struct trace_array *tr, int size)
9105 {
9106         int ret;
9107
9108         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9109         if (ret)
9110                 return ret;
9111
9112 #ifdef CONFIG_TRACER_MAX_TRACE
9113         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9114                                     allocate_snapshot ? size : 1);
9115         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9116                 ring_buffer_free(tr->array_buffer.buffer);
9117                 tr->array_buffer.buffer = NULL;
9118                 free_percpu(tr->array_buffer.data);
9119                 tr->array_buffer.data = NULL;
9120                 return -ENOMEM;
9121         }
9122         tr->allocated_snapshot = allocate_snapshot;
9123
9124         /*
9125          * Only the top level trace array gets its snapshot allocated
9126          * from the kernel command line.
9127          */
9128         allocate_snapshot = false;
9129 #endif
9130
9131         return 0;
9132 }
9133
9134 static void free_trace_buffer(struct array_buffer *buf)
9135 {
9136         if (buf->buffer) {
9137                 ring_buffer_free(buf->buffer);
9138                 buf->buffer = NULL;
9139                 free_percpu(buf->data);
9140                 buf->data = NULL;
9141         }
9142 }
9143
9144 static void free_trace_buffers(struct trace_array *tr)
9145 {
9146         if (!tr)
9147                 return;
9148
9149         free_trace_buffer(&tr->array_buffer);
9150
9151 #ifdef CONFIG_TRACER_MAX_TRACE
9152         free_trace_buffer(&tr->max_buffer);
9153 #endif
9154 }
9155
9156 static void init_trace_flags_index(struct trace_array *tr)
9157 {
9158         int i;
9159
9160         /* Used by the trace options files */
9161         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9162                 tr->trace_flags_index[i] = i;
9163 }
9164
9165 static void __update_tracer_options(struct trace_array *tr)
9166 {
9167         struct tracer *t;
9168
9169         for (t = trace_types; t; t = t->next)
9170                 add_tracer_options(tr, t);
9171 }
9172
9173 static void update_tracer_options(struct trace_array *tr)
9174 {
9175         mutex_lock(&trace_types_lock);
9176         tracer_options_updated = true;
9177         __update_tracer_options(tr);
9178         mutex_unlock(&trace_types_lock);
9179 }
9180
9181 /* Must have trace_types_lock held */
9182 struct trace_array *trace_array_find(const char *instance)
9183 {
9184         struct trace_array *tr, *found = NULL;
9185
9186         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9187                 if (tr->name && strcmp(tr->name, instance) == 0) {
9188                         found = tr;
9189                         break;
9190                 }
9191         }
9192
9193         return found;
9194 }
9195
9196 struct trace_array *trace_array_find_get(const char *instance)
9197 {
9198         struct trace_array *tr;
9199
9200         mutex_lock(&trace_types_lock);
9201         tr = trace_array_find(instance);
9202         if (tr)
9203                 tr->ref++;
9204         mutex_unlock(&trace_types_lock);
9205
9206         return tr;
9207 }
9208
9209 static int trace_array_create_dir(struct trace_array *tr)
9210 {
9211         int ret;
9212
9213         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9214         if (!tr->dir)
9215                 return -EINVAL;
9216
9217         ret = event_trace_add_tracer(tr->dir, tr);
9218         if (ret) {
9219                 tracefs_remove(tr->dir);
9220                 return ret;
9221         }
9222
9223         init_tracer_tracefs(tr, tr->dir);
9224         __update_tracer_options(tr);
9225
9226         return ret;
9227 }
9228
9229 static struct trace_array *trace_array_create(const char *name)
9230 {
9231         struct trace_array *tr;
9232         int ret;
9233
9234         ret = -ENOMEM;
9235         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9236         if (!tr)
9237                 return ERR_PTR(ret);
9238
9239         tr->name = kstrdup(name, GFP_KERNEL);
9240         if (!tr->name)
9241                 goto out_free_tr;
9242
9243         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9244                 goto out_free_tr;
9245
9246         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9247
9248         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9249
9250         raw_spin_lock_init(&tr->start_lock);
9251
9252         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9253
9254         tr->current_trace = &nop_trace;
9255
9256         INIT_LIST_HEAD(&tr->systems);
9257         INIT_LIST_HEAD(&tr->events);
9258         INIT_LIST_HEAD(&tr->hist_vars);
9259         INIT_LIST_HEAD(&tr->err_log);
9260
9261         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9262                 goto out_free_tr;
9263
9264         if (ftrace_allocate_ftrace_ops(tr) < 0)
9265                 goto out_free_tr;
9266
9267         ftrace_init_trace_array(tr);
9268
9269         init_trace_flags_index(tr);
9270
9271         if (trace_instance_dir) {
9272                 ret = trace_array_create_dir(tr);
9273                 if (ret)
9274                         goto out_free_tr;
9275         } else
9276                 __trace_early_add_events(tr);
9277
9278         list_add(&tr->list, &ftrace_trace_arrays);
9279
9280         tr->ref++;
9281
9282         return tr;
9283
9284  out_free_tr:
9285         ftrace_free_ftrace_ops(tr);
9286         free_trace_buffers(tr);
9287         free_cpumask_var(tr->tracing_cpumask);
9288         kfree(tr->name);
9289         kfree(tr);
9290
9291         return ERR_PTR(ret);
9292 }
9293
9294 static int instance_mkdir(const char *name)
9295 {
9296         struct trace_array *tr;
9297         int ret;
9298
9299         mutex_lock(&event_mutex);
9300         mutex_lock(&trace_types_lock);
9301
9302         ret = -EEXIST;
9303         if (trace_array_find(name))
9304                 goto out_unlock;
9305
9306         tr = trace_array_create(name);
9307
9308         ret = PTR_ERR_OR_ZERO(tr);
9309
9310 out_unlock:
9311         mutex_unlock(&trace_types_lock);
9312         mutex_unlock(&event_mutex);
9313         return ret;
9314 }
9315
9316 /**
9317  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9318  * @name: The name of the trace array to be looked up/created.
9319  *
9320  * Returns pointer to trace array with given name.
9321  * NULL, if it cannot be created.
9322  *
9323  * NOTE: This function increments the reference counter associated with the
9324  * trace array returned. This makes sure it cannot be freed while in use.
9325  * Use trace_array_put() once the trace array is no longer needed.
9326  * If the trace_array is to be freed, trace_array_destroy() needs to
9327  * be called after the trace_array_put(), or simply let user space delete
9328  * it from the tracefs instances directory. But until the
9329  * trace_array_put() is called, user space can not delete it.
9330  *
9331  */
9332 struct trace_array *trace_array_get_by_name(const char *name)
9333 {
9334         struct trace_array *tr;
9335
9336         mutex_lock(&event_mutex);
9337         mutex_lock(&trace_types_lock);
9338
9339         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9340                 if (tr->name && strcmp(tr->name, name) == 0)
9341                         goto out_unlock;
9342         }
9343
9344         tr = trace_array_create(name);
9345
9346         if (IS_ERR(tr))
9347                 tr = NULL;
9348 out_unlock:
9349         if (tr)
9350                 tr->ref++;
9351
9352         mutex_unlock(&trace_types_lock);
9353         mutex_unlock(&event_mutex);
9354         return tr;
9355 }
9356 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9357
9358 static int __remove_instance(struct trace_array *tr)
9359 {
9360         int i;
9361
9362         /* Reference counter for a newly created trace array = 1. */
9363         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9364                 return -EBUSY;
9365
9366         list_del(&tr->list);
9367
9368         /* Disable all the flags that were enabled coming in */
9369         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9370                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9371                         set_tracer_flag(tr, 1 << i, 0);
9372         }
9373
9374         tracing_set_nop(tr);
9375         clear_ftrace_function_probes(tr);
9376         event_trace_del_tracer(tr);
9377         ftrace_clear_pids(tr);
9378         ftrace_destroy_function_files(tr);
9379         tracefs_remove(tr->dir);
9380         free_percpu(tr->last_func_repeats);
9381         free_trace_buffers(tr);
9382
9383         for (i = 0; i < tr->nr_topts; i++) {
9384                 kfree(tr->topts[i].topts);
9385         }
9386         kfree(tr->topts);
9387
9388         free_cpumask_var(tr->tracing_cpumask);
9389         kfree(tr->name);
9390         kfree(tr);
9391
9392         return 0;
9393 }
9394
9395 int trace_array_destroy(struct trace_array *this_tr)
9396 {
9397         struct trace_array *tr;
9398         int ret;
9399
9400         if (!this_tr)
9401                 return -EINVAL;
9402
9403         mutex_lock(&event_mutex);
9404         mutex_lock(&trace_types_lock);
9405
9406         ret = -ENODEV;
9407
9408         /* Making sure trace array exists before destroying it. */
9409         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9410                 if (tr == this_tr) {
9411                         ret = __remove_instance(tr);
9412                         break;
9413                 }
9414         }
9415
9416         mutex_unlock(&trace_types_lock);
9417         mutex_unlock(&event_mutex);
9418
9419         return ret;
9420 }
9421 EXPORT_SYMBOL_GPL(trace_array_destroy);
9422
9423 static int instance_rmdir(const char *name)
9424 {
9425         struct trace_array *tr;
9426         int ret;
9427
9428         mutex_lock(&event_mutex);
9429         mutex_lock(&trace_types_lock);
9430
9431         ret = -ENODEV;
9432         tr = trace_array_find(name);
9433         if (tr)
9434                 ret = __remove_instance(tr);
9435
9436         mutex_unlock(&trace_types_lock);
9437         mutex_unlock(&event_mutex);
9438
9439         return ret;
9440 }
9441
9442 static __init void create_trace_instances(struct dentry *d_tracer)
9443 {
9444         struct trace_array *tr;
9445
9446         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9447                                                          instance_mkdir,
9448                                                          instance_rmdir);
9449         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9450                 return;
9451
9452         mutex_lock(&event_mutex);
9453         mutex_lock(&trace_types_lock);
9454
9455         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9456                 if (!tr->name)
9457                         continue;
9458                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9459                              "Failed to create instance directory\n"))
9460                         break;
9461         }
9462
9463         mutex_unlock(&trace_types_lock);
9464         mutex_unlock(&event_mutex);
9465 }
9466
9467 static void
9468 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9469 {
9470         struct trace_event_file *file;
9471         int cpu;
9472
9473         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9474                         tr, &show_traces_fops);
9475
9476         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9477                         tr, &set_tracer_fops);
9478
9479         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9480                           tr, &tracing_cpumask_fops);
9481
9482         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9483                           tr, &tracing_iter_fops);
9484
9485         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9486                           tr, &tracing_fops);
9487
9488         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9489                           tr, &tracing_pipe_fops);
9490
9491         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9492                           tr, &tracing_entries_fops);
9493
9494         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9495                           tr, &tracing_total_entries_fops);
9496
9497         trace_create_file("free_buffer", 0200, d_tracer,
9498                           tr, &tracing_free_buffer_fops);
9499
9500         trace_create_file("trace_marker", 0220, d_tracer,
9501                           tr, &tracing_mark_fops);
9502
9503         file = __find_event_file(tr, "ftrace", "print");
9504         if (file && file->dir)
9505                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9506                                   file, &event_trigger_fops);
9507         tr->trace_marker_file = file;
9508
9509         trace_create_file("trace_marker_raw", 0220, d_tracer,
9510                           tr, &tracing_mark_raw_fops);
9511
9512         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9513                           &trace_clock_fops);
9514
9515         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9516                           tr, &rb_simple_fops);
9517
9518         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9519                           &trace_time_stamp_mode_fops);
9520
9521         tr->buffer_percent = 50;
9522
9523         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9524                         tr, &buffer_percent_fops);
9525
9526         create_trace_options_dir(tr);
9527
9528         trace_create_maxlat_file(tr, d_tracer);
9529
9530         if (ftrace_create_function_files(tr, d_tracer))
9531                 MEM_FAIL(1, "Could not allocate function filter files");
9532
9533 #ifdef CONFIG_TRACER_SNAPSHOT
9534         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9535                           tr, &snapshot_fops);
9536 #endif
9537
9538         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9539                           tr, &tracing_err_log_fops);
9540
9541         for_each_tracing_cpu(cpu)
9542                 tracing_init_tracefs_percpu(tr, cpu);
9543
9544         ftrace_init_tracefs(tr, d_tracer);
9545 }
9546
9547 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9548 {
9549         struct vfsmount *mnt;
9550         struct file_system_type *type;
9551
9552         /*
9553          * To maintain backward compatibility for tools that mount
9554          * debugfs to get to the tracing facility, tracefs is automatically
9555          * mounted to the debugfs/tracing directory.
9556          */
9557         type = get_fs_type("tracefs");
9558         if (!type)
9559                 return NULL;
9560         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9561         put_filesystem(type);
9562         if (IS_ERR(mnt))
9563                 return NULL;
9564         mntget(mnt);
9565
9566         return mnt;
9567 }
9568
9569 /**
9570  * tracing_init_dentry - initialize top level trace array
9571  *
9572  * This is called when creating files or directories in the tracing
9573  * directory. It is called via fs_initcall() by any of the boot up code
9574  * and expects to return the dentry of the top level tracing directory.
9575  */
9576 int tracing_init_dentry(void)
9577 {
9578         struct trace_array *tr = &global_trace;
9579
9580         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9581                 pr_warn("Tracing disabled due to lockdown\n");
9582                 return -EPERM;
9583         }
9584
9585         /* The top level trace array uses  NULL as parent */
9586         if (tr->dir)
9587                 return 0;
9588
9589         if (WARN_ON(!tracefs_initialized()))
9590                 return -ENODEV;
9591
9592         /*
9593          * As there may still be users that expect the tracing
9594          * files to exist in debugfs/tracing, we must automount
9595          * the tracefs file system there, so older tools still
9596          * work with the newer kernel.
9597          */
9598         tr->dir = debugfs_create_automount("tracing", NULL,
9599                                            trace_automount, NULL);
9600
9601         return 0;
9602 }
9603
9604 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9605 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9606
9607 static struct workqueue_struct *eval_map_wq __initdata;
9608 static struct work_struct eval_map_work __initdata;
9609 static struct work_struct tracerfs_init_work __initdata;
9610
9611 static void __init eval_map_work_func(struct work_struct *work)
9612 {
9613         int len;
9614
9615         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9616         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9617 }
9618
9619 static int __init trace_eval_init(void)
9620 {
9621         INIT_WORK(&eval_map_work, eval_map_work_func);
9622
9623         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9624         if (!eval_map_wq) {
9625                 pr_err("Unable to allocate eval_map_wq\n");
9626                 /* Do work here */
9627                 eval_map_work_func(&eval_map_work);
9628                 return -ENOMEM;
9629         }
9630
9631         queue_work(eval_map_wq, &eval_map_work);
9632         return 0;
9633 }
9634
9635 subsys_initcall(trace_eval_init);
9636
9637 static int __init trace_eval_sync(void)
9638 {
9639         /* Make sure the eval map updates are finished */
9640         if (eval_map_wq)
9641                 destroy_workqueue(eval_map_wq);
9642         return 0;
9643 }
9644
9645 late_initcall_sync(trace_eval_sync);
9646
9647
9648 #ifdef CONFIG_MODULES
9649 static void trace_module_add_evals(struct module *mod)
9650 {
9651         if (!mod->num_trace_evals)
9652                 return;
9653
9654         /*
9655          * Modules with bad taint do not have events created, do
9656          * not bother with enums either.
9657          */
9658         if (trace_module_has_bad_taint(mod))
9659                 return;
9660
9661         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9662 }
9663
9664 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9665 static void trace_module_remove_evals(struct module *mod)
9666 {
9667         union trace_eval_map_item *map;
9668         union trace_eval_map_item **last = &trace_eval_maps;
9669
9670         if (!mod->num_trace_evals)
9671                 return;
9672
9673         mutex_lock(&trace_eval_mutex);
9674
9675         map = trace_eval_maps;
9676
9677         while (map) {
9678                 if (map->head.mod == mod)
9679                         break;
9680                 map = trace_eval_jmp_to_tail(map);
9681                 last = &map->tail.next;
9682                 map = map->tail.next;
9683         }
9684         if (!map)
9685                 goto out;
9686
9687         *last = trace_eval_jmp_to_tail(map)->tail.next;
9688         kfree(map);
9689  out:
9690         mutex_unlock(&trace_eval_mutex);
9691 }
9692 #else
9693 static inline void trace_module_remove_evals(struct module *mod) { }
9694 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9695
9696 static int trace_module_notify(struct notifier_block *self,
9697                                unsigned long val, void *data)
9698 {
9699         struct module *mod = data;
9700
9701         switch (val) {
9702         case MODULE_STATE_COMING:
9703                 trace_module_add_evals(mod);
9704                 break;
9705         case MODULE_STATE_GOING:
9706                 trace_module_remove_evals(mod);
9707                 break;
9708         }
9709
9710         return NOTIFY_OK;
9711 }
9712
9713 static struct notifier_block trace_module_nb = {
9714         .notifier_call = trace_module_notify,
9715         .priority = 0,
9716 };
9717 #endif /* CONFIG_MODULES */
9718
9719 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9720 {
9721
9722         event_trace_init();
9723
9724         init_tracer_tracefs(&global_trace, NULL);
9725         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9726
9727         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9728                         &global_trace, &tracing_thresh_fops);
9729
9730         trace_create_file("README", TRACE_MODE_READ, NULL,
9731                         NULL, &tracing_readme_fops);
9732
9733         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9734                         NULL, &tracing_saved_cmdlines_fops);
9735
9736         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9737                           NULL, &tracing_saved_cmdlines_size_fops);
9738
9739         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9740                         NULL, &tracing_saved_tgids_fops);
9741
9742         trace_create_eval_file(NULL);
9743
9744 #ifdef CONFIG_MODULES
9745         register_module_notifier(&trace_module_nb);
9746 #endif
9747
9748 #ifdef CONFIG_DYNAMIC_FTRACE
9749         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9750                         NULL, &tracing_dyn_info_fops);
9751 #endif
9752
9753         create_trace_instances(NULL);
9754
9755         update_tracer_options(&global_trace);
9756 }
9757
9758 static __init int tracer_init_tracefs(void)
9759 {
9760         int ret;
9761
9762         trace_access_lock_init();
9763
9764         ret = tracing_init_dentry();
9765         if (ret)
9766                 return 0;
9767
9768         if (eval_map_wq) {
9769                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9770                 queue_work(eval_map_wq, &tracerfs_init_work);
9771         } else {
9772                 tracer_init_tracefs_work_func(NULL);
9773         }
9774
9775         return 0;
9776 }
9777
9778 fs_initcall(tracer_init_tracefs);
9779
9780 static int trace_panic_handler(struct notifier_block *this,
9781                                unsigned long event, void *unused)
9782 {
9783         if (ftrace_dump_on_oops)
9784                 ftrace_dump(ftrace_dump_on_oops);
9785         return NOTIFY_OK;
9786 }
9787
9788 static struct notifier_block trace_panic_notifier = {
9789         .notifier_call  = trace_panic_handler,
9790         .next           = NULL,
9791         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9792 };
9793
9794 static int trace_die_handler(struct notifier_block *self,
9795                              unsigned long val,
9796                              void *data)
9797 {
9798         switch (val) {
9799         case DIE_OOPS:
9800                 if (ftrace_dump_on_oops)
9801                         ftrace_dump(ftrace_dump_on_oops);
9802                 break;
9803         default:
9804                 break;
9805         }
9806         return NOTIFY_OK;
9807 }
9808
9809 static struct notifier_block trace_die_notifier = {
9810         .notifier_call = trace_die_handler,
9811         .priority = 200
9812 };
9813
9814 /*
9815  * printk is set to max of 1024, we really don't need it that big.
9816  * Nothing should be printing 1000 characters anyway.
9817  */
9818 #define TRACE_MAX_PRINT         1000
9819
9820 /*
9821  * Define here KERN_TRACE so that we have one place to modify
9822  * it if we decide to change what log level the ftrace dump
9823  * should be at.
9824  */
9825 #define KERN_TRACE              KERN_EMERG
9826
9827 void
9828 trace_printk_seq(struct trace_seq *s)
9829 {
9830         /* Probably should print a warning here. */
9831         if (s->seq.len >= TRACE_MAX_PRINT)
9832                 s->seq.len = TRACE_MAX_PRINT;
9833
9834         /*
9835          * More paranoid code. Although the buffer size is set to
9836          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9837          * an extra layer of protection.
9838          */
9839         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9840                 s->seq.len = s->seq.size - 1;
9841
9842         /* should be zero ended, but we are paranoid. */
9843         s->buffer[s->seq.len] = 0;
9844
9845         printk(KERN_TRACE "%s", s->buffer);
9846
9847         trace_seq_init(s);
9848 }
9849
9850 void trace_init_global_iter(struct trace_iterator *iter)
9851 {
9852         iter->tr = &global_trace;
9853         iter->trace = iter->tr->current_trace;
9854         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9855         iter->array_buffer = &global_trace.array_buffer;
9856
9857         if (iter->trace && iter->trace->open)
9858                 iter->trace->open(iter);
9859
9860         /* Annotate start of buffers if we had overruns */
9861         if (ring_buffer_overruns(iter->array_buffer->buffer))
9862                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9863
9864         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9865         if (trace_clocks[iter->tr->clock_id].in_ns)
9866                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9867
9868         /* Can not use kmalloc for iter.temp and iter.fmt */
9869         iter->temp = static_temp_buf;
9870         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9871         iter->fmt = static_fmt_buf;
9872         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9873 }
9874
9875 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9876 {
9877         /* use static because iter can be a bit big for the stack */
9878         static struct trace_iterator iter;
9879         static atomic_t dump_running;
9880         struct trace_array *tr = &global_trace;
9881         unsigned int old_userobj;
9882         unsigned long flags;
9883         int cnt = 0, cpu;
9884
9885         /* Only allow one dump user at a time. */
9886         if (atomic_inc_return(&dump_running) != 1) {
9887                 atomic_dec(&dump_running);
9888                 return;
9889         }
9890
9891         /*
9892          * Always turn off tracing when we dump.
9893          * We don't need to show trace output of what happens
9894          * between multiple crashes.
9895          *
9896          * If the user does a sysrq-z, then they can re-enable
9897          * tracing with echo 1 > tracing_on.
9898          */
9899         tracing_off();
9900
9901         local_irq_save(flags);
9902
9903         /* Simulate the iterator */
9904         trace_init_global_iter(&iter);
9905
9906         for_each_tracing_cpu(cpu) {
9907                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9908         }
9909
9910         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9911
9912         /* don't look at user memory in panic mode */
9913         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9914
9915         switch (oops_dump_mode) {
9916         case DUMP_ALL:
9917                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9918                 break;
9919         case DUMP_ORIG:
9920                 iter.cpu_file = raw_smp_processor_id();
9921                 break;
9922         case DUMP_NONE:
9923                 goto out_enable;
9924         default:
9925                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9926                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9927         }
9928
9929         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9930
9931         /* Did function tracer already get disabled? */
9932         if (ftrace_is_dead()) {
9933                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9934                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9935         }
9936
9937         /*
9938          * We need to stop all tracing on all CPUS to read
9939          * the next buffer. This is a bit expensive, but is
9940          * not done often. We fill all what we can read,
9941          * and then release the locks again.
9942          */
9943
9944         while (!trace_empty(&iter)) {
9945
9946                 if (!cnt)
9947                         printk(KERN_TRACE "---------------------------------\n");
9948
9949                 cnt++;
9950
9951                 trace_iterator_reset(&iter);
9952                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9953
9954                 if (trace_find_next_entry_inc(&iter) != NULL) {
9955                         int ret;
9956
9957                         ret = print_trace_line(&iter);
9958                         if (ret != TRACE_TYPE_NO_CONSUME)
9959                                 trace_consume(&iter);
9960                 }
9961                 touch_nmi_watchdog();
9962
9963                 trace_printk_seq(&iter.seq);
9964         }
9965
9966         if (!cnt)
9967                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9968         else
9969                 printk(KERN_TRACE "---------------------------------\n");
9970
9971  out_enable:
9972         tr->trace_flags |= old_userobj;
9973
9974         for_each_tracing_cpu(cpu) {
9975                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9976         }
9977         atomic_dec(&dump_running);
9978         local_irq_restore(flags);
9979 }
9980 EXPORT_SYMBOL_GPL(ftrace_dump);
9981
9982 #define WRITE_BUFSIZE  4096
9983
9984 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9985                                 size_t count, loff_t *ppos,
9986                                 int (*createfn)(const char *))
9987 {
9988         char *kbuf, *buf, *tmp;
9989         int ret = 0;
9990         size_t done = 0;
9991         size_t size;
9992
9993         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9994         if (!kbuf)
9995                 return -ENOMEM;
9996
9997         while (done < count) {
9998                 size = count - done;
9999
10000                 if (size >= WRITE_BUFSIZE)
10001                         size = WRITE_BUFSIZE - 1;
10002
10003                 if (copy_from_user(kbuf, buffer + done, size)) {
10004                         ret = -EFAULT;
10005                         goto out;
10006                 }
10007                 kbuf[size] = '\0';
10008                 buf = kbuf;
10009                 do {
10010                         tmp = strchr(buf, '\n');
10011                         if (tmp) {
10012                                 *tmp = '\0';
10013                                 size = tmp - buf + 1;
10014                         } else {
10015                                 size = strlen(buf);
10016                                 if (done + size < count) {
10017                                         if (buf != kbuf)
10018                                                 break;
10019                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10020                                         pr_warn("Line length is too long: Should be less than %d\n",
10021                                                 WRITE_BUFSIZE - 2);
10022                                         ret = -EINVAL;
10023                                         goto out;
10024                                 }
10025                         }
10026                         done += size;
10027
10028                         /* Remove comments */
10029                         tmp = strchr(buf, '#');
10030
10031                         if (tmp)
10032                                 *tmp = '\0';
10033
10034                         ret = createfn(buf);
10035                         if (ret)
10036                                 goto out;
10037                         buf += size;
10038
10039                 } while (done < count);
10040         }
10041         ret = done;
10042
10043 out:
10044         kfree(kbuf);
10045
10046         return ret;
10047 }
10048
10049 __init static int tracer_alloc_buffers(void)
10050 {
10051         int ring_buf_size;
10052         int ret = -ENOMEM;
10053
10054
10055         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10056                 pr_warn("Tracing disabled due to lockdown\n");
10057                 return -EPERM;
10058         }
10059
10060         /*
10061          * Make sure we don't accidentally add more trace options
10062          * than we have bits for.
10063          */
10064         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10065
10066         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10067                 goto out;
10068
10069         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10070                 goto out_free_buffer_mask;
10071
10072         /* Only allocate trace_printk buffers if a trace_printk exists */
10073         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10074                 /* Must be called before global_trace.buffer is allocated */
10075                 trace_printk_init_buffers();
10076
10077         /* To save memory, keep the ring buffer size to its minimum */
10078         if (ring_buffer_expanded)
10079                 ring_buf_size = trace_buf_size;
10080         else
10081                 ring_buf_size = 1;
10082
10083         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10084         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10085
10086         raw_spin_lock_init(&global_trace.start_lock);
10087
10088         /*
10089          * The prepare callbacks allocates some memory for the ring buffer. We
10090          * don't free the buffer if the CPU goes down. If we were to free
10091          * the buffer, then the user would lose any trace that was in the
10092          * buffer. The memory will be removed once the "instance" is removed.
10093          */
10094         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10095                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10096                                       NULL);
10097         if (ret < 0)
10098                 goto out_free_cpumask;
10099         /* Used for event triggers */
10100         ret = -ENOMEM;
10101         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10102         if (!temp_buffer)
10103                 goto out_rm_hp_state;
10104
10105         if (trace_create_savedcmd() < 0)
10106                 goto out_free_temp_buffer;
10107
10108         /* TODO: make the number of buffers hot pluggable with CPUS */
10109         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10110                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10111                 goto out_free_savedcmd;
10112         }
10113
10114         if (global_trace.buffer_disabled)
10115                 tracing_off();
10116
10117         if (trace_boot_clock) {
10118                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10119                 if (ret < 0)
10120                         pr_warn("Trace clock %s not defined, going back to default\n",
10121                                 trace_boot_clock);
10122         }
10123
10124         /*
10125          * register_tracer() might reference current_trace, so it
10126          * needs to be set before we register anything. This is
10127          * just a bootstrap of current_trace anyway.
10128          */
10129         global_trace.current_trace = &nop_trace;
10130
10131         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10132
10133         ftrace_init_global_array_ops(&global_trace);
10134
10135         init_trace_flags_index(&global_trace);
10136
10137         register_tracer(&nop_trace);
10138
10139         /* Function tracing may start here (via kernel command line) */
10140         init_function_trace();
10141
10142         /* All seems OK, enable tracing */
10143         tracing_disabled = 0;
10144
10145         atomic_notifier_chain_register(&panic_notifier_list,
10146                                        &trace_panic_notifier);
10147
10148         register_die_notifier(&trace_die_notifier);
10149
10150         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10151
10152         INIT_LIST_HEAD(&global_trace.systems);
10153         INIT_LIST_HEAD(&global_trace.events);
10154         INIT_LIST_HEAD(&global_trace.hist_vars);
10155         INIT_LIST_HEAD(&global_trace.err_log);
10156         list_add(&global_trace.list, &ftrace_trace_arrays);
10157
10158         apply_trace_boot_options();
10159
10160         register_snapshot_cmd();
10161
10162         test_can_verify();
10163
10164         return 0;
10165
10166 out_free_savedcmd:
10167         free_saved_cmdlines_buffer(savedcmd);
10168 out_free_temp_buffer:
10169         ring_buffer_free(temp_buffer);
10170 out_rm_hp_state:
10171         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10172 out_free_cpumask:
10173         free_cpumask_var(global_trace.tracing_cpumask);
10174 out_free_buffer_mask:
10175         free_cpumask_var(tracing_buffer_mask);
10176 out:
10177         return ret;
10178 }
10179
10180 void __init ftrace_boot_snapshot(void)
10181 {
10182         if (snapshot_at_boot) {
10183                 tracing_snapshot();
10184                 internal_trace_puts("** Boot snapshot taken **\n");
10185         }
10186 }
10187
10188 void __init early_trace_init(void)
10189 {
10190         if (tracepoint_printk) {
10191                 tracepoint_print_iter =
10192                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10193                 if (MEM_FAIL(!tracepoint_print_iter,
10194                              "Failed to allocate trace iterator\n"))
10195                         tracepoint_printk = 0;
10196                 else
10197                         static_key_enable(&tracepoint_printk_key.key);
10198         }
10199         tracer_alloc_buffers();
10200 }
10201
10202 void __init trace_init(void)
10203 {
10204         trace_event_init();
10205 }
10206
10207 __init static void clear_boot_tracer(void)
10208 {
10209         /*
10210          * The default tracer at boot buffer is an init section.
10211          * This function is called in lateinit. If we did not
10212          * find the boot tracer, then clear it out, to prevent
10213          * later registration from accessing the buffer that is
10214          * about to be freed.
10215          */
10216         if (!default_bootup_tracer)
10217                 return;
10218
10219         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10220                default_bootup_tracer);
10221         default_bootup_tracer = NULL;
10222 }
10223
10224 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10225 __init static void tracing_set_default_clock(void)
10226 {
10227         /* sched_clock_stable() is determined in late_initcall */
10228         if (!trace_boot_clock && !sched_clock_stable()) {
10229                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10230                         pr_warn("Can not set tracing clock due to lockdown\n");
10231                         return;
10232                 }
10233
10234                 printk(KERN_WARNING
10235                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10236                        "If you want to keep using the local clock, then add:\n"
10237                        "  \"trace_clock=local\"\n"
10238                        "on the kernel command line\n");
10239                 tracing_set_clock(&global_trace, "global");
10240         }
10241 }
10242 #else
10243 static inline void tracing_set_default_clock(void) { }
10244 #endif
10245
10246 __init static int late_trace_init(void)
10247 {
10248         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10249                 static_key_disable(&tracepoint_printk_key.key);
10250                 tracepoint_printk = 0;
10251         }
10252
10253         tracing_set_default_clock();
10254         clear_boot_tracer();
10255         return 0;
10256 }
10257
10258 late_initcall_sync(late_trace_init);