PCI: hv: Remove unused hv_set_msi_entry_from_desc()
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0 || !trace_parser_loaded(&parser))
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 ret = -EINVAL;
732                 if (kstrtoul(parser.buffer, 0, &val))
733                         break;
734
735                 pid = (pid_t)val;
736
737                 if (trace_pid_list_set(pid_list, pid) < 0) {
738                         ret = -1;
739                         break;
740                 }
741                 nr_pids++;
742
743                 trace_parser_clear(&parser);
744                 ret = 0;
745         }
746         trace_parser_put(&parser);
747
748         if (ret < 0) {
749                 trace_pid_list_free(pid_list);
750                 return ret;
751         }
752
753         if (!nr_pids) {
754                 /* Cleared the list of pids */
755                 trace_pid_list_free(pid_list);
756                 read = ret;
757                 pid_list = NULL;
758         }
759
760         *new_pid_list = pid_list;
761
762         return read;
763 }
764
765 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
766 {
767         u64 ts;
768
769         /* Early boot up does not have a buffer yet */
770         if (!buf->buffer)
771                 return trace_clock_local();
772
773         ts = ring_buffer_time_stamp(buf->buffer);
774         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
775
776         return ts;
777 }
778
779 u64 ftrace_now(int cpu)
780 {
781         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
782 }
783
784 /**
785  * tracing_is_enabled - Show if global_trace has been enabled
786  *
787  * Shows if the global trace has been enabled or not. It uses the
788  * mirror flag "buffer_disabled" to be used in fast paths such as for
789  * the irqsoff tracer. But it may be inaccurate due to races. If you
790  * need to know the accurate state, use tracing_is_on() which is a little
791  * slower, but accurate.
792  */
793 int tracing_is_enabled(void)
794 {
795         /*
796          * For quick access (irqsoff uses this in fast path), just
797          * return the mirror variable of the state of the ring buffer.
798          * It's a little racy, but we don't really care.
799          */
800         smp_rmb();
801         return !global_trace.buffer_disabled;
802 }
803
804 /*
805  * trace_buf_size is the size in bytes that is allocated
806  * for a buffer. Note, the number of bytes is always rounded
807  * to page size.
808  *
809  * This number is purposely set to a low number of 16384.
810  * If the dump on oops happens, it will be much appreciated
811  * to not have to wait for all that output. Anyway this can be
812  * boot time and run time configurable.
813  */
814 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
815
816 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
817
818 /* trace_types holds a link list of available tracers. */
819 static struct tracer            *trace_types __read_mostly;
820
821 /*
822  * trace_types_lock is used to protect the trace_types list.
823  */
824 DEFINE_MUTEX(trace_types_lock);
825
826 /*
827  * serialize the access of the ring buffer
828  *
829  * ring buffer serializes readers, but it is low level protection.
830  * The validity of the events (which returns by ring_buffer_peek() ..etc)
831  * are not protected by ring buffer.
832  *
833  * The content of events may become garbage if we allow other process consumes
834  * these events concurrently:
835  *   A) the page of the consumed events may become a normal page
836  *      (not reader page) in ring buffer, and this page will be rewritten
837  *      by events producer.
838  *   B) The page of the consumed events may become a page for splice_read,
839  *      and this page will be returned to system.
840  *
841  * These primitives allow multi process access to different cpu ring buffer
842  * concurrently.
843  *
844  * These primitives don't distinguish read-only and read-consume access.
845  * Multi read-only access are also serialized.
846  */
847
848 #ifdef CONFIG_SMP
849 static DECLARE_RWSEM(all_cpu_access_lock);
850 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
851
852 static inline void trace_access_lock(int cpu)
853 {
854         if (cpu == RING_BUFFER_ALL_CPUS) {
855                 /* gain it for accessing the whole ring buffer. */
856                 down_write(&all_cpu_access_lock);
857         } else {
858                 /* gain it for accessing a cpu ring buffer. */
859
860                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
861                 down_read(&all_cpu_access_lock);
862
863                 /* Secondly block other access to this @cpu ring buffer. */
864                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
865         }
866 }
867
868 static inline void trace_access_unlock(int cpu)
869 {
870         if (cpu == RING_BUFFER_ALL_CPUS) {
871                 up_write(&all_cpu_access_lock);
872         } else {
873                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
874                 up_read(&all_cpu_access_lock);
875         }
876 }
877
878 static inline void trace_access_lock_init(void)
879 {
880         int cpu;
881
882         for_each_possible_cpu(cpu)
883                 mutex_init(&per_cpu(cpu_access_lock, cpu));
884 }
885
886 #else
887
888 static DEFINE_MUTEX(access_lock);
889
890 static inline void trace_access_lock(int cpu)
891 {
892         (void)cpu;
893         mutex_lock(&access_lock);
894 }
895
896 static inline void trace_access_unlock(int cpu)
897 {
898         (void)cpu;
899         mutex_unlock(&access_lock);
900 }
901
902 static inline void trace_access_lock_init(void)
903 {
904 }
905
906 #endif
907
908 #ifdef CONFIG_STACKTRACE
909 static void __ftrace_trace_stack(struct trace_buffer *buffer,
910                                  unsigned int trace_ctx,
911                                  int skip, struct pt_regs *regs);
912 static inline void ftrace_trace_stack(struct trace_array *tr,
913                                       struct trace_buffer *buffer,
914                                       unsigned int trace_ctx,
915                                       int skip, struct pt_regs *regs);
916
917 #else
918 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                         unsigned int trace_ctx,
920                                         int skip, struct pt_regs *regs)
921 {
922 }
923 static inline void ftrace_trace_stack(struct trace_array *tr,
924                                       struct trace_buffer *buffer,
925                                       unsigned long trace_ctx,
926                                       int skip, struct pt_regs *regs)
927 {
928 }
929
930 #endif
931
932 static __always_inline void
933 trace_event_setup(struct ring_buffer_event *event,
934                   int type, unsigned int trace_ctx)
935 {
936         struct trace_entry *ent = ring_buffer_event_data(event);
937
938         tracing_generic_entry_update(ent, type, trace_ctx);
939 }
940
941 static __always_inline struct ring_buffer_event *
942 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
943                           int type,
944                           unsigned long len,
945                           unsigned int trace_ctx)
946 {
947         struct ring_buffer_event *event;
948
949         event = ring_buffer_lock_reserve(buffer, len);
950         if (event != NULL)
951                 trace_event_setup(event, type, trace_ctx);
952
953         return event;
954 }
955
956 void tracer_tracing_on(struct trace_array *tr)
957 {
958         if (tr->array_buffer.buffer)
959                 ring_buffer_record_on(tr->array_buffer.buffer);
960         /*
961          * This flag is looked at when buffers haven't been allocated
962          * yet, or by some tracers (like irqsoff), that just want to
963          * know if the ring buffer has been disabled, but it can handle
964          * races of where it gets disabled but we still do a record.
965          * As the check is in the fast path of the tracers, it is more
966          * important to be fast than accurate.
967          */
968         tr->buffer_disabled = 0;
969         /* Make the flag seen by readers */
970         smp_wmb();
971 }
972
973 /**
974  * tracing_on - enable tracing buffers
975  *
976  * This function enables tracing buffers that may have been
977  * disabled with tracing_off.
978  */
979 void tracing_on(void)
980 {
981         tracer_tracing_on(&global_trace);
982 }
983 EXPORT_SYMBOL_GPL(tracing_on);
984
985
986 static __always_inline void
987 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
988 {
989         __this_cpu_write(trace_taskinfo_save, true);
990
991         /* If this is the temp buffer, we need to commit fully */
992         if (this_cpu_read(trace_buffered_event) == event) {
993                 /* Length is in event->array[0] */
994                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
995                 /* Release the temp buffer */
996                 this_cpu_dec(trace_buffered_event_cnt);
997                 /* ring_buffer_unlock_commit() enables preemption */
998                 preempt_enable_notrace();
999         } else
1000                 ring_buffer_unlock_commit(buffer, event);
1001 }
1002
1003 /**
1004  * __trace_puts - write a constant string into the trace buffer.
1005  * @ip:    The address of the caller
1006  * @str:   The constant string to write
1007  * @size:  The size of the string.
1008  */
1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011         struct ring_buffer_event *event;
1012         struct trace_buffer *buffer;
1013         struct print_entry *entry;
1014         unsigned int trace_ctx;
1015         int alloc;
1016
1017         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1018                 return 0;
1019
1020         if (unlikely(tracing_selftest_running || tracing_disabled))
1021                 return 0;
1022
1023         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024
1025         trace_ctx = tracing_gen_ctx();
1026         buffer = global_trace.array_buffer.buffer;
1027         ring_buffer_nest_start(buffer);
1028         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1029                                             trace_ctx);
1030         if (!event) {
1031                 size = 0;
1032                 goto out;
1033         }
1034
1035         entry = ring_buffer_event_data(event);
1036         entry->ip = ip;
1037
1038         memcpy(&entry->buf, str, size);
1039
1040         /* Add a newline if necessary */
1041         if (entry->buf[size - 1] != '\n') {
1042                 entry->buf[size] = '\n';
1043                 entry->buf[size + 1] = '\0';
1044         } else
1045                 entry->buf[size] = '\0';
1046
1047         __buffer_unlock_commit(buffer, event);
1048         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049  out:
1050         ring_buffer_nest_end(buffer);
1051         return size;
1052 }
1053 EXPORT_SYMBOL_GPL(__trace_puts);
1054
1055 /**
1056  * __trace_bputs - write the pointer to a constant string into trace buffer
1057  * @ip:    The address of the caller
1058  * @str:   The constant string to write to the buffer to
1059  */
1060 int __trace_bputs(unsigned long ip, const char *str)
1061 {
1062         struct ring_buffer_event *event;
1063         struct trace_buffer *buffer;
1064         struct bputs_entry *entry;
1065         unsigned int trace_ctx;
1066         int size = sizeof(struct bputs_entry);
1067         int ret = 0;
1068
1069         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070                 return 0;
1071
1072         if (unlikely(tracing_selftest_running || tracing_disabled))
1073                 return 0;
1074
1075         trace_ctx = tracing_gen_ctx();
1076         buffer = global_trace.array_buffer.buffer;
1077
1078         ring_buffer_nest_start(buffer);
1079         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1080                                             trace_ctx);
1081         if (!event)
1082                 goto out;
1083
1084         entry = ring_buffer_event_data(event);
1085         entry->ip                       = ip;
1086         entry->str                      = str;
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1090
1091         ret = 1;
1092  out:
1093         ring_buffer_nest_end(buffer);
1094         return ret;
1095 }
1096 EXPORT_SYMBOL_GPL(__trace_bputs);
1097
1098 #ifdef CONFIG_TRACER_SNAPSHOT
1099 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1100                                            void *cond_data)
1101 {
1102         struct tracer *tracer = tr->current_trace;
1103         unsigned long flags;
1104
1105         if (in_nmi()) {
1106                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1107                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1108                 return;
1109         }
1110
1111         if (!tr->allocated_snapshot) {
1112                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1113                 internal_trace_puts("*** stopping trace here!   ***\n");
1114                 tracing_off();
1115                 return;
1116         }
1117
1118         /* Note, snapshot can not be used when the tracer uses it */
1119         if (tracer->use_max_tr) {
1120                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1121                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1122                 return;
1123         }
1124
1125         local_irq_save(flags);
1126         update_max_tr(tr, current, smp_processor_id(), cond_data);
1127         local_irq_restore(flags);
1128 }
1129
1130 void tracing_snapshot_instance(struct trace_array *tr)
1131 {
1132         tracing_snapshot_instance_cond(tr, NULL);
1133 }
1134
1135 /**
1136  * tracing_snapshot - take a snapshot of the current buffer.
1137  *
1138  * This causes a swap between the snapshot buffer and the current live
1139  * tracing buffer. You can use this to take snapshots of the live
1140  * trace when some condition is triggered, but continue to trace.
1141  *
1142  * Note, make sure to allocate the snapshot with either
1143  * a tracing_snapshot_alloc(), or by doing it manually
1144  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145  *
1146  * If the snapshot buffer is not allocated, it will stop tracing.
1147  * Basically making a permanent snapshot.
1148  */
1149 void tracing_snapshot(void)
1150 {
1151         struct trace_array *tr = &global_trace;
1152
1153         tracing_snapshot_instance(tr);
1154 }
1155 EXPORT_SYMBOL_GPL(tracing_snapshot);
1156
1157 /**
1158  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1159  * @tr:         The tracing instance to snapshot
1160  * @cond_data:  The data to be tested conditionally, and possibly saved
1161  *
1162  * This is the same as tracing_snapshot() except that the snapshot is
1163  * conditional - the snapshot will only happen if the
1164  * cond_snapshot.update() implementation receiving the cond_data
1165  * returns true, which means that the trace array's cond_snapshot
1166  * update() operation used the cond_data to determine whether the
1167  * snapshot should be taken, and if it was, presumably saved it along
1168  * with the snapshot.
1169  */
1170 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 {
1172         tracing_snapshot_instance_cond(tr, cond_data);
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1175
1176 /**
1177  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1178  * @tr:         The tracing instance
1179  *
1180  * When the user enables a conditional snapshot using
1181  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1182  * with the snapshot.  This accessor is used to retrieve it.
1183  *
1184  * Should not be called from cond_snapshot.update(), since it takes
1185  * the tr->max_lock lock, which the code calling
1186  * cond_snapshot.update() has already done.
1187  *
1188  * Returns the cond_data associated with the trace array's snapshot.
1189  */
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192         void *cond_data = NULL;
1193
1194         arch_spin_lock(&tr->max_lock);
1195
1196         if (tr->cond_snapshot)
1197                 cond_data = tr->cond_snapshot->cond_data;
1198
1199         arch_spin_unlock(&tr->max_lock);
1200
1201         return cond_data;
1202 }
1203 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204
1205 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1206                                         struct array_buffer *size_buf, int cpu_id);
1207 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208
1209 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1210 {
1211         int ret;
1212
1213         if (!tr->allocated_snapshot) {
1214
1215                 /* allocate spare buffer */
1216                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1217                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1218                 if (ret < 0)
1219                         return ret;
1220
1221                 tr->allocated_snapshot = true;
1222         }
1223
1224         return 0;
1225 }
1226
1227 static void free_snapshot(struct trace_array *tr)
1228 {
1229         /*
1230          * We don't free the ring buffer. instead, resize it because
1231          * The max_tr ring buffer has some state (e.g. ring->clock) and
1232          * we want preserve it.
1233          */
1234         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1235         set_buffer_entries(&tr->max_buffer, 1);
1236         tracing_reset_online_cpus(&tr->max_buffer);
1237         tr->allocated_snapshot = false;
1238 }
1239
1240 /**
1241  * tracing_alloc_snapshot - allocate snapshot buffer.
1242  *
1243  * This only allocates the snapshot buffer if it isn't already
1244  * allocated - it doesn't also take a snapshot.
1245  *
1246  * This is meant to be used in cases where the snapshot buffer needs
1247  * to be set up for events that can't sleep but need to be able to
1248  * trigger a snapshot.
1249  */
1250 int tracing_alloc_snapshot(void)
1251 {
1252         struct trace_array *tr = &global_trace;
1253         int ret;
1254
1255         ret = tracing_alloc_snapshot_instance(tr);
1256         WARN_ON(ret < 0);
1257
1258         return ret;
1259 }
1260 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1261
1262 /**
1263  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264  *
1265  * This is similar to tracing_snapshot(), but it will allocate the
1266  * snapshot buffer if it isn't already allocated. Use this only
1267  * where it is safe to sleep, as the allocation may sleep.
1268  *
1269  * This causes a swap between the snapshot buffer and the current live
1270  * tracing buffer. You can use this to take snapshots of the live
1271  * trace when some condition is triggered, but continue to trace.
1272  */
1273 void tracing_snapshot_alloc(void)
1274 {
1275         int ret;
1276
1277         ret = tracing_alloc_snapshot();
1278         if (ret < 0)
1279                 return;
1280
1281         tracing_snapshot();
1282 }
1283 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1284
1285 /**
1286  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1287  * @tr:         The tracing instance
1288  * @cond_data:  User data to associate with the snapshot
1289  * @update:     Implementation of the cond_snapshot update function
1290  *
1291  * Check whether the conditional snapshot for the given instance has
1292  * already been enabled, or if the current tracer is already using a
1293  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1294  * save the cond_data and update function inside.
1295  *
1296  * Returns 0 if successful, error otherwise.
1297  */
1298 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1299                                  cond_update_fn_t update)
1300 {
1301         struct cond_snapshot *cond_snapshot;
1302         int ret = 0;
1303
1304         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1305         if (!cond_snapshot)
1306                 return -ENOMEM;
1307
1308         cond_snapshot->cond_data = cond_data;
1309         cond_snapshot->update = update;
1310
1311         mutex_lock(&trace_types_lock);
1312
1313         ret = tracing_alloc_snapshot_instance(tr);
1314         if (ret)
1315                 goto fail_unlock;
1316
1317         if (tr->current_trace->use_max_tr) {
1318                 ret = -EBUSY;
1319                 goto fail_unlock;
1320         }
1321
1322         /*
1323          * The cond_snapshot can only change to NULL without the
1324          * trace_types_lock. We don't care if we race with it going
1325          * to NULL, but we want to make sure that it's not set to
1326          * something other than NULL when we get here, which we can
1327          * do safely with only holding the trace_types_lock and not
1328          * having to take the max_lock.
1329          */
1330         if (tr->cond_snapshot) {
1331                 ret = -EBUSY;
1332                 goto fail_unlock;
1333         }
1334
1335         arch_spin_lock(&tr->max_lock);
1336         tr->cond_snapshot = cond_snapshot;
1337         arch_spin_unlock(&tr->max_lock);
1338
1339         mutex_unlock(&trace_types_lock);
1340
1341         return ret;
1342
1343  fail_unlock:
1344         mutex_unlock(&trace_types_lock);
1345         kfree(cond_snapshot);
1346         return ret;
1347 }
1348 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1349
1350 /**
1351  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1352  * @tr:         The tracing instance
1353  *
1354  * Check whether the conditional snapshot for the given instance is
1355  * enabled; if so, free the cond_snapshot associated with it,
1356  * otherwise return -EINVAL.
1357  *
1358  * Returns 0 if successful, error otherwise.
1359  */
1360 int tracing_snapshot_cond_disable(struct trace_array *tr)
1361 {
1362         int ret = 0;
1363
1364         arch_spin_lock(&tr->max_lock);
1365
1366         if (!tr->cond_snapshot)
1367                 ret = -EINVAL;
1368         else {
1369                 kfree(tr->cond_snapshot);
1370                 tr->cond_snapshot = NULL;
1371         }
1372
1373         arch_spin_unlock(&tr->max_lock);
1374
1375         return ret;
1376 }
1377 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 #else
1379 void tracing_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 }
1383 EXPORT_SYMBOL_GPL(tracing_snapshot);
1384 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 {
1386         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1389 int tracing_alloc_snapshot(void)
1390 {
1391         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1392         return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1395 void tracing_snapshot_alloc(void)
1396 {
1397         /* Give warning */
1398         tracing_snapshot();
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1401 void *tracing_cond_snapshot_data(struct trace_array *tr)
1402 {
1403         return NULL;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1406 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1407 {
1408         return -ENODEV;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1411 int tracing_snapshot_cond_disable(struct trace_array *tr)
1412 {
1413         return false;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1416 #endif /* CONFIG_TRACER_SNAPSHOT */
1417
1418 void tracer_tracing_off(struct trace_array *tr)
1419 {
1420         if (tr->array_buffer.buffer)
1421                 ring_buffer_record_off(tr->array_buffer.buffer);
1422         /*
1423          * This flag is looked at when buffers haven't been allocated
1424          * yet, or by some tracers (like irqsoff), that just want to
1425          * know if the ring buffer has been disabled, but it can handle
1426          * races of where it gets disabled but we still do a record.
1427          * As the check is in the fast path of the tracers, it is more
1428          * important to be fast than accurate.
1429          */
1430         tr->buffer_disabled = 1;
1431         /* Make the flag seen by readers */
1432         smp_wmb();
1433 }
1434
1435 /**
1436  * tracing_off - turn off tracing buffers
1437  *
1438  * This function stops the tracing buffers from recording data.
1439  * It does not disable any overhead the tracers themselves may
1440  * be causing. This function simply causes all recording to
1441  * the ring buffers to fail.
1442  */
1443 void tracing_off(void)
1444 {
1445         tracer_tracing_off(&global_trace);
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_off);
1448
1449 void disable_trace_on_warning(void)
1450 {
1451         if (__disable_trace_on_warning) {
1452                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1453                         "Disabling tracing due to warning\n");
1454                 tracing_off();
1455         }
1456 }
1457
1458 /**
1459  * tracer_tracing_is_on - show real state of ring buffer enabled
1460  * @tr : the trace array to know if ring buffer is enabled
1461  *
1462  * Shows real state of the ring buffer if it is enabled or not.
1463  */
1464 bool tracer_tracing_is_on(struct trace_array *tr)
1465 {
1466         if (tr->array_buffer.buffer)
1467                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1468         return !tr->buffer_disabled;
1469 }
1470
1471 /**
1472  * tracing_is_on - show state of ring buffers enabled
1473  */
1474 int tracing_is_on(void)
1475 {
1476         return tracer_tracing_is_on(&global_trace);
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_is_on);
1479
1480 static int __init set_buf_size(char *str)
1481 {
1482         unsigned long buf_size;
1483
1484         if (!str)
1485                 return 0;
1486         buf_size = memparse(str, &str);
1487         /*
1488          * nr_entries can not be zero and the startup
1489          * tests require some buffer space. Therefore
1490          * ensure we have at least 4096 bytes of buffer.
1491          */
1492         trace_buf_size = max(4096UL, buf_size);
1493         return 1;
1494 }
1495 __setup("trace_buf_size=", set_buf_size);
1496
1497 static int __init set_tracing_thresh(char *str)
1498 {
1499         unsigned long threshold;
1500         int ret;
1501
1502         if (!str)
1503                 return 0;
1504         ret = kstrtoul(str, 0, &threshold);
1505         if (ret < 0)
1506                 return 0;
1507         tracing_thresh = threshold * 1000;
1508         return 1;
1509 }
1510 __setup("tracing_thresh=", set_tracing_thresh);
1511
1512 unsigned long nsecs_to_usecs(unsigned long nsecs)
1513 {
1514         return nsecs / 1000;
1515 }
1516
1517 /*
1518  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1519  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1520  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1521  * of strings in the order that the evals (enum) were defined.
1522  */
1523 #undef C
1524 #define C(a, b) b
1525
1526 /* These must match the bit positions in trace_iterator_flags */
1527 static const char *trace_options[] = {
1528         TRACE_FLAGS
1529         NULL
1530 };
1531
1532 static struct {
1533         u64 (*func)(void);
1534         const char *name;
1535         int in_ns;              /* is this clock in nanoseconds? */
1536 } trace_clocks[] = {
1537         { trace_clock_local,            "local",        1 },
1538         { trace_clock_global,           "global",       1 },
1539         { trace_clock_counter,          "counter",      0 },
1540         { trace_clock_jiffies,          "uptime",       0 },
1541         { trace_clock,                  "perf",         1 },
1542         { ktime_get_mono_fast_ns,       "mono",         1 },
1543         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1544         { ktime_get_boot_fast_ns,       "boot",         1 },
1545         ARCH_TRACE_CLOCKS
1546 };
1547
1548 bool trace_clock_in_ns(struct trace_array *tr)
1549 {
1550         if (trace_clocks[tr->clock_id].in_ns)
1551                 return true;
1552
1553         return false;
1554 }
1555
1556 /*
1557  * trace_parser_get_init - gets the buffer for trace parser
1558  */
1559 int trace_parser_get_init(struct trace_parser *parser, int size)
1560 {
1561         memset(parser, 0, sizeof(*parser));
1562
1563         parser->buffer = kmalloc(size, GFP_KERNEL);
1564         if (!parser->buffer)
1565                 return 1;
1566
1567         parser->size = size;
1568         return 0;
1569 }
1570
1571 /*
1572  * trace_parser_put - frees the buffer for trace parser
1573  */
1574 void trace_parser_put(struct trace_parser *parser)
1575 {
1576         kfree(parser->buffer);
1577         parser->buffer = NULL;
1578 }
1579
1580 /*
1581  * trace_get_user - reads the user input string separated by  space
1582  * (matched by isspace(ch))
1583  *
1584  * For each string found the 'struct trace_parser' is updated,
1585  * and the function returns.
1586  *
1587  * Returns number of bytes read.
1588  *
1589  * See kernel/trace/trace.h for 'struct trace_parser' details.
1590  */
1591 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1592         size_t cnt, loff_t *ppos)
1593 {
1594         char ch;
1595         size_t read = 0;
1596         ssize_t ret;
1597
1598         if (!*ppos)
1599                 trace_parser_clear(parser);
1600
1601         ret = get_user(ch, ubuf++);
1602         if (ret)
1603                 goto out;
1604
1605         read++;
1606         cnt--;
1607
1608         /*
1609          * The parser is not finished with the last write,
1610          * continue reading the user input without skipping spaces.
1611          */
1612         if (!parser->cont) {
1613                 /* skip white space */
1614                 while (cnt && isspace(ch)) {
1615                         ret = get_user(ch, ubuf++);
1616                         if (ret)
1617                                 goto out;
1618                         read++;
1619                         cnt--;
1620                 }
1621
1622                 parser->idx = 0;
1623
1624                 /* only spaces were written */
1625                 if (isspace(ch) || !ch) {
1626                         *ppos += read;
1627                         ret = read;
1628                         goto out;
1629                 }
1630         }
1631
1632         /* read the non-space input */
1633         while (cnt && !isspace(ch) && ch) {
1634                 if (parser->idx < parser->size - 1)
1635                         parser->buffer[parser->idx++] = ch;
1636                 else {
1637                         ret = -EINVAL;
1638                         goto out;
1639                 }
1640                 ret = get_user(ch, ubuf++);
1641                 if (ret)
1642                         goto out;
1643                 read++;
1644                 cnt--;
1645         }
1646
1647         /* We either got finished input or we have to wait for another call. */
1648         if (isspace(ch) || !ch) {
1649                 parser->buffer[parser->idx] = 0;
1650                 parser->cont = false;
1651         } else if (parser->idx < parser->size - 1) {
1652                 parser->cont = true;
1653                 parser->buffer[parser->idx++] = ch;
1654                 /* Make sure the parsed string always terminates with '\0'. */
1655                 parser->buffer[parser->idx] = 0;
1656         } else {
1657                 ret = -EINVAL;
1658                 goto out;
1659         }
1660
1661         *ppos += read;
1662         ret = read;
1663
1664 out:
1665         return ret;
1666 }
1667
1668 /* TODO add a seq_buf_to_buffer() */
1669 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 {
1671         int len;
1672
1673         if (trace_seq_used(s) <= s->seq.readpos)
1674                 return -EBUSY;
1675
1676         len = trace_seq_used(s) - s->seq.readpos;
1677         if (cnt > len)
1678                 cnt = len;
1679         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1680
1681         s->seq.readpos += cnt;
1682         return cnt;
1683 }
1684
1685 unsigned long __read_mostly     tracing_thresh;
1686 static const struct file_operations tracing_max_lat_fops;
1687
1688 #ifdef LATENCY_FS_NOTIFY
1689
1690 static struct workqueue_struct *fsnotify_wq;
1691
1692 static void latency_fsnotify_workfn(struct work_struct *work)
1693 {
1694         struct trace_array *tr = container_of(work, struct trace_array,
1695                                               fsnotify_work);
1696         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1697 }
1698
1699 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1700 {
1701         struct trace_array *tr = container_of(iwork, struct trace_array,
1702                                               fsnotify_irqwork);
1703         queue_work(fsnotify_wq, &tr->fsnotify_work);
1704 }
1705
1706 static void trace_create_maxlat_file(struct trace_array *tr,
1707                                      struct dentry *d_tracer)
1708 {
1709         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1710         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1711         tr->d_max_latency = trace_create_file("tracing_max_latency",
1712                                               TRACE_MODE_WRITE,
1713                                               d_tracer, &tr->max_latency,
1714                                               &tracing_max_lat_fops);
1715 }
1716
1717 __init static int latency_fsnotify_init(void)
1718 {
1719         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1720                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1721         if (!fsnotify_wq) {
1722                 pr_err("Unable to allocate tr_max_lat_wq\n");
1723                 return -ENOMEM;
1724         }
1725         return 0;
1726 }
1727
1728 late_initcall_sync(latency_fsnotify_init);
1729
1730 void latency_fsnotify(struct trace_array *tr)
1731 {
1732         if (!fsnotify_wq)
1733                 return;
1734         /*
1735          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1736          * possible that we are called from __schedule() or do_idle(), which
1737          * could cause a deadlock.
1738          */
1739         irq_work_queue(&tr->fsnotify_irqwork);
1740 }
1741
1742 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1743         || defined(CONFIG_OSNOISE_TRACER)
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1747                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #else
1750 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1751 #endif
1752
1753 #ifdef CONFIG_TRACER_MAX_TRACE
1754 /*
1755  * Copy the new maximum trace into the separate maximum-trace
1756  * structure. (this way the maximum trace is permanently saved,
1757  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1758  */
1759 static void
1760 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1761 {
1762         struct array_buffer *trace_buf = &tr->array_buffer;
1763         struct array_buffer *max_buf = &tr->max_buffer;
1764         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1765         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1766
1767         max_buf->cpu = cpu;
1768         max_buf->time_start = data->preempt_timestamp;
1769
1770         max_data->saved_latency = tr->max_latency;
1771         max_data->critical_start = data->critical_start;
1772         max_data->critical_end = data->critical_end;
1773
1774         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1775         max_data->pid = tsk->pid;
1776         /*
1777          * If tsk == current, then use current_uid(), as that does not use
1778          * RCU. The irq tracer can be called out of RCU scope.
1779          */
1780         if (tsk == current)
1781                 max_data->uid = current_uid();
1782         else
1783                 max_data->uid = task_uid(tsk);
1784
1785         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1786         max_data->policy = tsk->policy;
1787         max_data->rt_priority = tsk->rt_priority;
1788
1789         /* record this tasks comm */
1790         tracing_record_cmdline(tsk);
1791         latency_fsnotify(tr);
1792 }
1793
1794 /**
1795  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1796  * @tr: tracer
1797  * @tsk: the task with the latency
1798  * @cpu: The cpu that initiated the trace.
1799  * @cond_data: User data associated with a conditional snapshot
1800  *
1801  * Flip the buffers between the @tr and the max_tr and record information
1802  * about which task was the cause of this latency.
1803  */
1804 void
1805 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1806               void *cond_data)
1807 {
1808         if (tr->stop_count)
1809                 return;
1810
1811         WARN_ON_ONCE(!irqs_disabled());
1812
1813         if (!tr->allocated_snapshot) {
1814                 /* Only the nop tracer should hit this when disabling */
1815                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1816                 return;
1817         }
1818
1819         arch_spin_lock(&tr->max_lock);
1820
1821         /* Inherit the recordable setting from array_buffer */
1822         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1823                 ring_buffer_record_on(tr->max_buffer.buffer);
1824         else
1825                 ring_buffer_record_off(tr->max_buffer.buffer);
1826
1827 #ifdef CONFIG_TRACER_SNAPSHOT
1828         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1829                 goto out_unlock;
1830 #endif
1831         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1832
1833         __update_max_tr(tr, tsk, cpu);
1834
1835  out_unlock:
1836         arch_spin_unlock(&tr->max_lock);
1837 }
1838
1839 /**
1840  * update_max_tr_single - only copy one trace over, and reset the rest
1841  * @tr: tracer
1842  * @tsk: task with the latency
1843  * @cpu: the cpu of the buffer to copy.
1844  *
1845  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1846  */
1847 void
1848 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1849 {
1850         int ret;
1851
1852         if (tr->stop_count)
1853                 return;
1854
1855         WARN_ON_ONCE(!irqs_disabled());
1856         if (!tr->allocated_snapshot) {
1857                 /* Only the nop tracer should hit this when disabling */
1858                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1859                 return;
1860         }
1861
1862         arch_spin_lock(&tr->max_lock);
1863
1864         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1865
1866         if (ret == -EBUSY) {
1867                 /*
1868                  * We failed to swap the buffer due to a commit taking
1869                  * place on this CPU. We fail to record, but we reset
1870                  * the max trace buffer (no one writes directly to it)
1871                  * and flag that it failed.
1872                  */
1873                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1874                         "Failed to swap buffers due to commit in progress\n");
1875         }
1876
1877         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1878
1879         __update_max_tr(tr, tsk, cpu);
1880         arch_spin_unlock(&tr->max_lock);
1881 }
1882 #endif /* CONFIG_TRACER_MAX_TRACE */
1883
1884 static int wait_on_pipe(struct trace_iterator *iter, int full)
1885 {
1886         /* Iterators are static, they should be filled or empty */
1887         if (trace_buffer_iter(iter, iter->cpu_file))
1888                 return 0;
1889
1890         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1891                                 full);
1892 }
1893
1894 #ifdef CONFIG_FTRACE_STARTUP_TEST
1895 static bool selftests_can_run;
1896
1897 struct trace_selftests {
1898         struct list_head                list;
1899         struct tracer                   *type;
1900 };
1901
1902 static LIST_HEAD(postponed_selftests);
1903
1904 static int save_selftest(struct tracer *type)
1905 {
1906         struct trace_selftests *selftest;
1907
1908         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1909         if (!selftest)
1910                 return -ENOMEM;
1911
1912         selftest->type = type;
1913         list_add(&selftest->list, &postponed_selftests);
1914         return 0;
1915 }
1916
1917 static int run_tracer_selftest(struct tracer *type)
1918 {
1919         struct trace_array *tr = &global_trace;
1920         struct tracer *saved_tracer = tr->current_trace;
1921         int ret;
1922
1923         if (!type->selftest || tracing_selftest_disabled)
1924                 return 0;
1925
1926         /*
1927          * If a tracer registers early in boot up (before scheduling is
1928          * initialized and such), then do not run its selftests yet.
1929          * Instead, run it a little later in the boot process.
1930          */
1931         if (!selftests_can_run)
1932                 return save_selftest(type);
1933
1934         if (!tracing_is_on()) {
1935                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1936                         type->name);
1937                 return 0;
1938         }
1939
1940         /*
1941          * Run a selftest on this tracer.
1942          * Here we reset the trace buffer, and set the current
1943          * tracer to be this tracer. The tracer can then run some
1944          * internal tracing to verify that everything is in order.
1945          * If we fail, we do not register this tracer.
1946          */
1947         tracing_reset_online_cpus(&tr->array_buffer);
1948
1949         tr->current_trace = type;
1950
1951 #ifdef CONFIG_TRACER_MAX_TRACE
1952         if (type->use_max_tr) {
1953                 /* If we expanded the buffers, make sure the max is expanded too */
1954                 if (ring_buffer_expanded)
1955                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1956                                            RING_BUFFER_ALL_CPUS);
1957                 tr->allocated_snapshot = true;
1958         }
1959 #endif
1960
1961         /* the test is responsible for initializing and enabling */
1962         pr_info("Testing tracer %s: ", type->name);
1963         ret = type->selftest(type, tr);
1964         /* the test is responsible for resetting too */
1965         tr->current_trace = saved_tracer;
1966         if (ret) {
1967                 printk(KERN_CONT "FAILED!\n");
1968                 /* Add the warning after printing 'FAILED' */
1969                 WARN_ON(1);
1970                 return -1;
1971         }
1972         /* Only reset on passing, to avoid touching corrupted buffers */
1973         tracing_reset_online_cpus(&tr->array_buffer);
1974
1975 #ifdef CONFIG_TRACER_MAX_TRACE
1976         if (type->use_max_tr) {
1977                 tr->allocated_snapshot = false;
1978
1979                 /* Shrink the max buffer again */
1980                 if (ring_buffer_expanded)
1981                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1982                                            RING_BUFFER_ALL_CPUS);
1983         }
1984 #endif
1985
1986         printk(KERN_CONT "PASSED\n");
1987         return 0;
1988 }
1989
1990 static __init int init_trace_selftests(void)
1991 {
1992         struct trace_selftests *p, *n;
1993         struct tracer *t, **last;
1994         int ret;
1995
1996         selftests_can_run = true;
1997
1998         mutex_lock(&trace_types_lock);
1999
2000         if (list_empty(&postponed_selftests))
2001                 goto out;
2002
2003         pr_info("Running postponed tracer tests:\n");
2004
2005         tracing_selftest_running = true;
2006         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2007                 /* This loop can take minutes when sanitizers are enabled, so
2008                  * lets make sure we allow RCU processing.
2009                  */
2010                 cond_resched();
2011                 ret = run_tracer_selftest(p->type);
2012                 /* If the test fails, then warn and remove from available_tracers */
2013                 if (ret < 0) {
2014                         WARN(1, "tracer: %s failed selftest, disabling\n",
2015                              p->type->name);
2016                         last = &trace_types;
2017                         for (t = trace_types; t; t = t->next) {
2018                                 if (t == p->type) {
2019                                         *last = t->next;
2020                                         break;
2021                                 }
2022                                 last = &t->next;
2023                         }
2024                 }
2025                 list_del(&p->list);
2026                 kfree(p);
2027         }
2028         tracing_selftest_running = false;
2029
2030  out:
2031         mutex_unlock(&trace_types_lock);
2032
2033         return 0;
2034 }
2035 core_initcall(init_trace_selftests);
2036 #else
2037 static inline int run_tracer_selftest(struct tracer *type)
2038 {
2039         return 0;
2040 }
2041 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2042
2043 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2044
2045 static void __init apply_trace_boot_options(void);
2046
2047 /**
2048  * register_tracer - register a tracer with the ftrace system.
2049  * @type: the plugin for the tracer
2050  *
2051  * Register a new plugin tracer.
2052  */
2053 int __init register_tracer(struct tracer *type)
2054 {
2055         struct tracer *t;
2056         int ret = 0;
2057
2058         if (!type->name) {
2059                 pr_info("Tracer must have a name\n");
2060                 return -1;
2061         }
2062
2063         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2064                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2065                 return -1;
2066         }
2067
2068         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2069                 pr_warn("Can not register tracer %s due to lockdown\n",
2070                            type->name);
2071                 return -EPERM;
2072         }
2073
2074         mutex_lock(&trace_types_lock);
2075
2076         tracing_selftest_running = true;
2077
2078         for (t = trace_types; t; t = t->next) {
2079                 if (strcmp(type->name, t->name) == 0) {
2080                         /* already found */
2081                         pr_info("Tracer %s already registered\n",
2082                                 type->name);
2083                         ret = -1;
2084                         goto out;
2085                 }
2086         }
2087
2088         if (!type->set_flag)
2089                 type->set_flag = &dummy_set_flag;
2090         if (!type->flags) {
2091                 /*allocate a dummy tracer_flags*/
2092                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2093                 if (!type->flags) {
2094                         ret = -ENOMEM;
2095                         goto out;
2096                 }
2097                 type->flags->val = 0;
2098                 type->flags->opts = dummy_tracer_opt;
2099         } else
2100                 if (!type->flags->opts)
2101                         type->flags->opts = dummy_tracer_opt;
2102
2103         /* store the tracer for __set_tracer_option */
2104         type->flags->trace = type;
2105
2106         ret = run_tracer_selftest(type);
2107         if (ret < 0)
2108                 goto out;
2109
2110         type->next = trace_types;
2111         trace_types = type;
2112         add_tracer_options(&global_trace, type);
2113
2114  out:
2115         tracing_selftest_running = false;
2116         mutex_unlock(&trace_types_lock);
2117
2118         if (ret || !default_bootup_tracer)
2119                 goto out_unlock;
2120
2121         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2122                 goto out_unlock;
2123
2124         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2125         /* Do we want this tracer to start on bootup? */
2126         tracing_set_tracer(&global_trace, type->name);
2127         default_bootup_tracer = NULL;
2128
2129         apply_trace_boot_options();
2130
2131         /* disable other selftests, since this will break it. */
2132         disable_tracing_selftest("running a tracer");
2133
2134  out_unlock:
2135         return ret;
2136 }
2137
2138 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2139 {
2140         struct trace_buffer *buffer = buf->buffer;
2141
2142         if (!buffer)
2143                 return;
2144
2145         ring_buffer_record_disable(buffer);
2146
2147         /* Make sure all commits have finished */
2148         synchronize_rcu();
2149         ring_buffer_reset_cpu(buffer, cpu);
2150
2151         ring_buffer_record_enable(buffer);
2152 }
2153
2154 void tracing_reset_online_cpus(struct array_buffer *buf)
2155 {
2156         struct trace_buffer *buffer = buf->buffer;
2157
2158         if (!buffer)
2159                 return;
2160
2161         ring_buffer_record_disable(buffer);
2162
2163         /* Make sure all commits have finished */
2164         synchronize_rcu();
2165
2166         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2167
2168         ring_buffer_reset_online_cpus(buffer);
2169
2170         ring_buffer_record_enable(buffer);
2171 }
2172
2173 /* Must have trace_types_lock held */
2174 void tracing_reset_all_online_cpus(void)
2175 {
2176         struct trace_array *tr;
2177
2178         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2179                 if (!tr->clear_trace)
2180                         continue;
2181                 tr->clear_trace = false;
2182                 tracing_reset_online_cpus(&tr->array_buffer);
2183 #ifdef CONFIG_TRACER_MAX_TRACE
2184                 tracing_reset_online_cpus(&tr->max_buffer);
2185 #endif
2186         }
2187 }
2188
2189 /*
2190  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2191  * is the tgid last observed corresponding to pid=i.
2192  */
2193 static int *tgid_map;
2194
2195 /* The maximum valid index into tgid_map. */
2196 static size_t tgid_map_max;
2197
2198 #define SAVED_CMDLINES_DEFAULT 128
2199 #define NO_CMDLINE_MAP UINT_MAX
2200 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2201 struct saved_cmdlines_buffer {
2202         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2203         unsigned *map_cmdline_to_pid;
2204         unsigned cmdline_num;
2205         int cmdline_idx;
2206         char *saved_cmdlines;
2207 };
2208 static struct saved_cmdlines_buffer *savedcmd;
2209
2210 static inline char *get_saved_cmdlines(int idx)
2211 {
2212         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2213 }
2214
2215 static inline void set_cmdline(int idx, const char *cmdline)
2216 {
2217         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2218 }
2219
2220 static int allocate_cmdlines_buffer(unsigned int val,
2221                                     struct saved_cmdlines_buffer *s)
2222 {
2223         s->map_cmdline_to_pid = kmalloc_array(val,
2224                                               sizeof(*s->map_cmdline_to_pid),
2225                                               GFP_KERNEL);
2226         if (!s->map_cmdline_to_pid)
2227                 return -ENOMEM;
2228
2229         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2230         if (!s->saved_cmdlines) {
2231                 kfree(s->map_cmdline_to_pid);
2232                 return -ENOMEM;
2233         }
2234
2235         s->cmdline_idx = 0;
2236         s->cmdline_num = val;
2237         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2238                sizeof(s->map_pid_to_cmdline));
2239         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2240                val * sizeof(*s->map_cmdline_to_pid));
2241
2242         return 0;
2243 }
2244
2245 static int trace_create_savedcmd(void)
2246 {
2247         int ret;
2248
2249         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2250         if (!savedcmd)
2251                 return -ENOMEM;
2252
2253         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2254         if (ret < 0) {
2255                 kfree(savedcmd);
2256                 savedcmd = NULL;
2257                 return -ENOMEM;
2258         }
2259
2260         return 0;
2261 }
2262
2263 int is_tracing_stopped(void)
2264 {
2265         return global_trace.stop_count;
2266 }
2267
2268 /**
2269  * tracing_start - quick start of the tracer
2270  *
2271  * If tracing is enabled but was stopped by tracing_stop,
2272  * this will start the tracer back up.
2273  */
2274 void tracing_start(void)
2275 {
2276         struct trace_buffer *buffer;
2277         unsigned long flags;
2278
2279         if (tracing_disabled)
2280                 return;
2281
2282         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2283         if (--global_trace.stop_count) {
2284                 if (global_trace.stop_count < 0) {
2285                         /* Someone screwed up their debugging */
2286                         WARN_ON_ONCE(1);
2287                         global_trace.stop_count = 0;
2288                 }
2289                 goto out;
2290         }
2291
2292         /* Prevent the buffers from switching */
2293         arch_spin_lock(&global_trace.max_lock);
2294
2295         buffer = global_trace.array_buffer.buffer;
2296         if (buffer)
2297                 ring_buffer_record_enable(buffer);
2298
2299 #ifdef CONFIG_TRACER_MAX_TRACE
2300         buffer = global_trace.max_buffer.buffer;
2301         if (buffer)
2302                 ring_buffer_record_enable(buffer);
2303 #endif
2304
2305         arch_spin_unlock(&global_trace.max_lock);
2306
2307  out:
2308         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2309 }
2310
2311 static void tracing_start_tr(struct trace_array *tr)
2312 {
2313         struct trace_buffer *buffer;
2314         unsigned long flags;
2315
2316         if (tracing_disabled)
2317                 return;
2318
2319         /* If global, we need to also start the max tracer */
2320         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2321                 return tracing_start();
2322
2323         raw_spin_lock_irqsave(&tr->start_lock, flags);
2324
2325         if (--tr->stop_count) {
2326                 if (tr->stop_count < 0) {
2327                         /* Someone screwed up their debugging */
2328                         WARN_ON_ONCE(1);
2329                         tr->stop_count = 0;
2330                 }
2331                 goto out;
2332         }
2333
2334         buffer = tr->array_buffer.buffer;
2335         if (buffer)
2336                 ring_buffer_record_enable(buffer);
2337
2338  out:
2339         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2340 }
2341
2342 /**
2343  * tracing_stop - quick stop of the tracer
2344  *
2345  * Light weight way to stop tracing. Use in conjunction with
2346  * tracing_start.
2347  */
2348 void tracing_stop(void)
2349 {
2350         struct trace_buffer *buffer;
2351         unsigned long flags;
2352
2353         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2354         if (global_trace.stop_count++)
2355                 goto out;
2356
2357         /* Prevent the buffers from switching */
2358         arch_spin_lock(&global_trace.max_lock);
2359
2360         buffer = global_trace.array_buffer.buffer;
2361         if (buffer)
2362                 ring_buffer_record_disable(buffer);
2363
2364 #ifdef CONFIG_TRACER_MAX_TRACE
2365         buffer = global_trace.max_buffer.buffer;
2366         if (buffer)
2367                 ring_buffer_record_disable(buffer);
2368 #endif
2369
2370         arch_spin_unlock(&global_trace.max_lock);
2371
2372  out:
2373         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2374 }
2375
2376 static void tracing_stop_tr(struct trace_array *tr)
2377 {
2378         struct trace_buffer *buffer;
2379         unsigned long flags;
2380
2381         /* If global, we need to also stop the max tracer */
2382         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2383                 return tracing_stop();
2384
2385         raw_spin_lock_irqsave(&tr->start_lock, flags);
2386         if (tr->stop_count++)
2387                 goto out;
2388
2389         buffer = tr->array_buffer.buffer;
2390         if (buffer)
2391                 ring_buffer_record_disable(buffer);
2392
2393  out:
2394         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2395 }
2396
2397 static int trace_save_cmdline(struct task_struct *tsk)
2398 {
2399         unsigned tpid, idx;
2400
2401         /* treat recording of idle task as a success */
2402         if (!tsk->pid)
2403                 return 1;
2404
2405         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2406
2407         /*
2408          * It's not the end of the world if we don't get
2409          * the lock, but we also don't want to spin
2410          * nor do we want to disable interrupts,
2411          * so if we miss here, then better luck next time.
2412          */
2413         if (!arch_spin_trylock(&trace_cmdline_lock))
2414                 return 0;
2415
2416         idx = savedcmd->map_pid_to_cmdline[tpid];
2417         if (idx == NO_CMDLINE_MAP) {
2418                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2419
2420                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2421                 savedcmd->cmdline_idx = idx;
2422         }
2423
2424         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2425         set_cmdline(idx, tsk->comm);
2426
2427         arch_spin_unlock(&trace_cmdline_lock);
2428
2429         return 1;
2430 }
2431
2432 static void __trace_find_cmdline(int pid, char comm[])
2433 {
2434         unsigned map;
2435         int tpid;
2436
2437         if (!pid) {
2438                 strcpy(comm, "<idle>");
2439                 return;
2440         }
2441
2442         if (WARN_ON_ONCE(pid < 0)) {
2443                 strcpy(comm, "<XXX>");
2444                 return;
2445         }
2446
2447         tpid = pid & (PID_MAX_DEFAULT - 1);
2448         map = savedcmd->map_pid_to_cmdline[tpid];
2449         if (map != NO_CMDLINE_MAP) {
2450                 tpid = savedcmd->map_cmdline_to_pid[map];
2451                 if (tpid == pid) {
2452                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2453                         return;
2454                 }
2455         }
2456         strcpy(comm, "<...>");
2457 }
2458
2459 void trace_find_cmdline(int pid, char comm[])
2460 {
2461         preempt_disable();
2462         arch_spin_lock(&trace_cmdline_lock);
2463
2464         __trace_find_cmdline(pid, comm);
2465
2466         arch_spin_unlock(&trace_cmdline_lock);
2467         preempt_enable();
2468 }
2469
2470 static int *trace_find_tgid_ptr(int pid)
2471 {
2472         /*
2473          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2474          * if we observe a non-NULL tgid_map then we also observe the correct
2475          * tgid_map_max.
2476          */
2477         int *map = smp_load_acquire(&tgid_map);
2478
2479         if (unlikely(!map || pid > tgid_map_max))
2480                 return NULL;
2481
2482         return &map[pid];
2483 }
2484
2485 int trace_find_tgid(int pid)
2486 {
2487         int *ptr = trace_find_tgid_ptr(pid);
2488
2489         return ptr ? *ptr : 0;
2490 }
2491
2492 static int trace_save_tgid(struct task_struct *tsk)
2493 {
2494         int *ptr;
2495
2496         /* treat recording of idle task as a success */
2497         if (!tsk->pid)
2498                 return 1;
2499
2500         ptr = trace_find_tgid_ptr(tsk->pid);
2501         if (!ptr)
2502                 return 0;
2503
2504         *ptr = tsk->tgid;
2505         return 1;
2506 }
2507
2508 static bool tracing_record_taskinfo_skip(int flags)
2509 {
2510         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2511                 return true;
2512         if (!__this_cpu_read(trace_taskinfo_save))
2513                 return true;
2514         return false;
2515 }
2516
2517 /**
2518  * tracing_record_taskinfo - record the task info of a task
2519  *
2520  * @task:  task to record
2521  * @flags: TRACE_RECORD_CMDLINE for recording comm
2522  *         TRACE_RECORD_TGID for recording tgid
2523  */
2524 void tracing_record_taskinfo(struct task_struct *task, int flags)
2525 {
2526         bool done;
2527
2528         if (tracing_record_taskinfo_skip(flags))
2529                 return;
2530
2531         /*
2532          * Record as much task information as possible. If some fail, continue
2533          * to try to record the others.
2534          */
2535         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2536         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2537
2538         /* If recording any information failed, retry again soon. */
2539         if (!done)
2540                 return;
2541
2542         __this_cpu_write(trace_taskinfo_save, false);
2543 }
2544
2545 /**
2546  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2547  *
2548  * @prev: previous task during sched_switch
2549  * @next: next task during sched_switch
2550  * @flags: TRACE_RECORD_CMDLINE for recording comm
2551  *         TRACE_RECORD_TGID for recording tgid
2552  */
2553 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2554                                           struct task_struct *next, int flags)
2555 {
2556         bool done;
2557
2558         if (tracing_record_taskinfo_skip(flags))
2559                 return;
2560
2561         /*
2562          * Record as much task information as possible. If some fail, continue
2563          * to try to record the others.
2564          */
2565         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2566         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2567         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2568         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2569
2570         /* If recording any information failed, retry again soon. */
2571         if (!done)
2572                 return;
2573
2574         __this_cpu_write(trace_taskinfo_save, false);
2575 }
2576
2577 /* Helpers to record a specific task information */
2578 void tracing_record_cmdline(struct task_struct *task)
2579 {
2580         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2581 }
2582
2583 void tracing_record_tgid(struct task_struct *task)
2584 {
2585         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2586 }
2587
2588 /*
2589  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2590  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2591  * simplifies those functions and keeps them in sync.
2592  */
2593 enum print_line_t trace_handle_return(struct trace_seq *s)
2594 {
2595         return trace_seq_has_overflowed(s) ?
2596                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2597 }
2598 EXPORT_SYMBOL_GPL(trace_handle_return);
2599
2600 static unsigned short migration_disable_value(void)
2601 {
2602 #if defined(CONFIG_SMP)
2603         return current->migration_disabled;
2604 #else
2605         return 0;
2606 #endif
2607 }
2608
2609 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2610 {
2611         unsigned int trace_flags = irqs_status;
2612         unsigned int pc;
2613
2614         pc = preempt_count();
2615
2616         if (pc & NMI_MASK)
2617                 trace_flags |= TRACE_FLAG_NMI;
2618         if (pc & HARDIRQ_MASK)
2619                 trace_flags |= TRACE_FLAG_HARDIRQ;
2620         if (in_serving_softirq())
2621                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2622         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2623                 trace_flags |= TRACE_FLAG_BH_OFF;
2624
2625         if (tif_need_resched())
2626                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2627         if (test_preempt_need_resched())
2628                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2629         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2630                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2631 }
2632
2633 struct ring_buffer_event *
2634 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2635                           int type,
2636                           unsigned long len,
2637                           unsigned int trace_ctx)
2638 {
2639         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2640 }
2641
2642 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2643 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2644 static int trace_buffered_event_ref;
2645
2646 /**
2647  * trace_buffered_event_enable - enable buffering events
2648  *
2649  * When events are being filtered, it is quicker to use a temporary
2650  * buffer to write the event data into if there's a likely chance
2651  * that it will not be committed. The discard of the ring buffer
2652  * is not as fast as committing, and is much slower than copying
2653  * a commit.
2654  *
2655  * When an event is to be filtered, allocate per cpu buffers to
2656  * write the event data into, and if the event is filtered and discarded
2657  * it is simply dropped, otherwise, the entire data is to be committed
2658  * in one shot.
2659  */
2660 void trace_buffered_event_enable(void)
2661 {
2662         struct ring_buffer_event *event;
2663         struct page *page;
2664         int cpu;
2665
2666         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2667
2668         if (trace_buffered_event_ref++)
2669                 return;
2670
2671         for_each_tracing_cpu(cpu) {
2672                 page = alloc_pages_node(cpu_to_node(cpu),
2673                                         GFP_KERNEL | __GFP_NORETRY, 0);
2674                 if (!page)
2675                         goto failed;
2676
2677                 event = page_address(page);
2678                 memset(event, 0, sizeof(*event));
2679
2680                 per_cpu(trace_buffered_event, cpu) = event;
2681
2682                 preempt_disable();
2683                 if (cpu == smp_processor_id() &&
2684                     __this_cpu_read(trace_buffered_event) !=
2685                     per_cpu(trace_buffered_event, cpu))
2686                         WARN_ON_ONCE(1);
2687                 preempt_enable();
2688         }
2689
2690         return;
2691  failed:
2692         trace_buffered_event_disable();
2693 }
2694
2695 static void enable_trace_buffered_event(void *data)
2696 {
2697         /* Probably not needed, but do it anyway */
2698         smp_rmb();
2699         this_cpu_dec(trace_buffered_event_cnt);
2700 }
2701
2702 static void disable_trace_buffered_event(void *data)
2703 {
2704         this_cpu_inc(trace_buffered_event_cnt);
2705 }
2706
2707 /**
2708  * trace_buffered_event_disable - disable buffering events
2709  *
2710  * When a filter is removed, it is faster to not use the buffered
2711  * events, and to commit directly into the ring buffer. Free up
2712  * the temp buffers when there are no more users. This requires
2713  * special synchronization with current events.
2714  */
2715 void trace_buffered_event_disable(void)
2716 {
2717         int cpu;
2718
2719         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2720
2721         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2722                 return;
2723
2724         if (--trace_buffered_event_ref)
2725                 return;
2726
2727         preempt_disable();
2728         /* For each CPU, set the buffer as used. */
2729         smp_call_function_many(tracing_buffer_mask,
2730                                disable_trace_buffered_event, NULL, 1);
2731         preempt_enable();
2732
2733         /* Wait for all current users to finish */
2734         synchronize_rcu();
2735
2736         for_each_tracing_cpu(cpu) {
2737                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2738                 per_cpu(trace_buffered_event, cpu) = NULL;
2739         }
2740         /*
2741          * Make sure trace_buffered_event is NULL before clearing
2742          * trace_buffered_event_cnt.
2743          */
2744         smp_wmb();
2745
2746         preempt_disable();
2747         /* Do the work on each cpu */
2748         smp_call_function_many(tracing_buffer_mask,
2749                                enable_trace_buffered_event, NULL, 1);
2750         preempt_enable();
2751 }
2752
2753 static struct trace_buffer *temp_buffer;
2754
2755 struct ring_buffer_event *
2756 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2757                           struct trace_event_file *trace_file,
2758                           int type, unsigned long len,
2759                           unsigned int trace_ctx)
2760 {
2761         struct ring_buffer_event *entry;
2762         struct trace_array *tr = trace_file->tr;
2763         int val;
2764
2765         *current_rb = tr->array_buffer.buffer;
2766
2767         if (!tr->no_filter_buffering_ref &&
2768             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2769                 preempt_disable_notrace();
2770                 /*
2771                  * Filtering is on, so try to use the per cpu buffer first.
2772                  * This buffer will simulate a ring_buffer_event,
2773                  * where the type_len is zero and the array[0] will
2774                  * hold the full length.
2775                  * (see include/linux/ring-buffer.h for details on
2776                  *  how the ring_buffer_event is structured).
2777                  *
2778                  * Using a temp buffer during filtering and copying it
2779                  * on a matched filter is quicker than writing directly
2780                  * into the ring buffer and then discarding it when
2781                  * it doesn't match. That is because the discard
2782                  * requires several atomic operations to get right.
2783                  * Copying on match and doing nothing on a failed match
2784                  * is still quicker than no copy on match, but having
2785                  * to discard out of the ring buffer on a failed match.
2786                  */
2787                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2788                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2789
2790                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2791
2792                         /*
2793                          * Preemption is disabled, but interrupts and NMIs
2794                          * can still come in now. If that happens after
2795                          * the above increment, then it will have to go
2796                          * back to the old method of allocating the event
2797                          * on the ring buffer, and if the filter fails, it
2798                          * will have to call ring_buffer_discard_commit()
2799                          * to remove it.
2800                          *
2801                          * Need to also check the unlikely case that the
2802                          * length is bigger than the temp buffer size.
2803                          * If that happens, then the reserve is pretty much
2804                          * guaranteed to fail, as the ring buffer currently
2805                          * only allows events less than a page. But that may
2806                          * change in the future, so let the ring buffer reserve
2807                          * handle the failure in that case.
2808                          */
2809                         if (val == 1 && likely(len <= max_len)) {
2810                                 trace_event_setup(entry, type, trace_ctx);
2811                                 entry->array[0] = len;
2812                                 /* Return with preemption disabled */
2813                                 return entry;
2814                         }
2815                         this_cpu_dec(trace_buffered_event_cnt);
2816                 }
2817                 /* __trace_buffer_lock_reserve() disables preemption */
2818                 preempt_enable_notrace();
2819         }
2820
2821         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2822                                             trace_ctx);
2823         /*
2824          * If tracing is off, but we have triggers enabled
2825          * we still need to look at the event data. Use the temp_buffer
2826          * to store the trace event for the trigger to use. It's recursive
2827          * safe and will not be recorded anywhere.
2828          */
2829         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2830                 *current_rb = temp_buffer;
2831                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2832                                                     trace_ctx);
2833         }
2834         return entry;
2835 }
2836 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2837
2838 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2839 static DEFINE_MUTEX(tracepoint_printk_mutex);
2840
2841 static void output_printk(struct trace_event_buffer *fbuffer)
2842 {
2843         struct trace_event_call *event_call;
2844         struct trace_event_file *file;
2845         struct trace_event *event;
2846         unsigned long flags;
2847         struct trace_iterator *iter = tracepoint_print_iter;
2848
2849         /* We should never get here if iter is NULL */
2850         if (WARN_ON_ONCE(!iter))
2851                 return;
2852
2853         event_call = fbuffer->trace_file->event_call;
2854         if (!event_call || !event_call->event.funcs ||
2855             !event_call->event.funcs->trace)
2856                 return;
2857
2858         file = fbuffer->trace_file;
2859         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2860             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2861              !filter_match_preds(file->filter, fbuffer->entry)))
2862                 return;
2863
2864         event = &fbuffer->trace_file->event_call->event;
2865
2866         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2867         trace_seq_init(&iter->seq);
2868         iter->ent = fbuffer->entry;
2869         event_call->event.funcs->trace(iter, 0, event);
2870         trace_seq_putc(&iter->seq, 0);
2871         printk("%s", iter->seq.buffer);
2872
2873         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2874 }
2875
2876 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2877                              void *buffer, size_t *lenp,
2878                              loff_t *ppos)
2879 {
2880         int save_tracepoint_printk;
2881         int ret;
2882
2883         mutex_lock(&tracepoint_printk_mutex);
2884         save_tracepoint_printk = tracepoint_printk;
2885
2886         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2887
2888         /*
2889          * This will force exiting early, as tracepoint_printk
2890          * is always zero when tracepoint_printk_iter is not allocated
2891          */
2892         if (!tracepoint_print_iter)
2893                 tracepoint_printk = 0;
2894
2895         if (save_tracepoint_printk == tracepoint_printk)
2896                 goto out;
2897
2898         if (tracepoint_printk)
2899                 static_key_enable(&tracepoint_printk_key.key);
2900         else
2901                 static_key_disable(&tracepoint_printk_key.key);
2902
2903  out:
2904         mutex_unlock(&tracepoint_printk_mutex);
2905
2906         return ret;
2907 }
2908
2909 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2910 {
2911         enum event_trigger_type tt = ETT_NONE;
2912         struct trace_event_file *file = fbuffer->trace_file;
2913
2914         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2915                         fbuffer->entry, &tt))
2916                 goto discard;
2917
2918         if (static_key_false(&tracepoint_printk_key.key))
2919                 output_printk(fbuffer);
2920
2921         if (static_branch_unlikely(&trace_event_exports_enabled))
2922                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2923
2924         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2925                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2926
2927 discard:
2928         if (tt)
2929                 event_triggers_post_call(file, tt);
2930
2931 }
2932 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2933
2934 /*
2935  * Skip 3:
2936  *
2937  *   trace_buffer_unlock_commit_regs()
2938  *   trace_event_buffer_commit()
2939  *   trace_event_raw_event_xxx()
2940  */
2941 # define STACK_SKIP 3
2942
2943 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2944                                      struct trace_buffer *buffer,
2945                                      struct ring_buffer_event *event,
2946                                      unsigned int trace_ctx,
2947                                      struct pt_regs *regs)
2948 {
2949         __buffer_unlock_commit(buffer, event);
2950
2951         /*
2952          * If regs is not set, then skip the necessary functions.
2953          * Note, we can still get here via blktrace, wakeup tracer
2954          * and mmiotrace, but that's ok if they lose a function or
2955          * two. They are not that meaningful.
2956          */
2957         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2958         ftrace_trace_userstack(tr, buffer, trace_ctx);
2959 }
2960
2961 /*
2962  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2963  */
2964 void
2965 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2966                                    struct ring_buffer_event *event)
2967 {
2968         __buffer_unlock_commit(buffer, event);
2969 }
2970
2971 void
2972 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2973                parent_ip, unsigned int trace_ctx)
2974 {
2975         struct trace_event_call *call = &event_function;
2976         struct trace_buffer *buffer = tr->array_buffer.buffer;
2977         struct ring_buffer_event *event;
2978         struct ftrace_entry *entry;
2979
2980         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2981                                             trace_ctx);
2982         if (!event)
2983                 return;
2984         entry   = ring_buffer_event_data(event);
2985         entry->ip                       = ip;
2986         entry->parent_ip                = parent_ip;
2987
2988         if (!call_filter_check_discard(call, entry, buffer, event)) {
2989                 if (static_branch_unlikely(&trace_function_exports_enabled))
2990                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2991                 __buffer_unlock_commit(buffer, event);
2992         }
2993 }
2994
2995 #ifdef CONFIG_STACKTRACE
2996
2997 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2998 #define FTRACE_KSTACK_NESTING   4
2999
3000 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3001
3002 struct ftrace_stack {
3003         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3004 };
3005
3006
3007 struct ftrace_stacks {
3008         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3009 };
3010
3011 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3012 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3013
3014 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3015                                  unsigned int trace_ctx,
3016                                  int skip, struct pt_regs *regs)
3017 {
3018         struct trace_event_call *call = &event_kernel_stack;
3019         struct ring_buffer_event *event;
3020         unsigned int size, nr_entries;
3021         struct ftrace_stack *fstack;
3022         struct stack_entry *entry;
3023         int stackidx;
3024
3025         /*
3026          * Add one, for this function and the call to save_stack_trace()
3027          * If regs is set, then these functions will not be in the way.
3028          */
3029 #ifndef CONFIG_UNWINDER_ORC
3030         if (!regs)
3031                 skip++;
3032 #endif
3033
3034         preempt_disable_notrace();
3035
3036         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3037
3038         /* This should never happen. If it does, yell once and skip */
3039         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3040                 goto out;
3041
3042         /*
3043          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3044          * interrupt will either see the value pre increment or post
3045          * increment. If the interrupt happens pre increment it will have
3046          * restored the counter when it returns.  We just need a barrier to
3047          * keep gcc from moving things around.
3048          */
3049         barrier();
3050
3051         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3052         size = ARRAY_SIZE(fstack->calls);
3053
3054         if (regs) {
3055                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3056                                                    size, skip);
3057         } else {
3058                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3059         }
3060
3061         size = nr_entries * sizeof(unsigned long);
3062         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3063                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3064                                     trace_ctx);
3065         if (!event)
3066                 goto out;
3067         entry = ring_buffer_event_data(event);
3068
3069         memcpy(&entry->caller, fstack->calls, size);
3070         entry->size = nr_entries;
3071
3072         if (!call_filter_check_discard(call, entry, buffer, event))
3073                 __buffer_unlock_commit(buffer, event);
3074
3075  out:
3076         /* Again, don't let gcc optimize things here */
3077         barrier();
3078         __this_cpu_dec(ftrace_stack_reserve);
3079         preempt_enable_notrace();
3080
3081 }
3082
3083 static inline void ftrace_trace_stack(struct trace_array *tr,
3084                                       struct trace_buffer *buffer,
3085                                       unsigned int trace_ctx,
3086                                       int skip, struct pt_regs *regs)
3087 {
3088         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3089                 return;
3090
3091         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3092 }
3093
3094 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3095                    int skip)
3096 {
3097         struct trace_buffer *buffer = tr->array_buffer.buffer;
3098
3099         if (rcu_is_watching()) {
3100                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3101                 return;
3102         }
3103
3104         /*
3105          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3106          * but if the above rcu_is_watching() failed, then the NMI
3107          * triggered someplace critical, and rcu_irq_enter() should
3108          * not be called from NMI.
3109          */
3110         if (unlikely(in_nmi()))
3111                 return;
3112
3113         rcu_irq_enter_irqson();
3114         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3115         rcu_irq_exit_irqson();
3116 }
3117
3118 /**
3119  * trace_dump_stack - record a stack back trace in the trace buffer
3120  * @skip: Number of functions to skip (helper handlers)
3121  */
3122 void trace_dump_stack(int skip)
3123 {
3124         if (tracing_disabled || tracing_selftest_running)
3125                 return;
3126
3127 #ifndef CONFIG_UNWINDER_ORC
3128         /* Skip 1 to skip this function. */
3129         skip++;
3130 #endif
3131         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3132                              tracing_gen_ctx(), skip, NULL);
3133 }
3134 EXPORT_SYMBOL_GPL(trace_dump_stack);
3135
3136 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3137 static DEFINE_PER_CPU(int, user_stack_count);
3138
3139 static void
3140 ftrace_trace_userstack(struct trace_array *tr,
3141                        struct trace_buffer *buffer, unsigned int trace_ctx)
3142 {
3143         struct trace_event_call *call = &event_user_stack;
3144         struct ring_buffer_event *event;
3145         struct userstack_entry *entry;
3146
3147         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3148                 return;
3149
3150         /*
3151          * NMIs can not handle page faults, even with fix ups.
3152          * The save user stack can (and often does) fault.
3153          */
3154         if (unlikely(in_nmi()))
3155                 return;
3156
3157         /*
3158          * prevent recursion, since the user stack tracing may
3159          * trigger other kernel events.
3160          */
3161         preempt_disable();
3162         if (__this_cpu_read(user_stack_count))
3163                 goto out;
3164
3165         __this_cpu_inc(user_stack_count);
3166
3167         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3168                                             sizeof(*entry), trace_ctx);
3169         if (!event)
3170                 goto out_drop_count;
3171         entry   = ring_buffer_event_data(event);
3172
3173         entry->tgid             = current->tgid;
3174         memset(&entry->caller, 0, sizeof(entry->caller));
3175
3176         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3177         if (!call_filter_check_discard(call, entry, buffer, event))
3178                 __buffer_unlock_commit(buffer, event);
3179
3180  out_drop_count:
3181         __this_cpu_dec(user_stack_count);
3182  out:
3183         preempt_enable();
3184 }
3185 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3186 static void ftrace_trace_userstack(struct trace_array *tr,
3187                                    struct trace_buffer *buffer,
3188                                    unsigned int trace_ctx)
3189 {
3190 }
3191 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3192
3193 #endif /* CONFIG_STACKTRACE */
3194
3195 static inline void
3196 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3197                           unsigned long long delta)
3198 {
3199         entry->bottom_delta_ts = delta & U32_MAX;
3200         entry->top_delta_ts = (delta >> 32);
3201 }
3202
3203 void trace_last_func_repeats(struct trace_array *tr,
3204                              struct trace_func_repeats *last_info,
3205                              unsigned int trace_ctx)
3206 {
3207         struct trace_buffer *buffer = tr->array_buffer.buffer;
3208         struct func_repeats_entry *entry;
3209         struct ring_buffer_event *event;
3210         u64 delta;
3211
3212         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3213                                             sizeof(*entry), trace_ctx);
3214         if (!event)
3215                 return;
3216
3217         delta = ring_buffer_event_time_stamp(buffer, event) -
3218                 last_info->ts_last_call;
3219
3220         entry = ring_buffer_event_data(event);
3221         entry->ip = last_info->ip;
3222         entry->parent_ip = last_info->parent_ip;
3223         entry->count = last_info->count;
3224         func_repeats_set_delta_ts(entry, delta);
3225
3226         __buffer_unlock_commit(buffer, event);
3227 }
3228
3229 /* created for use with alloc_percpu */
3230 struct trace_buffer_struct {
3231         int nesting;
3232         char buffer[4][TRACE_BUF_SIZE];
3233 };
3234
3235 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3236
3237 /*
3238  * This allows for lockless recording.  If we're nested too deeply, then
3239  * this returns NULL.
3240  */
3241 static char *get_trace_buf(void)
3242 {
3243         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3244
3245         if (!trace_percpu_buffer || buffer->nesting >= 4)
3246                 return NULL;
3247
3248         buffer->nesting++;
3249
3250         /* Interrupts must see nesting incremented before we use the buffer */
3251         barrier();
3252         return &buffer->buffer[buffer->nesting - 1][0];
3253 }
3254
3255 static void put_trace_buf(void)
3256 {
3257         /* Don't let the decrement of nesting leak before this */
3258         barrier();
3259         this_cpu_dec(trace_percpu_buffer->nesting);
3260 }
3261
3262 static int alloc_percpu_trace_buffer(void)
3263 {
3264         struct trace_buffer_struct __percpu *buffers;
3265
3266         if (trace_percpu_buffer)
3267                 return 0;
3268
3269         buffers = alloc_percpu(struct trace_buffer_struct);
3270         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3271                 return -ENOMEM;
3272
3273         trace_percpu_buffer = buffers;
3274         return 0;
3275 }
3276
3277 static int buffers_allocated;
3278
3279 void trace_printk_init_buffers(void)
3280 {
3281         if (buffers_allocated)
3282                 return;
3283
3284         if (alloc_percpu_trace_buffer())
3285                 return;
3286
3287         /* trace_printk() is for debug use only. Don't use it in production. */
3288
3289         pr_warn("\n");
3290         pr_warn("**********************************************************\n");
3291         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3292         pr_warn("**                                                      **\n");
3293         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3294         pr_warn("**                                                      **\n");
3295         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3296         pr_warn("** unsafe for production use.                           **\n");
3297         pr_warn("**                                                      **\n");
3298         pr_warn("** If you see this message and you are not debugging    **\n");
3299         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3300         pr_warn("**                                                      **\n");
3301         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3302         pr_warn("**********************************************************\n");
3303
3304         /* Expand the buffers to set size */
3305         tracing_update_buffers();
3306
3307         buffers_allocated = 1;
3308
3309         /*
3310          * trace_printk_init_buffers() can be called by modules.
3311          * If that happens, then we need to start cmdline recording
3312          * directly here. If the global_trace.buffer is already
3313          * allocated here, then this was called by module code.
3314          */
3315         if (global_trace.array_buffer.buffer)
3316                 tracing_start_cmdline_record();
3317 }
3318 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3319
3320 void trace_printk_start_comm(void)
3321 {
3322         /* Start tracing comms if trace printk is set */
3323         if (!buffers_allocated)
3324                 return;
3325         tracing_start_cmdline_record();
3326 }
3327
3328 static void trace_printk_start_stop_comm(int enabled)
3329 {
3330         if (!buffers_allocated)
3331                 return;
3332
3333         if (enabled)
3334                 tracing_start_cmdline_record();
3335         else
3336                 tracing_stop_cmdline_record();
3337 }
3338
3339 /**
3340  * trace_vbprintk - write binary msg to tracing buffer
3341  * @ip:    The address of the caller
3342  * @fmt:   The string format to write to the buffer
3343  * @args:  Arguments for @fmt
3344  */
3345 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3346 {
3347         struct trace_event_call *call = &event_bprint;
3348         struct ring_buffer_event *event;
3349         struct trace_buffer *buffer;
3350         struct trace_array *tr = &global_trace;
3351         struct bprint_entry *entry;
3352         unsigned int trace_ctx;
3353         char *tbuffer;
3354         int len = 0, size;
3355
3356         if (unlikely(tracing_selftest_running || tracing_disabled))
3357                 return 0;
3358
3359         /* Don't pollute graph traces with trace_vprintk internals */
3360         pause_graph_tracing();
3361
3362         trace_ctx = tracing_gen_ctx();
3363         preempt_disable_notrace();
3364
3365         tbuffer = get_trace_buf();
3366         if (!tbuffer) {
3367                 len = 0;
3368                 goto out_nobuffer;
3369         }
3370
3371         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3372
3373         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3374                 goto out_put;
3375
3376         size = sizeof(*entry) + sizeof(u32) * len;
3377         buffer = tr->array_buffer.buffer;
3378         ring_buffer_nest_start(buffer);
3379         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3380                                             trace_ctx);
3381         if (!event)
3382                 goto out;
3383         entry = ring_buffer_event_data(event);
3384         entry->ip                       = ip;
3385         entry->fmt                      = fmt;
3386
3387         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3388         if (!call_filter_check_discard(call, entry, buffer, event)) {
3389                 __buffer_unlock_commit(buffer, event);
3390                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3391         }
3392
3393 out:
3394         ring_buffer_nest_end(buffer);
3395 out_put:
3396         put_trace_buf();
3397
3398 out_nobuffer:
3399         preempt_enable_notrace();
3400         unpause_graph_tracing();
3401
3402         return len;
3403 }
3404 EXPORT_SYMBOL_GPL(trace_vbprintk);
3405
3406 __printf(3, 0)
3407 static int
3408 __trace_array_vprintk(struct trace_buffer *buffer,
3409                       unsigned long ip, const char *fmt, va_list args)
3410 {
3411         struct trace_event_call *call = &event_print;
3412         struct ring_buffer_event *event;
3413         int len = 0, size;
3414         struct print_entry *entry;
3415         unsigned int trace_ctx;
3416         char *tbuffer;
3417
3418         if (tracing_disabled || tracing_selftest_running)
3419                 return 0;
3420
3421         /* Don't pollute graph traces with trace_vprintk internals */
3422         pause_graph_tracing();
3423
3424         trace_ctx = tracing_gen_ctx();
3425         preempt_disable_notrace();
3426
3427
3428         tbuffer = get_trace_buf();
3429         if (!tbuffer) {
3430                 len = 0;
3431                 goto out_nobuffer;
3432         }
3433
3434         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3435
3436         size = sizeof(*entry) + len + 1;
3437         ring_buffer_nest_start(buffer);
3438         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3439                                             trace_ctx);
3440         if (!event)
3441                 goto out;
3442         entry = ring_buffer_event_data(event);
3443         entry->ip = ip;
3444
3445         memcpy(&entry->buf, tbuffer, len + 1);
3446         if (!call_filter_check_discard(call, entry, buffer, event)) {
3447                 __buffer_unlock_commit(buffer, event);
3448                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3449         }
3450
3451 out:
3452         ring_buffer_nest_end(buffer);
3453         put_trace_buf();
3454
3455 out_nobuffer:
3456         preempt_enable_notrace();
3457         unpause_graph_tracing();
3458
3459         return len;
3460 }
3461
3462 __printf(3, 0)
3463 int trace_array_vprintk(struct trace_array *tr,
3464                         unsigned long ip, const char *fmt, va_list args)
3465 {
3466         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3467 }
3468
3469 /**
3470  * trace_array_printk - Print a message to a specific instance
3471  * @tr: The instance trace_array descriptor
3472  * @ip: The instruction pointer that this is called from.
3473  * @fmt: The format to print (printf format)
3474  *
3475  * If a subsystem sets up its own instance, they have the right to
3476  * printk strings into their tracing instance buffer using this
3477  * function. Note, this function will not write into the top level
3478  * buffer (use trace_printk() for that), as writing into the top level
3479  * buffer should only have events that can be individually disabled.
3480  * trace_printk() is only used for debugging a kernel, and should not
3481  * be ever incorporated in normal use.
3482  *
3483  * trace_array_printk() can be used, as it will not add noise to the
3484  * top level tracing buffer.
3485  *
3486  * Note, trace_array_init_printk() must be called on @tr before this
3487  * can be used.
3488  */
3489 __printf(3, 0)
3490 int trace_array_printk(struct trace_array *tr,
3491                        unsigned long ip, const char *fmt, ...)
3492 {
3493         int ret;
3494         va_list ap;
3495
3496         if (!tr)
3497                 return -ENOENT;
3498
3499         /* This is only allowed for created instances */
3500         if (tr == &global_trace)
3501                 return 0;
3502
3503         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3504                 return 0;
3505
3506         va_start(ap, fmt);
3507         ret = trace_array_vprintk(tr, ip, fmt, ap);
3508         va_end(ap);
3509         return ret;
3510 }
3511 EXPORT_SYMBOL_GPL(trace_array_printk);
3512
3513 /**
3514  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3515  * @tr: The trace array to initialize the buffers for
3516  *
3517  * As trace_array_printk() only writes into instances, they are OK to
3518  * have in the kernel (unlike trace_printk()). This needs to be called
3519  * before trace_array_printk() can be used on a trace_array.
3520  */
3521 int trace_array_init_printk(struct trace_array *tr)
3522 {
3523         if (!tr)
3524                 return -ENOENT;
3525
3526         /* This is only allowed for created instances */
3527         if (tr == &global_trace)
3528                 return -EINVAL;
3529
3530         return alloc_percpu_trace_buffer();
3531 }
3532 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3533
3534 __printf(3, 4)
3535 int trace_array_printk_buf(struct trace_buffer *buffer,
3536                            unsigned long ip, const char *fmt, ...)
3537 {
3538         int ret;
3539         va_list ap;
3540
3541         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3542                 return 0;
3543
3544         va_start(ap, fmt);
3545         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3546         va_end(ap);
3547         return ret;
3548 }
3549
3550 __printf(2, 0)
3551 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3552 {
3553         return trace_array_vprintk(&global_trace, ip, fmt, args);
3554 }
3555 EXPORT_SYMBOL_GPL(trace_vprintk);
3556
3557 static void trace_iterator_increment(struct trace_iterator *iter)
3558 {
3559         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3560
3561         iter->idx++;
3562         if (buf_iter)
3563                 ring_buffer_iter_advance(buf_iter);
3564 }
3565
3566 static struct trace_entry *
3567 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3568                 unsigned long *lost_events)
3569 {
3570         struct ring_buffer_event *event;
3571         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3572
3573         if (buf_iter) {
3574                 event = ring_buffer_iter_peek(buf_iter, ts);
3575                 if (lost_events)
3576                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3577                                 (unsigned long)-1 : 0;
3578         } else {
3579                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3580                                          lost_events);
3581         }
3582
3583         if (event) {
3584                 iter->ent_size = ring_buffer_event_length(event);
3585                 return ring_buffer_event_data(event);
3586         }
3587         iter->ent_size = 0;
3588         return NULL;
3589 }
3590
3591 static struct trace_entry *
3592 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3593                   unsigned long *missing_events, u64 *ent_ts)
3594 {
3595         struct trace_buffer *buffer = iter->array_buffer->buffer;
3596         struct trace_entry *ent, *next = NULL;
3597         unsigned long lost_events = 0, next_lost = 0;
3598         int cpu_file = iter->cpu_file;
3599         u64 next_ts = 0, ts;
3600         int next_cpu = -1;
3601         int next_size = 0;
3602         int cpu;
3603
3604         /*
3605          * If we are in a per_cpu trace file, don't bother by iterating over
3606          * all cpu and peek directly.
3607          */
3608         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3609                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3610                         return NULL;
3611                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3612                 if (ent_cpu)
3613                         *ent_cpu = cpu_file;
3614
3615                 return ent;
3616         }
3617
3618         for_each_tracing_cpu(cpu) {
3619
3620                 if (ring_buffer_empty_cpu(buffer, cpu))
3621                         continue;
3622
3623                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3624
3625                 /*
3626                  * Pick the entry with the smallest timestamp:
3627                  */
3628                 if (ent && (!next || ts < next_ts)) {
3629                         next = ent;
3630                         next_cpu = cpu;
3631                         next_ts = ts;
3632                         next_lost = lost_events;
3633                         next_size = iter->ent_size;
3634                 }
3635         }
3636
3637         iter->ent_size = next_size;
3638
3639         if (ent_cpu)
3640                 *ent_cpu = next_cpu;
3641
3642         if (ent_ts)
3643                 *ent_ts = next_ts;
3644
3645         if (missing_events)
3646                 *missing_events = next_lost;
3647
3648         return next;
3649 }
3650
3651 #define STATIC_FMT_BUF_SIZE     128
3652 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3653
3654 static char *trace_iter_expand_format(struct trace_iterator *iter)
3655 {
3656         char *tmp;
3657
3658         /*
3659          * iter->tr is NULL when used with tp_printk, which makes
3660          * this get called where it is not safe to call krealloc().
3661          */
3662         if (!iter->tr || iter->fmt == static_fmt_buf)
3663                 return NULL;
3664
3665         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3666                        GFP_KERNEL);
3667         if (tmp) {
3668                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3669                 iter->fmt = tmp;
3670         }
3671
3672         return tmp;
3673 }
3674
3675 /* Returns true if the string is safe to dereference from an event */
3676 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3677 {
3678         unsigned long addr = (unsigned long)str;
3679         struct trace_event *trace_event;
3680         struct trace_event_call *event;
3681
3682         /* OK if part of the event data */
3683         if ((addr >= (unsigned long)iter->ent) &&
3684             (addr < (unsigned long)iter->ent + iter->ent_size))
3685                 return true;
3686
3687         /* OK if part of the temp seq buffer */
3688         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3689             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3690                 return true;
3691
3692         /* Core rodata can not be freed */
3693         if (is_kernel_rodata(addr))
3694                 return true;
3695
3696         if (trace_is_tracepoint_string(str))
3697                 return true;
3698
3699         /*
3700          * Now this could be a module event, referencing core module
3701          * data, which is OK.
3702          */
3703         if (!iter->ent)
3704                 return false;
3705
3706         trace_event = ftrace_find_event(iter->ent->type);
3707         if (!trace_event)
3708                 return false;
3709
3710         event = container_of(trace_event, struct trace_event_call, event);
3711         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3712                 return false;
3713
3714         /* Would rather have rodata, but this will suffice */
3715         if (within_module_core(addr, event->module))
3716                 return true;
3717
3718         return false;
3719 }
3720
3721 static const char *show_buffer(struct trace_seq *s)
3722 {
3723         struct seq_buf *seq = &s->seq;
3724
3725         seq_buf_terminate(seq);
3726
3727         return seq->buffer;
3728 }
3729
3730 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3731
3732 static int test_can_verify_check(const char *fmt, ...)
3733 {
3734         char buf[16];
3735         va_list ap;
3736         int ret;
3737
3738         /*
3739          * The verifier is dependent on vsnprintf() modifies the va_list
3740          * passed to it, where it is sent as a reference. Some architectures
3741          * (like x86_32) passes it by value, which means that vsnprintf()
3742          * does not modify the va_list passed to it, and the verifier
3743          * would then need to be able to understand all the values that
3744          * vsnprintf can use. If it is passed by value, then the verifier
3745          * is disabled.
3746          */
3747         va_start(ap, fmt);
3748         vsnprintf(buf, 16, "%d", ap);
3749         ret = va_arg(ap, int);
3750         va_end(ap);
3751
3752         return ret;
3753 }
3754
3755 static void test_can_verify(void)
3756 {
3757         if (!test_can_verify_check("%d %d", 0, 1)) {
3758                 pr_info("trace event string verifier disabled\n");
3759                 static_branch_inc(&trace_no_verify);
3760         }
3761 }
3762
3763 /**
3764  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3765  * @iter: The iterator that holds the seq buffer and the event being printed
3766  * @fmt: The format used to print the event
3767  * @ap: The va_list holding the data to print from @fmt.
3768  *
3769  * This writes the data into the @iter->seq buffer using the data from
3770  * @fmt and @ap. If the format has a %s, then the source of the string
3771  * is examined to make sure it is safe to print, otherwise it will
3772  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3773  * pointer.
3774  */
3775 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3776                          va_list ap)
3777 {
3778         const char *p = fmt;
3779         const char *str;
3780         int i, j;
3781
3782         if (WARN_ON_ONCE(!fmt))
3783                 return;
3784
3785         if (static_branch_unlikely(&trace_no_verify))
3786                 goto print;
3787
3788         /* Don't bother checking when doing a ftrace_dump() */
3789         if (iter->fmt == static_fmt_buf)
3790                 goto print;
3791
3792         while (*p) {
3793                 bool star = false;
3794                 int len = 0;
3795
3796                 j = 0;
3797
3798                 /* We only care about %s and variants */
3799                 for (i = 0; p[i]; i++) {
3800                         if (i + 1 >= iter->fmt_size) {
3801                                 /*
3802                                  * If we can't expand the copy buffer,
3803                                  * just print it.
3804                                  */
3805                                 if (!trace_iter_expand_format(iter))
3806                                         goto print;
3807                         }
3808
3809                         if (p[i] == '\\' && p[i+1]) {
3810                                 i++;
3811                                 continue;
3812                         }
3813                         if (p[i] == '%') {
3814                                 /* Need to test cases like %08.*s */
3815                                 for (j = 1; p[i+j]; j++) {
3816                                         if (isdigit(p[i+j]) ||
3817                                             p[i+j] == '.')
3818                                                 continue;
3819                                         if (p[i+j] == '*') {
3820                                                 star = true;
3821                                                 continue;
3822                                         }
3823                                         break;
3824                                 }
3825                                 if (p[i+j] == 's')
3826                                         break;
3827                                 star = false;
3828                         }
3829                         j = 0;
3830                 }
3831                 /* If no %s found then just print normally */
3832                 if (!p[i])
3833                         break;
3834
3835                 /* Copy up to the %s, and print that */
3836                 strncpy(iter->fmt, p, i);
3837                 iter->fmt[i] = '\0';
3838                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3839
3840                 /*
3841                  * If iter->seq is full, the above call no longer guarantees
3842                  * that ap is in sync with fmt processing, and further calls
3843                  * to va_arg() can return wrong positional arguments.
3844                  *
3845                  * Ensure that ap is no longer used in this case.
3846                  */
3847                 if (iter->seq.full) {
3848                         p = "";
3849                         break;
3850                 }
3851
3852                 if (star)
3853                         len = va_arg(ap, int);
3854
3855                 /* The ap now points to the string data of the %s */
3856                 str = va_arg(ap, const char *);
3857
3858                 /*
3859                  * If you hit this warning, it is likely that the
3860                  * trace event in question used %s on a string that
3861                  * was saved at the time of the event, but may not be
3862                  * around when the trace is read. Use __string(),
3863                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3864                  * instead. See samples/trace_events/trace-events-sample.h
3865                  * for reference.
3866                  */
3867                 if (WARN_ONCE(!trace_safe_str(iter, str),
3868                               "fmt: '%s' current_buffer: '%s'",
3869                               fmt, show_buffer(&iter->seq))) {
3870                         int ret;
3871
3872                         /* Try to safely read the string */
3873                         if (star) {
3874                                 if (len + 1 > iter->fmt_size)
3875                                         len = iter->fmt_size - 1;
3876                                 if (len < 0)
3877                                         len = 0;
3878                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3879                                 iter->fmt[len] = 0;
3880                                 star = false;
3881                         } else {
3882                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3883                                                                   iter->fmt_size);
3884                         }
3885                         if (ret < 0)
3886                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3887                         else
3888                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3889                                                  str, iter->fmt);
3890                         str = "[UNSAFE-MEMORY]";
3891                         strcpy(iter->fmt, "%s");
3892                 } else {
3893                         strncpy(iter->fmt, p + i, j + 1);
3894                         iter->fmt[j+1] = '\0';
3895                 }
3896                 if (star)
3897                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3898                 else
3899                         trace_seq_printf(&iter->seq, iter->fmt, str);
3900
3901                 p += i + j + 1;
3902         }
3903  print:
3904         if (*p)
3905                 trace_seq_vprintf(&iter->seq, p, ap);
3906 }
3907
3908 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3909 {
3910         const char *p, *new_fmt;
3911         char *q;
3912
3913         if (WARN_ON_ONCE(!fmt))
3914                 return fmt;
3915
3916         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3917                 return fmt;
3918
3919         p = fmt;
3920         new_fmt = q = iter->fmt;
3921         while (*p) {
3922                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3923                         if (!trace_iter_expand_format(iter))
3924                                 return fmt;
3925
3926                         q += iter->fmt - new_fmt;
3927                         new_fmt = iter->fmt;
3928                 }
3929
3930                 *q++ = *p++;
3931
3932                 /* Replace %p with %px */
3933                 if (p[-1] == '%') {
3934                         if (p[0] == '%') {
3935                                 *q++ = *p++;
3936                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3937                                 *q++ = *p++;
3938                                 *q++ = 'x';
3939                         }
3940                 }
3941         }
3942         *q = '\0';
3943
3944         return new_fmt;
3945 }
3946
3947 #define STATIC_TEMP_BUF_SIZE    128
3948 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3949
3950 /* Find the next real entry, without updating the iterator itself */
3951 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3952                                           int *ent_cpu, u64 *ent_ts)
3953 {
3954         /* __find_next_entry will reset ent_size */
3955         int ent_size = iter->ent_size;
3956         struct trace_entry *entry;
3957
3958         /*
3959          * If called from ftrace_dump(), then the iter->temp buffer
3960          * will be the static_temp_buf and not created from kmalloc.
3961          * If the entry size is greater than the buffer, we can
3962          * not save it. Just return NULL in that case. This is only
3963          * used to add markers when two consecutive events' time
3964          * stamps have a large delta. See trace_print_lat_context()
3965          */
3966         if (iter->temp == static_temp_buf &&
3967             STATIC_TEMP_BUF_SIZE < ent_size)
3968                 return NULL;
3969
3970         /*
3971          * The __find_next_entry() may call peek_next_entry(), which may
3972          * call ring_buffer_peek() that may make the contents of iter->ent
3973          * undefined. Need to copy iter->ent now.
3974          */
3975         if (iter->ent && iter->ent != iter->temp) {
3976                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3977                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3978                         void *temp;
3979                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3980                         if (!temp)
3981                                 return NULL;
3982                         kfree(iter->temp);
3983                         iter->temp = temp;
3984                         iter->temp_size = iter->ent_size;
3985                 }
3986                 memcpy(iter->temp, iter->ent, iter->ent_size);
3987                 iter->ent = iter->temp;
3988         }
3989         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3990         /* Put back the original ent_size */
3991         iter->ent_size = ent_size;
3992
3993         return entry;
3994 }
3995
3996 /* Find the next real entry, and increment the iterator to the next entry */
3997 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3998 {
3999         iter->ent = __find_next_entry(iter, &iter->cpu,
4000                                       &iter->lost_events, &iter->ts);
4001
4002         if (iter->ent)
4003                 trace_iterator_increment(iter);
4004
4005         return iter->ent ? iter : NULL;
4006 }
4007
4008 static void trace_consume(struct trace_iterator *iter)
4009 {
4010         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4011                             &iter->lost_events);
4012 }
4013
4014 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4015 {
4016         struct trace_iterator *iter = m->private;
4017         int i = (int)*pos;
4018         void *ent;
4019
4020         WARN_ON_ONCE(iter->leftover);
4021
4022         (*pos)++;
4023
4024         /* can't go backwards */
4025         if (iter->idx > i)
4026                 return NULL;
4027
4028         if (iter->idx < 0)
4029                 ent = trace_find_next_entry_inc(iter);
4030         else
4031                 ent = iter;
4032
4033         while (ent && iter->idx < i)
4034                 ent = trace_find_next_entry_inc(iter);
4035
4036         iter->pos = *pos;
4037
4038         return ent;
4039 }
4040
4041 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4042 {
4043         struct ring_buffer_iter *buf_iter;
4044         unsigned long entries = 0;
4045         u64 ts;
4046
4047         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4048
4049         buf_iter = trace_buffer_iter(iter, cpu);
4050         if (!buf_iter)
4051                 return;
4052
4053         ring_buffer_iter_reset(buf_iter);
4054
4055         /*
4056          * We could have the case with the max latency tracers
4057          * that a reset never took place on a cpu. This is evident
4058          * by the timestamp being before the start of the buffer.
4059          */
4060         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4061                 if (ts >= iter->array_buffer->time_start)
4062                         break;
4063                 entries++;
4064                 ring_buffer_iter_advance(buf_iter);
4065         }
4066
4067         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4068 }
4069
4070 /*
4071  * The current tracer is copied to avoid a global locking
4072  * all around.
4073  */
4074 static void *s_start(struct seq_file *m, loff_t *pos)
4075 {
4076         struct trace_iterator *iter = m->private;
4077         struct trace_array *tr = iter->tr;
4078         int cpu_file = iter->cpu_file;
4079         void *p = NULL;
4080         loff_t l = 0;
4081         int cpu;
4082
4083         /*
4084          * copy the tracer to avoid using a global lock all around.
4085          * iter->trace is a copy of current_trace, the pointer to the
4086          * name may be used instead of a strcmp(), as iter->trace->name
4087          * will point to the same string as current_trace->name.
4088          */
4089         mutex_lock(&trace_types_lock);
4090         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4091                 *iter->trace = *tr->current_trace;
4092         mutex_unlock(&trace_types_lock);
4093
4094 #ifdef CONFIG_TRACER_MAX_TRACE
4095         if (iter->snapshot && iter->trace->use_max_tr)
4096                 return ERR_PTR(-EBUSY);
4097 #endif
4098
4099         if (*pos != iter->pos) {
4100                 iter->ent = NULL;
4101                 iter->cpu = 0;
4102                 iter->idx = -1;
4103
4104                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4105                         for_each_tracing_cpu(cpu)
4106                                 tracing_iter_reset(iter, cpu);
4107                 } else
4108                         tracing_iter_reset(iter, cpu_file);
4109
4110                 iter->leftover = 0;
4111                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4112                         ;
4113
4114         } else {
4115                 /*
4116                  * If we overflowed the seq_file before, then we want
4117                  * to just reuse the trace_seq buffer again.
4118                  */
4119                 if (iter->leftover)
4120                         p = iter;
4121                 else {
4122                         l = *pos - 1;
4123                         p = s_next(m, p, &l);
4124                 }
4125         }
4126
4127         trace_event_read_lock();
4128         trace_access_lock(cpu_file);
4129         return p;
4130 }
4131
4132 static void s_stop(struct seq_file *m, void *p)
4133 {
4134         struct trace_iterator *iter = m->private;
4135
4136 #ifdef CONFIG_TRACER_MAX_TRACE
4137         if (iter->snapshot && iter->trace->use_max_tr)
4138                 return;
4139 #endif
4140
4141         trace_access_unlock(iter->cpu_file);
4142         trace_event_read_unlock();
4143 }
4144
4145 static void
4146 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4147                       unsigned long *entries, int cpu)
4148 {
4149         unsigned long count;
4150
4151         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4152         /*
4153          * If this buffer has skipped entries, then we hold all
4154          * entries for the trace and we need to ignore the
4155          * ones before the time stamp.
4156          */
4157         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4158                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4159                 /* total is the same as the entries */
4160                 *total = count;
4161         } else
4162                 *total = count +
4163                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4164         *entries = count;
4165 }
4166
4167 static void
4168 get_total_entries(struct array_buffer *buf,
4169                   unsigned long *total, unsigned long *entries)
4170 {
4171         unsigned long t, e;
4172         int cpu;
4173
4174         *total = 0;
4175         *entries = 0;
4176
4177         for_each_tracing_cpu(cpu) {
4178                 get_total_entries_cpu(buf, &t, &e, cpu);
4179                 *total += t;
4180                 *entries += e;
4181         }
4182 }
4183
4184 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4185 {
4186         unsigned long total, entries;
4187
4188         if (!tr)
4189                 tr = &global_trace;
4190
4191         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4192
4193         return entries;
4194 }
4195
4196 unsigned long trace_total_entries(struct trace_array *tr)
4197 {
4198         unsigned long total, entries;
4199
4200         if (!tr)
4201                 tr = &global_trace;
4202
4203         get_total_entries(&tr->array_buffer, &total, &entries);
4204
4205         return entries;
4206 }
4207
4208 static void print_lat_help_header(struct seq_file *m)
4209 {
4210         seq_puts(m, "#                    _------=> CPU#            \n"
4211                     "#                   / _-----=> irqs-off/BH-disabled\n"
4212                     "#                  | / _----=> need-resched    \n"
4213                     "#                  || / _---=> hardirq/softirq \n"
4214                     "#                  ||| / _--=> preempt-depth   \n"
4215                     "#                  |||| / _-=> migrate-disable \n"
4216                     "#                  ||||| /     delay           \n"
4217                     "#  cmd     pid     |||||| time  |   caller     \n"
4218                     "#     \\   /        ||||||  \\    |    /       \n");
4219 }
4220
4221 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4222 {
4223         unsigned long total;
4224         unsigned long entries;
4225
4226         get_total_entries(buf, &total, &entries);
4227         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4228                    entries, total, num_online_cpus());
4229         seq_puts(m, "#\n");
4230 }
4231
4232 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4233                                    unsigned int flags)
4234 {
4235         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4236
4237         print_event_info(buf, m);
4238
4239         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4240         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4241 }
4242
4243 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4244                                        unsigned int flags)
4245 {
4246         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4247         const char *space = "            ";
4248         int prec = tgid ? 12 : 2;
4249
4250         print_event_info(buf, m);
4251
4252         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4253         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4254         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4255         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4256         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4257         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4258         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4259         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4260 }
4261
4262 void
4263 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4264 {
4265         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4266         struct array_buffer *buf = iter->array_buffer;
4267         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4268         struct tracer *type = iter->trace;
4269         unsigned long entries;
4270         unsigned long total;
4271         const char *name = "preemption";
4272
4273         name = type->name;
4274
4275         get_total_entries(buf, &total, &entries);
4276
4277         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4278                    name, UTS_RELEASE);
4279         seq_puts(m, "# -----------------------------------"
4280                  "---------------------------------\n");
4281         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4282                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4283                    nsecs_to_usecs(data->saved_latency),
4284                    entries,
4285                    total,
4286                    buf->cpu,
4287 #if defined(CONFIG_PREEMPT_NONE)
4288                    "server",
4289 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4290                    "desktop",
4291 #elif defined(CONFIG_PREEMPT)
4292                    "preempt",
4293 #elif defined(CONFIG_PREEMPT_RT)
4294                    "preempt_rt",
4295 #else
4296                    "unknown",
4297 #endif
4298                    /* These are reserved for later use */
4299                    0, 0, 0, 0);
4300 #ifdef CONFIG_SMP
4301         seq_printf(m, " #P:%d)\n", num_online_cpus());
4302 #else
4303         seq_puts(m, ")\n");
4304 #endif
4305         seq_puts(m, "#    -----------------\n");
4306         seq_printf(m, "#    | task: %.16s-%d "
4307                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4308                    data->comm, data->pid,
4309                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4310                    data->policy, data->rt_priority);
4311         seq_puts(m, "#    -----------------\n");
4312
4313         if (data->critical_start) {
4314                 seq_puts(m, "#  => started at: ");
4315                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4316                 trace_print_seq(m, &iter->seq);
4317                 seq_puts(m, "\n#  => ended at:   ");
4318                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4319                 trace_print_seq(m, &iter->seq);
4320                 seq_puts(m, "\n#\n");
4321         }
4322
4323         seq_puts(m, "#\n");
4324 }
4325
4326 static void test_cpu_buff_start(struct trace_iterator *iter)
4327 {
4328         struct trace_seq *s = &iter->seq;
4329         struct trace_array *tr = iter->tr;
4330
4331         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4332                 return;
4333
4334         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4335                 return;
4336
4337         if (cpumask_available(iter->started) &&
4338             cpumask_test_cpu(iter->cpu, iter->started))
4339                 return;
4340
4341         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4342                 return;
4343
4344         if (cpumask_available(iter->started))
4345                 cpumask_set_cpu(iter->cpu, iter->started);
4346
4347         /* Don't print started cpu buffer for the first entry of the trace */
4348         if (iter->idx > 1)
4349                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4350                                 iter->cpu);
4351 }
4352
4353 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4354 {
4355         struct trace_array *tr = iter->tr;
4356         struct trace_seq *s = &iter->seq;
4357         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4358         struct trace_entry *entry;
4359         struct trace_event *event;
4360
4361         entry = iter->ent;
4362
4363         test_cpu_buff_start(iter);
4364
4365         event = ftrace_find_event(entry->type);
4366
4367         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4368                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4369                         trace_print_lat_context(iter);
4370                 else
4371                         trace_print_context(iter);
4372         }
4373
4374         if (trace_seq_has_overflowed(s))
4375                 return TRACE_TYPE_PARTIAL_LINE;
4376
4377         if (event)
4378                 return event->funcs->trace(iter, sym_flags, event);
4379
4380         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4381
4382         return trace_handle_return(s);
4383 }
4384
4385 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4386 {
4387         struct trace_array *tr = iter->tr;
4388         struct trace_seq *s = &iter->seq;
4389         struct trace_entry *entry;
4390         struct trace_event *event;
4391
4392         entry = iter->ent;
4393
4394         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4395                 trace_seq_printf(s, "%d %d %llu ",
4396                                  entry->pid, iter->cpu, iter->ts);
4397
4398         if (trace_seq_has_overflowed(s))
4399                 return TRACE_TYPE_PARTIAL_LINE;
4400
4401         event = ftrace_find_event(entry->type);
4402         if (event)
4403                 return event->funcs->raw(iter, 0, event);
4404
4405         trace_seq_printf(s, "%d ?\n", entry->type);
4406
4407         return trace_handle_return(s);
4408 }
4409
4410 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4411 {
4412         struct trace_array *tr = iter->tr;
4413         struct trace_seq *s = &iter->seq;
4414         unsigned char newline = '\n';
4415         struct trace_entry *entry;
4416         struct trace_event *event;
4417
4418         entry = iter->ent;
4419
4420         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4421                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4422                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4423                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4424                 if (trace_seq_has_overflowed(s))
4425                         return TRACE_TYPE_PARTIAL_LINE;
4426         }
4427
4428         event = ftrace_find_event(entry->type);
4429         if (event) {
4430                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4431                 if (ret != TRACE_TYPE_HANDLED)
4432                         return ret;
4433         }
4434
4435         SEQ_PUT_FIELD(s, newline);
4436
4437         return trace_handle_return(s);
4438 }
4439
4440 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4441 {
4442         struct trace_array *tr = iter->tr;
4443         struct trace_seq *s = &iter->seq;
4444         struct trace_entry *entry;
4445         struct trace_event *event;
4446
4447         entry = iter->ent;
4448
4449         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4450                 SEQ_PUT_FIELD(s, entry->pid);
4451                 SEQ_PUT_FIELD(s, iter->cpu);
4452                 SEQ_PUT_FIELD(s, iter->ts);
4453                 if (trace_seq_has_overflowed(s))
4454                         return TRACE_TYPE_PARTIAL_LINE;
4455         }
4456
4457         event = ftrace_find_event(entry->type);
4458         return event ? event->funcs->binary(iter, 0, event) :
4459                 TRACE_TYPE_HANDLED;
4460 }
4461
4462 int trace_empty(struct trace_iterator *iter)
4463 {
4464         struct ring_buffer_iter *buf_iter;
4465         int cpu;
4466
4467         /* If we are looking at one CPU buffer, only check that one */
4468         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4469                 cpu = iter->cpu_file;
4470                 buf_iter = trace_buffer_iter(iter, cpu);
4471                 if (buf_iter) {
4472                         if (!ring_buffer_iter_empty(buf_iter))
4473                                 return 0;
4474                 } else {
4475                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476                                 return 0;
4477                 }
4478                 return 1;
4479         }
4480
4481         for_each_tracing_cpu(cpu) {
4482                 buf_iter = trace_buffer_iter(iter, cpu);
4483                 if (buf_iter) {
4484                         if (!ring_buffer_iter_empty(buf_iter))
4485                                 return 0;
4486                 } else {
4487                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4488                                 return 0;
4489                 }
4490         }
4491
4492         return 1;
4493 }
4494
4495 /*  Called with trace_event_read_lock() held. */
4496 enum print_line_t print_trace_line(struct trace_iterator *iter)
4497 {
4498         struct trace_array *tr = iter->tr;
4499         unsigned long trace_flags = tr->trace_flags;
4500         enum print_line_t ret;
4501
4502         if (iter->lost_events) {
4503                 if (iter->lost_events == (unsigned long)-1)
4504                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4505                                          iter->cpu);
4506                 else
4507                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4508                                          iter->cpu, iter->lost_events);
4509                 if (trace_seq_has_overflowed(&iter->seq))
4510                         return TRACE_TYPE_PARTIAL_LINE;
4511         }
4512
4513         if (iter->trace && iter->trace->print_line) {
4514                 ret = iter->trace->print_line(iter);
4515                 if (ret != TRACE_TYPE_UNHANDLED)
4516                         return ret;
4517         }
4518
4519         if (iter->ent->type == TRACE_BPUTS &&
4520                         trace_flags & TRACE_ITER_PRINTK &&
4521                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4522                 return trace_print_bputs_msg_only(iter);
4523
4524         if (iter->ent->type == TRACE_BPRINT &&
4525                         trace_flags & TRACE_ITER_PRINTK &&
4526                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4527                 return trace_print_bprintk_msg_only(iter);
4528
4529         if (iter->ent->type == TRACE_PRINT &&
4530                         trace_flags & TRACE_ITER_PRINTK &&
4531                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532                 return trace_print_printk_msg_only(iter);
4533
4534         if (trace_flags & TRACE_ITER_BIN)
4535                 return print_bin_fmt(iter);
4536
4537         if (trace_flags & TRACE_ITER_HEX)
4538                 return print_hex_fmt(iter);
4539
4540         if (trace_flags & TRACE_ITER_RAW)
4541                 return print_raw_fmt(iter);
4542
4543         return print_trace_fmt(iter);
4544 }
4545
4546 void trace_latency_header(struct seq_file *m)
4547 {
4548         struct trace_iterator *iter = m->private;
4549         struct trace_array *tr = iter->tr;
4550
4551         /* print nothing if the buffers are empty */
4552         if (trace_empty(iter))
4553                 return;
4554
4555         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4556                 print_trace_header(m, iter);
4557
4558         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4559                 print_lat_help_header(m);
4560 }
4561
4562 void trace_default_header(struct seq_file *m)
4563 {
4564         struct trace_iterator *iter = m->private;
4565         struct trace_array *tr = iter->tr;
4566         unsigned long trace_flags = tr->trace_flags;
4567
4568         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4569                 return;
4570
4571         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4572                 /* print nothing if the buffers are empty */
4573                 if (trace_empty(iter))
4574                         return;
4575                 print_trace_header(m, iter);
4576                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4577                         print_lat_help_header(m);
4578         } else {
4579                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4580                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4581                                 print_func_help_header_irq(iter->array_buffer,
4582                                                            m, trace_flags);
4583                         else
4584                                 print_func_help_header(iter->array_buffer, m,
4585                                                        trace_flags);
4586                 }
4587         }
4588 }
4589
4590 static void test_ftrace_alive(struct seq_file *m)
4591 {
4592         if (!ftrace_is_dead())
4593                 return;
4594         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4595                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4596 }
4597
4598 #ifdef CONFIG_TRACER_MAX_TRACE
4599 static void show_snapshot_main_help(struct seq_file *m)
4600 {
4601         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4602                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4603                     "#                      Takes a snapshot of the main buffer.\n"
4604                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4605                     "#                      (Doesn't have to be '2' works with any number that\n"
4606                     "#                       is not a '0' or '1')\n");
4607 }
4608
4609 static void show_snapshot_percpu_help(struct seq_file *m)
4610 {
4611         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4612 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4613         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4614                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4615 #else
4616         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4617                     "#                     Must use main snapshot file to allocate.\n");
4618 #endif
4619         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4620                     "#                      (Doesn't have to be '2' works with any number that\n"
4621                     "#                       is not a '0' or '1')\n");
4622 }
4623
4624 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4625 {
4626         if (iter->tr->allocated_snapshot)
4627                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4628         else
4629                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4630
4631         seq_puts(m, "# Snapshot commands:\n");
4632         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4633                 show_snapshot_main_help(m);
4634         else
4635                 show_snapshot_percpu_help(m);
4636 }
4637 #else
4638 /* Should never be called */
4639 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4640 #endif
4641
4642 static int s_show(struct seq_file *m, void *v)
4643 {
4644         struct trace_iterator *iter = v;
4645         int ret;
4646
4647         if (iter->ent == NULL) {
4648                 if (iter->tr) {
4649                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4650                         seq_puts(m, "#\n");
4651                         test_ftrace_alive(m);
4652                 }
4653                 if (iter->snapshot && trace_empty(iter))
4654                         print_snapshot_help(m, iter);
4655                 else if (iter->trace && iter->trace->print_header)
4656                         iter->trace->print_header(m);
4657                 else
4658                         trace_default_header(m);
4659
4660         } else if (iter->leftover) {
4661                 /*
4662                  * If we filled the seq_file buffer earlier, we
4663                  * want to just show it now.
4664                  */
4665                 ret = trace_print_seq(m, &iter->seq);
4666
4667                 /* ret should this time be zero, but you never know */
4668                 iter->leftover = ret;
4669
4670         } else {
4671                 print_trace_line(iter);
4672                 ret = trace_print_seq(m, &iter->seq);
4673                 /*
4674                  * If we overflow the seq_file buffer, then it will
4675                  * ask us for this data again at start up.
4676                  * Use that instead.
4677                  *  ret is 0 if seq_file write succeeded.
4678                  *        -1 otherwise.
4679                  */
4680                 iter->leftover = ret;
4681         }
4682
4683         return 0;
4684 }
4685
4686 /*
4687  * Should be used after trace_array_get(), trace_types_lock
4688  * ensures that i_cdev was already initialized.
4689  */
4690 static inline int tracing_get_cpu(struct inode *inode)
4691 {
4692         if (inode->i_cdev) /* See trace_create_cpu_file() */
4693                 return (long)inode->i_cdev - 1;
4694         return RING_BUFFER_ALL_CPUS;
4695 }
4696
4697 static const struct seq_operations tracer_seq_ops = {
4698         .start          = s_start,
4699         .next           = s_next,
4700         .stop           = s_stop,
4701         .show           = s_show,
4702 };
4703
4704 static struct trace_iterator *
4705 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4706 {
4707         struct trace_array *tr = inode->i_private;
4708         struct trace_iterator *iter;
4709         int cpu;
4710
4711         if (tracing_disabled)
4712                 return ERR_PTR(-ENODEV);
4713
4714         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4715         if (!iter)
4716                 return ERR_PTR(-ENOMEM);
4717
4718         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4719                                     GFP_KERNEL);
4720         if (!iter->buffer_iter)
4721                 goto release;
4722
4723         /*
4724          * trace_find_next_entry() may need to save off iter->ent.
4725          * It will place it into the iter->temp buffer. As most
4726          * events are less than 128, allocate a buffer of that size.
4727          * If one is greater, then trace_find_next_entry() will
4728          * allocate a new buffer to adjust for the bigger iter->ent.
4729          * It's not critical if it fails to get allocated here.
4730          */
4731         iter->temp = kmalloc(128, GFP_KERNEL);
4732         if (iter->temp)
4733                 iter->temp_size = 128;
4734
4735         /*
4736          * trace_event_printf() may need to modify given format
4737          * string to replace %p with %px so that it shows real address
4738          * instead of hash value. However, that is only for the event
4739          * tracing, other tracer may not need. Defer the allocation
4740          * until it is needed.
4741          */
4742         iter->fmt = NULL;
4743         iter->fmt_size = 0;
4744
4745         /*
4746          * We make a copy of the current tracer to avoid concurrent
4747          * changes on it while we are reading.
4748          */
4749         mutex_lock(&trace_types_lock);
4750         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4751         if (!iter->trace)
4752                 goto fail;
4753
4754         *iter->trace = *tr->current_trace;
4755
4756         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4757                 goto fail;
4758
4759         iter->tr = tr;
4760
4761 #ifdef CONFIG_TRACER_MAX_TRACE
4762         /* Currently only the top directory has a snapshot */
4763         if (tr->current_trace->print_max || snapshot)
4764                 iter->array_buffer = &tr->max_buffer;
4765         else
4766 #endif
4767                 iter->array_buffer = &tr->array_buffer;
4768         iter->snapshot = snapshot;
4769         iter->pos = -1;
4770         iter->cpu_file = tracing_get_cpu(inode);
4771         mutex_init(&iter->mutex);
4772
4773         /* Notify the tracer early; before we stop tracing. */
4774         if (iter->trace->open)
4775                 iter->trace->open(iter);
4776
4777         /* Annotate start of buffers if we had overruns */
4778         if (ring_buffer_overruns(iter->array_buffer->buffer))
4779                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4780
4781         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4782         if (trace_clocks[tr->clock_id].in_ns)
4783                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4784
4785         /*
4786          * If pause-on-trace is enabled, then stop the trace while
4787          * dumping, unless this is the "snapshot" file
4788          */
4789         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4790                 tracing_stop_tr(tr);
4791
4792         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4793                 for_each_tracing_cpu(cpu) {
4794                         iter->buffer_iter[cpu] =
4795                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4796                                                          cpu, GFP_KERNEL);
4797                 }
4798                 ring_buffer_read_prepare_sync();
4799                 for_each_tracing_cpu(cpu) {
4800                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4801                         tracing_iter_reset(iter, cpu);
4802                 }
4803         } else {
4804                 cpu = iter->cpu_file;
4805                 iter->buffer_iter[cpu] =
4806                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4807                                                  cpu, GFP_KERNEL);
4808                 ring_buffer_read_prepare_sync();
4809                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4810                 tracing_iter_reset(iter, cpu);
4811         }
4812
4813         mutex_unlock(&trace_types_lock);
4814
4815         return iter;
4816
4817  fail:
4818         mutex_unlock(&trace_types_lock);
4819         kfree(iter->trace);
4820         kfree(iter->temp);
4821         kfree(iter->buffer_iter);
4822 release:
4823         seq_release_private(inode, file);
4824         return ERR_PTR(-ENOMEM);
4825 }
4826
4827 int tracing_open_generic(struct inode *inode, struct file *filp)
4828 {
4829         int ret;
4830
4831         ret = tracing_check_open_get_tr(NULL);
4832         if (ret)
4833                 return ret;
4834
4835         filp->private_data = inode->i_private;
4836         return 0;
4837 }
4838
4839 bool tracing_is_disabled(void)
4840 {
4841         return (tracing_disabled) ? true: false;
4842 }
4843
4844 /*
4845  * Open and update trace_array ref count.
4846  * Must have the current trace_array passed to it.
4847  */
4848 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4849 {
4850         struct trace_array *tr = inode->i_private;
4851         int ret;
4852
4853         ret = tracing_check_open_get_tr(tr);
4854         if (ret)
4855                 return ret;
4856
4857         filp->private_data = inode->i_private;
4858
4859         return 0;
4860 }
4861
4862 static int tracing_mark_open(struct inode *inode, struct file *filp)
4863 {
4864         stream_open(inode, filp);
4865         return tracing_open_generic_tr(inode, filp);
4866 }
4867
4868 static int tracing_release(struct inode *inode, struct file *file)
4869 {
4870         struct trace_array *tr = inode->i_private;
4871         struct seq_file *m = file->private_data;
4872         struct trace_iterator *iter;
4873         int cpu;
4874
4875         if (!(file->f_mode & FMODE_READ)) {
4876                 trace_array_put(tr);
4877                 return 0;
4878         }
4879
4880         /* Writes do not use seq_file */
4881         iter = m->private;
4882         mutex_lock(&trace_types_lock);
4883
4884         for_each_tracing_cpu(cpu) {
4885                 if (iter->buffer_iter[cpu])
4886                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4887         }
4888
4889         if (iter->trace && iter->trace->close)
4890                 iter->trace->close(iter);
4891
4892         if (!iter->snapshot && tr->stop_count)
4893                 /* reenable tracing if it was previously enabled */
4894                 tracing_start_tr(tr);
4895
4896         __trace_array_put(tr);
4897
4898         mutex_unlock(&trace_types_lock);
4899
4900         mutex_destroy(&iter->mutex);
4901         free_cpumask_var(iter->started);
4902         kfree(iter->fmt);
4903         kfree(iter->temp);
4904         kfree(iter->trace);
4905         kfree(iter->buffer_iter);
4906         seq_release_private(inode, file);
4907
4908         return 0;
4909 }
4910
4911 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4912 {
4913         struct trace_array *tr = inode->i_private;
4914
4915         trace_array_put(tr);
4916         return 0;
4917 }
4918
4919 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4920 {
4921         struct trace_array *tr = inode->i_private;
4922
4923         trace_array_put(tr);
4924
4925         return single_release(inode, file);
4926 }
4927
4928 static int tracing_open(struct inode *inode, struct file *file)
4929 {
4930         struct trace_array *tr = inode->i_private;
4931         struct trace_iterator *iter;
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(tr);
4935         if (ret)
4936                 return ret;
4937
4938         /* If this file was open for write, then erase contents */
4939         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4940                 int cpu = tracing_get_cpu(inode);
4941                 struct array_buffer *trace_buf = &tr->array_buffer;
4942
4943 #ifdef CONFIG_TRACER_MAX_TRACE
4944                 if (tr->current_trace->print_max)
4945                         trace_buf = &tr->max_buffer;
4946 #endif
4947
4948                 if (cpu == RING_BUFFER_ALL_CPUS)
4949                         tracing_reset_online_cpus(trace_buf);
4950                 else
4951                         tracing_reset_cpu(trace_buf, cpu);
4952         }
4953
4954         if (file->f_mode & FMODE_READ) {
4955                 iter = __tracing_open(inode, file, false);
4956                 if (IS_ERR(iter))
4957                         ret = PTR_ERR(iter);
4958                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4959                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4960         }
4961
4962         if (ret < 0)
4963                 trace_array_put(tr);
4964
4965         return ret;
4966 }
4967
4968 /*
4969  * Some tracers are not suitable for instance buffers.
4970  * A tracer is always available for the global array (toplevel)
4971  * or if it explicitly states that it is.
4972  */
4973 static bool
4974 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4975 {
4976         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4977 }
4978
4979 /* Find the next tracer that this trace array may use */
4980 static struct tracer *
4981 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4982 {
4983         while (t && !trace_ok_for_array(t, tr))
4984                 t = t->next;
4985
4986         return t;
4987 }
4988
4989 static void *
4990 t_next(struct seq_file *m, void *v, loff_t *pos)
4991 {
4992         struct trace_array *tr = m->private;
4993         struct tracer *t = v;
4994
4995         (*pos)++;
4996
4997         if (t)
4998                 t = get_tracer_for_array(tr, t->next);
4999
5000         return t;
5001 }
5002
5003 static void *t_start(struct seq_file *m, loff_t *pos)
5004 {
5005         struct trace_array *tr = m->private;
5006         struct tracer *t;
5007         loff_t l = 0;
5008
5009         mutex_lock(&trace_types_lock);
5010
5011         t = get_tracer_for_array(tr, trace_types);
5012         for (; t && l < *pos; t = t_next(m, t, &l))
5013                         ;
5014
5015         return t;
5016 }
5017
5018 static void t_stop(struct seq_file *m, void *p)
5019 {
5020         mutex_unlock(&trace_types_lock);
5021 }
5022
5023 static int t_show(struct seq_file *m, void *v)
5024 {
5025         struct tracer *t = v;
5026
5027         if (!t)
5028                 return 0;
5029
5030         seq_puts(m, t->name);
5031         if (t->next)
5032                 seq_putc(m, ' ');
5033         else
5034                 seq_putc(m, '\n');
5035
5036         return 0;
5037 }
5038
5039 static const struct seq_operations show_traces_seq_ops = {
5040         .start          = t_start,
5041         .next           = t_next,
5042         .stop           = t_stop,
5043         .show           = t_show,
5044 };
5045
5046 static int show_traces_open(struct inode *inode, struct file *file)
5047 {
5048         struct trace_array *tr = inode->i_private;
5049         struct seq_file *m;
5050         int ret;
5051
5052         ret = tracing_check_open_get_tr(tr);
5053         if (ret)
5054                 return ret;
5055
5056         ret = seq_open(file, &show_traces_seq_ops);
5057         if (ret) {
5058                 trace_array_put(tr);
5059                 return ret;
5060         }
5061
5062         m = file->private_data;
5063         m->private = tr;
5064
5065         return 0;
5066 }
5067
5068 static int show_traces_release(struct inode *inode, struct file *file)
5069 {
5070         struct trace_array *tr = inode->i_private;
5071
5072         trace_array_put(tr);
5073         return seq_release(inode, file);
5074 }
5075
5076 static ssize_t
5077 tracing_write_stub(struct file *filp, const char __user *ubuf,
5078                    size_t count, loff_t *ppos)
5079 {
5080         return count;
5081 }
5082
5083 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5084 {
5085         int ret;
5086
5087         if (file->f_mode & FMODE_READ)
5088                 ret = seq_lseek(file, offset, whence);
5089         else
5090                 file->f_pos = ret = 0;
5091
5092         return ret;
5093 }
5094
5095 static const struct file_operations tracing_fops = {
5096         .open           = tracing_open,
5097         .read           = seq_read,
5098         .write          = tracing_write_stub,
5099         .llseek         = tracing_lseek,
5100         .release        = tracing_release,
5101 };
5102
5103 static const struct file_operations show_traces_fops = {
5104         .open           = show_traces_open,
5105         .read           = seq_read,
5106         .llseek         = seq_lseek,
5107         .release        = show_traces_release,
5108 };
5109
5110 static ssize_t
5111 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5112                      size_t count, loff_t *ppos)
5113 {
5114         struct trace_array *tr = file_inode(filp)->i_private;
5115         char *mask_str;
5116         int len;
5117
5118         len = snprintf(NULL, 0, "%*pb\n",
5119                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5120         mask_str = kmalloc(len, GFP_KERNEL);
5121         if (!mask_str)
5122                 return -ENOMEM;
5123
5124         len = snprintf(mask_str, len, "%*pb\n",
5125                        cpumask_pr_args(tr->tracing_cpumask));
5126         if (len >= count) {
5127                 count = -EINVAL;
5128                 goto out_err;
5129         }
5130         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5131
5132 out_err:
5133         kfree(mask_str);
5134
5135         return count;
5136 }
5137
5138 int tracing_set_cpumask(struct trace_array *tr,
5139                         cpumask_var_t tracing_cpumask_new)
5140 {
5141         int cpu;
5142
5143         if (!tr)
5144                 return -EINVAL;
5145
5146         local_irq_disable();
5147         arch_spin_lock(&tr->max_lock);
5148         for_each_tracing_cpu(cpu) {
5149                 /*
5150                  * Increase/decrease the disabled counter if we are
5151                  * about to flip a bit in the cpumask:
5152                  */
5153                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5154                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5155                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5156                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5157                 }
5158                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5159                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5160                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5161                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5162                 }
5163         }
5164         arch_spin_unlock(&tr->max_lock);
5165         local_irq_enable();
5166
5167         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5168
5169         return 0;
5170 }
5171
5172 static ssize_t
5173 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5174                       size_t count, loff_t *ppos)
5175 {
5176         struct trace_array *tr = file_inode(filp)->i_private;
5177         cpumask_var_t tracing_cpumask_new;
5178         int err;
5179
5180         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5181                 return -ENOMEM;
5182
5183         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5184         if (err)
5185                 goto err_free;
5186
5187         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5188         if (err)
5189                 goto err_free;
5190
5191         free_cpumask_var(tracing_cpumask_new);
5192
5193         return count;
5194
5195 err_free:
5196         free_cpumask_var(tracing_cpumask_new);
5197
5198         return err;
5199 }
5200
5201 static const struct file_operations tracing_cpumask_fops = {
5202         .open           = tracing_open_generic_tr,
5203         .read           = tracing_cpumask_read,
5204         .write          = tracing_cpumask_write,
5205         .release        = tracing_release_generic_tr,
5206         .llseek         = generic_file_llseek,
5207 };
5208
5209 static int tracing_trace_options_show(struct seq_file *m, void *v)
5210 {
5211         struct tracer_opt *trace_opts;
5212         struct trace_array *tr = m->private;
5213         u32 tracer_flags;
5214         int i;
5215
5216         mutex_lock(&trace_types_lock);
5217         tracer_flags = tr->current_trace->flags->val;
5218         trace_opts = tr->current_trace->flags->opts;
5219
5220         for (i = 0; trace_options[i]; i++) {
5221                 if (tr->trace_flags & (1 << i))
5222                         seq_printf(m, "%s\n", trace_options[i]);
5223                 else
5224                         seq_printf(m, "no%s\n", trace_options[i]);
5225         }
5226
5227         for (i = 0; trace_opts[i].name; i++) {
5228                 if (tracer_flags & trace_opts[i].bit)
5229                         seq_printf(m, "%s\n", trace_opts[i].name);
5230                 else
5231                         seq_printf(m, "no%s\n", trace_opts[i].name);
5232         }
5233         mutex_unlock(&trace_types_lock);
5234
5235         return 0;
5236 }
5237
5238 static int __set_tracer_option(struct trace_array *tr,
5239                                struct tracer_flags *tracer_flags,
5240                                struct tracer_opt *opts, int neg)
5241 {
5242         struct tracer *trace = tracer_flags->trace;
5243         int ret;
5244
5245         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5246         if (ret)
5247                 return ret;
5248
5249         if (neg)
5250                 tracer_flags->val &= ~opts->bit;
5251         else
5252                 tracer_flags->val |= opts->bit;
5253         return 0;
5254 }
5255
5256 /* Try to assign a tracer specific option */
5257 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5258 {
5259         struct tracer *trace = tr->current_trace;
5260         struct tracer_flags *tracer_flags = trace->flags;
5261         struct tracer_opt *opts = NULL;
5262         int i;
5263
5264         for (i = 0; tracer_flags->opts[i].name; i++) {
5265                 opts = &tracer_flags->opts[i];
5266
5267                 if (strcmp(cmp, opts->name) == 0)
5268                         return __set_tracer_option(tr, trace->flags, opts, neg);
5269         }
5270
5271         return -EINVAL;
5272 }
5273
5274 /* Some tracers require overwrite to stay enabled */
5275 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5276 {
5277         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5278                 return -1;
5279
5280         return 0;
5281 }
5282
5283 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5284 {
5285         int *map;
5286
5287         if ((mask == TRACE_ITER_RECORD_TGID) ||
5288             (mask == TRACE_ITER_RECORD_CMD))
5289                 lockdep_assert_held(&event_mutex);
5290
5291         /* do nothing if flag is already set */
5292         if (!!(tr->trace_flags & mask) == !!enabled)
5293                 return 0;
5294
5295         /* Give the tracer a chance to approve the change */
5296         if (tr->current_trace->flag_changed)
5297                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5298                         return -EINVAL;
5299
5300         if (enabled)
5301                 tr->trace_flags |= mask;
5302         else
5303                 tr->trace_flags &= ~mask;
5304
5305         if (mask == TRACE_ITER_RECORD_CMD)
5306                 trace_event_enable_cmd_record(enabled);
5307
5308         if (mask == TRACE_ITER_RECORD_TGID) {
5309                 if (!tgid_map) {
5310                         tgid_map_max = pid_max;
5311                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5312                                        GFP_KERNEL);
5313
5314                         /*
5315                          * Pairs with smp_load_acquire() in
5316                          * trace_find_tgid_ptr() to ensure that if it observes
5317                          * the tgid_map we just allocated then it also observes
5318                          * the corresponding tgid_map_max value.
5319                          */
5320                         smp_store_release(&tgid_map, map);
5321                 }
5322                 if (!tgid_map) {
5323                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5324                         return -ENOMEM;
5325                 }
5326
5327                 trace_event_enable_tgid_record(enabled);
5328         }
5329
5330         if (mask == TRACE_ITER_EVENT_FORK)
5331                 trace_event_follow_fork(tr, enabled);
5332
5333         if (mask == TRACE_ITER_FUNC_FORK)
5334                 ftrace_pid_follow_fork(tr, enabled);
5335
5336         if (mask == TRACE_ITER_OVERWRITE) {
5337                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5340 #endif
5341         }
5342
5343         if (mask == TRACE_ITER_PRINTK) {
5344                 trace_printk_start_stop_comm(enabled);
5345                 trace_printk_control(enabled);
5346         }
5347
5348         return 0;
5349 }
5350
5351 int trace_set_options(struct trace_array *tr, char *option)
5352 {
5353         char *cmp;
5354         int neg = 0;
5355         int ret;
5356         size_t orig_len = strlen(option);
5357         int len;
5358
5359         cmp = strstrip(option);
5360
5361         len = str_has_prefix(cmp, "no");
5362         if (len)
5363                 neg = 1;
5364
5365         cmp += len;
5366
5367         mutex_lock(&event_mutex);
5368         mutex_lock(&trace_types_lock);
5369
5370         ret = match_string(trace_options, -1, cmp);
5371         /* If no option could be set, test the specific tracer options */
5372         if (ret < 0)
5373                 ret = set_tracer_option(tr, cmp, neg);
5374         else
5375                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5376
5377         mutex_unlock(&trace_types_lock);
5378         mutex_unlock(&event_mutex);
5379
5380         /*
5381          * If the first trailing whitespace is replaced with '\0' by strstrip,
5382          * turn it back into a space.
5383          */
5384         if (orig_len > strlen(option))
5385                 option[strlen(option)] = ' ';
5386
5387         return ret;
5388 }
5389
5390 static void __init apply_trace_boot_options(void)
5391 {
5392         char *buf = trace_boot_options_buf;
5393         char *option;
5394
5395         while (true) {
5396                 option = strsep(&buf, ",");
5397
5398                 if (!option)
5399                         break;
5400
5401                 if (*option)
5402                         trace_set_options(&global_trace, option);
5403
5404                 /* Put back the comma to allow this to be called again */
5405                 if (buf)
5406                         *(buf - 1) = ',';
5407         }
5408 }
5409
5410 static ssize_t
5411 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5412                         size_t cnt, loff_t *ppos)
5413 {
5414         struct seq_file *m = filp->private_data;
5415         struct trace_array *tr = m->private;
5416         char buf[64];
5417         int ret;
5418
5419         if (cnt >= sizeof(buf))
5420                 return -EINVAL;
5421
5422         if (copy_from_user(buf, ubuf, cnt))
5423                 return -EFAULT;
5424
5425         buf[cnt] = 0;
5426
5427         ret = trace_set_options(tr, buf);
5428         if (ret < 0)
5429                 return ret;
5430
5431         *ppos += cnt;
5432
5433         return cnt;
5434 }
5435
5436 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5437 {
5438         struct trace_array *tr = inode->i_private;
5439         int ret;
5440
5441         ret = tracing_check_open_get_tr(tr);
5442         if (ret)
5443                 return ret;
5444
5445         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5446         if (ret < 0)
5447                 trace_array_put(tr);
5448
5449         return ret;
5450 }
5451
5452 static const struct file_operations tracing_iter_fops = {
5453         .open           = tracing_trace_options_open,
5454         .read           = seq_read,
5455         .llseek         = seq_lseek,
5456         .release        = tracing_single_release_tr,
5457         .write          = tracing_trace_options_write,
5458 };
5459
5460 static const char readme_msg[] =
5461         "tracing mini-HOWTO:\n\n"
5462         "# echo 0 > tracing_on : quick way to disable tracing\n"
5463         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5464         " Important files:\n"
5465         "  trace\t\t\t- The static contents of the buffer\n"
5466         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5467         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5468         "  current_tracer\t- function and latency tracers\n"
5469         "  available_tracers\t- list of configured tracers for current_tracer\n"
5470         "  error_log\t- error log for failed commands (that support it)\n"
5471         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5472         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5473         "  trace_clock\t\t-change the clock used to order events\n"
5474         "       local:   Per cpu clock but may not be synced across CPUs\n"
5475         "      global:   Synced across CPUs but slows tracing down.\n"
5476         "     counter:   Not a clock, but just an increment\n"
5477         "      uptime:   Jiffy counter from time of boot\n"
5478         "        perf:   Same clock that perf events use\n"
5479 #ifdef CONFIG_X86_64
5480         "     x86-tsc:   TSC cycle counter\n"
5481 #endif
5482         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5483         "       delta:   Delta difference against a buffer-wide timestamp\n"
5484         "    absolute:   Absolute (standalone) timestamp\n"
5485         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5486         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5487         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5488         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5489         "\t\t\t  Remove sub-buffer with rmdir\n"
5490         "  trace_options\t\t- Set format or modify how tracing happens\n"
5491         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5492         "\t\t\t  option name\n"
5493         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5494 #ifdef CONFIG_DYNAMIC_FTRACE
5495         "\n  available_filter_functions - list of functions that can be filtered on\n"
5496         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5497         "\t\t\t  functions\n"
5498         "\t     accepts: func_full_name or glob-matching-pattern\n"
5499         "\t     modules: Can select a group via module\n"
5500         "\t      Format: :mod:<module-name>\n"
5501         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5502         "\t    triggers: a command to perform when function is hit\n"
5503         "\t      Format: <function>:<trigger>[:count]\n"
5504         "\t     trigger: traceon, traceoff\n"
5505         "\t\t      enable_event:<system>:<event>\n"
5506         "\t\t      disable_event:<system>:<event>\n"
5507 #ifdef CONFIG_STACKTRACE
5508         "\t\t      stacktrace\n"
5509 #endif
5510 #ifdef CONFIG_TRACER_SNAPSHOT
5511         "\t\t      snapshot\n"
5512 #endif
5513         "\t\t      dump\n"
5514         "\t\t      cpudump\n"
5515         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5516         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5517         "\t     The first one will disable tracing every time do_fault is hit\n"
5518         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5519         "\t       The first time do trap is hit and it disables tracing, the\n"
5520         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5521         "\t       the counter will not decrement. It only decrements when the\n"
5522         "\t       trigger did work\n"
5523         "\t     To remove trigger without count:\n"
5524         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5525         "\t     To remove trigger with a count:\n"
5526         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5527         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5528         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5529         "\t    modules: Can select a group via module command :mod:\n"
5530         "\t    Does not accept triggers\n"
5531 #endif /* CONFIG_DYNAMIC_FTRACE */
5532 #ifdef CONFIG_FUNCTION_TRACER
5533         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5534         "\t\t    (function)\n"
5535         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5536         "\t\t    (function)\n"
5537 #endif
5538 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5539         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5540         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5541         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5542 #endif
5543 #ifdef CONFIG_TRACER_SNAPSHOT
5544         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5545         "\t\t\t  snapshot buffer. Read the contents for more\n"
5546         "\t\t\t  information\n"
5547 #endif
5548 #ifdef CONFIG_STACK_TRACER
5549         "  stack_trace\t\t- Shows the max stack trace when active\n"
5550         "  stack_max_size\t- Shows current max stack size that was traced\n"
5551         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5552         "\t\t\t  new trace)\n"
5553 #ifdef CONFIG_DYNAMIC_FTRACE
5554         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5555         "\t\t\t  traces\n"
5556 #endif
5557 #endif /* CONFIG_STACK_TRACER */
5558 #ifdef CONFIG_DYNAMIC_EVENTS
5559         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5560         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5561 #endif
5562 #ifdef CONFIG_KPROBE_EVENTS
5563         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5564         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5565 #endif
5566 #ifdef CONFIG_UPROBE_EVENTS
5567         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5568         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5569 #endif
5570 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571         "\t  accepts: event-definitions (one definition per line)\n"
5572         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5573         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5576 #endif
5577         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5578         "\t           -:[<group>/]<event>\n"
5579 #ifdef CONFIG_KPROBE_EVENTS
5580         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5581   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5582 #endif
5583 #ifdef CONFIG_UPROBE_EVENTS
5584   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5585 #endif
5586         "\t     args: <name>=fetcharg[:type]\n"
5587         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5588 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5589         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5590 #else
5591         "\t           $stack<index>, $stack, $retval, $comm,\n"
5592 #endif
5593         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5594         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5595         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5596         "\t           <type>\\[<array-size>\\]\n"
5597 #ifdef CONFIG_HIST_TRIGGERS
5598         "\t    field: <stype> <name>;\n"
5599         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5600         "\t           [unsigned] char/int/long\n"
5601 #endif
5602         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5603         "\t            of the <attached-group>/<attached-event>.\n"
5604 #endif
5605         "  events/\t\t- Directory containing all trace event subsystems:\n"
5606         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5607         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5608         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5609         "\t\t\t  events\n"
5610         "      filter\t\t- If set, only events passing filter are traced\n"
5611         "  events/<system>/<event>/\t- Directory containing control files for\n"
5612         "\t\t\t  <event>:\n"
5613         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5614         "      filter\t\t- If set, only events passing filter are traced\n"
5615         "      trigger\t\t- If set, a command to perform when event is hit\n"
5616         "\t    Format: <trigger>[:count][if <filter>]\n"
5617         "\t   trigger: traceon, traceoff\n"
5618         "\t            enable_event:<system>:<event>\n"
5619         "\t            disable_event:<system>:<event>\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621         "\t            enable_hist:<system>:<event>\n"
5622         "\t            disable_hist:<system>:<event>\n"
5623 #endif
5624 #ifdef CONFIG_STACKTRACE
5625         "\t\t    stacktrace\n"
5626 #endif
5627 #ifdef CONFIG_TRACER_SNAPSHOT
5628         "\t\t    snapshot\n"
5629 #endif
5630 #ifdef CONFIG_HIST_TRIGGERS
5631         "\t\t    hist (see below)\n"
5632 #endif
5633         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5634         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5635         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5636         "\t                  events/block/block_unplug/trigger\n"
5637         "\t   The first disables tracing every time block_unplug is hit.\n"
5638         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5639         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5640         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5641         "\t   Like function triggers, the counter is only decremented if it\n"
5642         "\t    enabled or disabled tracing.\n"
5643         "\t   To remove a trigger without a count:\n"
5644         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5645         "\t   To remove a trigger with a count:\n"
5646         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5647         "\t   Filters can be ignored when removing a trigger.\n"
5648 #ifdef CONFIG_HIST_TRIGGERS
5649         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5650         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5651         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5652         "\t            [:values=<field1[,field2,...]>]\n"
5653         "\t            [:sort=<field1[,field2,...]>]\n"
5654         "\t            [:size=#entries]\n"
5655         "\t            [:pause][:continue][:clear]\n"
5656         "\t            [:name=histname1]\n"
5657         "\t            [:<handler>.<action>]\n"
5658         "\t            [if <filter>]\n\n"
5659         "\t    Note, special fields can be used as well:\n"
5660         "\t            common_timestamp - to record current timestamp\n"
5661         "\t            common_cpu - to record the CPU the event happened on\n"
5662         "\n"
5663         "\t    A hist trigger variable can be:\n"
5664         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5665         "\t        - a reference to another variable e.g. y=$x,\n"
5666         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5667         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5668         "\n"
5669         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5670         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5671         "\t    variable reference, field or numeric literal.\n"
5672         "\n"
5673         "\t    When a matching event is hit, an entry is added to a hash\n"
5674         "\t    table using the key(s) and value(s) named, and the value of a\n"
5675         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5676         "\t    correspond to fields in the event's format description.  Keys\n"
5677         "\t    can be any field, or the special string 'stacktrace'.\n"
5678         "\t    Compound keys consisting of up to two fields can be specified\n"
5679         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5680         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5681         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5682         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5683         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5684         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5685         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5686         "\t    its histogram data will be shared with other triggers of the\n"
5687         "\t    same name, and trigger hits will update this common data.\n\n"
5688         "\t    Reading the 'hist' file for the event will dump the hash\n"
5689         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5690         "\t    triggers attached to an event, there will be a table for each\n"
5691         "\t    trigger in the output.  The table displayed for a named\n"
5692         "\t    trigger will be the same as any other instance having the\n"
5693         "\t    same name.  The default format used to display a given field\n"
5694         "\t    can be modified by appending any of the following modifiers\n"
5695         "\t    to the field name, as applicable:\n\n"
5696         "\t            .hex        display a number as a hex value\n"
5697         "\t            .sym        display an address as a symbol\n"
5698         "\t            .sym-offset display an address as a symbol and offset\n"
5699         "\t            .execname   display a common_pid as a program name\n"
5700         "\t            .syscall    display a syscall id as a syscall name\n"
5701         "\t            .log2       display log2 value rather than raw number\n"
5702         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5703         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5704         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5705         "\t    trigger or to start a hist trigger but not log any events\n"
5706         "\t    until told to do so.  'continue' can be used to start or\n"
5707         "\t    restart a paused hist trigger.\n\n"
5708         "\t    The 'clear' parameter will clear the contents of a running\n"
5709         "\t    hist trigger and leave its current paused/active state\n"
5710         "\t    unchanged.\n\n"
5711         "\t    The enable_hist and disable_hist triggers can be used to\n"
5712         "\t    have one event conditionally start and stop another event's\n"
5713         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5714         "\t    the enable_event and disable_event triggers.\n\n"
5715         "\t    Hist trigger handlers and actions are executed whenever a\n"
5716         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5717         "\t        <handler>.<action>\n\n"
5718         "\t    The available handlers are:\n\n"
5719         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5720         "\t        onmax(var)               - invoke if var exceeds current max\n"
5721         "\t        onchange(var)            - invoke action if var changes\n\n"
5722         "\t    The available actions are:\n\n"
5723         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5724         "\t        save(field,...)                      - save current event fields\n"
5725 #ifdef CONFIG_TRACER_SNAPSHOT
5726         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5727 #endif
5728 #ifdef CONFIG_SYNTH_EVENTS
5729         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5730         "\t  Write into this file to define/undefine new synthetic events.\n"
5731         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5732 #endif
5733 #endif
5734 ;
5735
5736 static ssize_t
5737 tracing_readme_read(struct file *filp, char __user *ubuf,
5738                        size_t cnt, loff_t *ppos)
5739 {
5740         return simple_read_from_buffer(ubuf, cnt, ppos,
5741                                         readme_msg, strlen(readme_msg));
5742 }
5743
5744 static const struct file_operations tracing_readme_fops = {
5745         .open           = tracing_open_generic,
5746         .read           = tracing_readme_read,
5747         .llseek         = generic_file_llseek,
5748 };
5749
5750 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5751 {
5752         int pid = ++(*pos);
5753
5754         return trace_find_tgid_ptr(pid);
5755 }
5756
5757 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5758 {
5759         int pid = *pos;
5760
5761         return trace_find_tgid_ptr(pid);
5762 }
5763
5764 static void saved_tgids_stop(struct seq_file *m, void *v)
5765 {
5766 }
5767
5768 static int saved_tgids_show(struct seq_file *m, void *v)
5769 {
5770         int *entry = (int *)v;
5771         int pid = entry - tgid_map;
5772         int tgid = *entry;
5773
5774         if (tgid == 0)
5775                 return SEQ_SKIP;
5776
5777         seq_printf(m, "%d %d\n", pid, tgid);
5778         return 0;
5779 }
5780
5781 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5782         .start          = saved_tgids_start,
5783         .stop           = saved_tgids_stop,
5784         .next           = saved_tgids_next,
5785         .show           = saved_tgids_show,
5786 };
5787
5788 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5789 {
5790         int ret;
5791
5792         ret = tracing_check_open_get_tr(NULL);
5793         if (ret)
5794                 return ret;
5795
5796         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5797 }
5798
5799
5800 static const struct file_operations tracing_saved_tgids_fops = {
5801         .open           = tracing_saved_tgids_open,
5802         .read           = seq_read,
5803         .llseek         = seq_lseek,
5804         .release        = seq_release,
5805 };
5806
5807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5808 {
5809         unsigned int *ptr = v;
5810
5811         if (*pos || m->count)
5812                 ptr++;
5813
5814         (*pos)++;
5815
5816         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5817              ptr++) {
5818                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5819                         continue;
5820
5821                 return ptr;
5822         }
5823
5824         return NULL;
5825 }
5826
5827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5828 {
5829         void *v;
5830         loff_t l = 0;
5831
5832         preempt_disable();
5833         arch_spin_lock(&trace_cmdline_lock);
5834
5835         v = &savedcmd->map_cmdline_to_pid[0];
5836         while (l <= *pos) {
5837                 v = saved_cmdlines_next(m, v, &l);
5838                 if (!v)
5839                         return NULL;
5840         }
5841
5842         return v;
5843 }
5844
5845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5846 {
5847         arch_spin_unlock(&trace_cmdline_lock);
5848         preempt_enable();
5849 }
5850
5851 static int saved_cmdlines_show(struct seq_file *m, void *v)
5852 {
5853         char buf[TASK_COMM_LEN];
5854         unsigned int *pid = v;
5855
5856         __trace_find_cmdline(*pid, buf);
5857         seq_printf(m, "%d %s\n", *pid, buf);
5858         return 0;
5859 }
5860
5861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5862         .start          = saved_cmdlines_start,
5863         .next           = saved_cmdlines_next,
5864         .stop           = saved_cmdlines_stop,
5865         .show           = saved_cmdlines_show,
5866 };
5867
5868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5869 {
5870         int ret;
5871
5872         ret = tracing_check_open_get_tr(NULL);
5873         if (ret)
5874                 return ret;
5875
5876         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5877 }
5878
5879 static const struct file_operations tracing_saved_cmdlines_fops = {
5880         .open           = tracing_saved_cmdlines_open,
5881         .read           = seq_read,
5882         .llseek         = seq_lseek,
5883         .release        = seq_release,
5884 };
5885
5886 static ssize_t
5887 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5888                                  size_t cnt, loff_t *ppos)
5889 {
5890         char buf[64];
5891         int r;
5892
5893         arch_spin_lock(&trace_cmdline_lock);
5894         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5895         arch_spin_unlock(&trace_cmdline_lock);
5896
5897         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5898 }
5899
5900 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5901 {
5902         kfree(s->saved_cmdlines);
5903         kfree(s->map_cmdline_to_pid);
5904         kfree(s);
5905 }
5906
5907 static int tracing_resize_saved_cmdlines(unsigned int val)
5908 {
5909         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5910
5911         s = kmalloc(sizeof(*s), GFP_KERNEL);
5912         if (!s)
5913                 return -ENOMEM;
5914
5915         if (allocate_cmdlines_buffer(val, s) < 0) {
5916                 kfree(s);
5917                 return -ENOMEM;
5918         }
5919
5920         arch_spin_lock(&trace_cmdline_lock);
5921         savedcmd_temp = savedcmd;
5922         savedcmd = s;
5923         arch_spin_unlock(&trace_cmdline_lock);
5924         free_saved_cmdlines_buffer(savedcmd_temp);
5925
5926         return 0;
5927 }
5928
5929 static ssize_t
5930 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5931                                   size_t cnt, loff_t *ppos)
5932 {
5933         unsigned long val;
5934         int ret;
5935
5936         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5937         if (ret)
5938                 return ret;
5939
5940         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5941         if (!val || val > PID_MAX_DEFAULT)
5942                 return -EINVAL;
5943
5944         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5945         if (ret < 0)
5946                 return ret;
5947
5948         *ppos += cnt;
5949
5950         return cnt;
5951 }
5952
5953 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5954         .open           = tracing_open_generic,
5955         .read           = tracing_saved_cmdlines_size_read,
5956         .write          = tracing_saved_cmdlines_size_write,
5957 };
5958
5959 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5960 static union trace_eval_map_item *
5961 update_eval_map(union trace_eval_map_item *ptr)
5962 {
5963         if (!ptr->map.eval_string) {
5964                 if (ptr->tail.next) {
5965                         ptr = ptr->tail.next;
5966                         /* Set ptr to the next real item (skip head) */
5967                         ptr++;
5968                 } else
5969                         return NULL;
5970         }
5971         return ptr;
5972 }
5973
5974 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5975 {
5976         union trace_eval_map_item *ptr = v;
5977
5978         /*
5979          * Paranoid! If ptr points to end, we don't want to increment past it.
5980          * This really should never happen.
5981          */
5982         (*pos)++;
5983         ptr = update_eval_map(ptr);
5984         if (WARN_ON_ONCE(!ptr))
5985                 return NULL;
5986
5987         ptr++;
5988         ptr = update_eval_map(ptr);
5989
5990         return ptr;
5991 }
5992
5993 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5994 {
5995         union trace_eval_map_item *v;
5996         loff_t l = 0;
5997
5998         mutex_lock(&trace_eval_mutex);
5999
6000         v = trace_eval_maps;
6001         if (v)
6002                 v++;
6003
6004         while (v && l < *pos) {
6005                 v = eval_map_next(m, v, &l);
6006         }
6007
6008         return v;
6009 }
6010
6011 static void eval_map_stop(struct seq_file *m, void *v)
6012 {
6013         mutex_unlock(&trace_eval_mutex);
6014 }
6015
6016 static int eval_map_show(struct seq_file *m, void *v)
6017 {
6018         union trace_eval_map_item *ptr = v;
6019
6020         seq_printf(m, "%s %ld (%s)\n",
6021                    ptr->map.eval_string, ptr->map.eval_value,
6022                    ptr->map.system);
6023
6024         return 0;
6025 }
6026
6027 static const struct seq_operations tracing_eval_map_seq_ops = {
6028         .start          = eval_map_start,
6029         .next           = eval_map_next,
6030         .stop           = eval_map_stop,
6031         .show           = eval_map_show,
6032 };
6033
6034 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6035 {
6036         int ret;
6037
6038         ret = tracing_check_open_get_tr(NULL);
6039         if (ret)
6040                 return ret;
6041
6042         return seq_open(filp, &tracing_eval_map_seq_ops);
6043 }
6044
6045 static const struct file_operations tracing_eval_map_fops = {
6046         .open           = tracing_eval_map_open,
6047         .read           = seq_read,
6048         .llseek         = seq_lseek,
6049         .release        = seq_release,
6050 };
6051
6052 static inline union trace_eval_map_item *
6053 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6054 {
6055         /* Return tail of array given the head */
6056         return ptr + ptr->head.length + 1;
6057 }
6058
6059 static void
6060 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6061                            int len)
6062 {
6063         struct trace_eval_map **stop;
6064         struct trace_eval_map **map;
6065         union trace_eval_map_item *map_array;
6066         union trace_eval_map_item *ptr;
6067
6068         stop = start + len;
6069
6070         /*
6071          * The trace_eval_maps contains the map plus a head and tail item,
6072          * where the head holds the module and length of array, and the
6073          * tail holds a pointer to the next list.
6074          */
6075         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6076         if (!map_array) {
6077                 pr_warn("Unable to allocate trace eval mapping\n");
6078                 return;
6079         }
6080
6081         mutex_lock(&trace_eval_mutex);
6082
6083         if (!trace_eval_maps)
6084                 trace_eval_maps = map_array;
6085         else {
6086                 ptr = trace_eval_maps;
6087                 for (;;) {
6088                         ptr = trace_eval_jmp_to_tail(ptr);
6089                         if (!ptr->tail.next)
6090                                 break;
6091                         ptr = ptr->tail.next;
6092
6093                 }
6094                 ptr->tail.next = map_array;
6095         }
6096         map_array->head.mod = mod;
6097         map_array->head.length = len;
6098         map_array++;
6099
6100         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6101                 map_array->map = **map;
6102                 map_array++;
6103         }
6104         memset(map_array, 0, sizeof(*map_array));
6105
6106         mutex_unlock(&trace_eval_mutex);
6107 }
6108
6109 static void trace_create_eval_file(struct dentry *d_tracer)
6110 {
6111         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6112                           NULL, &tracing_eval_map_fops);
6113 }
6114
6115 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6116 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6117 static inline void trace_insert_eval_map_file(struct module *mod,
6118                               struct trace_eval_map **start, int len) { }
6119 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6120
6121 static void trace_insert_eval_map(struct module *mod,
6122                                   struct trace_eval_map **start, int len)
6123 {
6124         struct trace_eval_map **map;
6125
6126         if (len <= 0)
6127                 return;
6128
6129         map = start;
6130
6131         trace_event_eval_update(map, len);
6132
6133         trace_insert_eval_map_file(mod, start, len);
6134 }
6135
6136 static ssize_t
6137 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6138                        size_t cnt, loff_t *ppos)
6139 {
6140         struct trace_array *tr = filp->private_data;
6141         char buf[MAX_TRACER_SIZE+2];
6142         int r;
6143
6144         mutex_lock(&trace_types_lock);
6145         r = sprintf(buf, "%s\n", tr->current_trace->name);
6146         mutex_unlock(&trace_types_lock);
6147
6148         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6149 }
6150
6151 int tracer_init(struct tracer *t, struct trace_array *tr)
6152 {
6153         tracing_reset_online_cpus(&tr->array_buffer);
6154         return t->init(tr);
6155 }
6156
6157 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6158 {
6159         int cpu;
6160
6161         for_each_tracing_cpu(cpu)
6162                 per_cpu_ptr(buf->data, cpu)->entries = val;
6163 }
6164
6165 #ifdef CONFIG_TRACER_MAX_TRACE
6166 /* resize @tr's buffer to the size of @size_tr's entries */
6167 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6168                                         struct array_buffer *size_buf, int cpu_id)
6169 {
6170         int cpu, ret = 0;
6171
6172         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6173                 for_each_tracing_cpu(cpu) {
6174                         ret = ring_buffer_resize(trace_buf->buffer,
6175                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6176                         if (ret < 0)
6177                                 break;
6178                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6179                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6180                 }
6181         } else {
6182                 ret = ring_buffer_resize(trace_buf->buffer,
6183                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6184                 if (ret == 0)
6185                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6186                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6187         }
6188
6189         return ret;
6190 }
6191 #endif /* CONFIG_TRACER_MAX_TRACE */
6192
6193 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6194                                         unsigned long size, int cpu)
6195 {
6196         int ret;
6197
6198         /*
6199          * If kernel or user changes the size of the ring buffer
6200          * we use the size that was given, and we can forget about
6201          * expanding it later.
6202          */
6203         ring_buffer_expanded = true;
6204
6205         /* May be called before buffers are initialized */
6206         if (!tr->array_buffer.buffer)
6207                 return 0;
6208
6209         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6210         if (ret < 0)
6211                 return ret;
6212
6213 #ifdef CONFIG_TRACER_MAX_TRACE
6214         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6215             !tr->current_trace->use_max_tr)
6216                 goto out;
6217
6218         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6219         if (ret < 0) {
6220                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6221                                                      &tr->array_buffer, cpu);
6222                 if (r < 0) {
6223                         /*
6224                          * AARGH! We are left with different
6225                          * size max buffer!!!!
6226                          * The max buffer is our "snapshot" buffer.
6227                          * When a tracer needs a snapshot (one of the
6228                          * latency tracers), it swaps the max buffer
6229                          * with the saved snap shot. We succeeded to
6230                          * update the size of the main buffer, but failed to
6231                          * update the size of the max buffer. But when we tried
6232                          * to reset the main buffer to the original size, we
6233                          * failed there too. This is very unlikely to
6234                          * happen, but if it does, warn and kill all
6235                          * tracing.
6236                          */
6237                         WARN_ON(1);
6238                         tracing_disabled = 1;
6239                 }
6240                 return ret;
6241         }
6242
6243         if (cpu == RING_BUFFER_ALL_CPUS)
6244                 set_buffer_entries(&tr->max_buffer, size);
6245         else
6246                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6247
6248  out:
6249 #endif /* CONFIG_TRACER_MAX_TRACE */
6250
6251         if (cpu == RING_BUFFER_ALL_CPUS)
6252                 set_buffer_entries(&tr->array_buffer, size);
6253         else
6254                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6255
6256         return ret;
6257 }
6258
6259 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6260                                   unsigned long size, int cpu_id)
6261 {
6262         int ret;
6263
6264         mutex_lock(&trace_types_lock);
6265
6266         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6267                 /* make sure, this cpu is enabled in the mask */
6268                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6269                         ret = -EINVAL;
6270                         goto out;
6271                 }
6272         }
6273
6274         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6275         if (ret < 0)
6276                 ret = -ENOMEM;
6277
6278 out:
6279         mutex_unlock(&trace_types_lock);
6280
6281         return ret;
6282 }
6283
6284
6285 /**
6286  * tracing_update_buffers - used by tracing facility to expand ring buffers
6287  *
6288  * To save on memory when the tracing is never used on a system with it
6289  * configured in. The ring buffers are set to a minimum size. But once
6290  * a user starts to use the tracing facility, then they need to grow
6291  * to their default size.
6292  *
6293  * This function is to be called when a tracer is about to be used.
6294  */
6295 int tracing_update_buffers(void)
6296 {
6297         int ret = 0;
6298
6299         mutex_lock(&trace_types_lock);
6300         if (!ring_buffer_expanded)
6301                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6302                                                 RING_BUFFER_ALL_CPUS);
6303         mutex_unlock(&trace_types_lock);
6304
6305         return ret;
6306 }
6307
6308 struct trace_option_dentry;
6309
6310 static void
6311 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6312
6313 /*
6314  * Used to clear out the tracer before deletion of an instance.
6315  * Must have trace_types_lock held.
6316  */
6317 static void tracing_set_nop(struct trace_array *tr)
6318 {
6319         if (tr->current_trace == &nop_trace)
6320                 return;
6321         
6322         tr->current_trace->enabled--;
6323
6324         if (tr->current_trace->reset)
6325                 tr->current_trace->reset(tr);
6326
6327         tr->current_trace = &nop_trace;
6328 }
6329
6330 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6331 {
6332         /* Only enable if the directory has been created already. */
6333         if (!tr->dir)
6334                 return;
6335
6336         create_trace_option_files(tr, t);
6337 }
6338
6339 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6340 {
6341         struct tracer *t;
6342 #ifdef CONFIG_TRACER_MAX_TRACE
6343         bool had_max_tr;
6344 #endif
6345         int ret = 0;
6346
6347         mutex_lock(&trace_types_lock);
6348
6349         if (!ring_buffer_expanded) {
6350                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6351                                                 RING_BUFFER_ALL_CPUS);
6352                 if (ret < 0)
6353                         goto out;
6354                 ret = 0;
6355         }
6356
6357         for (t = trace_types; t; t = t->next) {
6358                 if (strcmp(t->name, buf) == 0)
6359                         break;
6360         }
6361         if (!t) {
6362                 ret = -EINVAL;
6363                 goto out;
6364         }
6365         if (t == tr->current_trace)
6366                 goto out;
6367
6368 #ifdef CONFIG_TRACER_SNAPSHOT
6369         if (t->use_max_tr) {
6370                 arch_spin_lock(&tr->max_lock);
6371                 if (tr->cond_snapshot)
6372                         ret = -EBUSY;
6373                 arch_spin_unlock(&tr->max_lock);
6374                 if (ret)
6375                         goto out;
6376         }
6377 #endif
6378         /* Some tracers won't work on kernel command line */
6379         if (system_state < SYSTEM_RUNNING && t->noboot) {
6380                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6381                         t->name);
6382                 goto out;
6383         }
6384
6385         /* Some tracers are only allowed for the top level buffer */
6386         if (!trace_ok_for_array(t, tr)) {
6387                 ret = -EINVAL;
6388                 goto out;
6389         }
6390
6391         /* If trace pipe files are being read, we can't change the tracer */
6392         if (tr->trace_ref) {
6393                 ret = -EBUSY;
6394                 goto out;
6395         }
6396
6397         trace_branch_disable();
6398
6399         tr->current_trace->enabled--;
6400
6401         if (tr->current_trace->reset)
6402                 tr->current_trace->reset(tr);
6403
6404         /* Current trace needs to be nop_trace before synchronize_rcu */
6405         tr->current_trace = &nop_trace;
6406
6407 #ifdef CONFIG_TRACER_MAX_TRACE
6408         had_max_tr = tr->allocated_snapshot;
6409
6410         if (had_max_tr && !t->use_max_tr) {
6411                 /*
6412                  * We need to make sure that the update_max_tr sees that
6413                  * current_trace changed to nop_trace to keep it from
6414                  * swapping the buffers after we resize it.
6415                  * The update_max_tr is called from interrupts disabled
6416                  * so a synchronized_sched() is sufficient.
6417                  */
6418                 synchronize_rcu();
6419                 free_snapshot(tr);
6420         }
6421 #endif
6422
6423 #ifdef CONFIG_TRACER_MAX_TRACE
6424         if (t->use_max_tr && !had_max_tr) {
6425                 ret = tracing_alloc_snapshot_instance(tr);
6426                 if (ret < 0)
6427                         goto out;
6428         }
6429 #endif
6430
6431         if (t->init) {
6432                 ret = tracer_init(t, tr);
6433                 if (ret)
6434                         goto out;
6435         }
6436
6437         tr->current_trace = t;
6438         tr->current_trace->enabled++;
6439         trace_branch_enable(tr);
6440  out:
6441         mutex_unlock(&trace_types_lock);
6442
6443         return ret;
6444 }
6445
6446 static ssize_t
6447 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6448                         size_t cnt, loff_t *ppos)
6449 {
6450         struct trace_array *tr = filp->private_data;
6451         char buf[MAX_TRACER_SIZE+1];
6452         int i;
6453         size_t ret;
6454         int err;
6455
6456         ret = cnt;
6457
6458         if (cnt > MAX_TRACER_SIZE)
6459                 cnt = MAX_TRACER_SIZE;
6460
6461         if (copy_from_user(buf, ubuf, cnt))
6462                 return -EFAULT;
6463
6464         buf[cnt] = 0;
6465
6466         /* strip ending whitespace. */
6467         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6468                 buf[i] = 0;
6469
6470         err = tracing_set_tracer(tr, buf);
6471         if (err)
6472                 return err;
6473
6474         *ppos += ret;
6475
6476         return ret;
6477 }
6478
6479 static ssize_t
6480 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6481                    size_t cnt, loff_t *ppos)
6482 {
6483         char buf[64];
6484         int r;
6485
6486         r = snprintf(buf, sizeof(buf), "%ld\n",
6487                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6488         if (r > sizeof(buf))
6489                 r = sizeof(buf);
6490         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6491 }
6492
6493 static ssize_t
6494 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6495                     size_t cnt, loff_t *ppos)
6496 {
6497         unsigned long val;
6498         int ret;
6499
6500         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6501         if (ret)
6502                 return ret;
6503
6504         *ptr = val * 1000;
6505
6506         return cnt;
6507 }
6508
6509 static ssize_t
6510 tracing_thresh_read(struct file *filp, char __user *ubuf,
6511                     size_t cnt, loff_t *ppos)
6512 {
6513         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6514 }
6515
6516 static ssize_t
6517 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6518                      size_t cnt, loff_t *ppos)
6519 {
6520         struct trace_array *tr = filp->private_data;
6521         int ret;
6522
6523         mutex_lock(&trace_types_lock);
6524         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6525         if (ret < 0)
6526                 goto out;
6527
6528         if (tr->current_trace->update_thresh) {
6529                 ret = tr->current_trace->update_thresh(tr);
6530                 if (ret < 0)
6531                         goto out;
6532         }
6533
6534         ret = cnt;
6535 out:
6536         mutex_unlock(&trace_types_lock);
6537
6538         return ret;
6539 }
6540
6541 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6542
6543 static ssize_t
6544 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6545                      size_t cnt, loff_t *ppos)
6546 {
6547         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6548 }
6549
6550 static ssize_t
6551 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6552                       size_t cnt, loff_t *ppos)
6553 {
6554         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6555 }
6556
6557 #endif
6558
6559 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6560 {
6561         struct trace_array *tr = inode->i_private;
6562         struct trace_iterator *iter;
6563         int ret;
6564
6565         ret = tracing_check_open_get_tr(tr);
6566         if (ret)
6567                 return ret;
6568
6569         mutex_lock(&trace_types_lock);
6570
6571         /* create a buffer to store the information to pass to userspace */
6572         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6573         if (!iter) {
6574                 ret = -ENOMEM;
6575                 __trace_array_put(tr);
6576                 goto out;
6577         }
6578
6579         trace_seq_init(&iter->seq);
6580         iter->trace = tr->current_trace;
6581
6582         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6583                 ret = -ENOMEM;
6584                 goto fail;
6585         }
6586
6587         /* trace pipe does not show start of buffer */
6588         cpumask_setall(iter->started);
6589
6590         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6591                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6592
6593         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6594         if (trace_clocks[tr->clock_id].in_ns)
6595                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6596
6597         iter->tr = tr;
6598         iter->array_buffer = &tr->array_buffer;
6599         iter->cpu_file = tracing_get_cpu(inode);
6600         mutex_init(&iter->mutex);
6601         filp->private_data = iter;
6602
6603         if (iter->trace->pipe_open)
6604                 iter->trace->pipe_open(iter);
6605
6606         nonseekable_open(inode, filp);
6607
6608         tr->trace_ref++;
6609 out:
6610         mutex_unlock(&trace_types_lock);
6611         return ret;
6612
6613 fail:
6614         kfree(iter);
6615         __trace_array_put(tr);
6616         mutex_unlock(&trace_types_lock);
6617         return ret;
6618 }
6619
6620 static int tracing_release_pipe(struct inode *inode, struct file *file)
6621 {
6622         struct trace_iterator *iter = file->private_data;
6623         struct trace_array *tr = inode->i_private;
6624
6625         mutex_lock(&trace_types_lock);
6626
6627         tr->trace_ref--;
6628
6629         if (iter->trace->pipe_close)
6630                 iter->trace->pipe_close(iter);
6631
6632         mutex_unlock(&trace_types_lock);
6633
6634         free_cpumask_var(iter->started);
6635         mutex_destroy(&iter->mutex);
6636         kfree(iter);
6637
6638         trace_array_put(tr);
6639
6640         return 0;
6641 }
6642
6643 static __poll_t
6644 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6645 {
6646         struct trace_array *tr = iter->tr;
6647
6648         /* Iterators are static, they should be filled or empty */
6649         if (trace_buffer_iter(iter, iter->cpu_file))
6650                 return EPOLLIN | EPOLLRDNORM;
6651
6652         if (tr->trace_flags & TRACE_ITER_BLOCK)
6653                 /*
6654                  * Always select as readable when in blocking mode
6655                  */
6656                 return EPOLLIN | EPOLLRDNORM;
6657         else
6658                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6659                                              filp, poll_table);
6660 }
6661
6662 static __poll_t
6663 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6664 {
6665         struct trace_iterator *iter = filp->private_data;
6666
6667         return trace_poll(iter, filp, poll_table);
6668 }
6669
6670 /* Must be called with iter->mutex held. */
6671 static int tracing_wait_pipe(struct file *filp)
6672 {
6673         struct trace_iterator *iter = filp->private_data;
6674         int ret;
6675
6676         while (trace_empty(iter)) {
6677
6678                 if ((filp->f_flags & O_NONBLOCK)) {
6679                         return -EAGAIN;
6680                 }
6681
6682                 /*
6683                  * We block until we read something and tracing is disabled.
6684                  * We still block if tracing is disabled, but we have never
6685                  * read anything. This allows a user to cat this file, and
6686                  * then enable tracing. But after we have read something,
6687                  * we give an EOF when tracing is again disabled.
6688                  *
6689                  * iter->pos will be 0 if we haven't read anything.
6690                  */
6691                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6692                         break;
6693
6694                 mutex_unlock(&iter->mutex);
6695
6696                 ret = wait_on_pipe(iter, 0);
6697
6698                 mutex_lock(&iter->mutex);
6699
6700                 if (ret)
6701                         return ret;
6702         }
6703
6704         return 1;
6705 }
6706
6707 /*
6708  * Consumer reader.
6709  */
6710 static ssize_t
6711 tracing_read_pipe(struct file *filp, char __user *ubuf,
6712                   size_t cnt, loff_t *ppos)
6713 {
6714         struct trace_iterator *iter = filp->private_data;
6715         ssize_t sret;
6716
6717         /*
6718          * Avoid more than one consumer on a single file descriptor
6719          * This is just a matter of traces coherency, the ring buffer itself
6720          * is protected.
6721          */
6722         mutex_lock(&iter->mutex);
6723
6724         /* return any leftover data */
6725         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6726         if (sret != -EBUSY)
6727                 goto out;
6728
6729         trace_seq_init(&iter->seq);
6730
6731         if (iter->trace->read) {
6732                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6733                 if (sret)
6734                         goto out;
6735         }
6736
6737 waitagain:
6738         sret = tracing_wait_pipe(filp);
6739         if (sret <= 0)
6740                 goto out;
6741
6742         /* stop when tracing is finished */
6743         if (trace_empty(iter)) {
6744                 sret = 0;
6745                 goto out;
6746         }
6747
6748         if (cnt >= PAGE_SIZE)
6749                 cnt = PAGE_SIZE - 1;
6750
6751         /* reset all but tr, trace, and overruns */
6752         trace_iterator_reset(iter);
6753         cpumask_clear(iter->started);
6754         trace_seq_init(&iter->seq);
6755
6756         trace_event_read_lock();
6757         trace_access_lock(iter->cpu_file);
6758         while (trace_find_next_entry_inc(iter) != NULL) {
6759                 enum print_line_t ret;
6760                 int save_len = iter->seq.seq.len;
6761
6762                 ret = print_trace_line(iter);
6763                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6764                         /* don't print partial lines */
6765                         iter->seq.seq.len = save_len;
6766                         break;
6767                 }
6768                 if (ret != TRACE_TYPE_NO_CONSUME)
6769                         trace_consume(iter);
6770
6771                 if (trace_seq_used(&iter->seq) >= cnt)
6772                         break;
6773
6774                 /*
6775                  * Setting the full flag means we reached the trace_seq buffer
6776                  * size and we should leave by partial output condition above.
6777                  * One of the trace_seq_* functions is not used properly.
6778                  */
6779                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6780                           iter->ent->type);
6781         }
6782         trace_access_unlock(iter->cpu_file);
6783         trace_event_read_unlock();
6784
6785         /* Now copy what we have to the user */
6786         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6787         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6788                 trace_seq_init(&iter->seq);
6789
6790         /*
6791          * If there was nothing to send to user, in spite of consuming trace
6792          * entries, go back to wait for more entries.
6793          */
6794         if (sret == -EBUSY)
6795                 goto waitagain;
6796
6797 out:
6798         mutex_unlock(&iter->mutex);
6799
6800         return sret;
6801 }
6802
6803 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6804                                      unsigned int idx)
6805 {
6806         __free_page(spd->pages[idx]);
6807 }
6808
6809 static size_t
6810 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6811 {
6812         size_t count;
6813         int save_len;
6814         int ret;
6815
6816         /* Seq buffer is page-sized, exactly what we need. */
6817         for (;;) {
6818                 save_len = iter->seq.seq.len;
6819                 ret = print_trace_line(iter);
6820
6821                 if (trace_seq_has_overflowed(&iter->seq)) {
6822                         iter->seq.seq.len = save_len;
6823                         break;
6824                 }
6825
6826                 /*
6827                  * This should not be hit, because it should only
6828                  * be set if the iter->seq overflowed. But check it
6829                  * anyway to be safe.
6830                  */
6831                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6832                         iter->seq.seq.len = save_len;
6833                         break;
6834                 }
6835
6836                 count = trace_seq_used(&iter->seq) - save_len;
6837                 if (rem < count) {
6838                         rem = 0;
6839                         iter->seq.seq.len = save_len;
6840                         break;
6841                 }
6842
6843                 if (ret != TRACE_TYPE_NO_CONSUME)
6844                         trace_consume(iter);
6845                 rem -= count;
6846                 if (!trace_find_next_entry_inc(iter))   {
6847                         rem = 0;
6848                         iter->ent = NULL;
6849                         break;
6850                 }
6851         }
6852
6853         return rem;
6854 }
6855
6856 static ssize_t tracing_splice_read_pipe(struct file *filp,
6857                                         loff_t *ppos,
6858                                         struct pipe_inode_info *pipe,
6859                                         size_t len,
6860                                         unsigned int flags)
6861 {
6862         struct page *pages_def[PIPE_DEF_BUFFERS];
6863         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6864         struct trace_iterator *iter = filp->private_data;
6865         struct splice_pipe_desc spd = {
6866                 .pages          = pages_def,
6867                 .partial        = partial_def,
6868                 .nr_pages       = 0, /* This gets updated below. */
6869                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6870                 .ops            = &default_pipe_buf_ops,
6871                 .spd_release    = tracing_spd_release_pipe,
6872         };
6873         ssize_t ret;
6874         size_t rem;
6875         unsigned int i;
6876
6877         if (splice_grow_spd(pipe, &spd))
6878                 return -ENOMEM;
6879
6880         mutex_lock(&iter->mutex);
6881
6882         if (iter->trace->splice_read) {
6883                 ret = iter->trace->splice_read(iter, filp,
6884                                                ppos, pipe, len, flags);
6885                 if (ret)
6886                         goto out_err;
6887         }
6888
6889         ret = tracing_wait_pipe(filp);
6890         if (ret <= 0)
6891                 goto out_err;
6892
6893         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6894                 ret = -EFAULT;
6895                 goto out_err;
6896         }
6897
6898         trace_event_read_lock();
6899         trace_access_lock(iter->cpu_file);
6900
6901         /* Fill as many pages as possible. */
6902         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6903                 spd.pages[i] = alloc_page(GFP_KERNEL);
6904                 if (!spd.pages[i])
6905                         break;
6906
6907                 rem = tracing_fill_pipe_page(rem, iter);
6908
6909                 /* Copy the data into the page, so we can start over. */
6910                 ret = trace_seq_to_buffer(&iter->seq,
6911                                           page_address(spd.pages[i]),
6912                                           trace_seq_used(&iter->seq));
6913                 if (ret < 0) {
6914                         __free_page(spd.pages[i]);
6915                         break;
6916                 }
6917                 spd.partial[i].offset = 0;
6918                 spd.partial[i].len = trace_seq_used(&iter->seq);
6919
6920                 trace_seq_init(&iter->seq);
6921         }
6922
6923         trace_access_unlock(iter->cpu_file);
6924         trace_event_read_unlock();
6925         mutex_unlock(&iter->mutex);
6926
6927         spd.nr_pages = i;
6928
6929         if (i)
6930                 ret = splice_to_pipe(pipe, &spd);
6931         else
6932                 ret = 0;
6933 out:
6934         splice_shrink_spd(&spd);
6935         return ret;
6936
6937 out_err:
6938         mutex_unlock(&iter->mutex);
6939         goto out;
6940 }
6941
6942 static ssize_t
6943 tracing_entries_read(struct file *filp, char __user *ubuf,
6944                      size_t cnt, loff_t *ppos)
6945 {
6946         struct inode *inode = file_inode(filp);
6947         struct trace_array *tr = inode->i_private;
6948         int cpu = tracing_get_cpu(inode);
6949         char buf[64];
6950         int r = 0;
6951         ssize_t ret;
6952
6953         mutex_lock(&trace_types_lock);
6954
6955         if (cpu == RING_BUFFER_ALL_CPUS) {
6956                 int cpu, buf_size_same;
6957                 unsigned long size;
6958
6959                 size = 0;
6960                 buf_size_same = 1;
6961                 /* check if all cpu sizes are same */
6962                 for_each_tracing_cpu(cpu) {
6963                         /* fill in the size from first enabled cpu */
6964                         if (size == 0)
6965                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6966                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6967                                 buf_size_same = 0;
6968                                 break;
6969                         }
6970                 }
6971
6972                 if (buf_size_same) {
6973                         if (!ring_buffer_expanded)
6974                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6975                                             size >> 10,
6976                                             trace_buf_size >> 10);
6977                         else
6978                                 r = sprintf(buf, "%lu\n", size >> 10);
6979                 } else
6980                         r = sprintf(buf, "X\n");
6981         } else
6982                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6983
6984         mutex_unlock(&trace_types_lock);
6985
6986         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6987         return ret;
6988 }
6989
6990 static ssize_t
6991 tracing_entries_write(struct file *filp, const char __user *ubuf,
6992                       size_t cnt, loff_t *ppos)
6993 {
6994         struct inode *inode = file_inode(filp);
6995         struct trace_array *tr = inode->i_private;
6996         unsigned long val;
6997         int ret;
6998
6999         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7000         if (ret)
7001                 return ret;
7002
7003         /* must have at least 1 entry */
7004         if (!val)
7005                 return -EINVAL;
7006
7007         /* value is in KB */
7008         val <<= 10;
7009         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7010         if (ret < 0)
7011                 return ret;
7012
7013         *ppos += cnt;
7014
7015         return cnt;
7016 }
7017
7018 static ssize_t
7019 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7020                                 size_t cnt, loff_t *ppos)
7021 {
7022         struct trace_array *tr = filp->private_data;
7023         char buf[64];
7024         int r, cpu;
7025         unsigned long size = 0, expanded_size = 0;
7026
7027         mutex_lock(&trace_types_lock);
7028         for_each_tracing_cpu(cpu) {
7029                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7030                 if (!ring_buffer_expanded)
7031                         expanded_size += trace_buf_size >> 10;
7032         }
7033         if (ring_buffer_expanded)
7034                 r = sprintf(buf, "%lu\n", size);
7035         else
7036                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7037         mutex_unlock(&trace_types_lock);
7038
7039         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7040 }
7041
7042 static ssize_t
7043 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7044                           size_t cnt, loff_t *ppos)
7045 {
7046         /*
7047          * There is no need to read what the user has written, this function
7048          * is just to make sure that there is no error when "echo" is used
7049          */
7050
7051         *ppos += cnt;
7052
7053         return cnt;
7054 }
7055
7056 static int
7057 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7058 {
7059         struct trace_array *tr = inode->i_private;
7060
7061         /* disable tracing ? */
7062         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7063                 tracer_tracing_off(tr);
7064         /* resize the ring buffer to 0 */
7065         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7066
7067         trace_array_put(tr);
7068
7069         return 0;
7070 }
7071
7072 static ssize_t
7073 tracing_mark_write(struct file *filp, const char __user *ubuf,
7074                                         size_t cnt, loff_t *fpos)
7075 {
7076         struct trace_array *tr = filp->private_data;
7077         struct ring_buffer_event *event;
7078         enum event_trigger_type tt = ETT_NONE;
7079         struct trace_buffer *buffer;
7080         struct print_entry *entry;
7081         ssize_t written;
7082         int size;
7083         int len;
7084
7085 /* Used in tracing_mark_raw_write() as well */
7086 #define FAULTED_STR "<faulted>"
7087 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7088
7089         if (tracing_disabled)
7090                 return -EINVAL;
7091
7092         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7093                 return -EINVAL;
7094
7095         if (cnt > TRACE_BUF_SIZE)
7096                 cnt = TRACE_BUF_SIZE;
7097
7098         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7099
7100         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7101
7102         /* If less than "<faulted>", then make sure we can still add that */
7103         if (cnt < FAULTED_SIZE)
7104                 size += FAULTED_SIZE - cnt;
7105
7106         buffer = tr->array_buffer.buffer;
7107         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7108                                             tracing_gen_ctx());
7109         if (unlikely(!event))
7110                 /* Ring buffer disabled, return as if not open for write */
7111                 return -EBADF;
7112
7113         entry = ring_buffer_event_data(event);
7114         entry->ip = _THIS_IP_;
7115
7116         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7117         if (len) {
7118                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7119                 cnt = FAULTED_SIZE;
7120                 written = -EFAULT;
7121         } else
7122                 written = cnt;
7123
7124         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7125                 /* do not add \n before testing triggers, but add \0 */
7126                 entry->buf[cnt] = '\0';
7127                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7128         }
7129
7130         if (entry->buf[cnt - 1] != '\n') {
7131                 entry->buf[cnt] = '\n';
7132                 entry->buf[cnt + 1] = '\0';
7133         } else
7134                 entry->buf[cnt] = '\0';
7135
7136         if (static_branch_unlikely(&trace_marker_exports_enabled))
7137                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7138         __buffer_unlock_commit(buffer, event);
7139
7140         if (tt)
7141                 event_triggers_post_call(tr->trace_marker_file, tt);
7142
7143         return written;
7144 }
7145
7146 /* Limit it for now to 3K (including tag) */
7147 #define RAW_DATA_MAX_SIZE (1024*3)
7148
7149 static ssize_t
7150 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7151                                         size_t cnt, loff_t *fpos)
7152 {
7153         struct trace_array *tr = filp->private_data;
7154         struct ring_buffer_event *event;
7155         struct trace_buffer *buffer;
7156         struct raw_data_entry *entry;
7157         ssize_t written;
7158         int size;
7159         int len;
7160
7161 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7162
7163         if (tracing_disabled)
7164                 return -EINVAL;
7165
7166         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7167                 return -EINVAL;
7168
7169         /* The marker must at least have a tag id */
7170         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7171                 return -EINVAL;
7172
7173         if (cnt > TRACE_BUF_SIZE)
7174                 cnt = TRACE_BUF_SIZE;
7175
7176         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7177
7178         size = sizeof(*entry) + cnt;
7179         if (cnt < FAULT_SIZE_ID)
7180                 size += FAULT_SIZE_ID - cnt;
7181
7182         buffer = tr->array_buffer.buffer;
7183         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7184                                             tracing_gen_ctx());
7185         if (!event)
7186                 /* Ring buffer disabled, return as if not open for write */
7187                 return -EBADF;
7188
7189         entry = ring_buffer_event_data(event);
7190
7191         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7192         if (len) {
7193                 entry->id = -1;
7194                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7195                 written = -EFAULT;
7196         } else
7197                 written = cnt;
7198
7199         __buffer_unlock_commit(buffer, event);
7200
7201         return written;
7202 }
7203
7204 static int tracing_clock_show(struct seq_file *m, void *v)
7205 {
7206         struct trace_array *tr = m->private;
7207         int i;
7208
7209         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7210                 seq_printf(m,
7211                         "%s%s%s%s", i ? " " : "",
7212                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7213                         i == tr->clock_id ? "]" : "");
7214         seq_putc(m, '\n');
7215
7216         return 0;
7217 }
7218
7219 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7220 {
7221         int i;
7222
7223         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7224                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7225                         break;
7226         }
7227         if (i == ARRAY_SIZE(trace_clocks))
7228                 return -EINVAL;
7229
7230         mutex_lock(&trace_types_lock);
7231
7232         tr->clock_id = i;
7233
7234         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7235
7236         /*
7237          * New clock may not be consistent with the previous clock.
7238          * Reset the buffer so that it doesn't have incomparable timestamps.
7239          */
7240         tracing_reset_online_cpus(&tr->array_buffer);
7241
7242 #ifdef CONFIG_TRACER_MAX_TRACE
7243         if (tr->max_buffer.buffer)
7244                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7245         tracing_reset_online_cpus(&tr->max_buffer);
7246 #endif
7247
7248         mutex_unlock(&trace_types_lock);
7249
7250         return 0;
7251 }
7252
7253 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7254                                    size_t cnt, loff_t *fpos)
7255 {
7256         struct seq_file *m = filp->private_data;
7257         struct trace_array *tr = m->private;
7258         char buf[64];
7259         const char *clockstr;
7260         int ret;
7261
7262         if (cnt >= sizeof(buf))
7263                 return -EINVAL;
7264
7265         if (copy_from_user(buf, ubuf, cnt))
7266                 return -EFAULT;
7267
7268         buf[cnt] = 0;
7269
7270         clockstr = strstrip(buf);
7271
7272         ret = tracing_set_clock(tr, clockstr);
7273         if (ret)
7274                 return ret;
7275
7276         *fpos += cnt;
7277
7278         return cnt;
7279 }
7280
7281 static int tracing_clock_open(struct inode *inode, struct file *file)
7282 {
7283         struct trace_array *tr = inode->i_private;
7284         int ret;
7285
7286         ret = tracing_check_open_get_tr(tr);
7287         if (ret)
7288                 return ret;
7289
7290         ret = single_open(file, tracing_clock_show, inode->i_private);
7291         if (ret < 0)
7292                 trace_array_put(tr);
7293
7294         return ret;
7295 }
7296
7297 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7298 {
7299         struct trace_array *tr = m->private;
7300
7301         mutex_lock(&trace_types_lock);
7302
7303         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7304                 seq_puts(m, "delta [absolute]\n");
7305         else
7306                 seq_puts(m, "[delta] absolute\n");
7307
7308         mutex_unlock(&trace_types_lock);
7309
7310         return 0;
7311 }
7312
7313 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7314 {
7315         struct trace_array *tr = inode->i_private;
7316         int ret;
7317
7318         ret = tracing_check_open_get_tr(tr);
7319         if (ret)
7320                 return ret;
7321
7322         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7323         if (ret < 0)
7324                 trace_array_put(tr);
7325
7326         return ret;
7327 }
7328
7329 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7330 {
7331         if (rbe == this_cpu_read(trace_buffered_event))
7332                 return ring_buffer_time_stamp(buffer);
7333
7334         return ring_buffer_event_time_stamp(buffer, rbe);
7335 }
7336
7337 /*
7338  * Set or disable using the per CPU trace_buffer_event when possible.
7339  */
7340 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7341 {
7342         int ret = 0;
7343
7344         mutex_lock(&trace_types_lock);
7345
7346         if (set && tr->no_filter_buffering_ref++)
7347                 goto out;
7348
7349         if (!set) {
7350                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7351                         ret = -EINVAL;
7352                         goto out;
7353                 }
7354
7355                 --tr->no_filter_buffering_ref;
7356         }
7357  out:
7358         mutex_unlock(&trace_types_lock);
7359
7360         return ret;
7361 }
7362
7363 struct ftrace_buffer_info {
7364         struct trace_iterator   iter;
7365         void                    *spare;
7366         unsigned int            spare_cpu;
7367         unsigned int            read;
7368 };
7369
7370 #ifdef CONFIG_TRACER_SNAPSHOT
7371 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7372 {
7373         struct trace_array *tr = inode->i_private;
7374         struct trace_iterator *iter;
7375         struct seq_file *m;
7376         int ret;
7377
7378         ret = tracing_check_open_get_tr(tr);
7379         if (ret)
7380                 return ret;
7381
7382         if (file->f_mode & FMODE_READ) {
7383                 iter = __tracing_open(inode, file, true);
7384                 if (IS_ERR(iter))
7385                         ret = PTR_ERR(iter);
7386         } else {
7387                 /* Writes still need the seq_file to hold the private data */
7388                 ret = -ENOMEM;
7389                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7390                 if (!m)
7391                         goto out;
7392                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7393                 if (!iter) {
7394                         kfree(m);
7395                         goto out;
7396                 }
7397                 ret = 0;
7398
7399                 iter->tr = tr;
7400                 iter->array_buffer = &tr->max_buffer;
7401                 iter->cpu_file = tracing_get_cpu(inode);
7402                 m->private = iter;
7403                 file->private_data = m;
7404         }
7405 out:
7406         if (ret < 0)
7407                 trace_array_put(tr);
7408
7409         return ret;
7410 }
7411
7412 static ssize_t
7413 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7414                        loff_t *ppos)
7415 {
7416         struct seq_file *m = filp->private_data;
7417         struct trace_iterator *iter = m->private;
7418         struct trace_array *tr = iter->tr;
7419         unsigned long val;
7420         int ret;
7421
7422         ret = tracing_update_buffers();
7423         if (ret < 0)
7424                 return ret;
7425
7426         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7427         if (ret)
7428                 return ret;
7429
7430         mutex_lock(&trace_types_lock);
7431
7432         if (tr->current_trace->use_max_tr) {
7433                 ret = -EBUSY;
7434                 goto out;
7435         }
7436
7437         arch_spin_lock(&tr->max_lock);
7438         if (tr->cond_snapshot)
7439                 ret = -EBUSY;
7440         arch_spin_unlock(&tr->max_lock);
7441         if (ret)
7442                 goto out;
7443
7444         switch (val) {
7445         case 0:
7446                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7447                         ret = -EINVAL;
7448                         break;
7449                 }
7450                 if (tr->allocated_snapshot)
7451                         free_snapshot(tr);
7452                 break;
7453         case 1:
7454 /* Only allow per-cpu swap if the ring buffer supports it */
7455 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7456                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7457                         ret = -EINVAL;
7458                         break;
7459                 }
7460 #endif
7461                 if (tr->allocated_snapshot)
7462                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7463                                         &tr->array_buffer, iter->cpu_file);
7464                 else
7465                         ret = tracing_alloc_snapshot_instance(tr);
7466                 if (ret < 0)
7467                         break;
7468                 local_irq_disable();
7469                 /* Now, we're going to swap */
7470                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7471                         update_max_tr(tr, current, smp_processor_id(), NULL);
7472                 else
7473                         update_max_tr_single(tr, current, iter->cpu_file);
7474                 local_irq_enable();
7475                 break;
7476         default:
7477                 if (tr->allocated_snapshot) {
7478                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7479                                 tracing_reset_online_cpus(&tr->max_buffer);
7480                         else
7481                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7482                 }
7483                 break;
7484         }
7485
7486         if (ret >= 0) {
7487                 *ppos += cnt;
7488                 ret = cnt;
7489         }
7490 out:
7491         mutex_unlock(&trace_types_lock);
7492         return ret;
7493 }
7494
7495 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7496 {
7497         struct seq_file *m = file->private_data;
7498         int ret;
7499
7500         ret = tracing_release(inode, file);
7501
7502         if (file->f_mode & FMODE_READ)
7503                 return ret;
7504
7505         /* If write only, the seq_file is just a stub */
7506         if (m)
7507                 kfree(m->private);
7508         kfree(m);
7509
7510         return 0;
7511 }
7512
7513 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7514 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7515                                     size_t count, loff_t *ppos);
7516 static int tracing_buffers_release(struct inode *inode, struct file *file);
7517 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7518                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7519
7520 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7521 {
7522         struct ftrace_buffer_info *info;
7523         int ret;
7524
7525         /* The following checks for tracefs lockdown */
7526         ret = tracing_buffers_open(inode, filp);
7527         if (ret < 0)
7528                 return ret;
7529
7530         info = filp->private_data;
7531
7532         if (info->iter.trace->use_max_tr) {
7533                 tracing_buffers_release(inode, filp);
7534                 return -EBUSY;
7535         }
7536
7537         info->iter.snapshot = true;
7538         info->iter.array_buffer = &info->iter.tr->max_buffer;
7539
7540         return ret;
7541 }
7542
7543 #endif /* CONFIG_TRACER_SNAPSHOT */
7544
7545
7546 static const struct file_operations tracing_thresh_fops = {
7547         .open           = tracing_open_generic,
7548         .read           = tracing_thresh_read,
7549         .write          = tracing_thresh_write,
7550         .llseek         = generic_file_llseek,
7551 };
7552
7553 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7554 static const struct file_operations tracing_max_lat_fops = {
7555         .open           = tracing_open_generic,
7556         .read           = tracing_max_lat_read,
7557         .write          = tracing_max_lat_write,
7558         .llseek         = generic_file_llseek,
7559 };
7560 #endif
7561
7562 static const struct file_operations set_tracer_fops = {
7563         .open           = tracing_open_generic,
7564         .read           = tracing_set_trace_read,
7565         .write          = tracing_set_trace_write,
7566         .llseek         = generic_file_llseek,
7567 };
7568
7569 static const struct file_operations tracing_pipe_fops = {
7570         .open           = tracing_open_pipe,
7571         .poll           = tracing_poll_pipe,
7572         .read           = tracing_read_pipe,
7573         .splice_read    = tracing_splice_read_pipe,
7574         .release        = tracing_release_pipe,
7575         .llseek         = no_llseek,
7576 };
7577
7578 static const struct file_operations tracing_entries_fops = {
7579         .open           = tracing_open_generic_tr,
7580         .read           = tracing_entries_read,
7581         .write          = tracing_entries_write,
7582         .llseek         = generic_file_llseek,
7583         .release        = tracing_release_generic_tr,
7584 };
7585
7586 static const struct file_operations tracing_total_entries_fops = {
7587         .open           = tracing_open_generic_tr,
7588         .read           = tracing_total_entries_read,
7589         .llseek         = generic_file_llseek,
7590         .release        = tracing_release_generic_tr,
7591 };
7592
7593 static const struct file_operations tracing_free_buffer_fops = {
7594         .open           = tracing_open_generic_tr,
7595         .write          = tracing_free_buffer_write,
7596         .release        = tracing_free_buffer_release,
7597 };
7598
7599 static const struct file_operations tracing_mark_fops = {
7600         .open           = tracing_mark_open,
7601         .write          = tracing_mark_write,
7602         .release        = tracing_release_generic_tr,
7603 };
7604
7605 static const struct file_operations tracing_mark_raw_fops = {
7606         .open           = tracing_mark_open,
7607         .write          = tracing_mark_raw_write,
7608         .release        = tracing_release_generic_tr,
7609 };
7610
7611 static const struct file_operations trace_clock_fops = {
7612         .open           = tracing_clock_open,
7613         .read           = seq_read,
7614         .llseek         = seq_lseek,
7615         .release        = tracing_single_release_tr,
7616         .write          = tracing_clock_write,
7617 };
7618
7619 static const struct file_operations trace_time_stamp_mode_fops = {
7620         .open           = tracing_time_stamp_mode_open,
7621         .read           = seq_read,
7622         .llseek         = seq_lseek,
7623         .release        = tracing_single_release_tr,
7624 };
7625
7626 #ifdef CONFIG_TRACER_SNAPSHOT
7627 static const struct file_operations snapshot_fops = {
7628         .open           = tracing_snapshot_open,
7629         .read           = seq_read,
7630         .write          = tracing_snapshot_write,
7631         .llseek         = tracing_lseek,
7632         .release        = tracing_snapshot_release,
7633 };
7634
7635 static const struct file_operations snapshot_raw_fops = {
7636         .open           = snapshot_raw_open,
7637         .read           = tracing_buffers_read,
7638         .release        = tracing_buffers_release,
7639         .splice_read    = tracing_buffers_splice_read,
7640         .llseek         = no_llseek,
7641 };
7642
7643 #endif /* CONFIG_TRACER_SNAPSHOT */
7644
7645 /*
7646  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7647  * @filp: The active open file structure
7648  * @ubuf: The userspace provided buffer to read value into
7649  * @cnt: The maximum number of bytes to read
7650  * @ppos: The current "file" position
7651  *
7652  * This function implements the write interface for a struct trace_min_max_param.
7653  * The filp->private_data must point to a trace_min_max_param structure that
7654  * defines where to write the value, the min and the max acceptable values,
7655  * and a lock to protect the write.
7656  */
7657 static ssize_t
7658 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7659 {
7660         struct trace_min_max_param *param = filp->private_data;
7661         u64 val;
7662         int err;
7663
7664         if (!param)
7665                 return -EFAULT;
7666
7667         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7668         if (err)
7669                 return err;
7670
7671         if (param->lock)
7672                 mutex_lock(param->lock);
7673
7674         if (param->min && val < *param->min)
7675                 err = -EINVAL;
7676
7677         if (param->max && val > *param->max)
7678                 err = -EINVAL;
7679
7680         if (!err)
7681                 *param->val = val;
7682
7683         if (param->lock)
7684                 mutex_unlock(param->lock);
7685
7686         if (err)
7687                 return err;
7688
7689         return cnt;
7690 }
7691
7692 /*
7693  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7694  * @filp: The active open file structure
7695  * @ubuf: The userspace provided buffer to read value into
7696  * @cnt: The maximum number of bytes to read
7697  * @ppos: The current "file" position
7698  *
7699  * This function implements the read interface for a struct trace_min_max_param.
7700  * The filp->private_data must point to a trace_min_max_param struct with valid
7701  * data.
7702  */
7703 static ssize_t
7704 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7705 {
7706         struct trace_min_max_param *param = filp->private_data;
7707         char buf[U64_STR_SIZE];
7708         int len;
7709         u64 val;
7710
7711         if (!param)
7712                 return -EFAULT;
7713
7714         val = *param->val;
7715
7716         if (cnt > sizeof(buf))
7717                 cnt = sizeof(buf);
7718
7719         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7720
7721         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7722 }
7723
7724 const struct file_operations trace_min_max_fops = {
7725         .open           = tracing_open_generic,
7726         .read           = trace_min_max_read,
7727         .write          = trace_min_max_write,
7728 };
7729
7730 #define TRACING_LOG_ERRS_MAX    8
7731 #define TRACING_LOG_LOC_MAX     128
7732
7733 #define CMD_PREFIX "  Command: "
7734
7735 struct err_info {
7736         const char      **errs; /* ptr to loc-specific array of err strings */
7737         u8              type;   /* index into errs -> specific err string */
7738         u16             pos;    /* caret position */
7739         u64             ts;
7740 };
7741
7742 struct tracing_log_err {
7743         struct list_head        list;
7744         struct err_info         info;
7745         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7746         char                    *cmd;                     /* what caused err */
7747 };
7748
7749 static DEFINE_MUTEX(tracing_err_log_lock);
7750
7751 static struct tracing_log_err *alloc_tracing_log_err(int len)
7752 {
7753         struct tracing_log_err *err;
7754
7755         err = kzalloc(sizeof(*err), GFP_KERNEL);
7756         if (!err)
7757                 return ERR_PTR(-ENOMEM);
7758
7759         err->cmd = kzalloc(len, GFP_KERNEL);
7760         if (!err->cmd) {
7761                 kfree(err);
7762                 return ERR_PTR(-ENOMEM);
7763         }
7764
7765         return err;
7766 }
7767
7768 static void free_tracing_log_err(struct tracing_log_err *err)
7769 {
7770         kfree(err->cmd);
7771         kfree(err);
7772 }
7773
7774 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7775                                                    int len)
7776 {
7777         struct tracing_log_err *err;
7778
7779         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7780                 err = alloc_tracing_log_err(len);
7781                 if (PTR_ERR(err) != -ENOMEM)
7782                         tr->n_err_log_entries++;
7783
7784                 return err;
7785         }
7786
7787         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7788         kfree(err->cmd);
7789         err->cmd = kzalloc(len, GFP_KERNEL);
7790         if (!err->cmd)
7791                 return ERR_PTR(-ENOMEM);
7792         list_del(&err->list);
7793
7794         return err;
7795 }
7796
7797 /**
7798  * err_pos - find the position of a string within a command for error careting
7799  * @cmd: The tracing command that caused the error
7800  * @str: The string to position the caret at within @cmd
7801  *
7802  * Finds the position of the first occurrence of @str within @cmd.  The
7803  * return value can be passed to tracing_log_err() for caret placement
7804  * within @cmd.
7805  *
7806  * Returns the index within @cmd of the first occurrence of @str or 0
7807  * if @str was not found.
7808  */
7809 unsigned int err_pos(char *cmd, const char *str)
7810 {
7811         char *found;
7812
7813         if (WARN_ON(!strlen(cmd)))
7814                 return 0;
7815
7816         found = strstr(cmd, str);
7817         if (found)
7818                 return found - cmd;
7819
7820         return 0;
7821 }
7822
7823 /**
7824  * tracing_log_err - write an error to the tracing error log
7825  * @tr: The associated trace array for the error (NULL for top level array)
7826  * @loc: A string describing where the error occurred
7827  * @cmd: The tracing command that caused the error
7828  * @errs: The array of loc-specific static error strings
7829  * @type: The index into errs[], which produces the specific static err string
7830  * @pos: The position the caret should be placed in the cmd
7831  *
7832  * Writes an error into tracing/error_log of the form:
7833  *
7834  * <loc>: error: <text>
7835  *   Command: <cmd>
7836  *              ^
7837  *
7838  * tracing/error_log is a small log file containing the last
7839  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7840  * unless there has been a tracing error, and the error log can be
7841  * cleared and have its memory freed by writing the empty string in
7842  * truncation mode to it i.e. echo > tracing/error_log.
7843  *
7844  * NOTE: the @errs array along with the @type param are used to
7845  * produce a static error string - this string is not copied and saved
7846  * when the error is logged - only a pointer to it is saved.  See
7847  * existing callers for examples of how static strings are typically
7848  * defined for use with tracing_log_err().
7849  */
7850 void tracing_log_err(struct trace_array *tr,
7851                      const char *loc, const char *cmd,
7852                      const char **errs, u8 type, u16 pos)
7853 {
7854         struct tracing_log_err *err;
7855         int len = 0;
7856
7857         if (!tr)
7858                 tr = &global_trace;
7859
7860         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7861
7862         mutex_lock(&tracing_err_log_lock);
7863         err = get_tracing_log_err(tr, len);
7864         if (PTR_ERR(err) == -ENOMEM) {
7865                 mutex_unlock(&tracing_err_log_lock);
7866                 return;
7867         }
7868
7869         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7870         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7871
7872         err->info.errs = errs;
7873         err->info.type = type;
7874         err->info.pos = pos;
7875         err->info.ts = local_clock();
7876
7877         list_add_tail(&err->list, &tr->err_log);
7878         mutex_unlock(&tracing_err_log_lock);
7879 }
7880
7881 static void clear_tracing_err_log(struct trace_array *tr)
7882 {
7883         struct tracing_log_err *err, *next;
7884
7885         mutex_lock(&tracing_err_log_lock);
7886         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7887                 list_del(&err->list);
7888                 free_tracing_log_err(err);
7889         }
7890
7891         tr->n_err_log_entries = 0;
7892         mutex_unlock(&tracing_err_log_lock);
7893 }
7894
7895 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7896 {
7897         struct trace_array *tr = m->private;
7898
7899         mutex_lock(&tracing_err_log_lock);
7900
7901         return seq_list_start(&tr->err_log, *pos);
7902 }
7903
7904 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7905 {
7906         struct trace_array *tr = m->private;
7907
7908         return seq_list_next(v, &tr->err_log, pos);
7909 }
7910
7911 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7912 {
7913         mutex_unlock(&tracing_err_log_lock);
7914 }
7915
7916 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7917 {
7918         u16 i;
7919
7920         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7921                 seq_putc(m, ' ');
7922         for (i = 0; i < pos; i++)
7923                 seq_putc(m, ' ');
7924         seq_puts(m, "^\n");
7925 }
7926
7927 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7928 {
7929         struct tracing_log_err *err = v;
7930
7931         if (err) {
7932                 const char *err_text = err->info.errs[err->info.type];
7933                 u64 sec = err->info.ts;
7934                 u32 nsec;
7935
7936                 nsec = do_div(sec, NSEC_PER_SEC);
7937                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7938                            err->loc, err_text);
7939                 seq_printf(m, "%s", err->cmd);
7940                 tracing_err_log_show_pos(m, err->info.pos);
7941         }
7942
7943         return 0;
7944 }
7945
7946 static const struct seq_operations tracing_err_log_seq_ops = {
7947         .start  = tracing_err_log_seq_start,
7948         .next   = tracing_err_log_seq_next,
7949         .stop   = tracing_err_log_seq_stop,
7950         .show   = tracing_err_log_seq_show
7951 };
7952
7953 static int tracing_err_log_open(struct inode *inode, struct file *file)
7954 {
7955         struct trace_array *tr = inode->i_private;
7956         int ret = 0;
7957
7958         ret = tracing_check_open_get_tr(tr);
7959         if (ret)
7960                 return ret;
7961
7962         /* If this file was opened for write, then erase contents */
7963         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7964                 clear_tracing_err_log(tr);
7965
7966         if (file->f_mode & FMODE_READ) {
7967                 ret = seq_open(file, &tracing_err_log_seq_ops);
7968                 if (!ret) {
7969                         struct seq_file *m = file->private_data;
7970                         m->private = tr;
7971                 } else {
7972                         trace_array_put(tr);
7973                 }
7974         }
7975         return ret;
7976 }
7977
7978 static ssize_t tracing_err_log_write(struct file *file,
7979                                      const char __user *buffer,
7980                                      size_t count, loff_t *ppos)
7981 {
7982         return count;
7983 }
7984
7985 static int tracing_err_log_release(struct inode *inode, struct file *file)
7986 {
7987         struct trace_array *tr = inode->i_private;
7988
7989         trace_array_put(tr);
7990
7991         if (file->f_mode & FMODE_READ)
7992                 seq_release(inode, file);
7993
7994         return 0;
7995 }
7996
7997 static const struct file_operations tracing_err_log_fops = {
7998         .open           = tracing_err_log_open,
7999         .write          = tracing_err_log_write,
8000         .read           = seq_read,
8001         .llseek         = seq_lseek,
8002         .release        = tracing_err_log_release,
8003 };
8004
8005 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8006 {
8007         struct trace_array *tr = inode->i_private;
8008         struct ftrace_buffer_info *info;
8009         int ret;
8010
8011         ret = tracing_check_open_get_tr(tr);
8012         if (ret)
8013                 return ret;
8014
8015         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8016         if (!info) {
8017                 trace_array_put(tr);
8018                 return -ENOMEM;
8019         }
8020
8021         mutex_lock(&trace_types_lock);
8022
8023         info->iter.tr           = tr;
8024         info->iter.cpu_file     = tracing_get_cpu(inode);
8025         info->iter.trace        = tr->current_trace;
8026         info->iter.array_buffer = &tr->array_buffer;
8027         info->spare             = NULL;
8028         /* Force reading ring buffer for first read */
8029         info->read              = (unsigned int)-1;
8030
8031         filp->private_data = info;
8032
8033         tr->trace_ref++;
8034
8035         mutex_unlock(&trace_types_lock);
8036
8037         ret = nonseekable_open(inode, filp);
8038         if (ret < 0)
8039                 trace_array_put(tr);
8040
8041         return ret;
8042 }
8043
8044 static __poll_t
8045 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8046 {
8047         struct ftrace_buffer_info *info = filp->private_data;
8048         struct trace_iterator *iter = &info->iter;
8049
8050         return trace_poll(iter, filp, poll_table);
8051 }
8052
8053 static ssize_t
8054 tracing_buffers_read(struct file *filp, char __user *ubuf,
8055                      size_t count, loff_t *ppos)
8056 {
8057         struct ftrace_buffer_info *info = filp->private_data;
8058         struct trace_iterator *iter = &info->iter;
8059         ssize_t ret = 0;
8060         ssize_t size;
8061
8062         if (!count)
8063                 return 0;
8064
8065 #ifdef CONFIG_TRACER_MAX_TRACE
8066         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8067                 return -EBUSY;
8068 #endif
8069
8070         if (!info->spare) {
8071                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8072                                                           iter->cpu_file);
8073                 if (IS_ERR(info->spare)) {
8074                         ret = PTR_ERR(info->spare);
8075                         info->spare = NULL;
8076                 } else {
8077                         info->spare_cpu = iter->cpu_file;
8078                 }
8079         }
8080         if (!info->spare)
8081                 return ret;
8082
8083         /* Do we have previous read data to read? */
8084         if (info->read < PAGE_SIZE)
8085                 goto read;
8086
8087  again:
8088         trace_access_lock(iter->cpu_file);
8089         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8090                                     &info->spare,
8091                                     count,
8092                                     iter->cpu_file, 0);
8093         trace_access_unlock(iter->cpu_file);
8094
8095         if (ret < 0) {
8096                 if (trace_empty(iter)) {
8097                         if ((filp->f_flags & O_NONBLOCK))
8098                                 return -EAGAIN;
8099
8100                         ret = wait_on_pipe(iter, 0);
8101                         if (ret)
8102                                 return ret;
8103
8104                         goto again;
8105                 }
8106                 return 0;
8107         }
8108
8109         info->read = 0;
8110  read:
8111         size = PAGE_SIZE - info->read;
8112         if (size > count)
8113                 size = count;
8114
8115         ret = copy_to_user(ubuf, info->spare + info->read, size);
8116         if (ret == size)
8117                 return -EFAULT;
8118
8119         size -= ret;
8120
8121         *ppos += size;
8122         info->read += size;
8123
8124         return size;
8125 }
8126
8127 static int tracing_buffers_release(struct inode *inode, struct file *file)
8128 {
8129         struct ftrace_buffer_info *info = file->private_data;
8130         struct trace_iterator *iter = &info->iter;
8131
8132         mutex_lock(&trace_types_lock);
8133
8134         iter->tr->trace_ref--;
8135
8136         __trace_array_put(iter->tr);
8137
8138         if (info->spare)
8139                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8140                                            info->spare_cpu, info->spare);
8141         kvfree(info);
8142
8143         mutex_unlock(&trace_types_lock);
8144
8145         return 0;
8146 }
8147
8148 struct buffer_ref {
8149         struct trace_buffer     *buffer;
8150         void                    *page;
8151         int                     cpu;
8152         refcount_t              refcount;
8153 };
8154
8155 static void buffer_ref_release(struct buffer_ref *ref)
8156 {
8157         if (!refcount_dec_and_test(&ref->refcount))
8158                 return;
8159         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8160         kfree(ref);
8161 }
8162
8163 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8164                                     struct pipe_buffer *buf)
8165 {
8166         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8167
8168         buffer_ref_release(ref);
8169         buf->private = 0;
8170 }
8171
8172 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8173                                 struct pipe_buffer *buf)
8174 {
8175         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8176
8177         if (refcount_read(&ref->refcount) > INT_MAX/2)
8178                 return false;
8179
8180         refcount_inc(&ref->refcount);
8181         return true;
8182 }
8183
8184 /* Pipe buffer operations for a buffer. */
8185 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8186         .release                = buffer_pipe_buf_release,
8187         .get                    = buffer_pipe_buf_get,
8188 };
8189
8190 /*
8191  * Callback from splice_to_pipe(), if we need to release some pages
8192  * at the end of the spd in case we error'ed out in filling the pipe.
8193  */
8194 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8195 {
8196         struct buffer_ref *ref =
8197                 (struct buffer_ref *)spd->partial[i].private;
8198
8199         buffer_ref_release(ref);
8200         spd->partial[i].private = 0;
8201 }
8202
8203 static ssize_t
8204 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8205                             struct pipe_inode_info *pipe, size_t len,
8206                             unsigned int flags)
8207 {
8208         struct ftrace_buffer_info *info = file->private_data;
8209         struct trace_iterator *iter = &info->iter;
8210         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8211         struct page *pages_def[PIPE_DEF_BUFFERS];
8212         struct splice_pipe_desc spd = {
8213                 .pages          = pages_def,
8214                 .partial        = partial_def,
8215                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8216                 .ops            = &buffer_pipe_buf_ops,
8217                 .spd_release    = buffer_spd_release,
8218         };
8219         struct buffer_ref *ref;
8220         int entries, i;
8221         ssize_t ret = 0;
8222
8223 #ifdef CONFIG_TRACER_MAX_TRACE
8224         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8225                 return -EBUSY;
8226 #endif
8227
8228         if (*ppos & (PAGE_SIZE - 1))
8229                 return -EINVAL;
8230
8231         if (len & (PAGE_SIZE - 1)) {
8232                 if (len < PAGE_SIZE)
8233                         return -EINVAL;
8234                 len &= PAGE_MASK;
8235         }
8236
8237         if (splice_grow_spd(pipe, &spd))
8238                 return -ENOMEM;
8239
8240  again:
8241         trace_access_lock(iter->cpu_file);
8242         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8243
8244         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8245                 struct page *page;
8246                 int r;
8247
8248                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8249                 if (!ref) {
8250                         ret = -ENOMEM;
8251                         break;
8252                 }
8253
8254                 refcount_set(&ref->refcount, 1);
8255                 ref->buffer = iter->array_buffer->buffer;
8256                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8257                 if (IS_ERR(ref->page)) {
8258                         ret = PTR_ERR(ref->page);
8259                         ref->page = NULL;
8260                         kfree(ref);
8261                         break;
8262                 }
8263                 ref->cpu = iter->cpu_file;
8264
8265                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8266                                           len, iter->cpu_file, 1);
8267                 if (r < 0) {
8268                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8269                                                    ref->page);
8270                         kfree(ref);
8271                         break;
8272                 }
8273
8274                 page = virt_to_page(ref->page);
8275
8276                 spd.pages[i] = page;
8277                 spd.partial[i].len = PAGE_SIZE;
8278                 spd.partial[i].offset = 0;
8279                 spd.partial[i].private = (unsigned long)ref;
8280                 spd.nr_pages++;
8281                 *ppos += PAGE_SIZE;
8282
8283                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8284         }
8285
8286         trace_access_unlock(iter->cpu_file);
8287         spd.nr_pages = i;
8288
8289         /* did we read anything? */
8290         if (!spd.nr_pages) {
8291                 if (ret)
8292                         goto out;
8293
8294                 ret = -EAGAIN;
8295                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8296                         goto out;
8297
8298                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8299                 if (ret)
8300                         goto out;
8301
8302                 goto again;
8303         }
8304
8305         ret = splice_to_pipe(pipe, &spd);
8306 out:
8307         splice_shrink_spd(&spd);
8308
8309         return ret;
8310 }
8311
8312 static const struct file_operations tracing_buffers_fops = {
8313         .open           = tracing_buffers_open,
8314         .read           = tracing_buffers_read,
8315         .poll           = tracing_buffers_poll,
8316         .release        = tracing_buffers_release,
8317         .splice_read    = tracing_buffers_splice_read,
8318         .llseek         = no_llseek,
8319 };
8320
8321 static ssize_t
8322 tracing_stats_read(struct file *filp, char __user *ubuf,
8323                    size_t count, loff_t *ppos)
8324 {
8325         struct inode *inode = file_inode(filp);
8326         struct trace_array *tr = inode->i_private;
8327         struct array_buffer *trace_buf = &tr->array_buffer;
8328         int cpu = tracing_get_cpu(inode);
8329         struct trace_seq *s;
8330         unsigned long cnt;
8331         unsigned long long t;
8332         unsigned long usec_rem;
8333
8334         s = kmalloc(sizeof(*s), GFP_KERNEL);
8335         if (!s)
8336                 return -ENOMEM;
8337
8338         trace_seq_init(s);
8339
8340         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8341         trace_seq_printf(s, "entries: %ld\n", cnt);
8342
8343         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8344         trace_seq_printf(s, "overrun: %ld\n", cnt);
8345
8346         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8347         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8348
8349         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8350         trace_seq_printf(s, "bytes: %ld\n", cnt);
8351
8352         if (trace_clocks[tr->clock_id].in_ns) {
8353                 /* local or global for trace_clock */
8354                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8355                 usec_rem = do_div(t, USEC_PER_SEC);
8356                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8357                                                                 t, usec_rem);
8358
8359                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8360                 usec_rem = do_div(t, USEC_PER_SEC);
8361                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8362         } else {
8363                 /* counter or tsc mode for trace_clock */
8364                 trace_seq_printf(s, "oldest event ts: %llu\n",
8365                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8366
8367                 trace_seq_printf(s, "now ts: %llu\n",
8368                                 ring_buffer_time_stamp(trace_buf->buffer));
8369         }
8370
8371         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8372         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8373
8374         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8375         trace_seq_printf(s, "read events: %ld\n", cnt);
8376
8377         count = simple_read_from_buffer(ubuf, count, ppos,
8378                                         s->buffer, trace_seq_used(s));
8379
8380         kfree(s);
8381
8382         return count;
8383 }
8384
8385 static const struct file_operations tracing_stats_fops = {
8386         .open           = tracing_open_generic_tr,
8387         .read           = tracing_stats_read,
8388         .llseek         = generic_file_llseek,
8389         .release        = tracing_release_generic_tr,
8390 };
8391
8392 #ifdef CONFIG_DYNAMIC_FTRACE
8393
8394 static ssize_t
8395 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8396                   size_t cnt, loff_t *ppos)
8397 {
8398         ssize_t ret;
8399         char *buf;
8400         int r;
8401
8402         /* 256 should be plenty to hold the amount needed */
8403         buf = kmalloc(256, GFP_KERNEL);
8404         if (!buf)
8405                 return -ENOMEM;
8406
8407         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8408                       ftrace_update_tot_cnt,
8409                       ftrace_number_of_pages,
8410                       ftrace_number_of_groups);
8411
8412         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8413         kfree(buf);
8414         return ret;
8415 }
8416
8417 static const struct file_operations tracing_dyn_info_fops = {
8418         .open           = tracing_open_generic,
8419         .read           = tracing_read_dyn_info,
8420         .llseek         = generic_file_llseek,
8421 };
8422 #endif /* CONFIG_DYNAMIC_FTRACE */
8423
8424 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8425 static void
8426 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8427                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8428                 void *data)
8429 {
8430         tracing_snapshot_instance(tr);
8431 }
8432
8433 static void
8434 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8435                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8436                       void *data)
8437 {
8438         struct ftrace_func_mapper *mapper = data;
8439         long *count = NULL;
8440
8441         if (mapper)
8442                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8443
8444         if (count) {
8445
8446                 if (*count <= 0)
8447                         return;
8448
8449                 (*count)--;
8450         }
8451
8452         tracing_snapshot_instance(tr);
8453 }
8454
8455 static int
8456 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8457                       struct ftrace_probe_ops *ops, void *data)
8458 {
8459         struct ftrace_func_mapper *mapper = data;
8460         long *count = NULL;
8461
8462         seq_printf(m, "%ps:", (void *)ip);
8463
8464         seq_puts(m, "snapshot");
8465
8466         if (mapper)
8467                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8468
8469         if (count)
8470                 seq_printf(m, ":count=%ld\n", *count);
8471         else
8472                 seq_puts(m, ":unlimited\n");
8473
8474         return 0;
8475 }
8476
8477 static int
8478 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8479                      unsigned long ip, void *init_data, void **data)
8480 {
8481         struct ftrace_func_mapper *mapper = *data;
8482
8483         if (!mapper) {
8484                 mapper = allocate_ftrace_func_mapper();
8485                 if (!mapper)
8486                         return -ENOMEM;
8487                 *data = mapper;
8488         }
8489
8490         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8491 }
8492
8493 static void
8494 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8495                      unsigned long ip, void *data)
8496 {
8497         struct ftrace_func_mapper *mapper = data;
8498
8499         if (!ip) {
8500                 if (!mapper)
8501                         return;
8502                 free_ftrace_func_mapper(mapper, NULL);
8503                 return;
8504         }
8505
8506         ftrace_func_mapper_remove_ip(mapper, ip);
8507 }
8508
8509 static struct ftrace_probe_ops snapshot_probe_ops = {
8510         .func                   = ftrace_snapshot,
8511         .print                  = ftrace_snapshot_print,
8512 };
8513
8514 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8515         .func                   = ftrace_count_snapshot,
8516         .print                  = ftrace_snapshot_print,
8517         .init                   = ftrace_snapshot_init,
8518         .free                   = ftrace_snapshot_free,
8519 };
8520
8521 static int
8522 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8523                                char *glob, char *cmd, char *param, int enable)
8524 {
8525         struct ftrace_probe_ops *ops;
8526         void *count = (void *)-1;
8527         char *number;
8528         int ret;
8529
8530         if (!tr)
8531                 return -ENODEV;
8532
8533         /* hash funcs only work with set_ftrace_filter */
8534         if (!enable)
8535                 return -EINVAL;
8536
8537         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8538
8539         if (glob[0] == '!')
8540                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8541
8542         if (!param)
8543                 goto out_reg;
8544
8545         number = strsep(&param, ":");
8546
8547         if (!strlen(number))
8548                 goto out_reg;
8549
8550         /*
8551          * We use the callback data field (which is a pointer)
8552          * as our counter.
8553          */
8554         ret = kstrtoul(number, 0, (unsigned long *)&count);
8555         if (ret)
8556                 return ret;
8557
8558  out_reg:
8559         ret = tracing_alloc_snapshot_instance(tr);
8560         if (ret < 0)
8561                 goto out;
8562
8563         ret = register_ftrace_function_probe(glob, tr, ops, count);
8564
8565  out:
8566         return ret < 0 ? ret : 0;
8567 }
8568
8569 static struct ftrace_func_command ftrace_snapshot_cmd = {
8570         .name                   = "snapshot",
8571         .func                   = ftrace_trace_snapshot_callback,
8572 };
8573
8574 static __init int register_snapshot_cmd(void)
8575 {
8576         return register_ftrace_command(&ftrace_snapshot_cmd);
8577 }
8578 #else
8579 static inline __init int register_snapshot_cmd(void) { return 0; }
8580 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8581
8582 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8583 {
8584         if (WARN_ON(!tr->dir))
8585                 return ERR_PTR(-ENODEV);
8586
8587         /* Top directory uses NULL as the parent */
8588         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8589                 return NULL;
8590
8591         /* All sub buffers have a descriptor */
8592         return tr->dir;
8593 }
8594
8595 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8596 {
8597         struct dentry *d_tracer;
8598
8599         if (tr->percpu_dir)
8600                 return tr->percpu_dir;
8601
8602         d_tracer = tracing_get_dentry(tr);
8603         if (IS_ERR(d_tracer))
8604                 return NULL;
8605
8606         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8607
8608         MEM_FAIL(!tr->percpu_dir,
8609                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8610
8611         return tr->percpu_dir;
8612 }
8613
8614 static struct dentry *
8615 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8616                       void *data, long cpu, const struct file_operations *fops)
8617 {
8618         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8619
8620         if (ret) /* See tracing_get_cpu() */
8621                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8622         return ret;
8623 }
8624
8625 static void
8626 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8627 {
8628         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8629         struct dentry *d_cpu;
8630         char cpu_dir[30]; /* 30 characters should be more than enough */
8631
8632         if (!d_percpu)
8633                 return;
8634
8635         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8636         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8637         if (!d_cpu) {
8638                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8639                 return;
8640         }
8641
8642         /* per cpu trace_pipe */
8643         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8644                                 tr, cpu, &tracing_pipe_fops);
8645
8646         /* per cpu trace */
8647         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8648                                 tr, cpu, &tracing_fops);
8649
8650         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8651                                 tr, cpu, &tracing_buffers_fops);
8652
8653         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8654                                 tr, cpu, &tracing_stats_fops);
8655
8656         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8657                                 tr, cpu, &tracing_entries_fops);
8658
8659 #ifdef CONFIG_TRACER_SNAPSHOT
8660         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8661                                 tr, cpu, &snapshot_fops);
8662
8663         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8664                                 tr, cpu, &snapshot_raw_fops);
8665 #endif
8666 }
8667
8668 #ifdef CONFIG_FTRACE_SELFTEST
8669 /* Let selftest have access to static functions in this file */
8670 #include "trace_selftest.c"
8671 #endif
8672
8673 static ssize_t
8674 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8675                         loff_t *ppos)
8676 {
8677         struct trace_option_dentry *topt = filp->private_data;
8678         char *buf;
8679
8680         if (topt->flags->val & topt->opt->bit)
8681                 buf = "1\n";
8682         else
8683                 buf = "0\n";
8684
8685         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8686 }
8687
8688 static ssize_t
8689 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8690                          loff_t *ppos)
8691 {
8692         struct trace_option_dentry *topt = filp->private_data;
8693         unsigned long val;
8694         int ret;
8695
8696         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8697         if (ret)
8698                 return ret;
8699
8700         if (val != 0 && val != 1)
8701                 return -EINVAL;
8702
8703         if (!!(topt->flags->val & topt->opt->bit) != val) {
8704                 mutex_lock(&trace_types_lock);
8705                 ret = __set_tracer_option(topt->tr, topt->flags,
8706                                           topt->opt, !val);
8707                 mutex_unlock(&trace_types_lock);
8708                 if (ret)
8709                         return ret;
8710         }
8711
8712         *ppos += cnt;
8713
8714         return cnt;
8715 }
8716
8717
8718 static const struct file_operations trace_options_fops = {
8719         .open = tracing_open_generic,
8720         .read = trace_options_read,
8721         .write = trace_options_write,
8722         .llseek = generic_file_llseek,
8723 };
8724
8725 /*
8726  * In order to pass in both the trace_array descriptor as well as the index
8727  * to the flag that the trace option file represents, the trace_array
8728  * has a character array of trace_flags_index[], which holds the index
8729  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8730  * The address of this character array is passed to the flag option file
8731  * read/write callbacks.
8732  *
8733  * In order to extract both the index and the trace_array descriptor,
8734  * get_tr_index() uses the following algorithm.
8735  *
8736  *   idx = *ptr;
8737  *
8738  * As the pointer itself contains the address of the index (remember
8739  * index[1] == 1).
8740  *
8741  * Then to get the trace_array descriptor, by subtracting that index
8742  * from the ptr, we get to the start of the index itself.
8743  *
8744  *   ptr - idx == &index[0]
8745  *
8746  * Then a simple container_of() from that pointer gets us to the
8747  * trace_array descriptor.
8748  */
8749 static void get_tr_index(void *data, struct trace_array **ptr,
8750                          unsigned int *pindex)
8751 {
8752         *pindex = *(unsigned char *)data;
8753
8754         *ptr = container_of(data - *pindex, struct trace_array,
8755                             trace_flags_index);
8756 }
8757
8758 static ssize_t
8759 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8760                         loff_t *ppos)
8761 {
8762         void *tr_index = filp->private_data;
8763         struct trace_array *tr;
8764         unsigned int index;
8765         char *buf;
8766
8767         get_tr_index(tr_index, &tr, &index);
8768
8769         if (tr->trace_flags & (1 << index))
8770                 buf = "1\n";
8771         else
8772                 buf = "0\n";
8773
8774         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8775 }
8776
8777 static ssize_t
8778 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8779                          loff_t *ppos)
8780 {
8781         void *tr_index = filp->private_data;
8782         struct trace_array *tr;
8783         unsigned int index;
8784         unsigned long val;
8785         int ret;
8786
8787         get_tr_index(tr_index, &tr, &index);
8788
8789         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8790         if (ret)
8791                 return ret;
8792
8793         if (val != 0 && val != 1)
8794                 return -EINVAL;
8795
8796         mutex_lock(&event_mutex);
8797         mutex_lock(&trace_types_lock);
8798         ret = set_tracer_flag(tr, 1 << index, val);
8799         mutex_unlock(&trace_types_lock);
8800         mutex_unlock(&event_mutex);
8801
8802         if (ret < 0)
8803                 return ret;
8804
8805         *ppos += cnt;
8806
8807         return cnt;
8808 }
8809
8810 static const struct file_operations trace_options_core_fops = {
8811         .open = tracing_open_generic,
8812         .read = trace_options_core_read,
8813         .write = trace_options_core_write,
8814         .llseek = generic_file_llseek,
8815 };
8816
8817 struct dentry *trace_create_file(const char *name,
8818                                  umode_t mode,
8819                                  struct dentry *parent,
8820                                  void *data,
8821                                  const struct file_operations *fops)
8822 {
8823         struct dentry *ret;
8824
8825         ret = tracefs_create_file(name, mode, parent, data, fops);
8826         if (!ret)
8827                 pr_warn("Could not create tracefs '%s' entry\n", name);
8828
8829         return ret;
8830 }
8831
8832
8833 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8834 {
8835         struct dentry *d_tracer;
8836
8837         if (tr->options)
8838                 return tr->options;
8839
8840         d_tracer = tracing_get_dentry(tr);
8841         if (IS_ERR(d_tracer))
8842                 return NULL;
8843
8844         tr->options = tracefs_create_dir("options", d_tracer);
8845         if (!tr->options) {
8846                 pr_warn("Could not create tracefs directory 'options'\n");
8847                 return NULL;
8848         }
8849
8850         return tr->options;
8851 }
8852
8853 static void
8854 create_trace_option_file(struct trace_array *tr,
8855                          struct trace_option_dentry *topt,
8856                          struct tracer_flags *flags,
8857                          struct tracer_opt *opt)
8858 {
8859         struct dentry *t_options;
8860
8861         t_options = trace_options_init_dentry(tr);
8862         if (!t_options)
8863                 return;
8864
8865         topt->flags = flags;
8866         topt->opt = opt;
8867         topt->tr = tr;
8868
8869         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8870                                         t_options, topt, &trace_options_fops);
8871
8872 }
8873
8874 static void
8875 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8876 {
8877         struct trace_option_dentry *topts;
8878         struct trace_options *tr_topts;
8879         struct tracer_flags *flags;
8880         struct tracer_opt *opts;
8881         int cnt;
8882         int i;
8883
8884         if (!tracer)
8885                 return;
8886
8887         flags = tracer->flags;
8888
8889         if (!flags || !flags->opts)
8890                 return;
8891
8892         /*
8893          * If this is an instance, only create flags for tracers
8894          * the instance may have.
8895          */
8896         if (!trace_ok_for_array(tracer, tr))
8897                 return;
8898
8899         for (i = 0; i < tr->nr_topts; i++) {
8900                 /* Make sure there's no duplicate flags. */
8901                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8902                         return;
8903         }
8904
8905         opts = flags->opts;
8906
8907         for (cnt = 0; opts[cnt].name; cnt++)
8908                 ;
8909
8910         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8911         if (!topts)
8912                 return;
8913
8914         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8915                             GFP_KERNEL);
8916         if (!tr_topts) {
8917                 kfree(topts);
8918                 return;
8919         }
8920
8921         tr->topts = tr_topts;
8922         tr->topts[tr->nr_topts].tracer = tracer;
8923         tr->topts[tr->nr_topts].topts = topts;
8924         tr->nr_topts++;
8925
8926         for (cnt = 0; opts[cnt].name; cnt++) {
8927                 create_trace_option_file(tr, &topts[cnt], flags,
8928                                          &opts[cnt]);
8929                 MEM_FAIL(topts[cnt].entry == NULL,
8930                           "Failed to create trace option: %s",
8931                           opts[cnt].name);
8932         }
8933 }
8934
8935 static struct dentry *
8936 create_trace_option_core_file(struct trace_array *tr,
8937                               const char *option, long index)
8938 {
8939         struct dentry *t_options;
8940
8941         t_options = trace_options_init_dentry(tr);
8942         if (!t_options)
8943                 return NULL;
8944
8945         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8946                                  (void *)&tr->trace_flags_index[index],
8947                                  &trace_options_core_fops);
8948 }
8949
8950 static void create_trace_options_dir(struct trace_array *tr)
8951 {
8952         struct dentry *t_options;
8953         bool top_level = tr == &global_trace;
8954         int i;
8955
8956         t_options = trace_options_init_dentry(tr);
8957         if (!t_options)
8958                 return;
8959
8960         for (i = 0; trace_options[i]; i++) {
8961                 if (top_level ||
8962                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8963                         create_trace_option_core_file(tr, trace_options[i], i);
8964         }
8965 }
8966
8967 static ssize_t
8968 rb_simple_read(struct file *filp, char __user *ubuf,
8969                size_t cnt, loff_t *ppos)
8970 {
8971         struct trace_array *tr = filp->private_data;
8972         char buf[64];
8973         int r;
8974
8975         r = tracer_tracing_is_on(tr);
8976         r = sprintf(buf, "%d\n", r);
8977
8978         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8979 }
8980
8981 static ssize_t
8982 rb_simple_write(struct file *filp, const char __user *ubuf,
8983                 size_t cnt, loff_t *ppos)
8984 {
8985         struct trace_array *tr = filp->private_data;
8986         struct trace_buffer *buffer = tr->array_buffer.buffer;
8987         unsigned long val;
8988         int ret;
8989
8990         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8991         if (ret)
8992                 return ret;
8993
8994         if (buffer) {
8995                 mutex_lock(&trace_types_lock);
8996                 if (!!val == tracer_tracing_is_on(tr)) {
8997                         val = 0; /* do nothing */
8998                 } else if (val) {
8999                         tracer_tracing_on(tr);
9000                         if (tr->current_trace->start)
9001                                 tr->current_trace->start(tr);
9002                 } else {
9003                         tracer_tracing_off(tr);
9004                         if (tr->current_trace->stop)
9005                                 tr->current_trace->stop(tr);
9006                 }
9007                 mutex_unlock(&trace_types_lock);
9008         }
9009
9010         (*ppos)++;
9011
9012         return cnt;
9013 }
9014
9015 static const struct file_operations rb_simple_fops = {
9016         .open           = tracing_open_generic_tr,
9017         .read           = rb_simple_read,
9018         .write          = rb_simple_write,
9019         .release        = tracing_release_generic_tr,
9020         .llseek         = default_llseek,
9021 };
9022
9023 static ssize_t
9024 buffer_percent_read(struct file *filp, char __user *ubuf,
9025                     size_t cnt, loff_t *ppos)
9026 {
9027         struct trace_array *tr = filp->private_data;
9028         char buf[64];
9029         int r;
9030
9031         r = tr->buffer_percent;
9032         r = sprintf(buf, "%d\n", r);
9033
9034         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9035 }
9036
9037 static ssize_t
9038 buffer_percent_write(struct file *filp, const char __user *ubuf,
9039                      size_t cnt, loff_t *ppos)
9040 {
9041         struct trace_array *tr = filp->private_data;
9042         unsigned long val;
9043         int ret;
9044
9045         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9046         if (ret)
9047                 return ret;
9048
9049         if (val > 100)
9050                 return -EINVAL;
9051
9052         if (!val)
9053                 val = 1;
9054
9055         tr->buffer_percent = val;
9056
9057         (*ppos)++;
9058
9059         return cnt;
9060 }
9061
9062 static const struct file_operations buffer_percent_fops = {
9063         .open           = tracing_open_generic_tr,
9064         .read           = buffer_percent_read,
9065         .write          = buffer_percent_write,
9066         .release        = tracing_release_generic_tr,
9067         .llseek         = default_llseek,
9068 };
9069
9070 static struct dentry *trace_instance_dir;
9071
9072 static void
9073 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9074
9075 static int
9076 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9077 {
9078         enum ring_buffer_flags rb_flags;
9079
9080         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9081
9082         buf->tr = tr;
9083
9084         buf->buffer = ring_buffer_alloc(size, rb_flags);
9085         if (!buf->buffer)
9086                 return -ENOMEM;
9087
9088         buf->data = alloc_percpu(struct trace_array_cpu);
9089         if (!buf->data) {
9090                 ring_buffer_free(buf->buffer);
9091                 buf->buffer = NULL;
9092                 return -ENOMEM;
9093         }
9094
9095         /* Allocate the first page for all buffers */
9096         set_buffer_entries(&tr->array_buffer,
9097                            ring_buffer_size(tr->array_buffer.buffer, 0));
9098
9099         return 0;
9100 }
9101
9102 static int allocate_trace_buffers(struct trace_array *tr, int size)
9103 {
9104         int ret;
9105
9106         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9107         if (ret)
9108                 return ret;
9109
9110 #ifdef CONFIG_TRACER_MAX_TRACE
9111         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9112                                     allocate_snapshot ? size : 1);
9113         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9114                 ring_buffer_free(tr->array_buffer.buffer);
9115                 tr->array_buffer.buffer = NULL;
9116                 free_percpu(tr->array_buffer.data);
9117                 tr->array_buffer.data = NULL;
9118                 return -ENOMEM;
9119         }
9120         tr->allocated_snapshot = allocate_snapshot;
9121
9122         /*
9123          * Only the top level trace array gets its snapshot allocated
9124          * from the kernel command line.
9125          */
9126         allocate_snapshot = false;
9127 #endif
9128
9129         return 0;
9130 }
9131
9132 static void free_trace_buffer(struct array_buffer *buf)
9133 {
9134         if (buf->buffer) {
9135                 ring_buffer_free(buf->buffer);
9136                 buf->buffer = NULL;
9137                 free_percpu(buf->data);
9138                 buf->data = NULL;
9139         }
9140 }
9141
9142 static void free_trace_buffers(struct trace_array *tr)
9143 {
9144         if (!tr)
9145                 return;
9146
9147         free_trace_buffer(&tr->array_buffer);
9148
9149 #ifdef CONFIG_TRACER_MAX_TRACE
9150         free_trace_buffer(&tr->max_buffer);
9151 #endif
9152 }
9153
9154 static void init_trace_flags_index(struct trace_array *tr)
9155 {
9156         int i;
9157
9158         /* Used by the trace options files */
9159         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9160                 tr->trace_flags_index[i] = i;
9161 }
9162
9163 static void __update_tracer_options(struct trace_array *tr)
9164 {
9165         struct tracer *t;
9166
9167         for (t = trace_types; t; t = t->next)
9168                 add_tracer_options(tr, t);
9169 }
9170
9171 static void update_tracer_options(struct trace_array *tr)
9172 {
9173         mutex_lock(&trace_types_lock);
9174         __update_tracer_options(tr);
9175         mutex_unlock(&trace_types_lock);
9176 }
9177
9178 /* Must have trace_types_lock held */
9179 struct trace_array *trace_array_find(const char *instance)
9180 {
9181         struct trace_array *tr, *found = NULL;
9182
9183         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9184                 if (tr->name && strcmp(tr->name, instance) == 0) {
9185                         found = tr;
9186                         break;
9187                 }
9188         }
9189
9190         return found;
9191 }
9192
9193 struct trace_array *trace_array_find_get(const char *instance)
9194 {
9195         struct trace_array *tr;
9196
9197         mutex_lock(&trace_types_lock);
9198         tr = trace_array_find(instance);
9199         if (tr)
9200                 tr->ref++;
9201         mutex_unlock(&trace_types_lock);
9202
9203         return tr;
9204 }
9205
9206 static int trace_array_create_dir(struct trace_array *tr)
9207 {
9208         int ret;
9209
9210         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9211         if (!tr->dir)
9212                 return -EINVAL;
9213
9214         ret = event_trace_add_tracer(tr->dir, tr);
9215         if (ret) {
9216                 tracefs_remove(tr->dir);
9217                 return ret;
9218         }
9219
9220         init_tracer_tracefs(tr, tr->dir);
9221         __update_tracer_options(tr);
9222
9223         return ret;
9224 }
9225
9226 static struct trace_array *trace_array_create(const char *name)
9227 {
9228         struct trace_array *tr;
9229         int ret;
9230
9231         ret = -ENOMEM;
9232         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9233         if (!tr)
9234                 return ERR_PTR(ret);
9235
9236         tr->name = kstrdup(name, GFP_KERNEL);
9237         if (!tr->name)
9238                 goto out_free_tr;
9239
9240         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9241                 goto out_free_tr;
9242
9243         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9244
9245         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9246
9247         raw_spin_lock_init(&tr->start_lock);
9248
9249         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9250
9251         tr->current_trace = &nop_trace;
9252
9253         INIT_LIST_HEAD(&tr->systems);
9254         INIT_LIST_HEAD(&tr->events);
9255         INIT_LIST_HEAD(&tr->hist_vars);
9256         INIT_LIST_HEAD(&tr->err_log);
9257
9258         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9259                 goto out_free_tr;
9260
9261         if (ftrace_allocate_ftrace_ops(tr) < 0)
9262                 goto out_free_tr;
9263
9264         ftrace_init_trace_array(tr);
9265
9266         init_trace_flags_index(tr);
9267
9268         if (trace_instance_dir) {
9269                 ret = trace_array_create_dir(tr);
9270                 if (ret)
9271                         goto out_free_tr;
9272         } else
9273                 __trace_early_add_events(tr);
9274
9275         list_add(&tr->list, &ftrace_trace_arrays);
9276
9277         tr->ref++;
9278
9279         return tr;
9280
9281  out_free_tr:
9282         ftrace_free_ftrace_ops(tr);
9283         free_trace_buffers(tr);
9284         free_cpumask_var(tr->tracing_cpumask);
9285         kfree(tr->name);
9286         kfree(tr);
9287
9288         return ERR_PTR(ret);
9289 }
9290
9291 static int instance_mkdir(const char *name)
9292 {
9293         struct trace_array *tr;
9294         int ret;
9295
9296         mutex_lock(&event_mutex);
9297         mutex_lock(&trace_types_lock);
9298
9299         ret = -EEXIST;
9300         if (trace_array_find(name))
9301                 goto out_unlock;
9302
9303         tr = trace_array_create(name);
9304
9305         ret = PTR_ERR_OR_ZERO(tr);
9306
9307 out_unlock:
9308         mutex_unlock(&trace_types_lock);
9309         mutex_unlock(&event_mutex);
9310         return ret;
9311 }
9312
9313 /**
9314  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9315  * @name: The name of the trace array to be looked up/created.
9316  *
9317  * Returns pointer to trace array with given name.
9318  * NULL, if it cannot be created.
9319  *
9320  * NOTE: This function increments the reference counter associated with the
9321  * trace array returned. This makes sure it cannot be freed while in use.
9322  * Use trace_array_put() once the trace array is no longer needed.
9323  * If the trace_array is to be freed, trace_array_destroy() needs to
9324  * be called after the trace_array_put(), or simply let user space delete
9325  * it from the tracefs instances directory. But until the
9326  * trace_array_put() is called, user space can not delete it.
9327  *
9328  */
9329 struct trace_array *trace_array_get_by_name(const char *name)
9330 {
9331         struct trace_array *tr;
9332
9333         mutex_lock(&event_mutex);
9334         mutex_lock(&trace_types_lock);
9335
9336         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9337                 if (tr->name && strcmp(tr->name, name) == 0)
9338                         goto out_unlock;
9339         }
9340
9341         tr = trace_array_create(name);
9342
9343         if (IS_ERR(tr))
9344                 tr = NULL;
9345 out_unlock:
9346         if (tr)
9347                 tr->ref++;
9348
9349         mutex_unlock(&trace_types_lock);
9350         mutex_unlock(&event_mutex);
9351         return tr;
9352 }
9353 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9354
9355 static int __remove_instance(struct trace_array *tr)
9356 {
9357         int i;
9358
9359         /* Reference counter for a newly created trace array = 1. */
9360         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9361                 return -EBUSY;
9362
9363         list_del(&tr->list);
9364
9365         /* Disable all the flags that were enabled coming in */
9366         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9367                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9368                         set_tracer_flag(tr, 1 << i, 0);
9369         }
9370
9371         tracing_set_nop(tr);
9372         clear_ftrace_function_probes(tr);
9373         event_trace_del_tracer(tr);
9374         ftrace_clear_pids(tr);
9375         ftrace_destroy_function_files(tr);
9376         tracefs_remove(tr->dir);
9377         free_percpu(tr->last_func_repeats);
9378         free_trace_buffers(tr);
9379
9380         for (i = 0; i < tr->nr_topts; i++) {
9381                 kfree(tr->topts[i].topts);
9382         }
9383         kfree(tr->topts);
9384
9385         free_cpumask_var(tr->tracing_cpumask);
9386         kfree(tr->name);
9387         kfree(tr);
9388
9389         return 0;
9390 }
9391
9392 int trace_array_destroy(struct trace_array *this_tr)
9393 {
9394         struct trace_array *tr;
9395         int ret;
9396
9397         if (!this_tr)
9398                 return -EINVAL;
9399
9400         mutex_lock(&event_mutex);
9401         mutex_lock(&trace_types_lock);
9402
9403         ret = -ENODEV;
9404
9405         /* Making sure trace array exists before destroying it. */
9406         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9407                 if (tr == this_tr) {
9408                         ret = __remove_instance(tr);
9409                         break;
9410                 }
9411         }
9412
9413         mutex_unlock(&trace_types_lock);
9414         mutex_unlock(&event_mutex);
9415
9416         return ret;
9417 }
9418 EXPORT_SYMBOL_GPL(trace_array_destroy);
9419
9420 static int instance_rmdir(const char *name)
9421 {
9422         struct trace_array *tr;
9423         int ret;
9424
9425         mutex_lock(&event_mutex);
9426         mutex_lock(&trace_types_lock);
9427
9428         ret = -ENODEV;
9429         tr = trace_array_find(name);
9430         if (tr)
9431                 ret = __remove_instance(tr);
9432
9433         mutex_unlock(&trace_types_lock);
9434         mutex_unlock(&event_mutex);
9435
9436         return ret;
9437 }
9438
9439 static __init void create_trace_instances(struct dentry *d_tracer)
9440 {
9441         struct trace_array *tr;
9442
9443         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9444                                                          instance_mkdir,
9445                                                          instance_rmdir);
9446         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9447                 return;
9448
9449         mutex_lock(&event_mutex);
9450         mutex_lock(&trace_types_lock);
9451
9452         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9453                 if (!tr->name)
9454                         continue;
9455                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9456                              "Failed to create instance directory\n"))
9457                         break;
9458         }
9459
9460         mutex_unlock(&trace_types_lock);
9461         mutex_unlock(&event_mutex);
9462 }
9463
9464 static void
9465 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9466 {
9467         struct trace_event_file *file;
9468         int cpu;
9469
9470         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9471                         tr, &show_traces_fops);
9472
9473         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9474                         tr, &set_tracer_fops);
9475
9476         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9477                           tr, &tracing_cpumask_fops);
9478
9479         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9480                           tr, &tracing_iter_fops);
9481
9482         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9483                           tr, &tracing_fops);
9484
9485         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9486                           tr, &tracing_pipe_fops);
9487
9488         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9489                           tr, &tracing_entries_fops);
9490
9491         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9492                           tr, &tracing_total_entries_fops);
9493
9494         trace_create_file("free_buffer", 0200, d_tracer,
9495                           tr, &tracing_free_buffer_fops);
9496
9497         trace_create_file("trace_marker", 0220, d_tracer,
9498                           tr, &tracing_mark_fops);
9499
9500         file = __find_event_file(tr, "ftrace", "print");
9501         if (file && file->dir)
9502                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9503                                   file, &event_trigger_fops);
9504         tr->trace_marker_file = file;
9505
9506         trace_create_file("trace_marker_raw", 0220, d_tracer,
9507                           tr, &tracing_mark_raw_fops);
9508
9509         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9510                           &trace_clock_fops);
9511
9512         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9513                           tr, &rb_simple_fops);
9514
9515         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9516                           &trace_time_stamp_mode_fops);
9517
9518         tr->buffer_percent = 50;
9519
9520         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9521                         tr, &buffer_percent_fops);
9522
9523         create_trace_options_dir(tr);
9524
9525         trace_create_maxlat_file(tr, d_tracer);
9526
9527         if (ftrace_create_function_files(tr, d_tracer))
9528                 MEM_FAIL(1, "Could not allocate function filter files");
9529
9530 #ifdef CONFIG_TRACER_SNAPSHOT
9531         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9532                           tr, &snapshot_fops);
9533 #endif
9534
9535         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9536                           tr, &tracing_err_log_fops);
9537
9538         for_each_tracing_cpu(cpu)
9539                 tracing_init_tracefs_percpu(tr, cpu);
9540
9541         ftrace_init_tracefs(tr, d_tracer);
9542 }
9543
9544 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9545 {
9546         struct vfsmount *mnt;
9547         struct file_system_type *type;
9548
9549         /*
9550          * To maintain backward compatibility for tools that mount
9551          * debugfs to get to the tracing facility, tracefs is automatically
9552          * mounted to the debugfs/tracing directory.
9553          */
9554         type = get_fs_type("tracefs");
9555         if (!type)
9556                 return NULL;
9557         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9558         put_filesystem(type);
9559         if (IS_ERR(mnt))
9560                 return NULL;
9561         mntget(mnt);
9562
9563         return mnt;
9564 }
9565
9566 /**
9567  * tracing_init_dentry - initialize top level trace array
9568  *
9569  * This is called when creating files or directories in the tracing
9570  * directory. It is called via fs_initcall() by any of the boot up code
9571  * and expects to return the dentry of the top level tracing directory.
9572  */
9573 int tracing_init_dentry(void)
9574 {
9575         struct trace_array *tr = &global_trace;
9576
9577         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9578                 pr_warn("Tracing disabled due to lockdown\n");
9579                 return -EPERM;
9580         }
9581
9582         /* The top level trace array uses  NULL as parent */
9583         if (tr->dir)
9584                 return 0;
9585
9586         if (WARN_ON(!tracefs_initialized()))
9587                 return -ENODEV;
9588
9589         /*
9590          * As there may still be users that expect the tracing
9591          * files to exist in debugfs/tracing, we must automount
9592          * the tracefs file system there, so older tools still
9593          * work with the newer kernel.
9594          */
9595         tr->dir = debugfs_create_automount("tracing", NULL,
9596                                            trace_automount, NULL);
9597
9598         return 0;
9599 }
9600
9601 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9602 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9603
9604 static struct workqueue_struct *eval_map_wq __initdata;
9605 static struct work_struct eval_map_work __initdata;
9606
9607 static void __init eval_map_work_func(struct work_struct *work)
9608 {
9609         int len;
9610
9611         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9612         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9613 }
9614
9615 static int __init trace_eval_init(void)
9616 {
9617         INIT_WORK(&eval_map_work, eval_map_work_func);
9618
9619         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9620         if (!eval_map_wq) {
9621                 pr_err("Unable to allocate eval_map_wq\n");
9622                 /* Do work here */
9623                 eval_map_work_func(&eval_map_work);
9624                 return -ENOMEM;
9625         }
9626
9627         queue_work(eval_map_wq, &eval_map_work);
9628         return 0;
9629 }
9630
9631 static int __init trace_eval_sync(void)
9632 {
9633         /* Make sure the eval map updates are finished */
9634         if (eval_map_wq)
9635                 destroy_workqueue(eval_map_wq);
9636         return 0;
9637 }
9638
9639 late_initcall_sync(trace_eval_sync);
9640
9641
9642 #ifdef CONFIG_MODULES
9643 static void trace_module_add_evals(struct module *mod)
9644 {
9645         if (!mod->num_trace_evals)
9646                 return;
9647
9648         /*
9649          * Modules with bad taint do not have events created, do
9650          * not bother with enums either.
9651          */
9652         if (trace_module_has_bad_taint(mod))
9653                 return;
9654
9655         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9656 }
9657
9658 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9659 static void trace_module_remove_evals(struct module *mod)
9660 {
9661         union trace_eval_map_item *map;
9662         union trace_eval_map_item **last = &trace_eval_maps;
9663
9664         if (!mod->num_trace_evals)
9665                 return;
9666
9667         mutex_lock(&trace_eval_mutex);
9668
9669         map = trace_eval_maps;
9670
9671         while (map) {
9672                 if (map->head.mod == mod)
9673                         break;
9674                 map = trace_eval_jmp_to_tail(map);
9675                 last = &map->tail.next;
9676                 map = map->tail.next;
9677         }
9678         if (!map)
9679                 goto out;
9680
9681         *last = trace_eval_jmp_to_tail(map)->tail.next;
9682         kfree(map);
9683  out:
9684         mutex_unlock(&trace_eval_mutex);
9685 }
9686 #else
9687 static inline void trace_module_remove_evals(struct module *mod) { }
9688 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9689
9690 static int trace_module_notify(struct notifier_block *self,
9691                                unsigned long val, void *data)
9692 {
9693         struct module *mod = data;
9694
9695         switch (val) {
9696         case MODULE_STATE_COMING:
9697                 trace_module_add_evals(mod);
9698                 break;
9699         case MODULE_STATE_GOING:
9700                 trace_module_remove_evals(mod);
9701                 break;
9702         }
9703
9704         return NOTIFY_OK;
9705 }
9706
9707 static struct notifier_block trace_module_nb = {
9708         .notifier_call = trace_module_notify,
9709         .priority = 0,
9710 };
9711 #endif /* CONFIG_MODULES */
9712
9713 static __init int tracer_init_tracefs(void)
9714 {
9715         int ret;
9716
9717         trace_access_lock_init();
9718
9719         ret = tracing_init_dentry();
9720         if (ret)
9721                 return 0;
9722
9723         event_trace_init();
9724
9725         init_tracer_tracefs(&global_trace, NULL);
9726         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9727
9728         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9729                         &global_trace, &tracing_thresh_fops);
9730
9731         trace_create_file("README", TRACE_MODE_READ, NULL,
9732                         NULL, &tracing_readme_fops);
9733
9734         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9735                         NULL, &tracing_saved_cmdlines_fops);
9736
9737         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9738                           NULL, &tracing_saved_cmdlines_size_fops);
9739
9740         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9741                         NULL, &tracing_saved_tgids_fops);
9742
9743         trace_eval_init();
9744
9745         trace_create_eval_file(NULL);
9746
9747 #ifdef CONFIG_MODULES
9748         register_module_notifier(&trace_module_nb);
9749 #endif
9750
9751 #ifdef CONFIG_DYNAMIC_FTRACE
9752         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9753                         NULL, &tracing_dyn_info_fops);
9754 #endif
9755
9756         create_trace_instances(NULL);
9757
9758         update_tracer_options(&global_trace);
9759
9760         return 0;
9761 }
9762
9763 fs_initcall(tracer_init_tracefs);
9764
9765 static int trace_panic_handler(struct notifier_block *this,
9766                                unsigned long event, void *unused)
9767 {
9768         if (ftrace_dump_on_oops)
9769                 ftrace_dump(ftrace_dump_on_oops);
9770         return NOTIFY_OK;
9771 }
9772
9773 static struct notifier_block trace_panic_notifier = {
9774         .notifier_call  = trace_panic_handler,
9775         .next           = NULL,
9776         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9777 };
9778
9779 static int trace_die_handler(struct notifier_block *self,
9780                              unsigned long val,
9781                              void *data)
9782 {
9783         switch (val) {
9784         case DIE_OOPS:
9785                 if (ftrace_dump_on_oops)
9786                         ftrace_dump(ftrace_dump_on_oops);
9787                 break;
9788         default:
9789                 break;
9790         }
9791         return NOTIFY_OK;
9792 }
9793
9794 static struct notifier_block trace_die_notifier = {
9795         .notifier_call = trace_die_handler,
9796         .priority = 200
9797 };
9798
9799 /*
9800  * printk is set to max of 1024, we really don't need it that big.
9801  * Nothing should be printing 1000 characters anyway.
9802  */
9803 #define TRACE_MAX_PRINT         1000
9804
9805 /*
9806  * Define here KERN_TRACE so that we have one place to modify
9807  * it if we decide to change what log level the ftrace dump
9808  * should be at.
9809  */
9810 #define KERN_TRACE              KERN_EMERG
9811
9812 void
9813 trace_printk_seq(struct trace_seq *s)
9814 {
9815         /* Probably should print a warning here. */
9816         if (s->seq.len >= TRACE_MAX_PRINT)
9817                 s->seq.len = TRACE_MAX_PRINT;
9818
9819         /*
9820          * More paranoid code. Although the buffer size is set to
9821          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9822          * an extra layer of protection.
9823          */
9824         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9825                 s->seq.len = s->seq.size - 1;
9826
9827         /* should be zero ended, but we are paranoid. */
9828         s->buffer[s->seq.len] = 0;
9829
9830         printk(KERN_TRACE "%s", s->buffer);
9831
9832         trace_seq_init(s);
9833 }
9834
9835 void trace_init_global_iter(struct trace_iterator *iter)
9836 {
9837         iter->tr = &global_trace;
9838         iter->trace = iter->tr->current_trace;
9839         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9840         iter->array_buffer = &global_trace.array_buffer;
9841
9842         if (iter->trace && iter->trace->open)
9843                 iter->trace->open(iter);
9844
9845         /* Annotate start of buffers if we had overruns */
9846         if (ring_buffer_overruns(iter->array_buffer->buffer))
9847                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9848
9849         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9850         if (trace_clocks[iter->tr->clock_id].in_ns)
9851                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9852 }
9853
9854 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9855 {
9856         /* use static because iter can be a bit big for the stack */
9857         static struct trace_iterator iter;
9858         static atomic_t dump_running;
9859         struct trace_array *tr = &global_trace;
9860         unsigned int old_userobj;
9861         unsigned long flags;
9862         int cnt = 0, cpu;
9863
9864         /* Only allow one dump user at a time. */
9865         if (atomic_inc_return(&dump_running) != 1) {
9866                 atomic_dec(&dump_running);
9867                 return;
9868         }
9869
9870         /*
9871          * Always turn off tracing when we dump.
9872          * We don't need to show trace output of what happens
9873          * between multiple crashes.
9874          *
9875          * If the user does a sysrq-z, then they can re-enable
9876          * tracing with echo 1 > tracing_on.
9877          */
9878         tracing_off();
9879
9880         local_irq_save(flags);
9881
9882         /* Simulate the iterator */
9883         trace_init_global_iter(&iter);
9884         /* Can not use kmalloc for iter.temp and iter.fmt */
9885         iter.temp = static_temp_buf;
9886         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9887         iter.fmt = static_fmt_buf;
9888         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9889
9890         for_each_tracing_cpu(cpu) {
9891                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9892         }
9893
9894         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9895
9896         /* don't look at user memory in panic mode */
9897         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9898
9899         switch (oops_dump_mode) {
9900         case DUMP_ALL:
9901                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9902                 break;
9903         case DUMP_ORIG:
9904                 iter.cpu_file = raw_smp_processor_id();
9905                 break;
9906         case DUMP_NONE:
9907                 goto out_enable;
9908         default:
9909                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9910                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9911         }
9912
9913         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9914
9915         /* Did function tracer already get disabled? */
9916         if (ftrace_is_dead()) {
9917                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9918                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9919         }
9920
9921         /*
9922          * We need to stop all tracing on all CPUS to read
9923          * the next buffer. This is a bit expensive, but is
9924          * not done often. We fill all what we can read,
9925          * and then release the locks again.
9926          */
9927
9928         while (!trace_empty(&iter)) {
9929
9930                 if (!cnt)
9931                         printk(KERN_TRACE "---------------------------------\n");
9932
9933                 cnt++;
9934
9935                 trace_iterator_reset(&iter);
9936                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9937
9938                 if (trace_find_next_entry_inc(&iter) != NULL) {
9939                         int ret;
9940
9941                         ret = print_trace_line(&iter);
9942                         if (ret != TRACE_TYPE_NO_CONSUME)
9943                                 trace_consume(&iter);
9944                 }
9945                 touch_nmi_watchdog();
9946
9947                 trace_printk_seq(&iter.seq);
9948         }
9949
9950         if (!cnt)
9951                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9952         else
9953                 printk(KERN_TRACE "---------------------------------\n");
9954
9955  out_enable:
9956         tr->trace_flags |= old_userobj;
9957
9958         for_each_tracing_cpu(cpu) {
9959                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9960         }
9961         atomic_dec(&dump_running);
9962         local_irq_restore(flags);
9963 }
9964 EXPORT_SYMBOL_GPL(ftrace_dump);
9965
9966 #define WRITE_BUFSIZE  4096
9967
9968 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9969                                 size_t count, loff_t *ppos,
9970                                 int (*createfn)(const char *))
9971 {
9972         char *kbuf, *buf, *tmp;
9973         int ret = 0;
9974         size_t done = 0;
9975         size_t size;
9976
9977         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9978         if (!kbuf)
9979                 return -ENOMEM;
9980
9981         while (done < count) {
9982                 size = count - done;
9983
9984                 if (size >= WRITE_BUFSIZE)
9985                         size = WRITE_BUFSIZE - 1;
9986
9987                 if (copy_from_user(kbuf, buffer + done, size)) {
9988                         ret = -EFAULT;
9989                         goto out;
9990                 }
9991                 kbuf[size] = '\0';
9992                 buf = kbuf;
9993                 do {
9994                         tmp = strchr(buf, '\n');
9995                         if (tmp) {
9996                                 *tmp = '\0';
9997                                 size = tmp - buf + 1;
9998                         } else {
9999                                 size = strlen(buf);
10000                                 if (done + size < count) {
10001                                         if (buf != kbuf)
10002                                                 break;
10003                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10004                                         pr_warn("Line length is too long: Should be less than %d\n",
10005                                                 WRITE_BUFSIZE - 2);
10006                                         ret = -EINVAL;
10007                                         goto out;
10008                                 }
10009                         }
10010                         done += size;
10011
10012                         /* Remove comments */
10013                         tmp = strchr(buf, '#');
10014
10015                         if (tmp)
10016                                 *tmp = '\0';
10017
10018                         ret = createfn(buf);
10019                         if (ret)
10020                                 goto out;
10021                         buf += size;
10022
10023                 } while (done < count);
10024         }
10025         ret = done;
10026
10027 out:
10028         kfree(kbuf);
10029
10030         return ret;
10031 }
10032
10033 __init static int tracer_alloc_buffers(void)
10034 {
10035         int ring_buf_size;
10036         int ret = -ENOMEM;
10037
10038
10039         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10040                 pr_warn("Tracing disabled due to lockdown\n");
10041                 return -EPERM;
10042         }
10043
10044         /*
10045          * Make sure we don't accidentally add more trace options
10046          * than we have bits for.
10047          */
10048         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10049
10050         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10051                 goto out;
10052
10053         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10054                 goto out_free_buffer_mask;
10055
10056         /* Only allocate trace_printk buffers if a trace_printk exists */
10057         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10058                 /* Must be called before global_trace.buffer is allocated */
10059                 trace_printk_init_buffers();
10060
10061         /* To save memory, keep the ring buffer size to its minimum */
10062         if (ring_buffer_expanded)
10063                 ring_buf_size = trace_buf_size;
10064         else
10065                 ring_buf_size = 1;
10066
10067         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10068         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10069
10070         raw_spin_lock_init(&global_trace.start_lock);
10071
10072         /*
10073          * The prepare callbacks allocates some memory for the ring buffer. We
10074          * don't free the buffer if the CPU goes down. If we were to free
10075          * the buffer, then the user would lose any trace that was in the
10076          * buffer. The memory will be removed once the "instance" is removed.
10077          */
10078         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10079                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10080                                       NULL);
10081         if (ret < 0)
10082                 goto out_free_cpumask;
10083         /* Used for event triggers */
10084         ret = -ENOMEM;
10085         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10086         if (!temp_buffer)
10087                 goto out_rm_hp_state;
10088
10089         if (trace_create_savedcmd() < 0)
10090                 goto out_free_temp_buffer;
10091
10092         /* TODO: make the number of buffers hot pluggable with CPUS */
10093         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10094                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10095                 goto out_free_savedcmd;
10096         }
10097
10098         if (global_trace.buffer_disabled)
10099                 tracing_off();
10100
10101         if (trace_boot_clock) {
10102                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10103                 if (ret < 0)
10104                         pr_warn("Trace clock %s not defined, going back to default\n",
10105                                 trace_boot_clock);
10106         }
10107
10108         /*
10109          * register_tracer() might reference current_trace, so it
10110          * needs to be set before we register anything. This is
10111          * just a bootstrap of current_trace anyway.
10112          */
10113         global_trace.current_trace = &nop_trace;
10114
10115         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10116
10117         ftrace_init_global_array_ops(&global_trace);
10118
10119         init_trace_flags_index(&global_trace);
10120
10121         register_tracer(&nop_trace);
10122
10123         /* Function tracing may start here (via kernel command line) */
10124         init_function_trace();
10125
10126         /* All seems OK, enable tracing */
10127         tracing_disabled = 0;
10128
10129         atomic_notifier_chain_register(&panic_notifier_list,
10130                                        &trace_panic_notifier);
10131
10132         register_die_notifier(&trace_die_notifier);
10133
10134         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10135
10136         INIT_LIST_HEAD(&global_trace.systems);
10137         INIT_LIST_HEAD(&global_trace.events);
10138         INIT_LIST_HEAD(&global_trace.hist_vars);
10139         INIT_LIST_HEAD(&global_trace.err_log);
10140         list_add(&global_trace.list, &ftrace_trace_arrays);
10141
10142         apply_trace_boot_options();
10143
10144         register_snapshot_cmd();
10145
10146         test_can_verify();
10147
10148         return 0;
10149
10150 out_free_savedcmd:
10151         free_saved_cmdlines_buffer(savedcmd);
10152 out_free_temp_buffer:
10153         ring_buffer_free(temp_buffer);
10154 out_rm_hp_state:
10155         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10156 out_free_cpumask:
10157         free_cpumask_var(global_trace.tracing_cpumask);
10158 out_free_buffer_mask:
10159         free_cpumask_var(tracing_buffer_mask);
10160 out:
10161         return ret;
10162 }
10163
10164 void __init ftrace_boot_snapshot(void)
10165 {
10166         if (snapshot_at_boot) {
10167                 tracing_snapshot();
10168                 internal_trace_puts("** Boot snapshot taken **\n");
10169         }
10170 }
10171
10172 void __init early_trace_init(void)
10173 {
10174         if (tracepoint_printk) {
10175                 tracepoint_print_iter =
10176                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10177                 if (MEM_FAIL(!tracepoint_print_iter,
10178                              "Failed to allocate trace iterator\n"))
10179                         tracepoint_printk = 0;
10180                 else
10181                         static_key_enable(&tracepoint_printk_key.key);
10182         }
10183         tracer_alloc_buffers();
10184 }
10185
10186 void __init trace_init(void)
10187 {
10188         trace_event_init();
10189 }
10190
10191 __init static void clear_boot_tracer(void)
10192 {
10193         /*
10194          * The default tracer at boot buffer is an init section.
10195          * This function is called in lateinit. If we did not
10196          * find the boot tracer, then clear it out, to prevent
10197          * later registration from accessing the buffer that is
10198          * about to be freed.
10199          */
10200         if (!default_bootup_tracer)
10201                 return;
10202
10203         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10204                default_bootup_tracer);
10205         default_bootup_tracer = NULL;
10206 }
10207
10208 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10209 __init static void tracing_set_default_clock(void)
10210 {
10211         /* sched_clock_stable() is determined in late_initcall */
10212         if (!trace_boot_clock && !sched_clock_stable()) {
10213                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10214                         pr_warn("Can not set tracing clock due to lockdown\n");
10215                         return;
10216                 }
10217
10218                 printk(KERN_WARNING
10219                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10220                        "If you want to keep using the local clock, then add:\n"
10221                        "  \"trace_clock=local\"\n"
10222                        "on the kernel command line\n");
10223                 tracing_set_clock(&global_trace, "global");
10224         }
10225 }
10226 #else
10227 static inline void tracing_set_default_clock(void) { }
10228 #endif
10229
10230 __init static int late_trace_init(void)
10231 {
10232         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10233                 static_key_disable(&tracepoint_printk_key.key);
10234                 tracepoint_printk = 0;
10235         }
10236
10237         tracing_set_default_clock();
10238         clear_boot_tracer();
10239         return 0;
10240 }
10241
10242 late_initcall_sync(late_trace_init);