Merge tag 'zonefs-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73
74 void __init disable_tracing_selftest(const char *reason)
75 {
76         if (!tracing_selftest_disabled) {
77                 tracing_selftest_disabled = true;
78                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
79         }
80 }
81 #else
82 #define tracing_selftest_running        0
83 #define tracing_selftest_disabled       0
84 #endif
85
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187 static bool snapshot_at_boot;
188
189 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
190 static int boot_instance_index;
191
192 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_snapshot_index;
194
195 static int __init set_cmdline_ftrace(char *str)
196 {
197         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
198         default_bootup_tracer = bootup_tracer_buf;
199         /* We are using ftrace early, expand it */
200         trace_set_ring_buffer_expanded(NULL);
201         return 1;
202 }
203 __setup("ftrace=", set_cmdline_ftrace);
204
205 static int __init set_ftrace_dump_on_oops(char *str)
206 {
207         if (*str++ != '=' || !*str || !strcmp("1", str)) {
208                 ftrace_dump_on_oops = DUMP_ALL;
209                 return 1;
210         }
211
212         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
213                 ftrace_dump_on_oops = DUMP_ORIG;
214                 return 1;
215         }
216
217         return 0;
218 }
219 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
220
221 static int __init stop_trace_on_warning(char *str)
222 {
223         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
224                 __disable_trace_on_warning = 1;
225         return 1;
226 }
227 __setup("traceoff_on_warning", stop_trace_on_warning);
228
229 static int __init boot_alloc_snapshot(char *str)
230 {
231         char *slot = boot_snapshot_info + boot_snapshot_index;
232         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
233         int ret;
234
235         if (str[0] == '=') {
236                 str++;
237                 if (strlen(str) >= left)
238                         return -1;
239
240                 ret = snprintf(slot, left, "%s\t", str);
241                 boot_snapshot_index += ret;
242         } else {
243                 allocate_snapshot = true;
244                 /* We also need the main ring buffer expanded */
245                 trace_set_ring_buffer_expanded(NULL);
246         }
247         return 1;
248 }
249 __setup("alloc_snapshot", boot_alloc_snapshot);
250
251
252 static int __init boot_snapshot(char *str)
253 {
254         snapshot_at_boot = true;
255         boot_alloc_snapshot(str);
256         return 1;
257 }
258 __setup("ftrace_boot_snapshot", boot_snapshot);
259
260
261 static int __init boot_instance(char *str)
262 {
263         char *slot = boot_instance_info + boot_instance_index;
264         int left = sizeof(boot_instance_info) - boot_instance_index;
265         int ret;
266
267         if (strlen(str) >= left)
268                 return -1;
269
270         ret = snprintf(slot, left, "%s\t", str);
271         boot_instance_index += ret;
272
273         return 1;
274 }
275 __setup("trace_instance=", boot_instance);
276
277
278 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
279
280 static int __init set_trace_boot_options(char *str)
281 {
282         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
283         return 1;
284 }
285 __setup("trace_options=", set_trace_boot_options);
286
287 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
288 static char *trace_boot_clock __initdata;
289
290 static int __init set_trace_boot_clock(char *str)
291 {
292         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
293         trace_boot_clock = trace_boot_clock_buf;
294         return 1;
295 }
296 __setup("trace_clock=", set_trace_boot_clock);
297
298 static int __init set_tracepoint_printk(char *str)
299 {
300         /* Ignore the "tp_printk_stop_on_boot" param */
301         if (*str == '_')
302                 return 0;
303
304         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
305                 tracepoint_printk = 1;
306         return 1;
307 }
308 __setup("tp_printk", set_tracepoint_printk);
309
310 static int __init set_tracepoint_printk_stop(char *str)
311 {
312         tracepoint_printk_stop_on_boot = true;
313         return 1;
314 }
315 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
316
317 unsigned long long ns2usecs(u64 nsec)
318 {
319         nsec += 500;
320         do_div(nsec, 1000);
321         return nsec;
322 }
323
324 static void
325 trace_process_export(struct trace_export *export,
326                struct ring_buffer_event *event, int flag)
327 {
328         struct trace_entry *entry;
329         unsigned int size = 0;
330
331         if (export->flags & flag) {
332                 entry = ring_buffer_event_data(event);
333                 size = ring_buffer_event_length(event);
334                 export->write(export, entry, size);
335         }
336 }
337
338 static DEFINE_MUTEX(ftrace_export_lock);
339
340 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
341
342 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
344 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
345
346 static inline void ftrace_exports_enable(struct trace_export *export)
347 {
348         if (export->flags & TRACE_EXPORT_FUNCTION)
349                 static_branch_inc(&trace_function_exports_enabled);
350
351         if (export->flags & TRACE_EXPORT_EVENT)
352                 static_branch_inc(&trace_event_exports_enabled);
353
354         if (export->flags & TRACE_EXPORT_MARKER)
355                 static_branch_inc(&trace_marker_exports_enabled);
356 }
357
358 static inline void ftrace_exports_disable(struct trace_export *export)
359 {
360         if (export->flags & TRACE_EXPORT_FUNCTION)
361                 static_branch_dec(&trace_function_exports_enabled);
362
363         if (export->flags & TRACE_EXPORT_EVENT)
364                 static_branch_dec(&trace_event_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_MARKER)
367                 static_branch_dec(&trace_marker_exports_enabled);
368 }
369
370 static void ftrace_exports(struct ring_buffer_event *event, int flag)
371 {
372         struct trace_export *export;
373
374         preempt_disable_notrace();
375
376         export = rcu_dereference_raw_check(ftrace_exports_list);
377         while (export) {
378                 trace_process_export(export, event, flag);
379                 export = rcu_dereference_raw_check(export->next);
380         }
381
382         preempt_enable_notrace();
383 }
384
385 static inline void
386 add_trace_export(struct trace_export **list, struct trace_export *export)
387 {
388         rcu_assign_pointer(export->next, *list);
389         /*
390          * We are entering export into the list but another
391          * CPU might be walking that list. We need to make sure
392          * the export->next pointer is valid before another CPU sees
393          * the export pointer included into the list.
394          */
395         rcu_assign_pointer(*list, export);
396 }
397
398 static inline int
399 rm_trace_export(struct trace_export **list, struct trace_export *export)
400 {
401         struct trace_export **p;
402
403         for (p = list; *p != NULL; p = &(*p)->next)
404                 if (*p == export)
405                         break;
406
407         if (*p != export)
408                 return -1;
409
410         rcu_assign_pointer(*p, (*p)->next);
411
412         return 0;
413 }
414
415 static inline void
416 add_ftrace_export(struct trace_export **list, struct trace_export *export)
417 {
418         ftrace_exports_enable(export);
419
420         add_trace_export(list, export);
421 }
422
423 static inline int
424 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
425 {
426         int ret;
427
428         ret = rm_trace_export(list, export);
429         ftrace_exports_disable(export);
430
431         return ret;
432 }
433
434 int register_ftrace_export(struct trace_export *export)
435 {
436         if (WARN_ON_ONCE(!export->write))
437                 return -1;
438
439         mutex_lock(&ftrace_export_lock);
440
441         add_ftrace_export(&ftrace_exports_list, export);
442
443         mutex_unlock(&ftrace_export_lock);
444
445         return 0;
446 }
447 EXPORT_SYMBOL_GPL(register_ftrace_export);
448
449 int unregister_ftrace_export(struct trace_export *export)
450 {
451         int ret;
452
453         mutex_lock(&ftrace_export_lock);
454
455         ret = rm_ftrace_export(&ftrace_exports_list, export);
456
457         mutex_unlock(&ftrace_export_lock);
458
459         return ret;
460 }
461 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
462
463 /* trace_flags holds trace_options default values */
464 #define TRACE_DEFAULT_FLAGS                                             \
465         (FUNCTION_DEFAULT_FLAGS |                                       \
466          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
467          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
468          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
469          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
470          TRACE_ITER_HASH_PTR)
471
472 /* trace_options that are only supported by global_trace */
473 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
474                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
475
476 /* trace_flags that are default zero for instances */
477 #define ZEROED_TRACE_FLAGS \
478         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
479
480 /*
481  * The global_trace is the descriptor that holds the top-level tracing
482  * buffers for the live tracing.
483  */
484 static struct trace_array global_trace = {
485         .trace_flags = TRACE_DEFAULT_FLAGS,
486 };
487
488 void trace_set_ring_buffer_expanded(struct trace_array *tr)
489 {
490         if (!tr)
491                 tr = &global_trace;
492         tr->ring_buffer_expanded = true;
493 }
494
495 LIST_HEAD(ftrace_trace_arrays);
496
497 int trace_array_get(struct trace_array *this_tr)
498 {
499         struct trace_array *tr;
500         int ret = -ENODEV;
501
502         mutex_lock(&trace_types_lock);
503         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
504                 if (tr == this_tr) {
505                         tr->ref++;
506                         ret = 0;
507                         break;
508                 }
509         }
510         mutex_unlock(&trace_types_lock);
511
512         return ret;
513 }
514
515 static void __trace_array_put(struct trace_array *this_tr)
516 {
517         WARN_ON(!this_tr->ref);
518         this_tr->ref--;
519 }
520
521 /**
522  * trace_array_put - Decrement the reference counter for this trace array.
523  * @this_tr : pointer to the trace array
524  *
525  * NOTE: Use this when we no longer need the trace array returned by
526  * trace_array_get_by_name(). This ensures the trace array can be later
527  * destroyed.
528  *
529  */
530 void trace_array_put(struct trace_array *this_tr)
531 {
532         if (!this_tr)
533                 return;
534
535         mutex_lock(&trace_types_lock);
536         __trace_array_put(this_tr);
537         mutex_unlock(&trace_types_lock);
538 }
539 EXPORT_SYMBOL_GPL(trace_array_put);
540
541 int tracing_check_open_get_tr(struct trace_array *tr)
542 {
543         int ret;
544
545         ret = security_locked_down(LOCKDOWN_TRACEFS);
546         if (ret)
547                 return ret;
548
549         if (tracing_disabled)
550                 return -ENODEV;
551
552         if (tr && trace_array_get(tr) < 0)
553                 return -ENODEV;
554
555         return 0;
556 }
557
558 int call_filter_check_discard(struct trace_event_call *call, void *rec,
559                               struct trace_buffer *buffer,
560                               struct ring_buffer_event *event)
561 {
562         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
563             !filter_match_preds(call->filter, rec)) {
564                 __trace_event_discard_commit(buffer, event);
565                 return 1;
566         }
567
568         return 0;
569 }
570
571 /**
572  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
573  * @filtered_pids: The list of pids to check
574  * @search_pid: The PID to find in @filtered_pids
575  *
576  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577  */
578 bool
579 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
580 {
581         return trace_pid_list_is_set(filtered_pids, search_pid);
582 }
583
584 /**
585  * trace_ignore_this_task - should a task be ignored for tracing
586  * @filtered_pids: The list of pids to check
587  * @filtered_no_pids: The list of pids not to be traced
588  * @task: The task that should be ignored if not filtered
589  *
590  * Checks if @task should be traced or not from @filtered_pids.
591  * Returns true if @task should *NOT* be traced.
592  * Returns false if @task should be traced.
593  */
594 bool
595 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
596                        struct trace_pid_list *filtered_no_pids,
597                        struct task_struct *task)
598 {
599         /*
600          * If filtered_no_pids is not empty, and the task's pid is listed
601          * in filtered_no_pids, then return true.
602          * Otherwise, if filtered_pids is empty, that means we can
603          * trace all tasks. If it has content, then only trace pids
604          * within filtered_pids.
605          */
606
607         return (filtered_pids &&
608                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
609                 (filtered_no_pids &&
610                  trace_find_filtered_pid(filtered_no_pids, task->pid));
611 }
612
613 /**
614  * trace_filter_add_remove_task - Add or remove a task from a pid_list
615  * @pid_list: The list to modify
616  * @self: The current task for fork or NULL for exit
617  * @task: The task to add or remove
618  *
619  * If adding a task, if @self is defined, the task is only added if @self
620  * is also included in @pid_list. This happens on fork and tasks should
621  * only be added when the parent is listed. If @self is NULL, then the
622  * @task pid will be removed from the list, which would happen on exit
623  * of a task.
624  */
625 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
626                                   struct task_struct *self,
627                                   struct task_struct *task)
628 {
629         if (!pid_list)
630                 return;
631
632         /* For forks, we only add if the forking task is listed */
633         if (self) {
634                 if (!trace_find_filtered_pid(pid_list, self->pid))
635                         return;
636         }
637
638         /* "self" is set for forks, and NULL for exits */
639         if (self)
640                 trace_pid_list_set(pid_list, task->pid);
641         else
642                 trace_pid_list_clear(pid_list, task->pid);
643 }
644
645 /**
646  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
647  * @pid_list: The pid list to show
648  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
649  * @pos: The position of the file
650  *
651  * This is used by the seq_file "next" operation to iterate the pids
652  * listed in a trace_pid_list structure.
653  *
654  * Returns the pid+1 as we want to display pid of zero, but NULL would
655  * stop the iteration.
656  */
657 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
658 {
659         long pid = (unsigned long)v;
660         unsigned int next;
661
662         (*pos)++;
663
664         /* pid already is +1 of the actual previous bit */
665         if (trace_pid_list_next(pid_list, pid, &next) < 0)
666                 return NULL;
667
668         pid = next;
669
670         /* Return pid + 1 to allow zero to be represented */
671         return (void *)(pid + 1);
672 }
673
674 /**
675  * trace_pid_start - Used for seq_file to start reading pid lists
676  * @pid_list: The pid list to show
677  * @pos: The position of the file
678  *
679  * This is used by seq_file "start" operation to start the iteration
680  * of listing pids.
681  *
682  * Returns the pid+1 as we want to display pid of zero, but NULL would
683  * stop the iteration.
684  */
685 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
686 {
687         unsigned long pid;
688         unsigned int first;
689         loff_t l = 0;
690
691         if (trace_pid_list_first(pid_list, &first) < 0)
692                 return NULL;
693
694         pid = first;
695
696         /* Return pid + 1 so that zero can be the exit value */
697         for (pid++; pid && l < *pos;
698              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
699                 ;
700         return (void *)pid;
701 }
702
703 /**
704  * trace_pid_show - show the current pid in seq_file processing
705  * @m: The seq_file structure to write into
706  * @v: A void pointer of the pid (+1) value to display
707  *
708  * Can be directly used by seq_file operations to display the current
709  * pid value.
710  */
711 int trace_pid_show(struct seq_file *m, void *v)
712 {
713         unsigned long pid = (unsigned long)v - 1;
714
715         seq_printf(m, "%lu\n", pid);
716         return 0;
717 }
718
719 /* 128 should be much more than enough */
720 #define PID_BUF_SIZE            127
721
722 int trace_pid_write(struct trace_pid_list *filtered_pids,
723                     struct trace_pid_list **new_pid_list,
724                     const char __user *ubuf, size_t cnt)
725 {
726         struct trace_pid_list *pid_list;
727         struct trace_parser parser;
728         unsigned long val;
729         int nr_pids = 0;
730         ssize_t read = 0;
731         ssize_t ret;
732         loff_t pos;
733         pid_t pid;
734
735         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
736                 return -ENOMEM;
737
738         /*
739          * Always recreate a new array. The write is an all or nothing
740          * operation. Always create a new array when adding new pids by
741          * the user. If the operation fails, then the current list is
742          * not modified.
743          */
744         pid_list = trace_pid_list_alloc();
745         if (!pid_list) {
746                 trace_parser_put(&parser);
747                 return -ENOMEM;
748         }
749
750         if (filtered_pids) {
751                 /* copy the current bits to the new max */
752                 ret = trace_pid_list_first(filtered_pids, &pid);
753                 while (!ret) {
754                         trace_pid_list_set(pid_list, pid);
755                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
756                         nr_pids++;
757                 }
758         }
759
760         ret = 0;
761         while (cnt > 0) {
762
763                 pos = 0;
764
765                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
766                 if (ret < 0)
767                         break;
768
769                 read += ret;
770                 ubuf += ret;
771                 cnt -= ret;
772
773                 if (!trace_parser_loaded(&parser))
774                         break;
775
776                 ret = -EINVAL;
777                 if (kstrtoul(parser.buffer, 0, &val))
778                         break;
779
780                 pid = (pid_t)val;
781
782                 if (trace_pid_list_set(pid_list, pid) < 0) {
783                         ret = -1;
784                         break;
785                 }
786                 nr_pids++;
787
788                 trace_parser_clear(&parser);
789                 ret = 0;
790         }
791         trace_parser_put(&parser);
792
793         if (ret < 0) {
794                 trace_pid_list_free(pid_list);
795                 return ret;
796         }
797
798         if (!nr_pids) {
799                 /* Cleared the list of pids */
800                 trace_pid_list_free(pid_list);
801                 pid_list = NULL;
802         }
803
804         *new_pid_list = pid_list;
805
806         return read;
807 }
808
809 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
810 {
811         u64 ts;
812
813         /* Early boot up does not have a buffer yet */
814         if (!buf->buffer)
815                 return trace_clock_local();
816
817         ts = ring_buffer_time_stamp(buf->buffer);
818         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
819
820         return ts;
821 }
822
823 u64 ftrace_now(int cpu)
824 {
825         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
826 }
827
828 /**
829  * tracing_is_enabled - Show if global_trace has been enabled
830  *
831  * Shows if the global trace has been enabled or not. It uses the
832  * mirror flag "buffer_disabled" to be used in fast paths such as for
833  * the irqsoff tracer. But it may be inaccurate due to races. If you
834  * need to know the accurate state, use tracing_is_on() which is a little
835  * slower, but accurate.
836  */
837 int tracing_is_enabled(void)
838 {
839         /*
840          * For quick access (irqsoff uses this in fast path), just
841          * return the mirror variable of the state of the ring buffer.
842          * It's a little racy, but we don't really care.
843          */
844         smp_rmb();
845         return !global_trace.buffer_disabled;
846 }
847
848 /*
849  * trace_buf_size is the size in bytes that is allocated
850  * for a buffer. Note, the number of bytes is always rounded
851  * to page size.
852  *
853  * This number is purposely set to a low number of 16384.
854  * If the dump on oops happens, it will be much appreciated
855  * to not have to wait for all that output. Anyway this can be
856  * boot time and run time configurable.
857  */
858 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
859
860 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
861
862 /* trace_types holds a link list of available tracers. */
863 static struct tracer            *trace_types __read_mostly;
864
865 /*
866  * trace_types_lock is used to protect the trace_types list.
867  */
868 DEFINE_MUTEX(trace_types_lock);
869
870 /*
871  * serialize the access of the ring buffer
872  *
873  * ring buffer serializes readers, but it is low level protection.
874  * The validity of the events (which returns by ring_buffer_peek() ..etc)
875  * are not protected by ring buffer.
876  *
877  * The content of events may become garbage if we allow other process consumes
878  * these events concurrently:
879  *   A) the page of the consumed events may become a normal page
880  *      (not reader page) in ring buffer, and this page will be rewritten
881  *      by events producer.
882  *   B) The page of the consumed events may become a page for splice_read,
883  *      and this page will be returned to system.
884  *
885  * These primitives allow multi process access to different cpu ring buffer
886  * concurrently.
887  *
888  * These primitives don't distinguish read-only and read-consume access.
889  * Multi read-only access are also serialized.
890  */
891
892 #ifdef CONFIG_SMP
893 static DECLARE_RWSEM(all_cpu_access_lock);
894 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
895
896 static inline void trace_access_lock(int cpu)
897 {
898         if (cpu == RING_BUFFER_ALL_CPUS) {
899                 /* gain it for accessing the whole ring buffer. */
900                 down_write(&all_cpu_access_lock);
901         } else {
902                 /* gain it for accessing a cpu ring buffer. */
903
904                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
905                 down_read(&all_cpu_access_lock);
906
907                 /* Secondly block other access to this @cpu ring buffer. */
908                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
909         }
910 }
911
912 static inline void trace_access_unlock(int cpu)
913 {
914         if (cpu == RING_BUFFER_ALL_CPUS) {
915                 up_write(&all_cpu_access_lock);
916         } else {
917                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
918                 up_read(&all_cpu_access_lock);
919         }
920 }
921
922 static inline void trace_access_lock_init(void)
923 {
924         int cpu;
925
926         for_each_possible_cpu(cpu)
927                 mutex_init(&per_cpu(cpu_access_lock, cpu));
928 }
929
930 #else
931
932 static DEFINE_MUTEX(access_lock);
933
934 static inline void trace_access_lock(int cpu)
935 {
936         (void)cpu;
937         mutex_lock(&access_lock);
938 }
939
940 static inline void trace_access_unlock(int cpu)
941 {
942         (void)cpu;
943         mutex_unlock(&access_lock);
944 }
945
946 static inline void trace_access_lock_init(void)
947 {
948 }
949
950 #endif
951
952 #ifdef CONFIG_STACKTRACE
953 static void __ftrace_trace_stack(struct trace_buffer *buffer,
954                                  unsigned int trace_ctx,
955                                  int skip, struct pt_regs *regs);
956 static inline void ftrace_trace_stack(struct trace_array *tr,
957                                       struct trace_buffer *buffer,
958                                       unsigned int trace_ctx,
959                                       int skip, struct pt_regs *regs);
960
961 #else
962 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
963                                         unsigned int trace_ctx,
964                                         int skip, struct pt_regs *regs)
965 {
966 }
967 static inline void ftrace_trace_stack(struct trace_array *tr,
968                                       struct trace_buffer *buffer,
969                                       unsigned long trace_ctx,
970                                       int skip, struct pt_regs *regs)
971 {
972 }
973
974 #endif
975
976 static __always_inline void
977 trace_event_setup(struct ring_buffer_event *event,
978                   int type, unsigned int trace_ctx)
979 {
980         struct trace_entry *ent = ring_buffer_event_data(event);
981
982         tracing_generic_entry_update(ent, type, trace_ctx);
983 }
984
985 static __always_inline struct ring_buffer_event *
986 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987                           int type,
988                           unsigned long len,
989                           unsigned int trace_ctx)
990 {
991         struct ring_buffer_event *event;
992
993         event = ring_buffer_lock_reserve(buffer, len);
994         if (event != NULL)
995                 trace_event_setup(event, type, trace_ctx);
996
997         return event;
998 }
999
1000 void tracer_tracing_on(struct trace_array *tr)
1001 {
1002         if (tr->array_buffer.buffer)
1003                 ring_buffer_record_on(tr->array_buffer.buffer);
1004         /*
1005          * This flag is looked at when buffers haven't been allocated
1006          * yet, or by some tracers (like irqsoff), that just want to
1007          * know if the ring buffer has been disabled, but it can handle
1008          * races of where it gets disabled but we still do a record.
1009          * As the check is in the fast path of the tracers, it is more
1010          * important to be fast than accurate.
1011          */
1012         tr->buffer_disabled = 0;
1013         /* Make the flag seen by readers */
1014         smp_wmb();
1015 }
1016
1017 /**
1018  * tracing_on - enable tracing buffers
1019  *
1020  * This function enables tracing buffers that may have been
1021  * disabled with tracing_off.
1022  */
1023 void tracing_on(void)
1024 {
1025         tracer_tracing_on(&global_trace);
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_on);
1028
1029
1030 static __always_inline void
1031 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1032 {
1033         __this_cpu_write(trace_taskinfo_save, true);
1034
1035         /* If this is the temp buffer, we need to commit fully */
1036         if (this_cpu_read(trace_buffered_event) == event) {
1037                 /* Length is in event->array[0] */
1038                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1039                 /* Release the temp buffer */
1040                 this_cpu_dec(trace_buffered_event_cnt);
1041                 /* ring_buffer_unlock_commit() enables preemption */
1042                 preempt_enable_notrace();
1043         } else
1044                 ring_buffer_unlock_commit(buffer);
1045 }
1046
1047 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1048                        const char *str, int size)
1049 {
1050         struct ring_buffer_event *event;
1051         struct trace_buffer *buffer;
1052         struct print_entry *entry;
1053         unsigned int trace_ctx;
1054         int alloc;
1055
1056         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057                 return 0;
1058
1059         if (unlikely(tracing_selftest_running && tr == &global_trace))
1060                 return 0;
1061
1062         if (unlikely(tracing_disabled))
1063                 return 0;
1064
1065         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1066
1067         trace_ctx = tracing_gen_ctx();
1068         buffer = tr->array_buffer.buffer;
1069         ring_buffer_nest_start(buffer);
1070         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1071                                             trace_ctx);
1072         if (!event) {
1073                 size = 0;
1074                 goto out;
1075         }
1076
1077         entry = ring_buffer_event_data(event);
1078         entry->ip = ip;
1079
1080         memcpy(&entry->buf, str, size);
1081
1082         /* Add a newline if necessary */
1083         if (entry->buf[size - 1] != '\n') {
1084                 entry->buf[size] = '\n';
1085                 entry->buf[size + 1] = '\0';
1086         } else
1087                 entry->buf[size] = '\0';
1088
1089         __buffer_unlock_commit(buffer, event);
1090         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return size;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_array_puts);
1096
1097 /**
1098  * __trace_puts - write a constant string into the trace buffer.
1099  * @ip:    The address of the caller
1100  * @str:   The constant string to write
1101  * @size:  The size of the string.
1102  */
1103 int __trace_puts(unsigned long ip, const char *str, int size)
1104 {
1105         return __trace_array_puts(&global_trace, ip, str, size);
1106 }
1107 EXPORT_SYMBOL_GPL(__trace_puts);
1108
1109 /**
1110  * __trace_bputs - write the pointer to a constant string into trace buffer
1111  * @ip:    The address of the caller
1112  * @str:   The constant string to write to the buffer to
1113  */
1114 int __trace_bputs(unsigned long ip, const char *str)
1115 {
1116         struct ring_buffer_event *event;
1117         struct trace_buffer *buffer;
1118         struct bputs_entry *entry;
1119         unsigned int trace_ctx;
1120         int size = sizeof(struct bputs_entry);
1121         int ret = 0;
1122
1123         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1124                 return 0;
1125
1126         if (unlikely(tracing_selftest_running || tracing_disabled))
1127                 return 0;
1128
1129         trace_ctx = tracing_gen_ctx();
1130         buffer = global_trace.array_buffer.buffer;
1131
1132         ring_buffer_nest_start(buffer);
1133         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1134                                             trace_ctx);
1135         if (!event)
1136                 goto out;
1137
1138         entry = ring_buffer_event_data(event);
1139         entry->ip                       = ip;
1140         entry->str                      = str;
1141
1142         __buffer_unlock_commit(buffer, event);
1143         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1144
1145         ret = 1;
1146  out:
1147         ring_buffer_nest_end(buffer);
1148         return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(__trace_bputs);
1151
1152 #ifdef CONFIG_TRACER_SNAPSHOT
1153 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1154                                            void *cond_data)
1155 {
1156         struct tracer *tracer = tr->current_trace;
1157         unsigned long flags;
1158
1159         if (in_nmi()) {
1160                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1161                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1162                 return;
1163         }
1164
1165         if (!tr->allocated_snapshot) {
1166                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1167                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1168                 tracer_tracing_off(tr);
1169                 return;
1170         }
1171
1172         /* Note, snapshot can not be used when the tracer uses it */
1173         if (tracer->use_max_tr) {
1174                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1175                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1176                 return;
1177         }
1178
1179         local_irq_save(flags);
1180         update_max_tr(tr, current, smp_processor_id(), cond_data);
1181         local_irq_restore(flags);
1182 }
1183
1184 void tracing_snapshot_instance(struct trace_array *tr)
1185 {
1186         tracing_snapshot_instance_cond(tr, NULL);
1187 }
1188
1189 /**
1190  * tracing_snapshot - take a snapshot of the current buffer.
1191  *
1192  * This causes a swap between the snapshot buffer and the current live
1193  * tracing buffer. You can use this to take snapshots of the live
1194  * trace when some condition is triggered, but continue to trace.
1195  *
1196  * Note, make sure to allocate the snapshot with either
1197  * a tracing_snapshot_alloc(), or by doing it manually
1198  * with: echo 1 > /sys/kernel/tracing/snapshot
1199  *
1200  * If the snapshot buffer is not allocated, it will stop tracing.
1201  * Basically making a permanent snapshot.
1202  */
1203 void tracing_snapshot(void)
1204 {
1205         struct trace_array *tr = &global_trace;
1206
1207         tracing_snapshot_instance(tr);
1208 }
1209 EXPORT_SYMBOL_GPL(tracing_snapshot);
1210
1211 /**
1212  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1213  * @tr:         The tracing instance to snapshot
1214  * @cond_data:  The data to be tested conditionally, and possibly saved
1215  *
1216  * This is the same as tracing_snapshot() except that the snapshot is
1217  * conditional - the snapshot will only happen if the
1218  * cond_snapshot.update() implementation receiving the cond_data
1219  * returns true, which means that the trace array's cond_snapshot
1220  * update() operation used the cond_data to determine whether the
1221  * snapshot should be taken, and if it was, presumably saved it along
1222  * with the snapshot.
1223  */
1224 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1225 {
1226         tracing_snapshot_instance_cond(tr, cond_data);
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1229
1230 /**
1231  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1232  * @tr:         The tracing instance
1233  *
1234  * When the user enables a conditional snapshot using
1235  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1236  * with the snapshot.  This accessor is used to retrieve it.
1237  *
1238  * Should not be called from cond_snapshot.update(), since it takes
1239  * the tr->max_lock lock, which the code calling
1240  * cond_snapshot.update() has already done.
1241  *
1242  * Returns the cond_data associated with the trace array's snapshot.
1243  */
1244 void *tracing_cond_snapshot_data(struct trace_array *tr)
1245 {
1246         void *cond_data = NULL;
1247
1248         local_irq_disable();
1249         arch_spin_lock(&tr->max_lock);
1250
1251         if (tr->cond_snapshot)
1252                 cond_data = tr->cond_snapshot->cond_data;
1253
1254         arch_spin_unlock(&tr->max_lock);
1255         local_irq_enable();
1256
1257         return cond_data;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1260
1261 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1262                                         struct array_buffer *size_buf, int cpu_id);
1263 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1264
1265 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1266 {
1267         int order;
1268         int ret;
1269
1270         if (!tr->allocated_snapshot) {
1271
1272                 /* Make the snapshot buffer have the same order as main buffer */
1273                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1274                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1275                 if (ret < 0)
1276                         return ret;
1277
1278                 /* allocate spare buffer */
1279                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1280                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1281                 if (ret < 0)
1282                         return ret;
1283
1284                 tr->allocated_snapshot = true;
1285         }
1286
1287         return 0;
1288 }
1289
1290 static void free_snapshot(struct trace_array *tr)
1291 {
1292         /*
1293          * We don't free the ring buffer. instead, resize it because
1294          * The max_tr ring buffer has some state (e.g. ring->clock) and
1295          * we want preserve it.
1296          */
1297         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1298         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1299         set_buffer_entries(&tr->max_buffer, 1);
1300         tracing_reset_online_cpus(&tr->max_buffer);
1301         tr->allocated_snapshot = false;
1302 }
1303
1304 /**
1305  * tracing_alloc_snapshot - allocate snapshot buffer.
1306  *
1307  * This only allocates the snapshot buffer if it isn't already
1308  * allocated - it doesn't also take a snapshot.
1309  *
1310  * This is meant to be used in cases where the snapshot buffer needs
1311  * to be set up for events that can't sleep but need to be able to
1312  * trigger a snapshot.
1313  */
1314 int tracing_alloc_snapshot(void)
1315 {
1316         struct trace_array *tr = &global_trace;
1317         int ret;
1318
1319         ret = tracing_alloc_snapshot_instance(tr);
1320         WARN_ON(ret < 0);
1321
1322         return ret;
1323 }
1324 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1325
1326 /**
1327  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1328  *
1329  * This is similar to tracing_snapshot(), but it will allocate the
1330  * snapshot buffer if it isn't already allocated. Use this only
1331  * where it is safe to sleep, as the allocation may sleep.
1332  *
1333  * This causes a swap between the snapshot buffer and the current live
1334  * tracing buffer. You can use this to take snapshots of the live
1335  * trace when some condition is triggered, but continue to trace.
1336  */
1337 void tracing_snapshot_alloc(void)
1338 {
1339         int ret;
1340
1341         ret = tracing_alloc_snapshot();
1342         if (ret < 0)
1343                 return;
1344
1345         tracing_snapshot();
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1348
1349 /**
1350  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  * @cond_data:  User data to associate with the snapshot
1353  * @update:     Implementation of the cond_snapshot update function
1354  *
1355  * Check whether the conditional snapshot for the given instance has
1356  * already been enabled, or if the current tracer is already using a
1357  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1358  * save the cond_data and update function inside.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1363                                  cond_update_fn_t update)
1364 {
1365         struct cond_snapshot *cond_snapshot;
1366         int ret = 0;
1367
1368         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1369         if (!cond_snapshot)
1370                 return -ENOMEM;
1371
1372         cond_snapshot->cond_data = cond_data;
1373         cond_snapshot->update = update;
1374
1375         mutex_lock(&trace_types_lock);
1376
1377         ret = tracing_alloc_snapshot_instance(tr);
1378         if (ret)
1379                 goto fail_unlock;
1380
1381         if (tr->current_trace->use_max_tr) {
1382                 ret = -EBUSY;
1383                 goto fail_unlock;
1384         }
1385
1386         /*
1387          * The cond_snapshot can only change to NULL without the
1388          * trace_types_lock. We don't care if we race with it going
1389          * to NULL, but we want to make sure that it's not set to
1390          * something other than NULL when we get here, which we can
1391          * do safely with only holding the trace_types_lock and not
1392          * having to take the max_lock.
1393          */
1394         if (tr->cond_snapshot) {
1395                 ret = -EBUSY;
1396                 goto fail_unlock;
1397         }
1398
1399         local_irq_disable();
1400         arch_spin_lock(&tr->max_lock);
1401         tr->cond_snapshot = cond_snapshot;
1402         arch_spin_unlock(&tr->max_lock);
1403         local_irq_enable();
1404
1405         mutex_unlock(&trace_types_lock);
1406
1407         return ret;
1408
1409  fail_unlock:
1410         mutex_unlock(&trace_types_lock);
1411         kfree(cond_snapshot);
1412         return ret;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1415
1416 /**
1417  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1418  * @tr:         The tracing instance
1419  *
1420  * Check whether the conditional snapshot for the given instance is
1421  * enabled; if so, free the cond_snapshot associated with it,
1422  * otherwise return -EINVAL.
1423  *
1424  * Returns 0 if successful, error otherwise.
1425  */
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         int ret = 0;
1429
1430         local_irq_disable();
1431         arch_spin_lock(&tr->max_lock);
1432
1433         if (!tr->cond_snapshot)
1434                 ret = -EINVAL;
1435         else {
1436                 kfree(tr->cond_snapshot);
1437                 tr->cond_snapshot = NULL;
1438         }
1439
1440         arch_spin_unlock(&tr->max_lock);
1441         local_irq_enable();
1442
1443         return ret;
1444 }
1445 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1446 #else
1447 void tracing_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_snapshot);
1452 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1453 {
1454         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1455 }
1456 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1457 int tracing_alloc_snapshot(void)
1458 {
1459         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1460         return -ENODEV;
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1463 void tracing_snapshot_alloc(void)
1464 {
1465         /* Give warning */
1466         tracing_snapshot();
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1469 void *tracing_cond_snapshot_data(struct trace_array *tr)
1470 {
1471         return NULL;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1474 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1475 {
1476         return -ENODEV;
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1479 int tracing_snapshot_cond_disable(struct trace_array *tr)
1480 {
1481         return false;
1482 }
1483 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1484 #define free_snapshot(tr)       do { } while (0)
1485 #endif /* CONFIG_TRACER_SNAPSHOT */
1486
1487 void tracer_tracing_off(struct trace_array *tr)
1488 {
1489         if (tr->array_buffer.buffer)
1490                 ring_buffer_record_off(tr->array_buffer.buffer);
1491         /*
1492          * This flag is looked at when buffers haven't been allocated
1493          * yet, or by some tracers (like irqsoff), that just want to
1494          * know if the ring buffer has been disabled, but it can handle
1495          * races of where it gets disabled but we still do a record.
1496          * As the check is in the fast path of the tracers, it is more
1497          * important to be fast than accurate.
1498          */
1499         tr->buffer_disabled = 1;
1500         /* Make the flag seen by readers */
1501         smp_wmb();
1502 }
1503
1504 /**
1505  * tracing_off - turn off tracing buffers
1506  *
1507  * This function stops the tracing buffers from recording data.
1508  * It does not disable any overhead the tracers themselves may
1509  * be causing. This function simply causes all recording to
1510  * the ring buffers to fail.
1511  */
1512 void tracing_off(void)
1513 {
1514         tracer_tracing_off(&global_trace);
1515 }
1516 EXPORT_SYMBOL_GPL(tracing_off);
1517
1518 void disable_trace_on_warning(void)
1519 {
1520         if (__disable_trace_on_warning) {
1521                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1522                         "Disabling tracing due to warning\n");
1523                 tracing_off();
1524         }
1525 }
1526
1527 /**
1528  * tracer_tracing_is_on - show real state of ring buffer enabled
1529  * @tr : the trace array to know if ring buffer is enabled
1530  *
1531  * Shows real state of the ring buffer if it is enabled or not.
1532  */
1533 bool tracer_tracing_is_on(struct trace_array *tr)
1534 {
1535         if (tr->array_buffer.buffer)
1536                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1537         return !tr->buffer_disabled;
1538 }
1539
1540 /**
1541  * tracing_is_on - show state of ring buffers enabled
1542  */
1543 int tracing_is_on(void)
1544 {
1545         return tracer_tracing_is_on(&global_trace);
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_is_on);
1548
1549 static int __init set_buf_size(char *str)
1550 {
1551         unsigned long buf_size;
1552
1553         if (!str)
1554                 return 0;
1555         buf_size = memparse(str, &str);
1556         /*
1557          * nr_entries can not be zero and the startup
1558          * tests require some buffer space. Therefore
1559          * ensure we have at least 4096 bytes of buffer.
1560          */
1561         trace_buf_size = max(4096UL, buf_size);
1562         return 1;
1563 }
1564 __setup("trace_buf_size=", set_buf_size);
1565
1566 static int __init set_tracing_thresh(char *str)
1567 {
1568         unsigned long threshold;
1569         int ret;
1570
1571         if (!str)
1572                 return 0;
1573         ret = kstrtoul(str, 0, &threshold);
1574         if (ret < 0)
1575                 return 0;
1576         tracing_thresh = threshold * 1000;
1577         return 1;
1578 }
1579 __setup("tracing_thresh=", set_tracing_thresh);
1580
1581 unsigned long nsecs_to_usecs(unsigned long nsecs)
1582 {
1583         return nsecs / 1000;
1584 }
1585
1586 /*
1587  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1588  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1589  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1590  * of strings in the order that the evals (enum) were defined.
1591  */
1592 #undef C
1593 #define C(a, b) b
1594
1595 /* These must match the bit positions in trace_iterator_flags */
1596 static const char *trace_options[] = {
1597         TRACE_FLAGS
1598         NULL
1599 };
1600
1601 static struct {
1602         u64 (*func)(void);
1603         const char *name;
1604         int in_ns;              /* is this clock in nanoseconds? */
1605 } trace_clocks[] = {
1606         { trace_clock_local,            "local",        1 },
1607         { trace_clock_global,           "global",       1 },
1608         { trace_clock_counter,          "counter",      0 },
1609         { trace_clock_jiffies,          "uptime",       0 },
1610         { trace_clock,                  "perf",         1 },
1611         { ktime_get_mono_fast_ns,       "mono",         1 },
1612         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1613         { ktime_get_boot_fast_ns,       "boot",         1 },
1614         { ktime_get_tai_fast_ns,        "tai",          1 },
1615         ARCH_TRACE_CLOCKS
1616 };
1617
1618 bool trace_clock_in_ns(struct trace_array *tr)
1619 {
1620         if (trace_clocks[tr->clock_id].in_ns)
1621                 return true;
1622
1623         return false;
1624 }
1625
1626 /*
1627  * trace_parser_get_init - gets the buffer for trace parser
1628  */
1629 int trace_parser_get_init(struct trace_parser *parser, int size)
1630 {
1631         memset(parser, 0, sizeof(*parser));
1632
1633         parser->buffer = kmalloc(size, GFP_KERNEL);
1634         if (!parser->buffer)
1635                 return 1;
1636
1637         parser->size = size;
1638         return 0;
1639 }
1640
1641 /*
1642  * trace_parser_put - frees the buffer for trace parser
1643  */
1644 void trace_parser_put(struct trace_parser *parser)
1645 {
1646         kfree(parser->buffer);
1647         parser->buffer = NULL;
1648 }
1649
1650 /*
1651  * trace_get_user - reads the user input string separated by  space
1652  * (matched by isspace(ch))
1653  *
1654  * For each string found the 'struct trace_parser' is updated,
1655  * and the function returns.
1656  *
1657  * Returns number of bytes read.
1658  *
1659  * See kernel/trace/trace.h for 'struct trace_parser' details.
1660  */
1661 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1662         size_t cnt, loff_t *ppos)
1663 {
1664         char ch;
1665         size_t read = 0;
1666         ssize_t ret;
1667
1668         if (!*ppos)
1669                 trace_parser_clear(parser);
1670
1671         ret = get_user(ch, ubuf++);
1672         if (ret)
1673                 goto out;
1674
1675         read++;
1676         cnt--;
1677
1678         /*
1679          * The parser is not finished with the last write,
1680          * continue reading the user input without skipping spaces.
1681          */
1682         if (!parser->cont) {
1683                 /* skip white space */
1684                 while (cnt && isspace(ch)) {
1685                         ret = get_user(ch, ubuf++);
1686                         if (ret)
1687                                 goto out;
1688                         read++;
1689                         cnt--;
1690                 }
1691
1692                 parser->idx = 0;
1693
1694                 /* only spaces were written */
1695                 if (isspace(ch) || !ch) {
1696                         *ppos += read;
1697                         ret = read;
1698                         goto out;
1699                 }
1700         }
1701
1702         /* read the non-space input */
1703         while (cnt && !isspace(ch) && ch) {
1704                 if (parser->idx < parser->size - 1)
1705                         parser->buffer[parser->idx++] = ch;
1706                 else {
1707                         ret = -EINVAL;
1708                         goto out;
1709                 }
1710                 ret = get_user(ch, ubuf++);
1711                 if (ret)
1712                         goto out;
1713                 read++;
1714                 cnt--;
1715         }
1716
1717         /* We either got finished input or we have to wait for another call. */
1718         if (isspace(ch) || !ch) {
1719                 parser->buffer[parser->idx] = 0;
1720                 parser->cont = false;
1721         } else if (parser->idx < parser->size - 1) {
1722                 parser->cont = true;
1723                 parser->buffer[parser->idx++] = ch;
1724                 /* Make sure the parsed string always terminates with '\0'. */
1725                 parser->buffer[parser->idx] = 0;
1726         } else {
1727                 ret = -EINVAL;
1728                 goto out;
1729         }
1730
1731         *ppos += read;
1732         ret = read;
1733
1734 out:
1735         return ret;
1736 }
1737
1738 /* TODO add a seq_buf_to_buffer() */
1739 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1740 {
1741         int len;
1742
1743         if (trace_seq_used(s) <= s->readpos)
1744                 return -EBUSY;
1745
1746         len = trace_seq_used(s) - s->readpos;
1747         if (cnt > len)
1748                 cnt = len;
1749         memcpy(buf, s->buffer + s->readpos, cnt);
1750
1751         s->readpos += cnt;
1752         return cnt;
1753 }
1754
1755 unsigned long __read_mostly     tracing_thresh;
1756
1757 #ifdef CONFIG_TRACER_MAX_TRACE
1758 static const struct file_operations tracing_max_lat_fops;
1759
1760 #ifdef LATENCY_FS_NOTIFY
1761
1762 static struct workqueue_struct *fsnotify_wq;
1763
1764 static void latency_fsnotify_workfn(struct work_struct *work)
1765 {
1766         struct trace_array *tr = container_of(work, struct trace_array,
1767                                               fsnotify_work);
1768         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1769 }
1770
1771 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1772 {
1773         struct trace_array *tr = container_of(iwork, struct trace_array,
1774                                               fsnotify_irqwork);
1775         queue_work(fsnotify_wq, &tr->fsnotify_work);
1776 }
1777
1778 static void trace_create_maxlat_file(struct trace_array *tr,
1779                                      struct dentry *d_tracer)
1780 {
1781         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1782         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1783         tr->d_max_latency = trace_create_file("tracing_max_latency",
1784                                               TRACE_MODE_WRITE,
1785                                               d_tracer, tr,
1786                                               &tracing_max_lat_fops);
1787 }
1788
1789 __init static int latency_fsnotify_init(void)
1790 {
1791         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1792                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1793         if (!fsnotify_wq) {
1794                 pr_err("Unable to allocate tr_max_lat_wq\n");
1795                 return -ENOMEM;
1796         }
1797         return 0;
1798 }
1799
1800 late_initcall_sync(latency_fsnotify_init);
1801
1802 void latency_fsnotify(struct trace_array *tr)
1803 {
1804         if (!fsnotify_wq)
1805                 return;
1806         /*
1807          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1808          * possible that we are called from __schedule() or do_idle(), which
1809          * could cause a deadlock.
1810          */
1811         irq_work_queue(&tr->fsnotify_irqwork);
1812 }
1813
1814 #else /* !LATENCY_FS_NOTIFY */
1815
1816 #define trace_create_maxlat_file(tr, d_tracer)                          \
1817         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1818                           d_tracer, tr, &tracing_max_lat_fops)
1819
1820 #endif
1821
1822 /*
1823  * Copy the new maximum trace into the separate maximum-trace
1824  * structure. (this way the maximum trace is permanently saved,
1825  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1826  */
1827 static void
1828 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1829 {
1830         struct array_buffer *trace_buf = &tr->array_buffer;
1831         struct array_buffer *max_buf = &tr->max_buffer;
1832         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1833         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1834
1835         max_buf->cpu = cpu;
1836         max_buf->time_start = data->preempt_timestamp;
1837
1838         max_data->saved_latency = tr->max_latency;
1839         max_data->critical_start = data->critical_start;
1840         max_data->critical_end = data->critical_end;
1841
1842         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1843         max_data->pid = tsk->pid;
1844         /*
1845          * If tsk == current, then use current_uid(), as that does not use
1846          * RCU. The irq tracer can be called out of RCU scope.
1847          */
1848         if (tsk == current)
1849                 max_data->uid = current_uid();
1850         else
1851                 max_data->uid = task_uid(tsk);
1852
1853         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1854         max_data->policy = tsk->policy;
1855         max_data->rt_priority = tsk->rt_priority;
1856
1857         /* record this tasks comm */
1858         tracing_record_cmdline(tsk);
1859         latency_fsnotify(tr);
1860 }
1861
1862 /**
1863  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1864  * @tr: tracer
1865  * @tsk: the task with the latency
1866  * @cpu: The cpu that initiated the trace.
1867  * @cond_data: User data associated with a conditional snapshot
1868  *
1869  * Flip the buffers between the @tr and the max_tr and record information
1870  * about which task was the cause of this latency.
1871  */
1872 void
1873 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1874               void *cond_data)
1875 {
1876         if (tr->stop_count)
1877                 return;
1878
1879         WARN_ON_ONCE(!irqs_disabled());
1880
1881         if (!tr->allocated_snapshot) {
1882                 /* Only the nop tracer should hit this when disabling */
1883                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1884                 return;
1885         }
1886
1887         arch_spin_lock(&tr->max_lock);
1888
1889         /* Inherit the recordable setting from array_buffer */
1890         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1891                 ring_buffer_record_on(tr->max_buffer.buffer);
1892         else
1893                 ring_buffer_record_off(tr->max_buffer.buffer);
1894
1895 #ifdef CONFIG_TRACER_SNAPSHOT
1896         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1897                 arch_spin_unlock(&tr->max_lock);
1898                 return;
1899         }
1900 #endif
1901         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1902
1903         __update_max_tr(tr, tsk, cpu);
1904
1905         arch_spin_unlock(&tr->max_lock);
1906
1907         /* Any waiters on the old snapshot buffer need to wake up */
1908         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1909 }
1910
1911 /**
1912  * update_max_tr_single - only copy one trace over, and reset the rest
1913  * @tr: tracer
1914  * @tsk: task with the latency
1915  * @cpu: the cpu of the buffer to copy.
1916  *
1917  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1918  */
1919 void
1920 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1921 {
1922         int ret;
1923
1924         if (tr->stop_count)
1925                 return;
1926
1927         WARN_ON_ONCE(!irqs_disabled());
1928         if (!tr->allocated_snapshot) {
1929                 /* Only the nop tracer should hit this when disabling */
1930                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1931                 return;
1932         }
1933
1934         arch_spin_lock(&tr->max_lock);
1935
1936         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1937
1938         if (ret == -EBUSY) {
1939                 /*
1940                  * We failed to swap the buffer due to a commit taking
1941                  * place on this CPU. We fail to record, but we reset
1942                  * the max trace buffer (no one writes directly to it)
1943                  * and flag that it failed.
1944                  * Another reason is resize is in progress.
1945                  */
1946                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1947                         "Failed to swap buffers due to commit or resize in progress\n");
1948         }
1949
1950         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1951
1952         __update_max_tr(tr, tsk, cpu);
1953         arch_spin_unlock(&tr->max_lock);
1954 }
1955
1956 #endif /* CONFIG_TRACER_MAX_TRACE */
1957
1958 static int wait_on_pipe(struct trace_iterator *iter, int full)
1959 {
1960         int ret;
1961
1962         /* Iterators are static, they should be filled or empty */
1963         if (trace_buffer_iter(iter, iter->cpu_file))
1964                 return 0;
1965
1966         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1967
1968 #ifdef CONFIG_TRACER_MAX_TRACE
1969         /*
1970          * Make sure this is still the snapshot buffer, as if a snapshot were
1971          * to happen, this would now be the main buffer.
1972          */
1973         if (iter->snapshot)
1974                 iter->array_buffer = &iter->tr->max_buffer;
1975 #endif
1976         return ret;
1977 }
1978
1979 #ifdef CONFIG_FTRACE_STARTUP_TEST
1980 static bool selftests_can_run;
1981
1982 struct trace_selftests {
1983         struct list_head                list;
1984         struct tracer                   *type;
1985 };
1986
1987 static LIST_HEAD(postponed_selftests);
1988
1989 static int save_selftest(struct tracer *type)
1990 {
1991         struct trace_selftests *selftest;
1992
1993         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1994         if (!selftest)
1995                 return -ENOMEM;
1996
1997         selftest->type = type;
1998         list_add(&selftest->list, &postponed_selftests);
1999         return 0;
2000 }
2001
2002 static int run_tracer_selftest(struct tracer *type)
2003 {
2004         struct trace_array *tr = &global_trace;
2005         struct tracer *saved_tracer = tr->current_trace;
2006         int ret;
2007
2008         if (!type->selftest || tracing_selftest_disabled)
2009                 return 0;
2010
2011         /*
2012          * If a tracer registers early in boot up (before scheduling is
2013          * initialized and such), then do not run its selftests yet.
2014          * Instead, run it a little later in the boot process.
2015          */
2016         if (!selftests_can_run)
2017                 return save_selftest(type);
2018
2019         if (!tracing_is_on()) {
2020                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2021                         type->name);
2022                 return 0;
2023         }
2024
2025         /*
2026          * Run a selftest on this tracer.
2027          * Here we reset the trace buffer, and set the current
2028          * tracer to be this tracer. The tracer can then run some
2029          * internal tracing to verify that everything is in order.
2030          * If we fail, we do not register this tracer.
2031          */
2032         tracing_reset_online_cpus(&tr->array_buffer);
2033
2034         tr->current_trace = type;
2035
2036 #ifdef CONFIG_TRACER_MAX_TRACE
2037         if (type->use_max_tr) {
2038                 /* If we expanded the buffers, make sure the max is expanded too */
2039                 if (tr->ring_buffer_expanded)
2040                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2041                                            RING_BUFFER_ALL_CPUS);
2042                 tr->allocated_snapshot = true;
2043         }
2044 #endif
2045
2046         /* the test is responsible for initializing and enabling */
2047         pr_info("Testing tracer %s: ", type->name);
2048         ret = type->selftest(type, tr);
2049         /* the test is responsible for resetting too */
2050         tr->current_trace = saved_tracer;
2051         if (ret) {
2052                 printk(KERN_CONT "FAILED!\n");
2053                 /* Add the warning after printing 'FAILED' */
2054                 WARN_ON(1);
2055                 return -1;
2056         }
2057         /* Only reset on passing, to avoid touching corrupted buffers */
2058         tracing_reset_online_cpus(&tr->array_buffer);
2059
2060 #ifdef CONFIG_TRACER_MAX_TRACE
2061         if (type->use_max_tr) {
2062                 tr->allocated_snapshot = false;
2063
2064                 /* Shrink the max buffer again */
2065                 if (tr->ring_buffer_expanded)
2066                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2067                                            RING_BUFFER_ALL_CPUS);
2068         }
2069 #endif
2070
2071         printk(KERN_CONT "PASSED\n");
2072         return 0;
2073 }
2074
2075 static int do_run_tracer_selftest(struct tracer *type)
2076 {
2077         int ret;
2078
2079         /*
2080          * Tests can take a long time, especially if they are run one after the
2081          * other, as does happen during bootup when all the tracers are
2082          * registered. This could cause the soft lockup watchdog to trigger.
2083          */
2084         cond_resched();
2085
2086         tracing_selftest_running = true;
2087         ret = run_tracer_selftest(type);
2088         tracing_selftest_running = false;
2089
2090         return ret;
2091 }
2092
2093 static __init int init_trace_selftests(void)
2094 {
2095         struct trace_selftests *p, *n;
2096         struct tracer *t, **last;
2097         int ret;
2098
2099         selftests_can_run = true;
2100
2101         mutex_lock(&trace_types_lock);
2102
2103         if (list_empty(&postponed_selftests))
2104                 goto out;
2105
2106         pr_info("Running postponed tracer tests:\n");
2107
2108         tracing_selftest_running = true;
2109         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2110                 /* This loop can take minutes when sanitizers are enabled, so
2111                  * lets make sure we allow RCU processing.
2112                  */
2113                 cond_resched();
2114                 ret = run_tracer_selftest(p->type);
2115                 /* If the test fails, then warn and remove from available_tracers */
2116                 if (ret < 0) {
2117                         WARN(1, "tracer: %s failed selftest, disabling\n",
2118                              p->type->name);
2119                         last = &trace_types;
2120                         for (t = trace_types; t; t = t->next) {
2121                                 if (t == p->type) {
2122                                         *last = t->next;
2123                                         break;
2124                                 }
2125                                 last = &t->next;
2126                         }
2127                 }
2128                 list_del(&p->list);
2129                 kfree(p);
2130         }
2131         tracing_selftest_running = false;
2132
2133  out:
2134         mutex_unlock(&trace_types_lock);
2135
2136         return 0;
2137 }
2138 core_initcall(init_trace_selftests);
2139 #else
2140 static inline int run_tracer_selftest(struct tracer *type)
2141 {
2142         return 0;
2143 }
2144 static inline int do_run_tracer_selftest(struct tracer *type)
2145 {
2146         return 0;
2147 }
2148 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2149
2150 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2151
2152 static void __init apply_trace_boot_options(void);
2153
2154 /**
2155  * register_tracer - register a tracer with the ftrace system.
2156  * @type: the plugin for the tracer
2157  *
2158  * Register a new plugin tracer.
2159  */
2160 int __init register_tracer(struct tracer *type)
2161 {
2162         struct tracer *t;
2163         int ret = 0;
2164
2165         if (!type->name) {
2166                 pr_info("Tracer must have a name\n");
2167                 return -1;
2168         }
2169
2170         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2171                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2172                 return -1;
2173         }
2174
2175         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2176                 pr_warn("Can not register tracer %s due to lockdown\n",
2177                            type->name);
2178                 return -EPERM;
2179         }
2180
2181         mutex_lock(&trace_types_lock);
2182
2183         for (t = trace_types; t; t = t->next) {
2184                 if (strcmp(type->name, t->name) == 0) {
2185                         /* already found */
2186                         pr_info("Tracer %s already registered\n",
2187                                 type->name);
2188                         ret = -1;
2189                         goto out;
2190                 }
2191         }
2192
2193         if (!type->set_flag)
2194                 type->set_flag = &dummy_set_flag;
2195         if (!type->flags) {
2196                 /*allocate a dummy tracer_flags*/
2197                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2198                 if (!type->flags) {
2199                         ret = -ENOMEM;
2200                         goto out;
2201                 }
2202                 type->flags->val = 0;
2203                 type->flags->opts = dummy_tracer_opt;
2204         } else
2205                 if (!type->flags->opts)
2206                         type->flags->opts = dummy_tracer_opt;
2207
2208         /* store the tracer for __set_tracer_option */
2209         type->flags->trace = type;
2210
2211         ret = do_run_tracer_selftest(type);
2212         if (ret < 0)
2213                 goto out;
2214
2215         type->next = trace_types;
2216         trace_types = type;
2217         add_tracer_options(&global_trace, type);
2218
2219  out:
2220         mutex_unlock(&trace_types_lock);
2221
2222         if (ret || !default_bootup_tracer)
2223                 goto out_unlock;
2224
2225         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2226                 goto out_unlock;
2227
2228         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2229         /* Do we want this tracer to start on bootup? */
2230         tracing_set_tracer(&global_trace, type->name);
2231         default_bootup_tracer = NULL;
2232
2233         apply_trace_boot_options();
2234
2235         /* disable other selftests, since this will break it. */
2236         disable_tracing_selftest("running a tracer");
2237
2238  out_unlock:
2239         return ret;
2240 }
2241
2242 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2243 {
2244         struct trace_buffer *buffer = buf->buffer;
2245
2246         if (!buffer)
2247                 return;
2248
2249         ring_buffer_record_disable(buffer);
2250
2251         /* Make sure all commits have finished */
2252         synchronize_rcu();
2253         ring_buffer_reset_cpu(buffer, cpu);
2254
2255         ring_buffer_record_enable(buffer);
2256 }
2257
2258 void tracing_reset_online_cpus(struct array_buffer *buf)
2259 {
2260         struct trace_buffer *buffer = buf->buffer;
2261
2262         if (!buffer)
2263                 return;
2264
2265         ring_buffer_record_disable(buffer);
2266
2267         /* Make sure all commits have finished */
2268         synchronize_rcu();
2269
2270         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2271
2272         ring_buffer_reset_online_cpus(buffer);
2273
2274         ring_buffer_record_enable(buffer);
2275 }
2276
2277 /* Must have trace_types_lock held */
2278 void tracing_reset_all_online_cpus_unlocked(void)
2279 {
2280         struct trace_array *tr;
2281
2282         lockdep_assert_held(&trace_types_lock);
2283
2284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2285                 if (!tr->clear_trace)
2286                         continue;
2287                 tr->clear_trace = false;
2288                 tracing_reset_online_cpus(&tr->array_buffer);
2289 #ifdef CONFIG_TRACER_MAX_TRACE
2290                 tracing_reset_online_cpus(&tr->max_buffer);
2291 #endif
2292         }
2293 }
2294
2295 void tracing_reset_all_online_cpus(void)
2296 {
2297         mutex_lock(&trace_types_lock);
2298         tracing_reset_all_online_cpus_unlocked();
2299         mutex_unlock(&trace_types_lock);
2300 }
2301
2302 /*
2303  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2304  * is the tgid last observed corresponding to pid=i.
2305  */
2306 static int *tgid_map;
2307
2308 /* The maximum valid index into tgid_map. */
2309 static size_t tgid_map_max;
2310
2311 #define SAVED_CMDLINES_DEFAULT 128
2312 #define NO_CMDLINE_MAP UINT_MAX
2313 /*
2314  * Preemption must be disabled before acquiring trace_cmdline_lock.
2315  * The various trace_arrays' max_lock must be acquired in a context
2316  * where interrupt is disabled.
2317  */
2318 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2319 struct saved_cmdlines_buffer {
2320         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2321         unsigned *map_cmdline_to_pid;
2322         unsigned cmdline_num;
2323         int cmdline_idx;
2324         char saved_cmdlines[];
2325 };
2326 static struct saved_cmdlines_buffer *savedcmd;
2327
2328 static inline char *get_saved_cmdlines(int idx)
2329 {
2330         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2331 }
2332
2333 static inline void set_cmdline(int idx, const char *cmdline)
2334 {
2335         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2336 }
2337
2338 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2339 {
2340         int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2341
2342         kfree(s->map_cmdline_to_pid);
2343         kmemleak_free(s);
2344         free_pages((unsigned long)s, order);
2345 }
2346
2347 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2348 {
2349         struct saved_cmdlines_buffer *s;
2350         struct page *page;
2351         int orig_size, size;
2352         int order;
2353
2354         /* Figure out how much is needed to hold the given number of cmdlines */
2355         orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2356         order = get_order(orig_size);
2357         size = 1 << (order + PAGE_SHIFT);
2358         page = alloc_pages(GFP_KERNEL, order);
2359         if (!page)
2360                 return NULL;
2361
2362         s = page_address(page);
2363         kmemleak_alloc(s, size, 1, GFP_KERNEL);
2364         memset(s, 0, sizeof(*s));
2365
2366         /* Round up to actual allocation */
2367         val = (size - sizeof(*s)) / TASK_COMM_LEN;
2368         s->cmdline_num = val;
2369
2370         s->map_cmdline_to_pid = kmalloc_array(val,
2371                                               sizeof(*s->map_cmdline_to_pid),
2372                                               GFP_KERNEL);
2373         if (!s->map_cmdline_to_pid) {
2374                 free_saved_cmdlines_buffer(s);
2375                 return NULL;
2376         }
2377
2378         s->cmdline_idx = 0;
2379         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2380                sizeof(s->map_pid_to_cmdline));
2381         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2382                val * sizeof(*s->map_cmdline_to_pid));
2383
2384         return s;
2385 }
2386
2387 static int trace_create_savedcmd(void)
2388 {
2389         savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2390
2391         return savedcmd ? 0 : -ENOMEM;
2392 }
2393
2394 int is_tracing_stopped(void)
2395 {
2396         return global_trace.stop_count;
2397 }
2398
2399 static void tracing_start_tr(struct trace_array *tr)
2400 {
2401         struct trace_buffer *buffer;
2402         unsigned long flags;
2403
2404         if (tracing_disabled)
2405                 return;
2406
2407         raw_spin_lock_irqsave(&tr->start_lock, flags);
2408         if (--tr->stop_count) {
2409                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2410                         /* Someone screwed up their debugging */
2411                         tr->stop_count = 0;
2412                 }
2413                 goto out;
2414         }
2415
2416         /* Prevent the buffers from switching */
2417         arch_spin_lock(&tr->max_lock);
2418
2419         buffer = tr->array_buffer.buffer;
2420         if (buffer)
2421                 ring_buffer_record_enable(buffer);
2422
2423 #ifdef CONFIG_TRACER_MAX_TRACE
2424         buffer = tr->max_buffer.buffer;
2425         if (buffer)
2426                 ring_buffer_record_enable(buffer);
2427 #endif
2428
2429         arch_spin_unlock(&tr->max_lock);
2430
2431  out:
2432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 }
2434
2435 /**
2436  * tracing_start - quick start of the tracer
2437  *
2438  * If tracing is enabled but was stopped by tracing_stop,
2439  * this will start the tracer back up.
2440  */
2441 void tracing_start(void)
2442
2443 {
2444         return tracing_start_tr(&global_trace);
2445 }
2446
2447 static void tracing_stop_tr(struct trace_array *tr)
2448 {
2449         struct trace_buffer *buffer;
2450         unsigned long flags;
2451
2452         raw_spin_lock_irqsave(&tr->start_lock, flags);
2453         if (tr->stop_count++)
2454                 goto out;
2455
2456         /* Prevent the buffers from switching */
2457         arch_spin_lock(&tr->max_lock);
2458
2459         buffer = tr->array_buffer.buffer;
2460         if (buffer)
2461                 ring_buffer_record_disable(buffer);
2462
2463 #ifdef CONFIG_TRACER_MAX_TRACE
2464         buffer = tr->max_buffer.buffer;
2465         if (buffer)
2466                 ring_buffer_record_disable(buffer);
2467 #endif
2468
2469         arch_spin_unlock(&tr->max_lock);
2470
2471  out:
2472         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2473 }
2474
2475 /**
2476  * tracing_stop - quick stop of the tracer
2477  *
2478  * Light weight way to stop tracing. Use in conjunction with
2479  * tracing_start.
2480  */
2481 void tracing_stop(void)
2482 {
2483         return tracing_stop_tr(&global_trace);
2484 }
2485
2486 static int trace_save_cmdline(struct task_struct *tsk)
2487 {
2488         unsigned tpid, idx;
2489
2490         /* treat recording of idle task as a success */
2491         if (!tsk->pid)
2492                 return 1;
2493
2494         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2495
2496         /*
2497          * It's not the end of the world if we don't get
2498          * the lock, but we also don't want to spin
2499          * nor do we want to disable interrupts,
2500          * so if we miss here, then better luck next time.
2501          *
2502          * This is called within the scheduler and wake up, so interrupts
2503          * had better been disabled and run queue lock been held.
2504          */
2505         lockdep_assert_preemption_disabled();
2506         if (!arch_spin_trylock(&trace_cmdline_lock))
2507                 return 0;
2508
2509         idx = savedcmd->map_pid_to_cmdline[tpid];
2510         if (idx == NO_CMDLINE_MAP) {
2511                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2512
2513                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2514                 savedcmd->cmdline_idx = idx;
2515         }
2516
2517         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2518         set_cmdline(idx, tsk->comm);
2519
2520         arch_spin_unlock(&trace_cmdline_lock);
2521
2522         return 1;
2523 }
2524
2525 static void __trace_find_cmdline(int pid, char comm[])
2526 {
2527         unsigned map;
2528         int tpid;
2529
2530         if (!pid) {
2531                 strcpy(comm, "<idle>");
2532                 return;
2533         }
2534
2535         if (WARN_ON_ONCE(pid < 0)) {
2536                 strcpy(comm, "<XXX>");
2537                 return;
2538         }
2539
2540         tpid = pid & (PID_MAX_DEFAULT - 1);
2541         map = savedcmd->map_pid_to_cmdline[tpid];
2542         if (map != NO_CMDLINE_MAP) {
2543                 tpid = savedcmd->map_cmdline_to_pid[map];
2544                 if (tpid == pid) {
2545                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2546                         return;
2547                 }
2548         }
2549         strcpy(comm, "<...>");
2550 }
2551
2552 void trace_find_cmdline(int pid, char comm[])
2553 {
2554         preempt_disable();
2555         arch_spin_lock(&trace_cmdline_lock);
2556
2557         __trace_find_cmdline(pid, comm);
2558
2559         arch_spin_unlock(&trace_cmdline_lock);
2560         preempt_enable();
2561 }
2562
2563 static int *trace_find_tgid_ptr(int pid)
2564 {
2565         /*
2566          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2567          * if we observe a non-NULL tgid_map then we also observe the correct
2568          * tgid_map_max.
2569          */
2570         int *map = smp_load_acquire(&tgid_map);
2571
2572         if (unlikely(!map || pid > tgid_map_max))
2573                 return NULL;
2574
2575         return &map[pid];
2576 }
2577
2578 int trace_find_tgid(int pid)
2579 {
2580         int *ptr = trace_find_tgid_ptr(pid);
2581
2582         return ptr ? *ptr : 0;
2583 }
2584
2585 static int trace_save_tgid(struct task_struct *tsk)
2586 {
2587         int *ptr;
2588
2589         /* treat recording of idle task as a success */
2590         if (!tsk->pid)
2591                 return 1;
2592
2593         ptr = trace_find_tgid_ptr(tsk->pid);
2594         if (!ptr)
2595                 return 0;
2596
2597         *ptr = tsk->tgid;
2598         return 1;
2599 }
2600
2601 static bool tracing_record_taskinfo_skip(int flags)
2602 {
2603         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2604                 return true;
2605         if (!__this_cpu_read(trace_taskinfo_save))
2606                 return true;
2607         return false;
2608 }
2609
2610 /**
2611  * tracing_record_taskinfo - record the task info of a task
2612  *
2613  * @task:  task to record
2614  * @flags: TRACE_RECORD_CMDLINE for recording comm
2615  *         TRACE_RECORD_TGID for recording tgid
2616  */
2617 void tracing_record_taskinfo(struct task_struct *task, int flags)
2618 {
2619         bool done;
2620
2621         if (tracing_record_taskinfo_skip(flags))
2622                 return;
2623
2624         /*
2625          * Record as much task information as possible. If some fail, continue
2626          * to try to record the others.
2627          */
2628         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2629         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2630
2631         /* If recording any information failed, retry again soon. */
2632         if (!done)
2633                 return;
2634
2635         __this_cpu_write(trace_taskinfo_save, false);
2636 }
2637
2638 /**
2639  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2640  *
2641  * @prev: previous task during sched_switch
2642  * @next: next task during sched_switch
2643  * @flags: TRACE_RECORD_CMDLINE for recording comm
2644  *         TRACE_RECORD_TGID for recording tgid
2645  */
2646 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2647                                           struct task_struct *next, int flags)
2648 {
2649         bool done;
2650
2651         if (tracing_record_taskinfo_skip(flags))
2652                 return;
2653
2654         /*
2655          * Record as much task information as possible. If some fail, continue
2656          * to try to record the others.
2657          */
2658         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2659         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2660         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2661         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2662
2663         /* If recording any information failed, retry again soon. */
2664         if (!done)
2665                 return;
2666
2667         __this_cpu_write(trace_taskinfo_save, false);
2668 }
2669
2670 /* Helpers to record a specific task information */
2671 void tracing_record_cmdline(struct task_struct *task)
2672 {
2673         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2674 }
2675
2676 void tracing_record_tgid(struct task_struct *task)
2677 {
2678         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2679 }
2680
2681 /*
2682  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2683  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2684  * simplifies those functions and keeps them in sync.
2685  */
2686 enum print_line_t trace_handle_return(struct trace_seq *s)
2687 {
2688         return trace_seq_has_overflowed(s) ?
2689                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2690 }
2691 EXPORT_SYMBOL_GPL(trace_handle_return);
2692
2693 static unsigned short migration_disable_value(void)
2694 {
2695 #if defined(CONFIG_SMP)
2696         return current->migration_disabled;
2697 #else
2698         return 0;
2699 #endif
2700 }
2701
2702 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2703 {
2704         unsigned int trace_flags = irqs_status;
2705         unsigned int pc;
2706
2707         pc = preempt_count();
2708
2709         if (pc & NMI_MASK)
2710                 trace_flags |= TRACE_FLAG_NMI;
2711         if (pc & HARDIRQ_MASK)
2712                 trace_flags |= TRACE_FLAG_HARDIRQ;
2713         if (in_serving_softirq())
2714                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2715         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2716                 trace_flags |= TRACE_FLAG_BH_OFF;
2717
2718         if (tif_need_resched())
2719                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2720         if (test_preempt_need_resched())
2721                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2722         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2723                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2724 }
2725
2726 struct ring_buffer_event *
2727 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2728                           int type,
2729                           unsigned long len,
2730                           unsigned int trace_ctx)
2731 {
2732         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2733 }
2734
2735 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2736 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2737 static int trace_buffered_event_ref;
2738
2739 /**
2740  * trace_buffered_event_enable - enable buffering events
2741  *
2742  * When events are being filtered, it is quicker to use a temporary
2743  * buffer to write the event data into if there's a likely chance
2744  * that it will not be committed. The discard of the ring buffer
2745  * is not as fast as committing, and is much slower than copying
2746  * a commit.
2747  *
2748  * When an event is to be filtered, allocate per cpu buffers to
2749  * write the event data into, and if the event is filtered and discarded
2750  * it is simply dropped, otherwise, the entire data is to be committed
2751  * in one shot.
2752  */
2753 void trace_buffered_event_enable(void)
2754 {
2755         struct ring_buffer_event *event;
2756         struct page *page;
2757         int cpu;
2758
2759         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2760
2761         if (trace_buffered_event_ref++)
2762                 return;
2763
2764         for_each_tracing_cpu(cpu) {
2765                 page = alloc_pages_node(cpu_to_node(cpu),
2766                                         GFP_KERNEL | __GFP_NORETRY, 0);
2767                 /* This is just an optimization and can handle failures */
2768                 if (!page) {
2769                         pr_err("Failed to allocate event buffer\n");
2770                         break;
2771                 }
2772
2773                 event = page_address(page);
2774                 memset(event, 0, sizeof(*event));
2775
2776                 per_cpu(trace_buffered_event, cpu) = event;
2777
2778                 preempt_disable();
2779                 if (cpu == smp_processor_id() &&
2780                     __this_cpu_read(trace_buffered_event) !=
2781                     per_cpu(trace_buffered_event, cpu))
2782                         WARN_ON_ONCE(1);
2783                 preempt_enable();
2784         }
2785 }
2786
2787 static void enable_trace_buffered_event(void *data)
2788 {
2789         /* Probably not needed, but do it anyway */
2790         smp_rmb();
2791         this_cpu_dec(trace_buffered_event_cnt);
2792 }
2793
2794 static void disable_trace_buffered_event(void *data)
2795 {
2796         this_cpu_inc(trace_buffered_event_cnt);
2797 }
2798
2799 /**
2800  * trace_buffered_event_disable - disable buffering events
2801  *
2802  * When a filter is removed, it is faster to not use the buffered
2803  * events, and to commit directly into the ring buffer. Free up
2804  * the temp buffers when there are no more users. This requires
2805  * special synchronization with current events.
2806  */
2807 void trace_buffered_event_disable(void)
2808 {
2809         int cpu;
2810
2811         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2812
2813         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2814                 return;
2815
2816         if (--trace_buffered_event_ref)
2817                 return;
2818
2819         /* For each CPU, set the buffer as used. */
2820         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2821                          NULL, true);
2822
2823         /* Wait for all current users to finish */
2824         synchronize_rcu();
2825
2826         for_each_tracing_cpu(cpu) {
2827                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2828                 per_cpu(trace_buffered_event, cpu) = NULL;
2829         }
2830
2831         /*
2832          * Wait for all CPUs that potentially started checking if they can use
2833          * their event buffer only after the previous synchronize_rcu() call and
2834          * they still read a valid pointer from trace_buffered_event. It must be
2835          * ensured they don't see cleared trace_buffered_event_cnt else they
2836          * could wrongly decide to use the pointed-to buffer which is now freed.
2837          */
2838         synchronize_rcu();
2839
2840         /* For each CPU, relinquish the buffer */
2841         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2842                          true);
2843 }
2844
2845 static struct trace_buffer *temp_buffer;
2846
2847 struct ring_buffer_event *
2848 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2849                           struct trace_event_file *trace_file,
2850                           int type, unsigned long len,
2851                           unsigned int trace_ctx)
2852 {
2853         struct ring_buffer_event *entry;
2854         struct trace_array *tr = trace_file->tr;
2855         int val;
2856
2857         *current_rb = tr->array_buffer.buffer;
2858
2859         if (!tr->no_filter_buffering_ref &&
2860             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2861                 preempt_disable_notrace();
2862                 /*
2863                  * Filtering is on, so try to use the per cpu buffer first.
2864                  * This buffer will simulate a ring_buffer_event,
2865                  * where the type_len is zero and the array[0] will
2866                  * hold the full length.
2867                  * (see include/linux/ring-buffer.h for details on
2868                  *  how the ring_buffer_event is structured).
2869                  *
2870                  * Using a temp buffer during filtering and copying it
2871                  * on a matched filter is quicker than writing directly
2872                  * into the ring buffer and then discarding it when
2873                  * it doesn't match. That is because the discard
2874                  * requires several atomic operations to get right.
2875                  * Copying on match and doing nothing on a failed match
2876                  * is still quicker than no copy on match, but having
2877                  * to discard out of the ring buffer on a failed match.
2878                  */
2879                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2880                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2881
2882                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2883
2884                         /*
2885                          * Preemption is disabled, but interrupts and NMIs
2886                          * can still come in now. If that happens after
2887                          * the above increment, then it will have to go
2888                          * back to the old method of allocating the event
2889                          * on the ring buffer, and if the filter fails, it
2890                          * will have to call ring_buffer_discard_commit()
2891                          * to remove it.
2892                          *
2893                          * Need to also check the unlikely case that the
2894                          * length is bigger than the temp buffer size.
2895                          * If that happens, then the reserve is pretty much
2896                          * guaranteed to fail, as the ring buffer currently
2897                          * only allows events less than a page. But that may
2898                          * change in the future, so let the ring buffer reserve
2899                          * handle the failure in that case.
2900                          */
2901                         if (val == 1 && likely(len <= max_len)) {
2902                                 trace_event_setup(entry, type, trace_ctx);
2903                                 entry->array[0] = len;
2904                                 /* Return with preemption disabled */
2905                                 return entry;
2906                         }
2907                         this_cpu_dec(trace_buffered_event_cnt);
2908                 }
2909                 /* __trace_buffer_lock_reserve() disables preemption */
2910                 preempt_enable_notrace();
2911         }
2912
2913         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2914                                             trace_ctx);
2915         /*
2916          * If tracing is off, but we have triggers enabled
2917          * we still need to look at the event data. Use the temp_buffer
2918          * to store the trace event for the trigger to use. It's recursive
2919          * safe and will not be recorded anywhere.
2920          */
2921         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2922                 *current_rb = temp_buffer;
2923                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2924                                                     trace_ctx);
2925         }
2926         return entry;
2927 }
2928 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2929
2930 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2931 static DEFINE_MUTEX(tracepoint_printk_mutex);
2932
2933 static void output_printk(struct trace_event_buffer *fbuffer)
2934 {
2935         struct trace_event_call *event_call;
2936         struct trace_event_file *file;
2937         struct trace_event *event;
2938         unsigned long flags;
2939         struct trace_iterator *iter = tracepoint_print_iter;
2940
2941         /* We should never get here if iter is NULL */
2942         if (WARN_ON_ONCE(!iter))
2943                 return;
2944
2945         event_call = fbuffer->trace_file->event_call;
2946         if (!event_call || !event_call->event.funcs ||
2947             !event_call->event.funcs->trace)
2948                 return;
2949
2950         file = fbuffer->trace_file;
2951         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2952             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2953              !filter_match_preds(file->filter, fbuffer->entry)))
2954                 return;
2955
2956         event = &fbuffer->trace_file->event_call->event;
2957
2958         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2959         trace_seq_init(&iter->seq);
2960         iter->ent = fbuffer->entry;
2961         event_call->event.funcs->trace(iter, 0, event);
2962         trace_seq_putc(&iter->seq, 0);
2963         printk("%s", iter->seq.buffer);
2964
2965         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2966 }
2967
2968 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2969                              void *buffer, size_t *lenp,
2970                              loff_t *ppos)
2971 {
2972         int save_tracepoint_printk;
2973         int ret;
2974
2975         mutex_lock(&tracepoint_printk_mutex);
2976         save_tracepoint_printk = tracepoint_printk;
2977
2978         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2979
2980         /*
2981          * This will force exiting early, as tracepoint_printk
2982          * is always zero when tracepoint_printk_iter is not allocated
2983          */
2984         if (!tracepoint_print_iter)
2985                 tracepoint_printk = 0;
2986
2987         if (save_tracepoint_printk == tracepoint_printk)
2988                 goto out;
2989
2990         if (tracepoint_printk)
2991                 static_key_enable(&tracepoint_printk_key.key);
2992         else
2993                 static_key_disable(&tracepoint_printk_key.key);
2994
2995  out:
2996         mutex_unlock(&tracepoint_printk_mutex);
2997
2998         return ret;
2999 }
3000
3001 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3002 {
3003         enum event_trigger_type tt = ETT_NONE;
3004         struct trace_event_file *file = fbuffer->trace_file;
3005
3006         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3007                         fbuffer->entry, &tt))
3008                 goto discard;
3009
3010         if (static_key_false(&tracepoint_printk_key.key))
3011                 output_printk(fbuffer);
3012
3013         if (static_branch_unlikely(&trace_event_exports_enabled))
3014                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3015
3016         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3017                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3018
3019 discard:
3020         if (tt)
3021                 event_triggers_post_call(file, tt);
3022
3023 }
3024 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3025
3026 /*
3027  * Skip 3:
3028  *
3029  *   trace_buffer_unlock_commit_regs()
3030  *   trace_event_buffer_commit()
3031  *   trace_event_raw_event_xxx()
3032  */
3033 # define STACK_SKIP 3
3034
3035 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3036                                      struct trace_buffer *buffer,
3037                                      struct ring_buffer_event *event,
3038                                      unsigned int trace_ctx,
3039                                      struct pt_regs *regs)
3040 {
3041         __buffer_unlock_commit(buffer, event);
3042
3043         /*
3044          * If regs is not set, then skip the necessary functions.
3045          * Note, we can still get here via blktrace, wakeup tracer
3046          * and mmiotrace, but that's ok if they lose a function or
3047          * two. They are not that meaningful.
3048          */
3049         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3050         ftrace_trace_userstack(tr, buffer, trace_ctx);
3051 }
3052
3053 /*
3054  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3055  */
3056 void
3057 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3058                                    struct ring_buffer_event *event)
3059 {
3060         __buffer_unlock_commit(buffer, event);
3061 }
3062
3063 void
3064 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3065                parent_ip, unsigned int trace_ctx)
3066 {
3067         struct trace_event_call *call = &event_function;
3068         struct trace_buffer *buffer = tr->array_buffer.buffer;
3069         struct ring_buffer_event *event;
3070         struct ftrace_entry *entry;
3071
3072         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3073                                             trace_ctx);
3074         if (!event)
3075                 return;
3076         entry   = ring_buffer_event_data(event);
3077         entry->ip                       = ip;
3078         entry->parent_ip                = parent_ip;
3079
3080         if (!call_filter_check_discard(call, entry, buffer, event)) {
3081                 if (static_branch_unlikely(&trace_function_exports_enabled))
3082                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3083                 __buffer_unlock_commit(buffer, event);
3084         }
3085 }
3086
3087 #ifdef CONFIG_STACKTRACE
3088
3089 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3090 #define FTRACE_KSTACK_NESTING   4
3091
3092 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3093
3094 struct ftrace_stack {
3095         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3096 };
3097
3098
3099 struct ftrace_stacks {
3100         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3101 };
3102
3103 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3104 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3105
3106 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3107                                  unsigned int trace_ctx,
3108                                  int skip, struct pt_regs *regs)
3109 {
3110         struct trace_event_call *call = &event_kernel_stack;
3111         struct ring_buffer_event *event;
3112         unsigned int size, nr_entries;
3113         struct ftrace_stack *fstack;
3114         struct stack_entry *entry;
3115         int stackidx;
3116
3117         /*
3118          * Add one, for this function and the call to save_stack_trace()
3119          * If regs is set, then these functions will not be in the way.
3120          */
3121 #ifndef CONFIG_UNWINDER_ORC
3122         if (!regs)
3123                 skip++;
3124 #endif
3125
3126         preempt_disable_notrace();
3127
3128         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3129
3130         /* This should never happen. If it does, yell once and skip */
3131         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3132                 goto out;
3133
3134         /*
3135          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3136          * interrupt will either see the value pre increment or post
3137          * increment. If the interrupt happens pre increment it will have
3138          * restored the counter when it returns.  We just need a barrier to
3139          * keep gcc from moving things around.
3140          */
3141         barrier();
3142
3143         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3144         size = ARRAY_SIZE(fstack->calls);
3145
3146         if (regs) {
3147                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3148                                                    size, skip);
3149         } else {
3150                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3151         }
3152
3153         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3154                                     struct_size(entry, caller, nr_entries),
3155                                     trace_ctx);
3156         if (!event)
3157                 goto out;
3158         entry = ring_buffer_event_data(event);
3159
3160         entry->size = nr_entries;
3161         memcpy(&entry->caller, fstack->calls,
3162                flex_array_size(entry, caller, nr_entries));
3163
3164         if (!call_filter_check_discard(call, entry, buffer, event))
3165                 __buffer_unlock_commit(buffer, event);
3166
3167  out:
3168         /* Again, don't let gcc optimize things here */
3169         barrier();
3170         __this_cpu_dec(ftrace_stack_reserve);
3171         preempt_enable_notrace();
3172
3173 }
3174
3175 static inline void ftrace_trace_stack(struct trace_array *tr,
3176                                       struct trace_buffer *buffer,
3177                                       unsigned int trace_ctx,
3178                                       int skip, struct pt_regs *regs)
3179 {
3180         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3181                 return;
3182
3183         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3184 }
3185
3186 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3187                    int skip)
3188 {
3189         struct trace_buffer *buffer = tr->array_buffer.buffer;
3190
3191         if (rcu_is_watching()) {
3192                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3193                 return;
3194         }
3195
3196         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3197                 return;
3198
3199         /*
3200          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3201          * but if the above rcu_is_watching() failed, then the NMI
3202          * triggered someplace critical, and ct_irq_enter() should
3203          * not be called from NMI.
3204          */
3205         if (unlikely(in_nmi()))
3206                 return;
3207
3208         ct_irq_enter_irqson();
3209         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3210         ct_irq_exit_irqson();
3211 }
3212
3213 /**
3214  * trace_dump_stack - record a stack back trace in the trace buffer
3215  * @skip: Number of functions to skip (helper handlers)
3216  */
3217 void trace_dump_stack(int skip)
3218 {
3219         if (tracing_disabled || tracing_selftest_running)
3220                 return;
3221
3222 #ifndef CONFIG_UNWINDER_ORC
3223         /* Skip 1 to skip this function. */
3224         skip++;
3225 #endif
3226         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3227                              tracing_gen_ctx(), skip, NULL);
3228 }
3229 EXPORT_SYMBOL_GPL(trace_dump_stack);
3230
3231 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3232 static DEFINE_PER_CPU(int, user_stack_count);
3233
3234 static void
3235 ftrace_trace_userstack(struct trace_array *tr,
3236                        struct trace_buffer *buffer, unsigned int trace_ctx)
3237 {
3238         struct trace_event_call *call = &event_user_stack;
3239         struct ring_buffer_event *event;
3240         struct userstack_entry *entry;
3241
3242         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3243                 return;
3244
3245         /*
3246          * NMIs can not handle page faults, even with fix ups.
3247          * The save user stack can (and often does) fault.
3248          */
3249         if (unlikely(in_nmi()))
3250                 return;
3251
3252         /*
3253          * prevent recursion, since the user stack tracing may
3254          * trigger other kernel events.
3255          */
3256         preempt_disable();
3257         if (__this_cpu_read(user_stack_count))
3258                 goto out;
3259
3260         __this_cpu_inc(user_stack_count);
3261
3262         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3263                                             sizeof(*entry), trace_ctx);
3264         if (!event)
3265                 goto out_drop_count;
3266         entry   = ring_buffer_event_data(event);
3267
3268         entry->tgid             = current->tgid;
3269         memset(&entry->caller, 0, sizeof(entry->caller));
3270
3271         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3272         if (!call_filter_check_discard(call, entry, buffer, event))
3273                 __buffer_unlock_commit(buffer, event);
3274
3275  out_drop_count:
3276         __this_cpu_dec(user_stack_count);
3277  out:
3278         preempt_enable();
3279 }
3280 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3281 static void ftrace_trace_userstack(struct trace_array *tr,
3282                                    struct trace_buffer *buffer,
3283                                    unsigned int trace_ctx)
3284 {
3285 }
3286 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3287
3288 #endif /* CONFIG_STACKTRACE */
3289
3290 static inline void
3291 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3292                           unsigned long long delta)
3293 {
3294         entry->bottom_delta_ts = delta & U32_MAX;
3295         entry->top_delta_ts = (delta >> 32);
3296 }
3297
3298 void trace_last_func_repeats(struct trace_array *tr,
3299                              struct trace_func_repeats *last_info,
3300                              unsigned int trace_ctx)
3301 {
3302         struct trace_buffer *buffer = tr->array_buffer.buffer;
3303         struct func_repeats_entry *entry;
3304         struct ring_buffer_event *event;
3305         u64 delta;
3306
3307         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3308                                             sizeof(*entry), trace_ctx);
3309         if (!event)
3310                 return;
3311
3312         delta = ring_buffer_event_time_stamp(buffer, event) -
3313                 last_info->ts_last_call;
3314
3315         entry = ring_buffer_event_data(event);
3316         entry->ip = last_info->ip;
3317         entry->parent_ip = last_info->parent_ip;
3318         entry->count = last_info->count;
3319         func_repeats_set_delta_ts(entry, delta);
3320
3321         __buffer_unlock_commit(buffer, event);
3322 }
3323
3324 /* created for use with alloc_percpu */
3325 struct trace_buffer_struct {
3326         int nesting;
3327         char buffer[4][TRACE_BUF_SIZE];
3328 };
3329
3330 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3331
3332 /*
3333  * This allows for lockless recording.  If we're nested too deeply, then
3334  * this returns NULL.
3335  */
3336 static char *get_trace_buf(void)
3337 {
3338         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3339
3340         if (!trace_percpu_buffer || buffer->nesting >= 4)
3341                 return NULL;
3342
3343         buffer->nesting++;
3344
3345         /* Interrupts must see nesting incremented before we use the buffer */
3346         barrier();
3347         return &buffer->buffer[buffer->nesting - 1][0];
3348 }
3349
3350 static void put_trace_buf(void)
3351 {
3352         /* Don't let the decrement of nesting leak before this */
3353         barrier();
3354         this_cpu_dec(trace_percpu_buffer->nesting);
3355 }
3356
3357 static int alloc_percpu_trace_buffer(void)
3358 {
3359         struct trace_buffer_struct __percpu *buffers;
3360
3361         if (trace_percpu_buffer)
3362                 return 0;
3363
3364         buffers = alloc_percpu(struct trace_buffer_struct);
3365         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3366                 return -ENOMEM;
3367
3368         trace_percpu_buffer = buffers;
3369         return 0;
3370 }
3371
3372 static int buffers_allocated;
3373
3374 void trace_printk_init_buffers(void)
3375 {
3376         if (buffers_allocated)
3377                 return;
3378
3379         if (alloc_percpu_trace_buffer())
3380                 return;
3381
3382         /* trace_printk() is for debug use only. Don't use it in production. */
3383
3384         pr_warn("\n");
3385         pr_warn("**********************************************************\n");
3386         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3387         pr_warn("**                                                      **\n");
3388         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3389         pr_warn("**                                                      **\n");
3390         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3391         pr_warn("** unsafe for production use.                           **\n");
3392         pr_warn("**                                                      **\n");
3393         pr_warn("** If you see this message and you are not debugging    **\n");
3394         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3395         pr_warn("**                                                      **\n");
3396         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3397         pr_warn("**********************************************************\n");
3398
3399         /* Expand the buffers to set size */
3400         tracing_update_buffers(&global_trace);
3401
3402         buffers_allocated = 1;
3403
3404         /*
3405          * trace_printk_init_buffers() can be called by modules.
3406          * If that happens, then we need to start cmdline recording
3407          * directly here. If the global_trace.buffer is already
3408          * allocated here, then this was called by module code.
3409          */
3410         if (global_trace.array_buffer.buffer)
3411                 tracing_start_cmdline_record();
3412 }
3413 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3414
3415 void trace_printk_start_comm(void)
3416 {
3417         /* Start tracing comms if trace printk is set */
3418         if (!buffers_allocated)
3419                 return;
3420         tracing_start_cmdline_record();
3421 }
3422
3423 static void trace_printk_start_stop_comm(int enabled)
3424 {
3425         if (!buffers_allocated)
3426                 return;
3427
3428         if (enabled)
3429                 tracing_start_cmdline_record();
3430         else
3431                 tracing_stop_cmdline_record();
3432 }
3433
3434 /**
3435  * trace_vbprintk - write binary msg to tracing buffer
3436  * @ip:    The address of the caller
3437  * @fmt:   The string format to write to the buffer
3438  * @args:  Arguments for @fmt
3439  */
3440 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3441 {
3442         struct trace_event_call *call = &event_bprint;
3443         struct ring_buffer_event *event;
3444         struct trace_buffer *buffer;
3445         struct trace_array *tr = &global_trace;
3446         struct bprint_entry *entry;
3447         unsigned int trace_ctx;
3448         char *tbuffer;
3449         int len = 0, size;
3450
3451         if (unlikely(tracing_selftest_running || tracing_disabled))
3452                 return 0;
3453
3454         /* Don't pollute graph traces with trace_vprintk internals */
3455         pause_graph_tracing();
3456
3457         trace_ctx = tracing_gen_ctx();
3458         preempt_disable_notrace();
3459
3460         tbuffer = get_trace_buf();
3461         if (!tbuffer) {
3462                 len = 0;
3463                 goto out_nobuffer;
3464         }
3465
3466         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3467
3468         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3469                 goto out_put;
3470
3471         size = sizeof(*entry) + sizeof(u32) * len;
3472         buffer = tr->array_buffer.buffer;
3473         ring_buffer_nest_start(buffer);
3474         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3475                                             trace_ctx);
3476         if (!event)
3477                 goto out;
3478         entry = ring_buffer_event_data(event);
3479         entry->ip                       = ip;
3480         entry->fmt                      = fmt;
3481
3482         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3483         if (!call_filter_check_discard(call, entry, buffer, event)) {
3484                 __buffer_unlock_commit(buffer, event);
3485                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3486         }
3487
3488 out:
3489         ring_buffer_nest_end(buffer);
3490 out_put:
3491         put_trace_buf();
3492
3493 out_nobuffer:
3494         preempt_enable_notrace();
3495         unpause_graph_tracing();
3496
3497         return len;
3498 }
3499 EXPORT_SYMBOL_GPL(trace_vbprintk);
3500
3501 __printf(3, 0)
3502 static int
3503 __trace_array_vprintk(struct trace_buffer *buffer,
3504                       unsigned long ip, const char *fmt, va_list args)
3505 {
3506         struct trace_event_call *call = &event_print;
3507         struct ring_buffer_event *event;
3508         int len = 0, size;
3509         struct print_entry *entry;
3510         unsigned int trace_ctx;
3511         char *tbuffer;
3512
3513         if (tracing_disabled)
3514                 return 0;
3515
3516         /* Don't pollute graph traces with trace_vprintk internals */
3517         pause_graph_tracing();
3518
3519         trace_ctx = tracing_gen_ctx();
3520         preempt_disable_notrace();
3521
3522
3523         tbuffer = get_trace_buf();
3524         if (!tbuffer) {
3525                 len = 0;
3526                 goto out_nobuffer;
3527         }
3528
3529         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3530
3531         size = sizeof(*entry) + len + 1;
3532         ring_buffer_nest_start(buffer);
3533         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3534                                             trace_ctx);
3535         if (!event)
3536                 goto out;
3537         entry = ring_buffer_event_data(event);
3538         entry->ip = ip;
3539
3540         memcpy(&entry->buf, tbuffer, len + 1);
3541         if (!call_filter_check_discard(call, entry, buffer, event)) {
3542                 __buffer_unlock_commit(buffer, event);
3543                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3544         }
3545
3546 out:
3547         ring_buffer_nest_end(buffer);
3548         put_trace_buf();
3549
3550 out_nobuffer:
3551         preempt_enable_notrace();
3552         unpause_graph_tracing();
3553
3554         return len;
3555 }
3556
3557 __printf(3, 0)
3558 int trace_array_vprintk(struct trace_array *tr,
3559                         unsigned long ip, const char *fmt, va_list args)
3560 {
3561         if (tracing_selftest_running && tr == &global_trace)
3562                 return 0;
3563
3564         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3565 }
3566
3567 /**
3568  * trace_array_printk - Print a message to a specific instance
3569  * @tr: The instance trace_array descriptor
3570  * @ip: The instruction pointer that this is called from.
3571  * @fmt: The format to print (printf format)
3572  *
3573  * If a subsystem sets up its own instance, they have the right to
3574  * printk strings into their tracing instance buffer using this
3575  * function. Note, this function will not write into the top level
3576  * buffer (use trace_printk() for that), as writing into the top level
3577  * buffer should only have events that can be individually disabled.
3578  * trace_printk() is only used for debugging a kernel, and should not
3579  * be ever incorporated in normal use.
3580  *
3581  * trace_array_printk() can be used, as it will not add noise to the
3582  * top level tracing buffer.
3583  *
3584  * Note, trace_array_init_printk() must be called on @tr before this
3585  * can be used.
3586  */
3587 __printf(3, 0)
3588 int trace_array_printk(struct trace_array *tr,
3589                        unsigned long ip, const char *fmt, ...)
3590 {
3591         int ret;
3592         va_list ap;
3593
3594         if (!tr)
3595                 return -ENOENT;
3596
3597         /* This is only allowed for created instances */
3598         if (tr == &global_trace)
3599                 return 0;
3600
3601         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3602                 return 0;
3603
3604         va_start(ap, fmt);
3605         ret = trace_array_vprintk(tr, ip, fmt, ap);
3606         va_end(ap);
3607         return ret;
3608 }
3609 EXPORT_SYMBOL_GPL(trace_array_printk);
3610
3611 /**
3612  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3613  * @tr: The trace array to initialize the buffers for
3614  *
3615  * As trace_array_printk() only writes into instances, they are OK to
3616  * have in the kernel (unlike trace_printk()). This needs to be called
3617  * before trace_array_printk() can be used on a trace_array.
3618  */
3619 int trace_array_init_printk(struct trace_array *tr)
3620 {
3621         if (!tr)
3622                 return -ENOENT;
3623
3624         /* This is only allowed for created instances */
3625         if (tr == &global_trace)
3626                 return -EINVAL;
3627
3628         return alloc_percpu_trace_buffer();
3629 }
3630 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3631
3632 __printf(3, 4)
3633 int trace_array_printk_buf(struct trace_buffer *buffer,
3634                            unsigned long ip, const char *fmt, ...)
3635 {
3636         int ret;
3637         va_list ap;
3638
3639         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3640                 return 0;
3641
3642         va_start(ap, fmt);
3643         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3644         va_end(ap);
3645         return ret;
3646 }
3647
3648 __printf(2, 0)
3649 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3650 {
3651         return trace_array_vprintk(&global_trace, ip, fmt, args);
3652 }
3653 EXPORT_SYMBOL_GPL(trace_vprintk);
3654
3655 static void trace_iterator_increment(struct trace_iterator *iter)
3656 {
3657         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3658
3659         iter->idx++;
3660         if (buf_iter)
3661                 ring_buffer_iter_advance(buf_iter);
3662 }
3663
3664 static struct trace_entry *
3665 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3666                 unsigned long *lost_events)
3667 {
3668         struct ring_buffer_event *event;
3669         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3670
3671         if (buf_iter) {
3672                 event = ring_buffer_iter_peek(buf_iter, ts);
3673                 if (lost_events)
3674                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3675                                 (unsigned long)-1 : 0;
3676         } else {
3677                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3678                                          lost_events);
3679         }
3680
3681         if (event) {
3682                 iter->ent_size = ring_buffer_event_length(event);
3683                 return ring_buffer_event_data(event);
3684         }
3685         iter->ent_size = 0;
3686         return NULL;
3687 }
3688
3689 static struct trace_entry *
3690 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3691                   unsigned long *missing_events, u64 *ent_ts)
3692 {
3693         struct trace_buffer *buffer = iter->array_buffer->buffer;
3694         struct trace_entry *ent, *next = NULL;
3695         unsigned long lost_events = 0, next_lost = 0;
3696         int cpu_file = iter->cpu_file;
3697         u64 next_ts = 0, ts;
3698         int next_cpu = -1;
3699         int next_size = 0;
3700         int cpu;
3701
3702         /*
3703          * If we are in a per_cpu trace file, don't bother by iterating over
3704          * all cpu and peek directly.
3705          */
3706         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3707                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3708                         return NULL;
3709                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3710                 if (ent_cpu)
3711                         *ent_cpu = cpu_file;
3712
3713                 return ent;
3714         }
3715
3716         for_each_tracing_cpu(cpu) {
3717
3718                 if (ring_buffer_empty_cpu(buffer, cpu))
3719                         continue;
3720
3721                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3722
3723                 /*
3724                  * Pick the entry with the smallest timestamp:
3725                  */
3726                 if (ent && (!next || ts < next_ts)) {
3727                         next = ent;
3728                         next_cpu = cpu;
3729                         next_ts = ts;
3730                         next_lost = lost_events;
3731                         next_size = iter->ent_size;
3732                 }
3733         }
3734
3735         iter->ent_size = next_size;
3736
3737         if (ent_cpu)
3738                 *ent_cpu = next_cpu;
3739
3740         if (ent_ts)
3741                 *ent_ts = next_ts;
3742
3743         if (missing_events)
3744                 *missing_events = next_lost;
3745
3746         return next;
3747 }
3748
3749 #define STATIC_FMT_BUF_SIZE     128
3750 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3751
3752 char *trace_iter_expand_format(struct trace_iterator *iter)
3753 {
3754         char *tmp;
3755
3756         /*
3757          * iter->tr is NULL when used with tp_printk, which makes
3758          * this get called where it is not safe to call krealloc().
3759          */
3760         if (!iter->tr || iter->fmt == static_fmt_buf)
3761                 return NULL;
3762
3763         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3764                        GFP_KERNEL);
3765         if (tmp) {
3766                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3767                 iter->fmt = tmp;
3768         }
3769
3770         return tmp;
3771 }
3772
3773 /* Returns true if the string is safe to dereference from an event */
3774 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3775                            bool star, int len)
3776 {
3777         unsigned long addr = (unsigned long)str;
3778         struct trace_event *trace_event;
3779         struct trace_event_call *event;
3780
3781         /* Ignore strings with no length */
3782         if (star && !len)
3783                 return true;
3784
3785         /* OK if part of the event data */
3786         if ((addr >= (unsigned long)iter->ent) &&
3787             (addr < (unsigned long)iter->ent + iter->ent_size))
3788                 return true;
3789
3790         /* OK if part of the temp seq buffer */
3791         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3792             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3793                 return true;
3794
3795         /* Core rodata can not be freed */
3796         if (is_kernel_rodata(addr))
3797                 return true;
3798
3799         if (trace_is_tracepoint_string(str))
3800                 return true;
3801
3802         /*
3803          * Now this could be a module event, referencing core module
3804          * data, which is OK.
3805          */
3806         if (!iter->ent)
3807                 return false;
3808
3809         trace_event = ftrace_find_event(iter->ent->type);
3810         if (!trace_event)
3811                 return false;
3812
3813         event = container_of(trace_event, struct trace_event_call, event);
3814         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3815                 return false;
3816
3817         /* Would rather have rodata, but this will suffice */
3818         if (within_module_core(addr, event->module))
3819                 return true;
3820
3821         return false;
3822 }
3823
3824 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3825
3826 static int test_can_verify_check(const char *fmt, ...)
3827 {
3828         char buf[16];
3829         va_list ap;
3830         int ret;
3831
3832         /*
3833          * The verifier is dependent on vsnprintf() modifies the va_list
3834          * passed to it, where it is sent as a reference. Some architectures
3835          * (like x86_32) passes it by value, which means that vsnprintf()
3836          * does not modify the va_list passed to it, and the verifier
3837          * would then need to be able to understand all the values that
3838          * vsnprintf can use. If it is passed by value, then the verifier
3839          * is disabled.
3840          */
3841         va_start(ap, fmt);
3842         vsnprintf(buf, 16, "%d", ap);
3843         ret = va_arg(ap, int);
3844         va_end(ap);
3845
3846         return ret;
3847 }
3848
3849 static void test_can_verify(void)
3850 {
3851         if (!test_can_verify_check("%d %d", 0, 1)) {
3852                 pr_info("trace event string verifier disabled\n");
3853                 static_branch_inc(&trace_no_verify);
3854         }
3855 }
3856
3857 /**
3858  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3859  * @iter: The iterator that holds the seq buffer and the event being printed
3860  * @fmt: The format used to print the event
3861  * @ap: The va_list holding the data to print from @fmt.
3862  *
3863  * This writes the data into the @iter->seq buffer using the data from
3864  * @fmt and @ap. If the format has a %s, then the source of the string
3865  * is examined to make sure it is safe to print, otherwise it will
3866  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3867  * pointer.
3868  */
3869 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3870                          va_list ap)
3871 {
3872         const char *p = fmt;
3873         const char *str;
3874         int i, j;
3875
3876         if (WARN_ON_ONCE(!fmt))
3877                 return;
3878
3879         if (static_branch_unlikely(&trace_no_verify))
3880                 goto print;
3881
3882         /* Don't bother checking when doing a ftrace_dump() */
3883         if (iter->fmt == static_fmt_buf)
3884                 goto print;
3885
3886         while (*p) {
3887                 bool star = false;
3888                 int len = 0;
3889
3890                 j = 0;
3891
3892                 /* We only care about %s and variants */
3893                 for (i = 0; p[i]; i++) {
3894                         if (i + 1 >= iter->fmt_size) {
3895                                 /*
3896                                  * If we can't expand the copy buffer,
3897                                  * just print it.
3898                                  */
3899                                 if (!trace_iter_expand_format(iter))
3900                                         goto print;
3901                         }
3902
3903                         if (p[i] == '\\' && p[i+1]) {
3904                                 i++;
3905                                 continue;
3906                         }
3907                         if (p[i] == '%') {
3908                                 /* Need to test cases like %08.*s */
3909                                 for (j = 1; p[i+j]; j++) {
3910                                         if (isdigit(p[i+j]) ||
3911                                             p[i+j] == '.')
3912                                                 continue;
3913                                         if (p[i+j] == '*') {
3914                                                 star = true;
3915                                                 continue;
3916                                         }
3917                                         break;
3918                                 }
3919                                 if (p[i+j] == 's')
3920                                         break;
3921                                 star = false;
3922                         }
3923                         j = 0;
3924                 }
3925                 /* If no %s found then just print normally */
3926                 if (!p[i])
3927                         break;
3928
3929                 /* Copy up to the %s, and print that */
3930                 strncpy(iter->fmt, p, i);
3931                 iter->fmt[i] = '\0';
3932                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3933
3934                 /*
3935                  * If iter->seq is full, the above call no longer guarantees
3936                  * that ap is in sync with fmt processing, and further calls
3937                  * to va_arg() can return wrong positional arguments.
3938                  *
3939                  * Ensure that ap is no longer used in this case.
3940                  */
3941                 if (iter->seq.full) {
3942                         p = "";
3943                         break;
3944                 }
3945
3946                 if (star)
3947                         len = va_arg(ap, int);
3948
3949                 /* The ap now points to the string data of the %s */
3950                 str = va_arg(ap, const char *);
3951
3952                 /*
3953                  * If you hit this warning, it is likely that the
3954                  * trace event in question used %s on a string that
3955                  * was saved at the time of the event, but may not be
3956                  * around when the trace is read. Use __string(),
3957                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3958                  * instead. See samples/trace_events/trace-events-sample.h
3959                  * for reference.
3960                  */
3961                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3962                               "fmt: '%s' current_buffer: '%s'",
3963                               fmt, seq_buf_str(&iter->seq.seq))) {
3964                         int ret;
3965
3966                         /* Try to safely read the string */
3967                         if (star) {
3968                                 if (len + 1 > iter->fmt_size)
3969                                         len = iter->fmt_size - 1;
3970                                 if (len < 0)
3971                                         len = 0;
3972                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3973                                 iter->fmt[len] = 0;
3974                                 star = false;
3975                         } else {
3976                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3977                                                                   iter->fmt_size);
3978                         }
3979                         if (ret < 0)
3980                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3981                         else
3982                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3983                                                  str, iter->fmt);
3984                         str = "[UNSAFE-MEMORY]";
3985                         strcpy(iter->fmt, "%s");
3986                 } else {
3987                         strncpy(iter->fmt, p + i, j + 1);
3988                         iter->fmt[j+1] = '\0';
3989                 }
3990                 if (star)
3991                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3992                 else
3993                         trace_seq_printf(&iter->seq, iter->fmt, str);
3994
3995                 p += i + j + 1;
3996         }
3997  print:
3998         if (*p)
3999                 trace_seq_vprintf(&iter->seq, p, ap);
4000 }
4001
4002 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4003 {
4004         const char *p, *new_fmt;
4005         char *q;
4006
4007         if (WARN_ON_ONCE(!fmt))
4008                 return fmt;
4009
4010         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4011                 return fmt;
4012
4013         p = fmt;
4014         new_fmt = q = iter->fmt;
4015         while (*p) {
4016                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4017                         if (!trace_iter_expand_format(iter))
4018                                 return fmt;
4019
4020                         q += iter->fmt - new_fmt;
4021                         new_fmt = iter->fmt;
4022                 }
4023
4024                 *q++ = *p++;
4025
4026                 /* Replace %p with %px */
4027                 if (p[-1] == '%') {
4028                         if (p[0] == '%') {
4029                                 *q++ = *p++;
4030                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4031                                 *q++ = *p++;
4032                                 *q++ = 'x';
4033                         }
4034                 }
4035         }
4036         *q = '\0';
4037
4038         return new_fmt;
4039 }
4040
4041 #define STATIC_TEMP_BUF_SIZE    128
4042 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4043
4044 /* Find the next real entry, without updating the iterator itself */
4045 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4046                                           int *ent_cpu, u64 *ent_ts)
4047 {
4048         /* __find_next_entry will reset ent_size */
4049         int ent_size = iter->ent_size;
4050         struct trace_entry *entry;
4051
4052         /*
4053          * If called from ftrace_dump(), then the iter->temp buffer
4054          * will be the static_temp_buf and not created from kmalloc.
4055          * If the entry size is greater than the buffer, we can
4056          * not save it. Just return NULL in that case. This is only
4057          * used to add markers when two consecutive events' time
4058          * stamps have a large delta. See trace_print_lat_context()
4059          */
4060         if (iter->temp == static_temp_buf &&
4061             STATIC_TEMP_BUF_SIZE < ent_size)
4062                 return NULL;
4063
4064         /*
4065          * The __find_next_entry() may call peek_next_entry(), which may
4066          * call ring_buffer_peek() that may make the contents of iter->ent
4067          * undefined. Need to copy iter->ent now.
4068          */
4069         if (iter->ent && iter->ent != iter->temp) {
4070                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4071                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4072                         void *temp;
4073                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4074                         if (!temp)
4075                                 return NULL;
4076                         kfree(iter->temp);
4077                         iter->temp = temp;
4078                         iter->temp_size = iter->ent_size;
4079                 }
4080                 memcpy(iter->temp, iter->ent, iter->ent_size);
4081                 iter->ent = iter->temp;
4082         }
4083         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4084         /* Put back the original ent_size */
4085         iter->ent_size = ent_size;
4086
4087         return entry;
4088 }
4089
4090 /* Find the next real entry, and increment the iterator to the next entry */
4091 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4092 {
4093         iter->ent = __find_next_entry(iter, &iter->cpu,
4094                                       &iter->lost_events, &iter->ts);
4095
4096         if (iter->ent)
4097                 trace_iterator_increment(iter);
4098
4099         return iter->ent ? iter : NULL;
4100 }
4101
4102 static void trace_consume(struct trace_iterator *iter)
4103 {
4104         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4105                             &iter->lost_events);
4106 }
4107
4108 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4109 {
4110         struct trace_iterator *iter = m->private;
4111         int i = (int)*pos;
4112         void *ent;
4113
4114         WARN_ON_ONCE(iter->leftover);
4115
4116         (*pos)++;
4117
4118         /* can't go backwards */
4119         if (iter->idx > i)
4120                 return NULL;
4121
4122         if (iter->idx < 0)
4123                 ent = trace_find_next_entry_inc(iter);
4124         else
4125                 ent = iter;
4126
4127         while (ent && iter->idx < i)
4128                 ent = trace_find_next_entry_inc(iter);
4129
4130         iter->pos = *pos;
4131
4132         return ent;
4133 }
4134
4135 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4136 {
4137         struct ring_buffer_iter *buf_iter;
4138         unsigned long entries = 0;
4139         u64 ts;
4140
4141         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4142
4143         buf_iter = trace_buffer_iter(iter, cpu);
4144         if (!buf_iter)
4145                 return;
4146
4147         ring_buffer_iter_reset(buf_iter);
4148
4149         /*
4150          * We could have the case with the max latency tracers
4151          * that a reset never took place on a cpu. This is evident
4152          * by the timestamp being before the start of the buffer.
4153          */
4154         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4155                 if (ts >= iter->array_buffer->time_start)
4156                         break;
4157                 entries++;
4158                 ring_buffer_iter_advance(buf_iter);
4159         }
4160
4161         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4162 }
4163
4164 /*
4165  * The current tracer is copied to avoid a global locking
4166  * all around.
4167  */
4168 static void *s_start(struct seq_file *m, loff_t *pos)
4169 {
4170         struct trace_iterator *iter = m->private;
4171         struct trace_array *tr = iter->tr;
4172         int cpu_file = iter->cpu_file;
4173         void *p = NULL;
4174         loff_t l = 0;
4175         int cpu;
4176
4177         mutex_lock(&trace_types_lock);
4178         if (unlikely(tr->current_trace != iter->trace)) {
4179                 /* Close iter->trace before switching to the new current tracer */
4180                 if (iter->trace->close)
4181                         iter->trace->close(iter);
4182                 iter->trace = tr->current_trace;
4183                 /* Reopen the new current tracer */
4184                 if (iter->trace->open)
4185                         iter->trace->open(iter);
4186         }
4187         mutex_unlock(&trace_types_lock);
4188
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190         if (iter->snapshot && iter->trace->use_max_tr)
4191                 return ERR_PTR(-EBUSY);
4192 #endif
4193
4194         if (*pos != iter->pos) {
4195                 iter->ent = NULL;
4196                 iter->cpu = 0;
4197                 iter->idx = -1;
4198
4199                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4200                         for_each_tracing_cpu(cpu)
4201                                 tracing_iter_reset(iter, cpu);
4202                 } else
4203                         tracing_iter_reset(iter, cpu_file);
4204
4205                 iter->leftover = 0;
4206                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4207                         ;
4208
4209         } else {
4210                 /*
4211                  * If we overflowed the seq_file before, then we want
4212                  * to just reuse the trace_seq buffer again.
4213                  */
4214                 if (iter->leftover)
4215                         p = iter;
4216                 else {
4217                         l = *pos - 1;
4218                         p = s_next(m, p, &l);
4219                 }
4220         }
4221
4222         trace_event_read_lock();
4223         trace_access_lock(cpu_file);
4224         return p;
4225 }
4226
4227 static void s_stop(struct seq_file *m, void *p)
4228 {
4229         struct trace_iterator *iter = m->private;
4230
4231 #ifdef CONFIG_TRACER_MAX_TRACE
4232         if (iter->snapshot && iter->trace->use_max_tr)
4233                 return;
4234 #endif
4235
4236         trace_access_unlock(iter->cpu_file);
4237         trace_event_read_unlock();
4238 }
4239
4240 static void
4241 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4242                       unsigned long *entries, int cpu)
4243 {
4244         unsigned long count;
4245
4246         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4247         /*
4248          * If this buffer has skipped entries, then we hold all
4249          * entries for the trace and we need to ignore the
4250          * ones before the time stamp.
4251          */
4252         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4253                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4254                 /* total is the same as the entries */
4255                 *total = count;
4256         } else
4257                 *total = count +
4258                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4259         *entries = count;
4260 }
4261
4262 static void
4263 get_total_entries(struct array_buffer *buf,
4264                   unsigned long *total, unsigned long *entries)
4265 {
4266         unsigned long t, e;
4267         int cpu;
4268
4269         *total = 0;
4270         *entries = 0;
4271
4272         for_each_tracing_cpu(cpu) {
4273                 get_total_entries_cpu(buf, &t, &e, cpu);
4274                 *total += t;
4275                 *entries += e;
4276         }
4277 }
4278
4279 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4280 {
4281         unsigned long total, entries;
4282
4283         if (!tr)
4284                 tr = &global_trace;
4285
4286         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4287
4288         return entries;
4289 }
4290
4291 unsigned long trace_total_entries(struct trace_array *tr)
4292 {
4293         unsigned long total, entries;
4294
4295         if (!tr)
4296                 tr = &global_trace;
4297
4298         get_total_entries(&tr->array_buffer, &total, &entries);
4299
4300         return entries;
4301 }
4302
4303 static void print_lat_help_header(struct seq_file *m)
4304 {
4305         seq_puts(m, "#                    _------=> CPU#            \n"
4306                     "#                   / _-----=> irqs-off/BH-disabled\n"
4307                     "#                  | / _----=> need-resched    \n"
4308                     "#                  || / _---=> hardirq/softirq \n"
4309                     "#                  ||| / _--=> preempt-depth   \n"
4310                     "#                  |||| / _-=> migrate-disable \n"
4311                     "#                  ||||| /     delay           \n"
4312                     "#  cmd     pid     |||||| time  |   caller     \n"
4313                     "#     \\   /        ||||||  \\    |    /       \n");
4314 }
4315
4316 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4317 {
4318         unsigned long total;
4319         unsigned long entries;
4320
4321         get_total_entries(buf, &total, &entries);
4322         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4323                    entries, total, num_online_cpus());
4324         seq_puts(m, "#\n");
4325 }
4326
4327 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4328                                    unsigned int flags)
4329 {
4330         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4331
4332         print_event_info(buf, m);
4333
4334         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4335         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4336 }
4337
4338 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4339                                        unsigned int flags)
4340 {
4341         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4342         static const char space[] = "            ";
4343         int prec = tgid ? 12 : 2;
4344
4345         print_event_info(buf, m);
4346
4347         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4348         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4349         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4350         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4351         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4352         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4353         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4354         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4355 }
4356
4357 void
4358 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4359 {
4360         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4361         struct array_buffer *buf = iter->array_buffer;
4362         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4363         struct tracer *type = iter->trace;
4364         unsigned long entries;
4365         unsigned long total;
4366         const char *name = type->name;
4367
4368         get_total_entries(buf, &total, &entries);
4369
4370         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4371                    name, UTS_RELEASE);
4372         seq_puts(m, "# -----------------------------------"
4373                  "---------------------------------\n");
4374         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4375                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4376                    nsecs_to_usecs(data->saved_latency),
4377                    entries,
4378                    total,
4379                    buf->cpu,
4380                    preempt_model_none()      ? "server" :
4381                    preempt_model_voluntary() ? "desktop" :
4382                    preempt_model_full()      ? "preempt" :
4383                    preempt_model_rt()        ? "preempt_rt" :
4384                    "unknown",
4385                    /* These are reserved for later use */
4386                    0, 0, 0, 0);
4387 #ifdef CONFIG_SMP
4388         seq_printf(m, " #P:%d)\n", num_online_cpus());
4389 #else
4390         seq_puts(m, ")\n");
4391 #endif
4392         seq_puts(m, "#    -----------------\n");
4393         seq_printf(m, "#    | task: %.16s-%d "
4394                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4395                    data->comm, data->pid,
4396                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4397                    data->policy, data->rt_priority);
4398         seq_puts(m, "#    -----------------\n");
4399
4400         if (data->critical_start) {
4401                 seq_puts(m, "#  => started at: ");
4402                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4403                 trace_print_seq(m, &iter->seq);
4404                 seq_puts(m, "\n#  => ended at:   ");
4405                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4406                 trace_print_seq(m, &iter->seq);
4407                 seq_puts(m, "\n#\n");
4408         }
4409
4410         seq_puts(m, "#\n");
4411 }
4412
4413 static void test_cpu_buff_start(struct trace_iterator *iter)
4414 {
4415         struct trace_seq *s = &iter->seq;
4416         struct trace_array *tr = iter->tr;
4417
4418         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4419                 return;
4420
4421         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4422                 return;
4423
4424         if (cpumask_available(iter->started) &&
4425             cpumask_test_cpu(iter->cpu, iter->started))
4426                 return;
4427
4428         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4429                 return;
4430
4431         if (cpumask_available(iter->started))
4432                 cpumask_set_cpu(iter->cpu, iter->started);
4433
4434         /* Don't print started cpu buffer for the first entry of the trace */
4435         if (iter->idx > 1)
4436                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4437                                 iter->cpu);
4438 }
4439
4440 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4441 {
4442         struct trace_array *tr = iter->tr;
4443         struct trace_seq *s = &iter->seq;
4444         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4445         struct trace_entry *entry;
4446         struct trace_event *event;
4447
4448         entry = iter->ent;
4449
4450         test_cpu_buff_start(iter);
4451
4452         event = ftrace_find_event(entry->type);
4453
4454         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4456                         trace_print_lat_context(iter);
4457                 else
4458                         trace_print_context(iter);
4459         }
4460
4461         if (trace_seq_has_overflowed(s))
4462                 return TRACE_TYPE_PARTIAL_LINE;
4463
4464         if (event) {
4465                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4466                         return print_event_fields(iter, event);
4467                 return event->funcs->trace(iter, sym_flags, event);
4468         }
4469
4470         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4471
4472         return trace_handle_return(s);
4473 }
4474
4475 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4476 {
4477         struct trace_array *tr = iter->tr;
4478         struct trace_seq *s = &iter->seq;
4479         struct trace_entry *entry;
4480         struct trace_event *event;
4481
4482         entry = iter->ent;
4483
4484         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4485                 trace_seq_printf(s, "%d %d %llu ",
4486                                  entry->pid, iter->cpu, iter->ts);
4487
4488         if (trace_seq_has_overflowed(s))
4489                 return TRACE_TYPE_PARTIAL_LINE;
4490
4491         event = ftrace_find_event(entry->type);
4492         if (event)
4493                 return event->funcs->raw(iter, 0, event);
4494
4495         trace_seq_printf(s, "%d ?\n", entry->type);
4496
4497         return trace_handle_return(s);
4498 }
4499
4500 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4501 {
4502         struct trace_array *tr = iter->tr;
4503         struct trace_seq *s = &iter->seq;
4504         unsigned char newline = '\n';
4505         struct trace_entry *entry;
4506         struct trace_event *event;
4507
4508         entry = iter->ent;
4509
4510         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4511                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4512                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4513                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4514                 if (trace_seq_has_overflowed(s))
4515                         return TRACE_TYPE_PARTIAL_LINE;
4516         }
4517
4518         event = ftrace_find_event(entry->type);
4519         if (event) {
4520                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4521                 if (ret != TRACE_TYPE_HANDLED)
4522                         return ret;
4523         }
4524
4525         SEQ_PUT_FIELD(s, newline);
4526
4527         return trace_handle_return(s);
4528 }
4529
4530 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4531 {
4532         struct trace_array *tr = iter->tr;
4533         struct trace_seq *s = &iter->seq;
4534         struct trace_entry *entry;
4535         struct trace_event *event;
4536
4537         entry = iter->ent;
4538
4539         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4540                 SEQ_PUT_FIELD(s, entry->pid);
4541                 SEQ_PUT_FIELD(s, iter->cpu);
4542                 SEQ_PUT_FIELD(s, iter->ts);
4543                 if (trace_seq_has_overflowed(s))
4544                         return TRACE_TYPE_PARTIAL_LINE;
4545         }
4546
4547         event = ftrace_find_event(entry->type);
4548         return event ? event->funcs->binary(iter, 0, event) :
4549                 TRACE_TYPE_HANDLED;
4550 }
4551
4552 int trace_empty(struct trace_iterator *iter)
4553 {
4554         struct ring_buffer_iter *buf_iter;
4555         int cpu;
4556
4557         /* If we are looking at one CPU buffer, only check that one */
4558         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4559                 cpu = iter->cpu_file;
4560                 buf_iter = trace_buffer_iter(iter, cpu);
4561                 if (buf_iter) {
4562                         if (!ring_buffer_iter_empty(buf_iter))
4563                                 return 0;
4564                 } else {
4565                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4566                                 return 0;
4567                 }
4568                 return 1;
4569         }
4570
4571         for_each_tracing_cpu(cpu) {
4572                 buf_iter = trace_buffer_iter(iter, cpu);
4573                 if (buf_iter) {
4574                         if (!ring_buffer_iter_empty(buf_iter))
4575                                 return 0;
4576                 } else {
4577                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578                                 return 0;
4579                 }
4580         }
4581
4582         return 1;
4583 }
4584
4585 /*  Called with trace_event_read_lock() held. */
4586 enum print_line_t print_trace_line(struct trace_iterator *iter)
4587 {
4588         struct trace_array *tr = iter->tr;
4589         unsigned long trace_flags = tr->trace_flags;
4590         enum print_line_t ret;
4591
4592         if (iter->lost_events) {
4593                 if (iter->lost_events == (unsigned long)-1)
4594                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4595                                          iter->cpu);
4596                 else
4597                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4598                                          iter->cpu, iter->lost_events);
4599                 if (trace_seq_has_overflowed(&iter->seq))
4600                         return TRACE_TYPE_PARTIAL_LINE;
4601         }
4602
4603         if (iter->trace && iter->trace->print_line) {
4604                 ret = iter->trace->print_line(iter);
4605                 if (ret != TRACE_TYPE_UNHANDLED)
4606                         return ret;
4607         }
4608
4609         if (iter->ent->type == TRACE_BPUTS &&
4610                         trace_flags & TRACE_ITER_PRINTK &&
4611                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4612                 return trace_print_bputs_msg_only(iter);
4613
4614         if (iter->ent->type == TRACE_BPRINT &&
4615                         trace_flags & TRACE_ITER_PRINTK &&
4616                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4617                 return trace_print_bprintk_msg_only(iter);
4618
4619         if (iter->ent->type == TRACE_PRINT &&
4620                         trace_flags & TRACE_ITER_PRINTK &&
4621                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4622                 return trace_print_printk_msg_only(iter);
4623
4624         if (trace_flags & TRACE_ITER_BIN)
4625                 return print_bin_fmt(iter);
4626
4627         if (trace_flags & TRACE_ITER_HEX)
4628                 return print_hex_fmt(iter);
4629
4630         if (trace_flags & TRACE_ITER_RAW)
4631                 return print_raw_fmt(iter);
4632
4633         return print_trace_fmt(iter);
4634 }
4635
4636 void trace_latency_header(struct seq_file *m)
4637 {
4638         struct trace_iterator *iter = m->private;
4639         struct trace_array *tr = iter->tr;
4640
4641         /* print nothing if the buffers are empty */
4642         if (trace_empty(iter))
4643                 return;
4644
4645         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4646                 print_trace_header(m, iter);
4647
4648         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4649                 print_lat_help_header(m);
4650 }
4651
4652 void trace_default_header(struct seq_file *m)
4653 {
4654         struct trace_iterator *iter = m->private;
4655         struct trace_array *tr = iter->tr;
4656         unsigned long trace_flags = tr->trace_flags;
4657
4658         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4659                 return;
4660
4661         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4662                 /* print nothing if the buffers are empty */
4663                 if (trace_empty(iter))
4664                         return;
4665                 print_trace_header(m, iter);
4666                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4667                         print_lat_help_header(m);
4668         } else {
4669                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4670                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4671                                 print_func_help_header_irq(iter->array_buffer,
4672                                                            m, trace_flags);
4673                         else
4674                                 print_func_help_header(iter->array_buffer, m,
4675                                                        trace_flags);
4676                 }
4677         }
4678 }
4679
4680 static void test_ftrace_alive(struct seq_file *m)
4681 {
4682         if (!ftrace_is_dead())
4683                 return;
4684         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4685                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4686 }
4687
4688 #ifdef CONFIG_TRACER_MAX_TRACE
4689 static void show_snapshot_main_help(struct seq_file *m)
4690 {
4691         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4692                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4693                     "#                      Takes a snapshot of the main buffer.\n"
4694                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4695                     "#                      (Doesn't have to be '2' works with any number that\n"
4696                     "#                       is not a '0' or '1')\n");
4697 }
4698
4699 static void show_snapshot_percpu_help(struct seq_file *m)
4700 {
4701         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4702 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4703         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4704                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4705 #else
4706         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4707                     "#                     Must use main snapshot file to allocate.\n");
4708 #endif
4709         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4710                     "#                      (Doesn't have to be '2' works with any number that\n"
4711                     "#                       is not a '0' or '1')\n");
4712 }
4713
4714 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4715 {
4716         if (iter->tr->allocated_snapshot)
4717                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4718         else
4719                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4720
4721         seq_puts(m, "# Snapshot commands:\n");
4722         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4723                 show_snapshot_main_help(m);
4724         else
4725                 show_snapshot_percpu_help(m);
4726 }
4727 #else
4728 /* Should never be called */
4729 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4730 #endif
4731
4732 static int s_show(struct seq_file *m, void *v)
4733 {
4734         struct trace_iterator *iter = v;
4735         int ret;
4736
4737         if (iter->ent == NULL) {
4738                 if (iter->tr) {
4739                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4740                         seq_puts(m, "#\n");
4741                         test_ftrace_alive(m);
4742                 }
4743                 if (iter->snapshot && trace_empty(iter))
4744                         print_snapshot_help(m, iter);
4745                 else if (iter->trace && iter->trace->print_header)
4746                         iter->trace->print_header(m);
4747                 else
4748                         trace_default_header(m);
4749
4750         } else if (iter->leftover) {
4751                 /*
4752                  * If we filled the seq_file buffer earlier, we
4753                  * want to just show it now.
4754                  */
4755                 ret = trace_print_seq(m, &iter->seq);
4756
4757                 /* ret should this time be zero, but you never know */
4758                 iter->leftover = ret;
4759
4760         } else {
4761                 ret = print_trace_line(iter);
4762                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4763                         iter->seq.full = 0;
4764                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4765                 }
4766                 ret = trace_print_seq(m, &iter->seq);
4767                 /*
4768                  * If we overflow the seq_file buffer, then it will
4769                  * ask us for this data again at start up.
4770                  * Use that instead.
4771                  *  ret is 0 if seq_file write succeeded.
4772                  *        -1 otherwise.
4773                  */
4774                 iter->leftover = ret;
4775         }
4776
4777         return 0;
4778 }
4779
4780 /*
4781  * Should be used after trace_array_get(), trace_types_lock
4782  * ensures that i_cdev was already initialized.
4783  */
4784 static inline int tracing_get_cpu(struct inode *inode)
4785 {
4786         if (inode->i_cdev) /* See trace_create_cpu_file() */
4787                 return (long)inode->i_cdev - 1;
4788         return RING_BUFFER_ALL_CPUS;
4789 }
4790
4791 static const struct seq_operations tracer_seq_ops = {
4792         .start          = s_start,
4793         .next           = s_next,
4794         .stop           = s_stop,
4795         .show           = s_show,
4796 };
4797
4798 /*
4799  * Note, as iter itself can be allocated and freed in different
4800  * ways, this function is only used to free its content, and not
4801  * the iterator itself. The only requirement to all the allocations
4802  * is that it must zero all fields (kzalloc), as freeing works with
4803  * ethier allocated content or NULL.
4804  */
4805 static void free_trace_iter_content(struct trace_iterator *iter)
4806 {
4807         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4808         if (iter->fmt != static_fmt_buf)
4809                 kfree(iter->fmt);
4810
4811         kfree(iter->temp);
4812         kfree(iter->buffer_iter);
4813         mutex_destroy(&iter->mutex);
4814         free_cpumask_var(iter->started);
4815 }
4816
4817 static struct trace_iterator *
4818 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4819 {
4820         struct trace_array *tr = inode->i_private;
4821         struct trace_iterator *iter;
4822         int cpu;
4823
4824         if (tracing_disabled)
4825                 return ERR_PTR(-ENODEV);
4826
4827         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4828         if (!iter)
4829                 return ERR_PTR(-ENOMEM);
4830
4831         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4832                                     GFP_KERNEL);
4833         if (!iter->buffer_iter)
4834                 goto release;
4835
4836         /*
4837          * trace_find_next_entry() may need to save off iter->ent.
4838          * It will place it into the iter->temp buffer. As most
4839          * events are less than 128, allocate a buffer of that size.
4840          * If one is greater, then trace_find_next_entry() will
4841          * allocate a new buffer to adjust for the bigger iter->ent.
4842          * It's not critical if it fails to get allocated here.
4843          */
4844         iter->temp = kmalloc(128, GFP_KERNEL);
4845         if (iter->temp)
4846                 iter->temp_size = 128;
4847
4848         /*
4849          * trace_event_printf() may need to modify given format
4850          * string to replace %p with %px so that it shows real address
4851          * instead of hash value. However, that is only for the event
4852          * tracing, other tracer may not need. Defer the allocation
4853          * until it is needed.
4854          */
4855         iter->fmt = NULL;
4856         iter->fmt_size = 0;
4857
4858         mutex_lock(&trace_types_lock);
4859         iter->trace = tr->current_trace;
4860
4861         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4862                 goto fail;
4863
4864         iter->tr = tr;
4865
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867         /* Currently only the top directory has a snapshot */
4868         if (tr->current_trace->print_max || snapshot)
4869                 iter->array_buffer = &tr->max_buffer;
4870         else
4871 #endif
4872                 iter->array_buffer = &tr->array_buffer;
4873         iter->snapshot = snapshot;
4874         iter->pos = -1;
4875         iter->cpu_file = tracing_get_cpu(inode);
4876         mutex_init(&iter->mutex);
4877
4878         /* Notify the tracer early; before we stop tracing. */
4879         if (iter->trace->open)
4880                 iter->trace->open(iter);
4881
4882         /* Annotate start of buffers if we had overruns */
4883         if (ring_buffer_overruns(iter->array_buffer->buffer))
4884                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4885
4886         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4887         if (trace_clocks[tr->clock_id].in_ns)
4888                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4889
4890         /*
4891          * If pause-on-trace is enabled, then stop the trace while
4892          * dumping, unless this is the "snapshot" file
4893          */
4894         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4895                 tracing_stop_tr(tr);
4896
4897         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4898                 for_each_tracing_cpu(cpu) {
4899                         iter->buffer_iter[cpu] =
4900                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4901                                                          cpu, GFP_KERNEL);
4902                 }
4903                 ring_buffer_read_prepare_sync();
4904                 for_each_tracing_cpu(cpu) {
4905                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4906                         tracing_iter_reset(iter, cpu);
4907                 }
4908         } else {
4909                 cpu = iter->cpu_file;
4910                 iter->buffer_iter[cpu] =
4911                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4912                                                  cpu, GFP_KERNEL);
4913                 ring_buffer_read_prepare_sync();
4914                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4915                 tracing_iter_reset(iter, cpu);
4916         }
4917
4918         mutex_unlock(&trace_types_lock);
4919
4920         return iter;
4921
4922  fail:
4923         mutex_unlock(&trace_types_lock);
4924         free_trace_iter_content(iter);
4925 release:
4926         seq_release_private(inode, file);
4927         return ERR_PTR(-ENOMEM);
4928 }
4929
4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(NULL);
4935         if (ret)
4936                 return ret;
4937
4938         filp->private_data = inode->i_private;
4939         return 0;
4940 }
4941
4942 bool tracing_is_disabled(void)
4943 {
4944         return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948  * Open and update trace_array ref count.
4949  * Must have the current trace_array passed to it.
4950  */
4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953         struct trace_array *tr = inode->i_private;
4954         int ret;
4955
4956         ret = tracing_check_open_get_tr(tr);
4957         if (ret)
4958                 return ret;
4959
4960         filp->private_data = inode->i_private;
4961
4962         return 0;
4963 }
4964
4965 /*
4966  * The private pointer of the inode is the trace_event_file.
4967  * Update the tr ref count associated to it.
4968  */
4969 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4970 {
4971         struct trace_event_file *file = inode->i_private;
4972         int ret;
4973
4974         ret = tracing_check_open_get_tr(file->tr);
4975         if (ret)
4976                 return ret;
4977
4978         mutex_lock(&event_mutex);
4979
4980         /* Fail if the file is marked for removal */
4981         if (file->flags & EVENT_FILE_FL_FREED) {
4982                 trace_array_put(file->tr);
4983                 ret = -ENODEV;
4984         } else {
4985                 event_file_get(file);
4986         }
4987
4988         mutex_unlock(&event_mutex);
4989         if (ret)
4990                 return ret;
4991
4992         filp->private_data = inode->i_private;
4993
4994         return 0;
4995 }
4996
4997 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4998 {
4999         struct trace_event_file *file = inode->i_private;
5000
5001         trace_array_put(file->tr);
5002         event_file_put(file);
5003
5004         return 0;
5005 }
5006
5007 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009         tracing_release_file_tr(inode, filp);
5010         return single_release(inode, filp);
5011 }
5012
5013 static int tracing_mark_open(struct inode *inode, struct file *filp)
5014 {
5015         stream_open(inode, filp);
5016         return tracing_open_generic_tr(inode, filp);
5017 }
5018
5019 static int tracing_release(struct inode *inode, struct file *file)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022         struct seq_file *m = file->private_data;
5023         struct trace_iterator *iter;
5024         int cpu;
5025
5026         if (!(file->f_mode & FMODE_READ)) {
5027                 trace_array_put(tr);
5028                 return 0;
5029         }
5030
5031         /* Writes do not use seq_file */
5032         iter = m->private;
5033         mutex_lock(&trace_types_lock);
5034
5035         for_each_tracing_cpu(cpu) {
5036                 if (iter->buffer_iter[cpu])
5037                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5038         }
5039
5040         if (iter->trace && iter->trace->close)
5041                 iter->trace->close(iter);
5042
5043         if (!iter->snapshot && tr->stop_count)
5044                 /* reenable tracing if it was previously enabled */
5045                 tracing_start_tr(tr);
5046
5047         __trace_array_put(tr);
5048
5049         mutex_unlock(&trace_types_lock);
5050
5051         free_trace_iter_content(iter);
5052         seq_release_private(inode, file);
5053
5054         return 0;
5055 }
5056
5057 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5058 {
5059         struct trace_array *tr = inode->i_private;
5060
5061         trace_array_put(tr);
5062         return 0;
5063 }
5064
5065 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5066 {
5067         struct trace_array *tr = inode->i_private;
5068
5069         trace_array_put(tr);
5070
5071         return single_release(inode, file);
5072 }
5073
5074 static int tracing_open(struct inode *inode, struct file *file)
5075 {
5076         struct trace_array *tr = inode->i_private;
5077         struct trace_iterator *iter;
5078         int ret;
5079
5080         ret = tracing_check_open_get_tr(tr);
5081         if (ret)
5082                 return ret;
5083
5084         /* If this file was open for write, then erase contents */
5085         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5086                 int cpu = tracing_get_cpu(inode);
5087                 struct array_buffer *trace_buf = &tr->array_buffer;
5088
5089 #ifdef CONFIG_TRACER_MAX_TRACE
5090                 if (tr->current_trace->print_max)
5091                         trace_buf = &tr->max_buffer;
5092 #endif
5093
5094                 if (cpu == RING_BUFFER_ALL_CPUS)
5095                         tracing_reset_online_cpus(trace_buf);
5096                 else
5097                         tracing_reset_cpu(trace_buf, cpu);
5098         }
5099
5100         if (file->f_mode & FMODE_READ) {
5101                 iter = __tracing_open(inode, file, false);
5102                 if (IS_ERR(iter))
5103                         ret = PTR_ERR(iter);
5104                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5105                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5106         }
5107
5108         if (ret < 0)
5109                 trace_array_put(tr);
5110
5111         return ret;
5112 }
5113
5114 /*
5115  * Some tracers are not suitable for instance buffers.
5116  * A tracer is always available for the global array (toplevel)
5117  * or if it explicitly states that it is.
5118  */
5119 static bool
5120 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5121 {
5122         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5123 }
5124
5125 /* Find the next tracer that this trace array may use */
5126 static struct tracer *
5127 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5128 {
5129         while (t && !trace_ok_for_array(t, tr))
5130                 t = t->next;
5131
5132         return t;
5133 }
5134
5135 static void *
5136 t_next(struct seq_file *m, void *v, loff_t *pos)
5137 {
5138         struct trace_array *tr = m->private;
5139         struct tracer *t = v;
5140
5141         (*pos)++;
5142
5143         if (t)
5144                 t = get_tracer_for_array(tr, t->next);
5145
5146         return t;
5147 }
5148
5149 static void *t_start(struct seq_file *m, loff_t *pos)
5150 {
5151         struct trace_array *tr = m->private;
5152         struct tracer *t;
5153         loff_t l = 0;
5154
5155         mutex_lock(&trace_types_lock);
5156
5157         t = get_tracer_for_array(tr, trace_types);
5158         for (; t && l < *pos; t = t_next(m, t, &l))
5159                         ;
5160
5161         return t;
5162 }
5163
5164 static void t_stop(struct seq_file *m, void *p)
5165 {
5166         mutex_unlock(&trace_types_lock);
5167 }
5168
5169 static int t_show(struct seq_file *m, void *v)
5170 {
5171         struct tracer *t = v;
5172
5173         if (!t)
5174                 return 0;
5175
5176         seq_puts(m, t->name);
5177         if (t->next)
5178                 seq_putc(m, ' ');
5179         else
5180                 seq_putc(m, '\n');
5181
5182         return 0;
5183 }
5184
5185 static const struct seq_operations show_traces_seq_ops = {
5186         .start          = t_start,
5187         .next           = t_next,
5188         .stop           = t_stop,
5189         .show           = t_show,
5190 };
5191
5192 static int show_traces_open(struct inode *inode, struct file *file)
5193 {
5194         struct trace_array *tr = inode->i_private;
5195         struct seq_file *m;
5196         int ret;
5197
5198         ret = tracing_check_open_get_tr(tr);
5199         if (ret)
5200                 return ret;
5201
5202         ret = seq_open(file, &show_traces_seq_ops);
5203         if (ret) {
5204                 trace_array_put(tr);
5205                 return ret;
5206         }
5207
5208         m = file->private_data;
5209         m->private = tr;
5210
5211         return 0;
5212 }
5213
5214 static int show_traces_release(struct inode *inode, struct file *file)
5215 {
5216         struct trace_array *tr = inode->i_private;
5217
5218         trace_array_put(tr);
5219         return seq_release(inode, file);
5220 }
5221
5222 static ssize_t
5223 tracing_write_stub(struct file *filp, const char __user *ubuf,
5224                    size_t count, loff_t *ppos)
5225 {
5226         return count;
5227 }
5228
5229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5230 {
5231         int ret;
5232
5233         if (file->f_mode & FMODE_READ)
5234                 ret = seq_lseek(file, offset, whence);
5235         else
5236                 file->f_pos = ret = 0;
5237
5238         return ret;
5239 }
5240
5241 static const struct file_operations tracing_fops = {
5242         .open           = tracing_open,
5243         .read           = seq_read,
5244         .read_iter      = seq_read_iter,
5245         .splice_read    = copy_splice_read,
5246         .write          = tracing_write_stub,
5247         .llseek         = tracing_lseek,
5248         .release        = tracing_release,
5249 };
5250
5251 static const struct file_operations show_traces_fops = {
5252         .open           = show_traces_open,
5253         .read           = seq_read,
5254         .llseek         = seq_lseek,
5255         .release        = show_traces_release,
5256 };
5257
5258 static ssize_t
5259 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5260                      size_t count, loff_t *ppos)
5261 {
5262         struct trace_array *tr = file_inode(filp)->i_private;
5263         char *mask_str;
5264         int len;
5265
5266         len = snprintf(NULL, 0, "%*pb\n",
5267                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5268         mask_str = kmalloc(len, GFP_KERNEL);
5269         if (!mask_str)
5270                 return -ENOMEM;
5271
5272         len = snprintf(mask_str, len, "%*pb\n",
5273                        cpumask_pr_args(tr->tracing_cpumask));
5274         if (len >= count) {
5275                 count = -EINVAL;
5276                 goto out_err;
5277         }
5278         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5279
5280 out_err:
5281         kfree(mask_str);
5282
5283         return count;
5284 }
5285
5286 int tracing_set_cpumask(struct trace_array *tr,
5287                         cpumask_var_t tracing_cpumask_new)
5288 {
5289         int cpu;
5290
5291         if (!tr)
5292                 return -EINVAL;
5293
5294         local_irq_disable();
5295         arch_spin_lock(&tr->max_lock);
5296         for_each_tracing_cpu(cpu) {
5297                 /*
5298                  * Increase/decrease the disabled counter if we are
5299                  * about to flip a bit in the cpumask:
5300                  */
5301                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5302                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5303                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5304                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5307 #endif
5308                 }
5309                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5310                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5311                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5312                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5315 #endif
5316                 }
5317         }
5318         arch_spin_unlock(&tr->max_lock);
5319         local_irq_enable();
5320
5321         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5322
5323         return 0;
5324 }
5325
5326 static ssize_t
5327 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5328                       size_t count, loff_t *ppos)
5329 {
5330         struct trace_array *tr = file_inode(filp)->i_private;
5331         cpumask_var_t tracing_cpumask_new;
5332         int err;
5333
5334         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5335                 return -ENOMEM;
5336
5337         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5338         if (err)
5339                 goto err_free;
5340
5341         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5342         if (err)
5343                 goto err_free;
5344
5345         free_cpumask_var(tracing_cpumask_new);
5346
5347         return count;
5348
5349 err_free:
5350         free_cpumask_var(tracing_cpumask_new);
5351
5352         return err;
5353 }
5354
5355 static const struct file_operations tracing_cpumask_fops = {
5356         .open           = tracing_open_generic_tr,
5357         .read           = tracing_cpumask_read,
5358         .write          = tracing_cpumask_write,
5359         .release        = tracing_release_generic_tr,
5360         .llseek         = generic_file_llseek,
5361 };
5362
5363 static int tracing_trace_options_show(struct seq_file *m, void *v)
5364 {
5365         struct tracer_opt *trace_opts;
5366         struct trace_array *tr = m->private;
5367         u32 tracer_flags;
5368         int i;
5369
5370         mutex_lock(&trace_types_lock);
5371         tracer_flags = tr->current_trace->flags->val;
5372         trace_opts = tr->current_trace->flags->opts;
5373
5374         for (i = 0; trace_options[i]; i++) {
5375                 if (tr->trace_flags & (1 << i))
5376                         seq_printf(m, "%s\n", trace_options[i]);
5377                 else
5378                         seq_printf(m, "no%s\n", trace_options[i]);
5379         }
5380
5381         for (i = 0; trace_opts[i].name; i++) {
5382                 if (tracer_flags & trace_opts[i].bit)
5383                         seq_printf(m, "%s\n", trace_opts[i].name);
5384                 else
5385                         seq_printf(m, "no%s\n", trace_opts[i].name);
5386         }
5387         mutex_unlock(&trace_types_lock);
5388
5389         return 0;
5390 }
5391
5392 static int __set_tracer_option(struct trace_array *tr,
5393                                struct tracer_flags *tracer_flags,
5394                                struct tracer_opt *opts, int neg)
5395 {
5396         struct tracer *trace = tracer_flags->trace;
5397         int ret;
5398
5399         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5400         if (ret)
5401                 return ret;
5402
5403         if (neg)
5404                 tracer_flags->val &= ~opts->bit;
5405         else
5406                 tracer_flags->val |= opts->bit;
5407         return 0;
5408 }
5409
5410 /* Try to assign a tracer specific option */
5411 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5412 {
5413         struct tracer *trace = tr->current_trace;
5414         struct tracer_flags *tracer_flags = trace->flags;
5415         struct tracer_opt *opts = NULL;
5416         int i;
5417
5418         for (i = 0; tracer_flags->opts[i].name; i++) {
5419                 opts = &tracer_flags->opts[i];
5420
5421                 if (strcmp(cmp, opts->name) == 0)
5422                         return __set_tracer_option(tr, trace->flags, opts, neg);
5423         }
5424
5425         return -EINVAL;
5426 }
5427
5428 /* Some tracers require overwrite to stay enabled */
5429 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5430 {
5431         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5432                 return -1;
5433
5434         return 0;
5435 }
5436
5437 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5438 {
5439         int *map;
5440
5441         if ((mask == TRACE_ITER_RECORD_TGID) ||
5442             (mask == TRACE_ITER_RECORD_CMD))
5443                 lockdep_assert_held(&event_mutex);
5444
5445         /* do nothing if flag is already set */
5446         if (!!(tr->trace_flags & mask) == !!enabled)
5447                 return 0;
5448
5449         /* Give the tracer a chance to approve the change */
5450         if (tr->current_trace->flag_changed)
5451                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5452                         return -EINVAL;
5453
5454         if (enabled)
5455                 tr->trace_flags |= mask;
5456         else
5457                 tr->trace_flags &= ~mask;
5458
5459         if (mask == TRACE_ITER_RECORD_CMD)
5460                 trace_event_enable_cmd_record(enabled);
5461
5462         if (mask == TRACE_ITER_RECORD_TGID) {
5463                 if (!tgid_map) {
5464                         tgid_map_max = pid_max;
5465                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5466                                        GFP_KERNEL);
5467
5468                         /*
5469                          * Pairs with smp_load_acquire() in
5470                          * trace_find_tgid_ptr() to ensure that if it observes
5471                          * the tgid_map we just allocated then it also observes
5472                          * the corresponding tgid_map_max value.
5473                          */
5474                         smp_store_release(&tgid_map, map);
5475                 }
5476                 if (!tgid_map) {
5477                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5478                         return -ENOMEM;
5479                 }
5480
5481                 trace_event_enable_tgid_record(enabled);
5482         }
5483
5484         if (mask == TRACE_ITER_EVENT_FORK)
5485                 trace_event_follow_fork(tr, enabled);
5486
5487         if (mask == TRACE_ITER_FUNC_FORK)
5488                 ftrace_pid_follow_fork(tr, enabled);
5489
5490         if (mask == TRACE_ITER_OVERWRITE) {
5491                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5492 #ifdef CONFIG_TRACER_MAX_TRACE
5493                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5494 #endif
5495         }
5496
5497         if (mask == TRACE_ITER_PRINTK) {
5498                 trace_printk_start_stop_comm(enabled);
5499                 trace_printk_control(enabled);
5500         }
5501
5502         return 0;
5503 }
5504
5505 int trace_set_options(struct trace_array *tr, char *option)
5506 {
5507         char *cmp;
5508         int neg = 0;
5509         int ret;
5510         size_t orig_len = strlen(option);
5511         int len;
5512
5513         cmp = strstrip(option);
5514
5515         len = str_has_prefix(cmp, "no");
5516         if (len)
5517                 neg = 1;
5518
5519         cmp += len;
5520
5521         mutex_lock(&event_mutex);
5522         mutex_lock(&trace_types_lock);
5523
5524         ret = match_string(trace_options, -1, cmp);
5525         /* If no option could be set, test the specific tracer options */
5526         if (ret < 0)
5527                 ret = set_tracer_option(tr, cmp, neg);
5528         else
5529                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5530
5531         mutex_unlock(&trace_types_lock);
5532         mutex_unlock(&event_mutex);
5533
5534         /*
5535          * If the first trailing whitespace is replaced with '\0' by strstrip,
5536          * turn it back into a space.
5537          */
5538         if (orig_len > strlen(option))
5539                 option[strlen(option)] = ' ';
5540
5541         return ret;
5542 }
5543
5544 static void __init apply_trace_boot_options(void)
5545 {
5546         char *buf = trace_boot_options_buf;
5547         char *option;
5548
5549         while (true) {
5550                 option = strsep(&buf, ",");
5551
5552                 if (!option)
5553                         break;
5554
5555                 if (*option)
5556                         trace_set_options(&global_trace, option);
5557
5558                 /* Put back the comma to allow this to be called again */
5559                 if (buf)
5560                         *(buf - 1) = ',';
5561         }
5562 }
5563
5564 static ssize_t
5565 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5566                         size_t cnt, loff_t *ppos)
5567 {
5568         struct seq_file *m = filp->private_data;
5569         struct trace_array *tr = m->private;
5570         char buf[64];
5571         int ret;
5572
5573         if (cnt >= sizeof(buf))
5574                 return -EINVAL;
5575
5576         if (copy_from_user(buf, ubuf, cnt))
5577                 return -EFAULT;
5578
5579         buf[cnt] = 0;
5580
5581         ret = trace_set_options(tr, buf);
5582         if (ret < 0)
5583                 return ret;
5584
5585         *ppos += cnt;
5586
5587         return cnt;
5588 }
5589
5590 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5591 {
5592         struct trace_array *tr = inode->i_private;
5593         int ret;
5594
5595         ret = tracing_check_open_get_tr(tr);
5596         if (ret)
5597                 return ret;
5598
5599         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5600         if (ret < 0)
5601                 trace_array_put(tr);
5602
5603         return ret;
5604 }
5605
5606 static const struct file_operations tracing_iter_fops = {
5607         .open           = tracing_trace_options_open,
5608         .read           = seq_read,
5609         .llseek         = seq_lseek,
5610         .release        = tracing_single_release_tr,
5611         .write          = tracing_trace_options_write,
5612 };
5613
5614 static const char readme_msg[] =
5615         "tracing mini-HOWTO:\n\n"
5616         "# echo 0 > tracing_on : quick way to disable tracing\n"
5617         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5618         " Important files:\n"
5619         "  trace\t\t\t- The static contents of the buffer\n"
5620         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5621         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5622         "  current_tracer\t- function and latency tracers\n"
5623         "  available_tracers\t- list of configured tracers for current_tracer\n"
5624         "  error_log\t- error log for failed commands (that support it)\n"
5625         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5626         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5627         "  trace_clock\t\t- change the clock used to order events\n"
5628         "       local:   Per cpu clock but may not be synced across CPUs\n"
5629         "      global:   Synced across CPUs but slows tracing down.\n"
5630         "     counter:   Not a clock, but just an increment\n"
5631         "      uptime:   Jiffy counter from time of boot\n"
5632         "        perf:   Same clock that perf events use\n"
5633 #ifdef CONFIG_X86_64
5634         "     x86-tsc:   TSC cycle counter\n"
5635 #endif
5636         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5637         "       delta:   Delta difference against a buffer-wide timestamp\n"
5638         "    absolute:   Absolute (standalone) timestamp\n"
5639         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5640         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5641         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5642         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5643         "\t\t\t  Remove sub-buffer with rmdir\n"
5644         "  trace_options\t\t- Set format or modify how tracing happens\n"
5645         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5646         "\t\t\t  option name\n"
5647         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5648 #ifdef CONFIG_DYNAMIC_FTRACE
5649         "\n  available_filter_functions - list of functions that can be filtered on\n"
5650         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5651         "\t\t\t  functions\n"
5652         "\t     accepts: func_full_name or glob-matching-pattern\n"
5653         "\t     modules: Can select a group via module\n"
5654         "\t      Format: :mod:<module-name>\n"
5655         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5656         "\t    triggers: a command to perform when function is hit\n"
5657         "\t      Format: <function>:<trigger>[:count]\n"
5658         "\t     trigger: traceon, traceoff\n"
5659         "\t\t      enable_event:<system>:<event>\n"
5660         "\t\t      disable_event:<system>:<event>\n"
5661 #ifdef CONFIG_STACKTRACE
5662         "\t\t      stacktrace\n"
5663 #endif
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665         "\t\t      snapshot\n"
5666 #endif
5667         "\t\t      dump\n"
5668         "\t\t      cpudump\n"
5669         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5670         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5671         "\t     The first one will disable tracing every time do_fault is hit\n"
5672         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5673         "\t       The first time do trap is hit and it disables tracing, the\n"
5674         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5675         "\t       the counter will not decrement. It only decrements when the\n"
5676         "\t       trigger did work\n"
5677         "\t     To remove trigger without count:\n"
5678         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5679         "\t     To remove trigger with a count:\n"
5680         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5681         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5682         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5683         "\t    modules: Can select a group via module command :mod:\n"
5684         "\t    Does not accept triggers\n"
5685 #endif /* CONFIG_DYNAMIC_FTRACE */
5686 #ifdef CONFIG_FUNCTION_TRACER
5687         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5688         "\t\t    (function)\n"
5689         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5690         "\t\t    (function)\n"
5691 #endif
5692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5693         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5694         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5695         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5696 #endif
5697 #ifdef CONFIG_TRACER_SNAPSHOT
5698         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5699         "\t\t\t  snapshot buffer. Read the contents for more\n"
5700         "\t\t\t  information\n"
5701 #endif
5702 #ifdef CONFIG_STACK_TRACER
5703         "  stack_trace\t\t- Shows the max stack trace when active\n"
5704         "  stack_max_size\t- Shows current max stack size that was traced\n"
5705         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5706         "\t\t\t  new trace)\n"
5707 #ifdef CONFIG_DYNAMIC_FTRACE
5708         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5709         "\t\t\t  traces\n"
5710 #endif
5711 #endif /* CONFIG_STACK_TRACER */
5712 #ifdef CONFIG_DYNAMIC_EVENTS
5713         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5714         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5715 #endif
5716 #ifdef CONFIG_KPROBE_EVENTS
5717         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5718         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5719 #endif
5720 #ifdef CONFIG_UPROBE_EVENTS
5721         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5722         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5723 #endif
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5725     defined(CONFIG_FPROBE_EVENTS)
5726         "\t  accepts: event-definitions (one definition per line)\n"
5727 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5728         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5729         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5730 #endif
5731 #ifdef CONFIG_FPROBE_EVENTS
5732         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5733         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5734 #endif
5735 #ifdef CONFIG_HIST_TRIGGERS
5736         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5737 #endif
5738         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5739         "\t           -:[<group>/][<event>]\n"
5740 #ifdef CONFIG_KPROBE_EVENTS
5741         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5742   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5743 #endif
5744 #ifdef CONFIG_UPROBE_EVENTS
5745   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5746 #endif
5747         "\t     args: <name>=fetcharg[:type]\n"
5748         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5749 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5750 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5751         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752         "\t           <argname>[->field[->field|.field...]],\n"
5753 #else
5754         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5755 #endif
5756 #else
5757         "\t           $stack<index>, $stack, $retval, $comm,\n"
5758 #endif
5759         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5760         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5761         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5762         "\t           symstr, <type>\\[<array-size>\\]\n"
5763 #ifdef CONFIG_HIST_TRIGGERS
5764         "\t    field: <stype> <name>;\n"
5765         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5766         "\t           [unsigned] char/int/long\n"
5767 #endif
5768         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5769         "\t            of the <attached-group>/<attached-event>.\n"
5770 #endif
5771         "  events/\t\t- Directory containing all trace event subsystems:\n"
5772         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5773         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5774         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5775         "\t\t\t  events\n"
5776         "      filter\t\t- If set, only events passing filter are traced\n"
5777         "  events/<system>/<event>/\t- Directory containing control files for\n"
5778         "\t\t\t  <event>:\n"
5779         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5780         "      filter\t\t- If set, only events passing filter are traced\n"
5781         "      trigger\t\t- If set, a command to perform when event is hit\n"
5782         "\t    Format: <trigger>[:count][if <filter>]\n"
5783         "\t   trigger: traceon, traceoff\n"
5784         "\t            enable_event:<system>:<event>\n"
5785         "\t            disable_event:<system>:<event>\n"
5786 #ifdef CONFIG_HIST_TRIGGERS
5787         "\t            enable_hist:<system>:<event>\n"
5788         "\t            disable_hist:<system>:<event>\n"
5789 #endif
5790 #ifdef CONFIG_STACKTRACE
5791         "\t\t    stacktrace\n"
5792 #endif
5793 #ifdef CONFIG_TRACER_SNAPSHOT
5794         "\t\t    snapshot\n"
5795 #endif
5796 #ifdef CONFIG_HIST_TRIGGERS
5797         "\t\t    hist (see below)\n"
5798 #endif
5799         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5800         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5801         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5802         "\t                  events/block/block_unplug/trigger\n"
5803         "\t   The first disables tracing every time block_unplug is hit.\n"
5804         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5805         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5806         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5807         "\t   Like function triggers, the counter is only decremented if it\n"
5808         "\t    enabled or disabled tracing.\n"
5809         "\t   To remove a trigger without a count:\n"
5810         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5811         "\t   To remove a trigger with a count:\n"
5812         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5813         "\t   Filters can be ignored when removing a trigger.\n"
5814 #ifdef CONFIG_HIST_TRIGGERS
5815         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5816         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5817         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5818         "\t            [:values=<field1[,field2,...]>]\n"
5819         "\t            [:sort=<field1[,field2,...]>]\n"
5820         "\t            [:size=#entries]\n"
5821         "\t            [:pause][:continue][:clear]\n"
5822         "\t            [:name=histname1]\n"
5823         "\t            [:nohitcount]\n"
5824         "\t            [:<handler>.<action>]\n"
5825         "\t            [if <filter>]\n\n"
5826         "\t    Note, special fields can be used as well:\n"
5827         "\t            common_timestamp - to record current timestamp\n"
5828         "\t            common_cpu - to record the CPU the event happened on\n"
5829         "\n"
5830         "\t    A hist trigger variable can be:\n"
5831         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5832         "\t        - a reference to another variable e.g. y=$x,\n"
5833         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5834         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5835         "\n"
5836         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5837         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5838         "\t    variable reference, field or numeric literal.\n"
5839         "\n"
5840         "\t    When a matching event is hit, an entry is added to a hash\n"
5841         "\t    table using the key(s) and value(s) named, and the value of a\n"
5842         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5843         "\t    correspond to fields in the event's format description.  Keys\n"
5844         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5845         "\t    Compound keys consisting of up to two fields can be specified\n"
5846         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5847         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5848         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5849         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5850         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5851         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5852         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5853         "\t    its histogram data will be shared with other triggers of the\n"
5854         "\t    same name, and trigger hits will update this common data.\n\n"
5855         "\t    Reading the 'hist' file for the event will dump the hash\n"
5856         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5857         "\t    triggers attached to an event, there will be a table for each\n"
5858         "\t    trigger in the output.  The table displayed for a named\n"
5859         "\t    trigger will be the same as any other instance having the\n"
5860         "\t    same name.  The default format used to display a given field\n"
5861         "\t    can be modified by appending any of the following modifiers\n"
5862         "\t    to the field name, as applicable:\n\n"
5863         "\t            .hex        display a number as a hex value\n"
5864         "\t            .sym        display an address as a symbol\n"
5865         "\t            .sym-offset display an address as a symbol and offset\n"
5866         "\t            .execname   display a common_pid as a program name\n"
5867         "\t            .syscall    display a syscall id as a syscall name\n"
5868         "\t            .log2       display log2 value rather than raw number\n"
5869         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5870         "\t            .usecs      display a common_timestamp in microseconds\n"
5871         "\t            .percent    display a number of percentage value\n"
5872         "\t            .graph      display a bar-graph of a value\n\n"
5873         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5874         "\t    trigger or to start a hist trigger but not log any events\n"
5875         "\t    until told to do so.  'continue' can be used to start or\n"
5876         "\t    restart a paused hist trigger.\n\n"
5877         "\t    The 'clear' parameter will clear the contents of a running\n"
5878         "\t    hist trigger and leave its current paused/active state\n"
5879         "\t    unchanged.\n\n"
5880         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5881         "\t    raw hitcount in the histogram.\n\n"
5882         "\t    The enable_hist and disable_hist triggers can be used to\n"
5883         "\t    have one event conditionally start and stop another event's\n"
5884         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5885         "\t    the enable_event and disable_event triggers.\n\n"
5886         "\t    Hist trigger handlers and actions are executed whenever a\n"
5887         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5888         "\t        <handler>.<action>\n\n"
5889         "\t    The available handlers are:\n\n"
5890         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5891         "\t        onmax(var)               - invoke if var exceeds current max\n"
5892         "\t        onchange(var)            - invoke action if var changes\n\n"
5893         "\t    The available actions are:\n\n"
5894         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5895         "\t        save(field,...)                      - save current event fields\n"
5896 #ifdef CONFIG_TRACER_SNAPSHOT
5897         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5898 #endif
5899 #ifdef CONFIG_SYNTH_EVENTS
5900         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5901         "\t  Write into this file to define/undefine new synthetic events.\n"
5902         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5903 #endif
5904 #endif
5905 ;
5906
5907 static ssize_t
5908 tracing_readme_read(struct file *filp, char __user *ubuf,
5909                        size_t cnt, loff_t *ppos)
5910 {
5911         return simple_read_from_buffer(ubuf, cnt, ppos,
5912                                         readme_msg, strlen(readme_msg));
5913 }
5914
5915 static const struct file_operations tracing_readme_fops = {
5916         .open           = tracing_open_generic,
5917         .read           = tracing_readme_read,
5918         .llseek         = generic_file_llseek,
5919 };
5920
5921 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5922 {
5923         int pid = ++(*pos);
5924
5925         return trace_find_tgid_ptr(pid);
5926 }
5927
5928 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5929 {
5930         int pid = *pos;
5931
5932         return trace_find_tgid_ptr(pid);
5933 }
5934
5935 static void saved_tgids_stop(struct seq_file *m, void *v)
5936 {
5937 }
5938
5939 static int saved_tgids_show(struct seq_file *m, void *v)
5940 {
5941         int *entry = (int *)v;
5942         int pid = entry - tgid_map;
5943         int tgid = *entry;
5944
5945         if (tgid == 0)
5946                 return SEQ_SKIP;
5947
5948         seq_printf(m, "%d %d\n", pid, tgid);
5949         return 0;
5950 }
5951
5952 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5953         .start          = saved_tgids_start,
5954         .stop           = saved_tgids_stop,
5955         .next           = saved_tgids_next,
5956         .show           = saved_tgids_show,
5957 };
5958
5959 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5960 {
5961         int ret;
5962
5963         ret = tracing_check_open_get_tr(NULL);
5964         if (ret)
5965                 return ret;
5966
5967         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5968 }
5969
5970
5971 static const struct file_operations tracing_saved_tgids_fops = {
5972         .open           = tracing_saved_tgids_open,
5973         .read           = seq_read,
5974         .llseek         = seq_lseek,
5975         .release        = seq_release,
5976 };
5977
5978 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5979 {
5980         unsigned int *ptr = v;
5981
5982         if (*pos || m->count)
5983                 ptr++;
5984
5985         (*pos)++;
5986
5987         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5988              ptr++) {
5989                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5990                         continue;
5991
5992                 return ptr;
5993         }
5994
5995         return NULL;
5996 }
5997
5998 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5999 {
6000         void *v;
6001         loff_t l = 0;
6002
6003         preempt_disable();
6004         arch_spin_lock(&trace_cmdline_lock);
6005
6006         v = &savedcmd->map_cmdline_to_pid[0];
6007         while (l <= *pos) {
6008                 v = saved_cmdlines_next(m, v, &l);
6009                 if (!v)
6010                         return NULL;
6011         }
6012
6013         return v;
6014 }
6015
6016 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6017 {
6018         arch_spin_unlock(&trace_cmdline_lock);
6019         preempt_enable();
6020 }
6021
6022 static int saved_cmdlines_show(struct seq_file *m, void *v)
6023 {
6024         char buf[TASK_COMM_LEN];
6025         unsigned int *pid = v;
6026
6027         __trace_find_cmdline(*pid, buf);
6028         seq_printf(m, "%d %s\n", *pid, buf);
6029         return 0;
6030 }
6031
6032 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6033         .start          = saved_cmdlines_start,
6034         .next           = saved_cmdlines_next,
6035         .stop           = saved_cmdlines_stop,
6036         .show           = saved_cmdlines_show,
6037 };
6038
6039 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6040 {
6041         int ret;
6042
6043         ret = tracing_check_open_get_tr(NULL);
6044         if (ret)
6045                 return ret;
6046
6047         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6048 }
6049
6050 static const struct file_operations tracing_saved_cmdlines_fops = {
6051         .open           = tracing_saved_cmdlines_open,
6052         .read           = seq_read,
6053         .llseek         = seq_lseek,
6054         .release        = seq_release,
6055 };
6056
6057 static ssize_t
6058 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6059                                  size_t cnt, loff_t *ppos)
6060 {
6061         char buf[64];
6062         int r;
6063
6064         preempt_disable();
6065         arch_spin_lock(&trace_cmdline_lock);
6066         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6067         arch_spin_unlock(&trace_cmdline_lock);
6068         preempt_enable();
6069
6070         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6071 }
6072
6073 static int tracing_resize_saved_cmdlines(unsigned int val)
6074 {
6075         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6076
6077         s = allocate_cmdlines_buffer(val);
6078         if (!s)
6079                 return -ENOMEM;
6080
6081         preempt_disable();
6082         arch_spin_lock(&trace_cmdline_lock);
6083         savedcmd_temp = savedcmd;
6084         savedcmd = s;
6085         arch_spin_unlock(&trace_cmdline_lock);
6086         preempt_enable();
6087         free_saved_cmdlines_buffer(savedcmd_temp);
6088
6089         return 0;
6090 }
6091
6092 static ssize_t
6093 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6094                                   size_t cnt, loff_t *ppos)
6095 {
6096         unsigned long val;
6097         int ret;
6098
6099         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6100         if (ret)
6101                 return ret;
6102
6103         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6104         if (!val || val > PID_MAX_DEFAULT)
6105                 return -EINVAL;
6106
6107         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6108         if (ret < 0)
6109                 return ret;
6110
6111         *ppos += cnt;
6112
6113         return cnt;
6114 }
6115
6116 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6117         .open           = tracing_open_generic,
6118         .read           = tracing_saved_cmdlines_size_read,
6119         .write          = tracing_saved_cmdlines_size_write,
6120 };
6121
6122 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6123 static union trace_eval_map_item *
6124 update_eval_map(union trace_eval_map_item *ptr)
6125 {
6126         if (!ptr->map.eval_string) {
6127                 if (ptr->tail.next) {
6128                         ptr = ptr->tail.next;
6129                         /* Set ptr to the next real item (skip head) */
6130                         ptr++;
6131                 } else
6132                         return NULL;
6133         }
6134         return ptr;
6135 }
6136
6137 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6138 {
6139         union trace_eval_map_item *ptr = v;
6140
6141         /*
6142          * Paranoid! If ptr points to end, we don't want to increment past it.
6143          * This really should never happen.
6144          */
6145         (*pos)++;
6146         ptr = update_eval_map(ptr);
6147         if (WARN_ON_ONCE(!ptr))
6148                 return NULL;
6149
6150         ptr++;
6151         ptr = update_eval_map(ptr);
6152
6153         return ptr;
6154 }
6155
6156 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6157 {
6158         union trace_eval_map_item *v;
6159         loff_t l = 0;
6160
6161         mutex_lock(&trace_eval_mutex);
6162
6163         v = trace_eval_maps;
6164         if (v)
6165                 v++;
6166
6167         while (v && l < *pos) {
6168                 v = eval_map_next(m, v, &l);
6169         }
6170
6171         return v;
6172 }
6173
6174 static void eval_map_stop(struct seq_file *m, void *v)
6175 {
6176         mutex_unlock(&trace_eval_mutex);
6177 }
6178
6179 static int eval_map_show(struct seq_file *m, void *v)
6180 {
6181         union trace_eval_map_item *ptr = v;
6182
6183         seq_printf(m, "%s %ld (%s)\n",
6184                    ptr->map.eval_string, ptr->map.eval_value,
6185                    ptr->map.system);
6186
6187         return 0;
6188 }
6189
6190 static const struct seq_operations tracing_eval_map_seq_ops = {
6191         .start          = eval_map_start,
6192         .next           = eval_map_next,
6193         .stop           = eval_map_stop,
6194         .show           = eval_map_show,
6195 };
6196
6197 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6198 {
6199         int ret;
6200
6201         ret = tracing_check_open_get_tr(NULL);
6202         if (ret)
6203                 return ret;
6204
6205         return seq_open(filp, &tracing_eval_map_seq_ops);
6206 }
6207
6208 static const struct file_operations tracing_eval_map_fops = {
6209         .open           = tracing_eval_map_open,
6210         .read           = seq_read,
6211         .llseek         = seq_lseek,
6212         .release        = seq_release,
6213 };
6214
6215 static inline union trace_eval_map_item *
6216 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6217 {
6218         /* Return tail of array given the head */
6219         return ptr + ptr->head.length + 1;
6220 }
6221
6222 static void
6223 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6224                            int len)
6225 {
6226         struct trace_eval_map **stop;
6227         struct trace_eval_map **map;
6228         union trace_eval_map_item *map_array;
6229         union trace_eval_map_item *ptr;
6230
6231         stop = start + len;
6232
6233         /*
6234          * The trace_eval_maps contains the map plus a head and tail item,
6235          * where the head holds the module and length of array, and the
6236          * tail holds a pointer to the next list.
6237          */
6238         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6239         if (!map_array) {
6240                 pr_warn("Unable to allocate trace eval mapping\n");
6241                 return;
6242         }
6243
6244         mutex_lock(&trace_eval_mutex);
6245
6246         if (!trace_eval_maps)
6247                 trace_eval_maps = map_array;
6248         else {
6249                 ptr = trace_eval_maps;
6250                 for (;;) {
6251                         ptr = trace_eval_jmp_to_tail(ptr);
6252                         if (!ptr->tail.next)
6253                                 break;
6254                         ptr = ptr->tail.next;
6255
6256                 }
6257                 ptr->tail.next = map_array;
6258         }
6259         map_array->head.mod = mod;
6260         map_array->head.length = len;
6261         map_array++;
6262
6263         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6264                 map_array->map = **map;
6265                 map_array++;
6266         }
6267         memset(map_array, 0, sizeof(*map_array));
6268
6269         mutex_unlock(&trace_eval_mutex);
6270 }
6271
6272 static void trace_create_eval_file(struct dentry *d_tracer)
6273 {
6274         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6275                           NULL, &tracing_eval_map_fops);
6276 }
6277
6278 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6279 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6280 static inline void trace_insert_eval_map_file(struct module *mod,
6281                               struct trace_eval_map **start, int len) { }
6282 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6283
6284 static void trace_insert_eval_map(struct module *mod,
6285                                   struct trace_eval_map **start, int len)
6286 {
6287         struct trace_eval_map **map;
6288
6289         if (len <= 0)
6290                 return;
6291
6292         map = start;
6293
6294         trace_event_eval_update(map, len);
6295
6296         trace_insert_eval_map_file(mod, start, len);
6297 }
6298
6299 static ssize_t
6300 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6301                        size_t cnt, loff_t *ppos)
6302 {
6303         struct trace_array *tr = filp->private_data;
6304         char buf[MAX_TRACER_SIZE+2];
6305         int r;
6306
6307         mutex_lock(&trace_types_lock);
6308         r = sprintf(buf, "%s\n", tr->current_trace->name);
6309         mutex_unlock(&trace_types_lock);
6310
6311         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6312 }
6313
6314 int tracer_init(struct tracer *t, struct trace_array *tr)
6315 {
6316         tracing_reset_online_cpus(&tr->array_buffer);
6317         return t->init(tr);
6318 }
6319
6320 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6321 {
6322         int cpu;
6323
6324         for_each_tracing_cpu(cpu)
6325                 per_cpu_ptr(buf->data, cpu)->entries = val;
6326 }
6327
6328 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6329 {
6330         if (cpu == RING_BUFFER_ALL_CPUS) {
6331                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6332         } else {
6333                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6334         }
6335 }
6336
6337 #ifdef CONFIG_TRACER_MAX_TRACE
6338 /* resize @tr's buffer to the size of @size_tr's entries */
6339 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6340                                         struct array_buffer *size_buf, int cpu_id)
6341 {
6342         int cpu, ret = 0;
6343
6344         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6345                 for_each_tracing_cpu(cpu) {
6346                         ret = ring_buffer_resize(trace_buf->buffer,
6347                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6348                         if (ret < 0)
6349                                 break;
6350                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6351                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6352                 }
6353         } else {
6354                 ret = ring_buffer_resize(trace_buf->buffer,
6355                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6356                 if (ret == 0)
6357                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6358                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6359         }
6360
6361         return ret;
6362 }
6363 #endif /* CONFIG_TRACER_MAX_TRACE */
6364
6365 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6366                                         unsigned long size, int cpu)
6367 {
6368         int ret;
6369
6370         /*
6371          * If kernel or user changes the size of the ring buffer
6372          * we use the size that was given, and we can forget about
6373          * expanding it later.
6374          */
6375         trace_set_ring_buffer_expanded(tr);
6376
6377         /* May be called before buffers are initialized */
6378         if (!tr->array_buffer.buffer)
6379                 return 0;
6380
6381         /* Do not allow tracing while resizing ring buffer */
6382         tracing_stop_tr(tr);
6383
6384         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6385         if (ret < 0)
6386                 goto out_start;
6387
6388 #ifdef CONFIG_TRACER_MAX_TRACE
6389         if (!tr->allocated_snapshot)
6390                 goto out;
6391
6392         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6393         if (ret < 0) {
6394                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6395                                                      &tr->array_buffer, cpu);
6396                 if (r < 0) {
6397                         /*
6398                          * AARGH! We are left with different
6399                          * size max buffer!!!!
6400                          * The max buffer is our "snapshot" buffer.
6401                          * When a tracer needs a snapshot (one of the
6402                          * latency tracers), it swaps the max buffer
6403                          * with the saved snap shot. We succeeded to
6404                          * update the size of the main buffer, but failed to
6405                          * update the size of the max buffer. But when we tried
6406                          * to reset the main buffer to the original size, we
6407                          * failed there too. This is very unlikely to
6408                          * happen, but if it does, warn and kill all
6409                          * tracing.
6410                          */
6411                         WARN_ON(1);
6412                         tracing_disabled = 1;
6413                 }
6414                 goto out_start;
6415         }
6416
6417         update_buffer_entries(&tr->max_buffer, cpu);
6418
6419  out:
6420 #endif /* CONFIG_TRACER_MAX_TRACE */
6421
6422         update_buffer_entries(&tr->array_buffer, cpu);
6423  out_start:
6424         tracing_start_tr(tr);
6425         return ret;
6426 }
6427
6428 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6429                                   unsigned long size, int cpu_id)
6430 {
6431         int ret;
6432
6433         mutex_lock(&trace_types_lock);
6434
6435         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6436                 /* make sure, this cpu is enabled in the mask */
6437                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6438                         ret = -EINVAL;
6439                         goto out;
6440                 }
6441         }
6442
6443         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6444         if (ret < 0)
6445                 ret = -ENOMEM;
6446
6447 out:
6448         mutex_unlock(&trace_types_lock);
6449
6450         return ret;
6451 }
6452
6453
6454 /**
6455  * tracing_update_buffers - used by tracing facility to expand ring buffers
6456  * @tr: The tracing instance
6457  *
6458  * To save on memory when the tracing is never used on a system with it
6459  * configured in. The ring buffers are set to a minimum size. But once
6460  * a user starts to use the tracing facility, then they need to grow
6461  * to their default size.
6462  *
6463  * This function is to be called when a tracer is about to be used.
6464  */
6465 int tracing_update_buffers(struct trace_array *tr)
6466 {
6467         int ret = 0;
6468
6469         mutex_lock(&trace_types_lock);
6470         if (!tr->ring_buffer_expanded)
6471                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6472                                                 RING_BUFFER_ALL_CPUS);
6473         mutex_unlock(&trace_types_lock);
6474
6475         return ret;
6476 }
6477
6478 struct trace_option_dentry;
6479
6480 static void
6481 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6482
6483 /*
6484  * Used to clear out the tracer before deletion of an instance.
6485  * Must have trace_types_lock held.
6486  */
6487 static void tracing_set_nop(struct trace_array *tr)
6488 {
6489         if (tr->current_trace == &nop_trace)
6490                 return;
6491         
6492         tr->current_trace->enabled--;
6493
6494         if (tr->current_trace->reset)
6495                 tr->current_trace->reset(tr);
6496
6497         tr->current_trace = &nop_trace;
6498 }
6499
6500 static bool tracer_options_updated;
6501
6502 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6503 {
6504         /* Only enable if the directory has been created already. */
6505         if (!tr->dir)
6506                 return;
6507
6508         /* Only create trace option files after update_tracer_options finish */
6509         if (!tracer_options_updated)
6510                 return;
6511
6512         create_trace_option_files(tr, t);
6513 }
6514
6515 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6516 {
6517         struct tracer *t;
6518 #ifdef CONFIG_TRACER_MAX_TRACE
6519         bool had_max_tr;
6520 #endif
6521         int ret = 0;
6522
6523         mutex_lock(&trace_types_lock);
6524
6525         if (!tr->ring_buffer_expanded) {
6526                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6527                                                 RING_BUFFER_ALL_CPUS);
6528                 if (ret < 0)
6529                         goto out;
6530                 ret = 0;
6531         }
6532
6533         for (t = trace_types; t; t = t->next) {
6534                 if (strcmp(t->name, buf) == 0)
6535                         break;
6536         }
6537         if (!t) {
6538                 ret = -EINVAL;
6539                 goto out;
6540         }
6541         if (t == tr->current_trace)
6542                 goto out;
6543
6544 #ifdef CONFIG_TRACER_SNAPSHOT
6545         if (t->use_max_tr) {
6546                 local_irq_disable();
6547                 arch_spin_lock(&tr->max_lock);
6548                 if (tr->cond_snapshot)
6549                         ret = -EBUSY;
6550                 arch_spin_unlock(&tr->max_lock);
6551                 local_irq_enable();
6552                 if (ret)
6553                         goto out;
6554         }
6555 #endif
6556         /* Some tracers won't work on kernel command line */
6557         if (system_state < SYSTEM_RUNNING && t->noboot) {
6558                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6559                         t->name);
6560                 goto out;
6561         }
6562
6563         /* Some tracers are only allowed for the top level buffer */
6564         if (!trace_ok_for_array(t, tr)) {
6565                 ret = -EINVAL;
6566                 goto out;
6567         }
6568
6569         /* If trace pipe files are being read, we can't change the tracer */
6570         if (tr->trace_ref) {
6571                 ret = -EBUSY;
6572                 goto out;
6573         }
6574
6575         trace_branch_disable();
6576
6577         tr->current_trace->enabled--;
6578
6579         if (tr->current_trace->reset)
6580                 tr->current_trace->reset(tr);
6581
6582 #ifdef CONFIG_TRACER_MAX_TRACE
6583         had_max_tr = tr->current_trace->use_max_tr;
6584
6585         /* Current trace needs to be nop_trace before synchronize_rcu */
6586         tr->current_trace = &nop_trace;
6587
6588         if (had_max_tr && !t->use_max_tr) {
6589                 /*
6590                  * We need to make sure that the update_max_tr sees that
6591                  * current_trace changed to nop_trace to keep it from
6592                  * swapping the buffers after we resize it.
6593                  * The update_max_tr is called from interrupts disabled
6594                  * so a synchronized_sched() is sufficient.
6595                  */
6596                 synchronize_rcu();
6597                 free_snapshot(tr);
6598         }
6599
6600         if (t->use_max_tr && !tr->allocated_snapshot) {
6601                 ret = tracing_alloc_snapshot_instance(tr);
6602                 if (ret < 0)
6603                         goto out;
6604         }
6605 #else
6606         tr->current_trace = &nop_trace;
6607 #endif
6608
6609         if (t->init) {
6610                 ret = tracer_init(t, tr);
6611                 if (ret)
6612                         goto out;
6613         }
6614
6615         tr->current_trace = t;
6616         tr->current_trace->enabled++;
6617         trace_branch_enable(tr);
6618  out:
6619         mutex_unlock(&trace_types_lock);
6620
6621         return ret;
6622 }
6623
6624 static ssize_t
6625 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6626                         size_t cnt, loff_t *ppos)
6627 {
6628         struct trace_array *tr = filp->private_data;
6629         char buf[MAX_TRACER_SIZE+1];
6630         char *name;
6631         size_t ret;
6632         int err;
6633
6634         ret = cnt;
6635
6636         if (cnt > MAX_TRACER_SIZE)
6637                 cnt = MAX_TRACER_SIZE;
6638
6639         if (copy_from_user(buf, ubuf, cnt))
6640                 return -EFAULT;
6641
6642         buf[cnt] = 0;
6643
6644         name = strim(buf);
6645
6646         err = tracing_set_tracer(tr, name);
6647         if (err)
6648                 return err;
6649
6650         *ppos += ret;
6651
6652         return ret;
6653 }
6654
6655 static ssize_t
6656 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6657                    size_t cnt, loff_t *ppos)
6658 {
6659         char buf[64];
6660         int r;
6661
6662         r = snprintf(buf, sizeof(buf), "%ld\n",
6663                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6664         if (r > sizeof(buf))
6665                 r = sizeof(buf);
6666         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6667 }
6668
6669 static ssize_t
6670 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6671                     size_t cnt, loff_t *ppos)
6672 {
6673         unsigned long val;
6674         int ret;
6675
6676         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6677         if (ret)
6678                 return ret;
6679
6680         *ptr = val * 1000;
6681
6682         return cnt;
6683 }
6684
6685 static ssize_t
6686 tracing_thresh_read(struct file *filp, char __user *ubuf,
6687                     size_t cnt, loff_t *ppos)
6688 {
6689         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6690 }
6691
6692 static ssize_t
6693 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6694                      size_t cnt, loff_t *ppos)
6695 {
6696         struct trace_array *tr = filp->private_data;
6697         int ret;
6698
6699         mutex_lock(&trace_types_lock);
6700         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6701         if (ret < 0)
6702                 goto out;
6703
6704         if (tr->current_trace->update_thresh) {
6705                 ret = tr->current_trace->update_thresh(tr);
6706                 if (ret < 0)
6707                         goto out;
6708         }
6709
6710         ret = cnt;
6711 out:
6712         mutex_unlock(&trace_types_lock);
6713
6714         return ret;
6715 }
6716
6717 #ifdef CONFIG_TRACER_MAX_TRACE
6718
6719 static ssize_t
6720 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6721                      size_t cnt, loff_t *ppos)
6722 {
6723         struct trace_array *tr = filp->private_data;
6724
6725         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6726 }
6727
6728 static ssize_t
6729 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6730                       size_t cnt, loff_t *ppos)
6731 {
6732         struct trace_array *tr = filp->private_data;
6733
6734         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6735 }
6736
6737 #endif
6738
6739 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6740 {
6741         if (cpu == RING_BUFFER_ALL_CPUS) {
6742                 if (cpumask_empty(tr->pipe_cpumask)) {
6743                         cpumask_setall(tr->pipe_cpumask);
6744                         return 0;
6745                 }
6746         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6747                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6748                 return 0;
6749         }
6750         return -EBUSY;
6751 }
6752
6753 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6754 {
6755         if (cpu == RING_BUFFER_ALL_CPUS) {
6756                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6757                 cpumask_clear(tr->pipe_cpumask);
6758         } else {
6759                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6760                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6761         }
6762 }
6763
6764 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6765 {
6766         struct trace_array *tr = inode->i_private;
6767         struct trace_iterator *iter;
6768         int cpu;
6769         int ret;
6770
6771         ret = tracing_check_open_get_tr(tr);
6772         if (ret)
6773                 return ret;
6774
6775         mutex_lock(&trace_types_lock);
6776         cpu = tracing_get_cpu(inode);
6777         ret = open_pipe_on_cpu(tr, cpu);
6778         if (ret)
6779                 goto fail_pipe_on_cpu;
6780
6781         /* create a buffer to store the information to pass to userspace */
6782         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6783         if (!iter) {
6784                 ret = -ENOMEM;
6785                 goto fail_alloc_iter;
6786         }
6787
6788         trace_seq_init(&iter->seq);
6789         iter->trace = tr->current_trace;
6790
6791         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6792                 ret = -ENOMEM;
6793                 goto fail;
6794         }
6795
6796         /* trace pipe does not show start of buffer */
6797         cpumask_setall(iter->started);
6798
6799         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6800                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6801
6802         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6803         if (trace_clocks[tr->clock_id].in_ns)
6804                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6805
6806         iter->tr = tr;
6807         iter->array_buffer = &tr->array_buffer;
6808         iter->cpu_file = cpu;
6809         mutex_init(&iter->mutex);
6810         filp->private_data = iter;
6811
6812         if (iter->trace->pipe_open)
6813                 iter->trace->pipe_open(iter);
6814
6815         nonseekable_open(inode, filp);
6816
6817         tr->trace_ref++;
6818
6819         mutex_unlock(&trace_types_lock);
6820         return ret;
6821
6822 fail:
6823         kfree(iter);
6824 fail_alloc_iter:
6825         close_pipe_on_cpu(tr, cpu);
6826 fail_pipe_on_cpu:
6827         __trace_array_put(tr);
6828         mutex_unlock(&trace_types_lock);
6829         return ret;
6830 }
6831
6832 static int tracing_release_pipe(struct inode *inode, struct file *file)
6833 {
6834         struct trace_iterator *iter = file->private_data;
6835         struct trace_array *tr = inode->i_private;
6836
6837         mutex_lock(&trace_types_lock);
6838
6839         tr->trace_ref--;
6840
6841         if (iter->trace->pipe_close)
6842                 iter->trace->pipe_close(iter);
6843         close_pipe_on_cpu(tr, iter->cpu_file);
6844         mutex_unlock(&trace_types_lock);
6845
6846         free_trace_iter_content(iter);
6847         kfree(iter);
6848
6849         trace_array_put(tr);
6850
6851         return 0;
6852 }
6853
6854 static __poll_t
6855 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6856 {
6857         struct trace_array *tr = iter->tr;
6858
6859         /* Iterators are static, they should be filled or empty */
6860         if (trace_buffer_iter(iter, iter->cpu_file))
6861                 return EPOLLIN | EPOLLRDNORM;
6862
6863         if (tr->trace_flags & TRACE_ITER_BLOCK)
6864                 /*
6865                  * Always select as readable when in blocking mode
6866                  */
6867                 return EPOLLIN | EPOLLRDNORM;
6868         else
6869                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6870                                              filp, poll_table, iter->tr->buffer_percent);
6871 }
6872
6873 static __poll_t
6874 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6875 {
6876         struct trace_iterator *iter = filp->private_data;
6877
6878         return trace_poll(iter, filp, poll_table);
6879 }
6880
6881 /* Must be called with iter->mutex held. */
6882 static int tracing_wait_pipe(struct file *filp)
6883 {
6884         struct trace_iterator *iter = filp->private_data;
6885         int ret;
6886
6887         while (trace_empty(iter)) {
6888
6889                 if ((filp->f_flags & O_NONBLOCK)) {
6890                         return -EAGAIN;
6891                 }
6892
6893                 /*
6894                  * We block until we read something and tracing is disabled.
6895                  * We still block if tracing is disabled, but we have never
6896                  * read anything. This allows a user to cat this file, and
6897                  * then enable tracing. But after we have read something,
6898                  * we give an EOF when tracing is again disabled.
6899                  *
6900                  * iter->pos will be 0 if we haven't read anything.
6901                  */
6902                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6903                         break;
6904
6905                 mutex_unlock(&iter->mutex);
6906
6907                 ret = wait_on_pipe(iter, 0);
6908
6909                 mutex_lock(&iter->mutex);
6910
6911                 if (ret)
6912                         return ret;
6913         }
6914
6915         return 1;
6916 }
6917
6918 /*
6919  * Consumer reader.
6920  */
6921 static ssize_t
6922 tracing_read_pipe(struct file *filp, char __user *ubuf,
6923                   size_t cnt, loff_t *ppos)
6924 {
6925         struct trace_iterator *iter = filp->private_data;
6926         ssize_t sret;
6927
6928         /*
6929          * Avoid more than one consumer on a single file descriptor
6930          * This is just a matter of traces coherency, the ring buffer itself
6931          * is protected.
6932          */
6933         mutex_lock(&iter->mutex);
6934
6935         /* return any leftover data */
6936         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6937         if (sret != -EBUSY)
6938                 goto out;
6939
6940         trace_seq_init(&iter->seq);
6941
6942         if (iter->trace->read) {
6943                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6944                 if (sret)
6945                         goto out;
6946         }
6947
6948 waitagain:
6949         sret = tracing_wait_pipe(filp);
6950         if (sret <= 0)
6951                 goto out;
6952
6953         /* stop when tracing is finished */
6954         if (trace_empty(iter)) {
6955                 sret = 0;
6956                 goto out;
6957         }
6958
6959         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6960                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6961
6962         /* reset all but tr, trace, and overruns */
6963         trace_iterator_reset(iter);
6964         cpumask_clear(iter->started);
6965         trace_seq_init(&iter->seq);
6966
6967         trace_event_read_lock();
6968         trace_access_lock(iter->cpu_file);
6969         while (trace_find_next_entry_inc(iter) != NULL) {
6970                 enum print_line_t ret;
6971                 int save_len = iter->seq.seq.len;
6972
6973                 ret = print_trace_line(iter);
6974                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6975                         /*
6976                          * If one print_trace_line() fills entire trace_seq in one shot,
6977                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6978                          * In this case, we need to consume it, otherwise, loop will peek
6979                          * this event next time, resulting in an infinite loop.
6980                          */
6981                         if (save_len == 0) {
6982                                 iter->seq.full = 0;
6983                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6984                                 trace_consume(iter);
6985                                 break;
6986                         }
6987
6988                         /* In other cases, don't print partial lines */
6989                         iter->seq.seq.len = save_len;
6990                         break;
6991                 }
6992                 if (ret != TRACE_TYPE_NO_CONSUME)
6993                         trace_consume(iter);
6994
6995                 if (trace_seq_used(&iter->seq) >= cnt)
6996                         break;
6997
6998                 /*
6999                  * Setting the full flag means we reached the trace_seq buffer
7000                  * size and we should leave by partial output condition above.
7001                  * One of the trace_seq_* functions is not used properly.
7002                  */
7003                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7004                           iter->ent->type);
7005         }
7006         trace_access_unlock(iter->cpu_file);
7007         trace_event_read_unlock();
7008
7009         /* Now copy what we have to the user */
7010         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7011         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7012                 trace_seq_init(&iter->seq);
7013
7014         /*
7015          * If there was nothing to send to user, in spite of consuming trace
7016          * entries, go back to wait for more entries.
7017          */
7018         if (sret == -EBUSY)
7019                 goto waitagain;
7020
7021 out:
7022         mutex_unlock(&iter->mutex);
7023
7024         return sret;
7025 }
7026
7027 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7028                                      unsigned int idx)
7029 {
7030         __free_page(spd->pages[idx]);
7031 }
7032
7033 static size_t
7034 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7035 {
7036         size_t count;
7037         int save_len;
7038         int ret;
7039
7040         /* Seq buffer is page-sized, exactly what we need. */
7041         for (;;) {
7042                 save_len = iter->seq.seq.len;
7043                 ret = print_trace_line(iter);
7044
7045                 if (trace_seq_has_overflowed(&iter->seq)) {
7046                         iter->seq.seq.len = save_len;
7047                         break;
7048                 }
7049
7050                 /*
7051                  * This should not be hit, because it should only
7052                  * be set if the iter->seq overflowed. But check it
7053                  * anyway to be safe.
7054                  */
7055                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7056                         iter->seq.seq.len = save_len;
7057                         break;
7058                 }
7059
7060                 count = trace_seq_used(&iter->seq) - save_len;
7061                 if (rem < count) {
7062                         rem = 0;
7063                         iter->seq.seq.len = save_len;
7064                         break;
7065                 }
7066
7067                 if (ret != TRACE_TYPE_NO_CONSUME)
7068                         trace_consume(iter);
7069                 rem -= count;
7070                 if (!trace_find_next_entry_inc(iter))   {
7071                         rem = 0;
7072                         iter->ent = NULL;
7073                         break;
7074                 }
7075         }
7076
7077         return rem;
7078 }
7079
7080 static ssize_t tracing_splice_read_pipe(struct file *filp,
7081                                         loff_t *ppos,
7082                                         struct pipe_inode_info *pipe,
7083                                         size_t len,
7084                                         unsigned int flags)
7085 {
7086         struct page *pages_def[PIPE_DEF_BUFFERS];
7087         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7088         struct trace_iterator *iter = filp->private_data;
7089         struct splice_pipe_desc spd = {
7090                 .pages          = pages_def,
7091                 .partial        = partial_def,
7092                 .nr_pages       = 0, /* This gets updated below. */
7093                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7094                 .ops            = &default_pipe_buf_ops,
7095                 .spd_release    = tracing_spd_release_pipe,
7096         };
7097         ssize_t ret;
7098         size_t rem;
7099         unsigned int i;
7100
7101         if (splice_grow_spd(pipe, &spd))
7102                 return -ENOMEM;
7103
7104         mutex_lock(&iter->mutex);
7105
7106         if (iter->trace->splice_read) {
7107                 ret = iter->trace->splice_read(iter, filp,
7108                                                ppos, pipe, len, flags);
7109                 if (ret)
7110                         goto out_err;
7111         }
7112
7113         ret = tracing_wait_pipe(filp);
7114         if (ret <= 0)
7115                 goto out_err;
7116
7117         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7118                 ret = -EFAULT;
7119                 goto out_err;
7120         }
7121
7122         trace_event_read_lock();
7123         trace_access_lock(iter->cpu_file);
7124
7125         /* Fill as many pages as possible. */
7126         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7127                 spd.pages[i] = alloc_page(GFP_KERNEL);
7128                 if (!spd.pages[i])
7129                         break;
7130
7131                 rem = tracing_fill_pipe_page(rem, iter);
7132
7133                 /* Copy the data into the page, so we can start over. */
7134                 ret = trace_seq_to_buffer(&iter->seq,
7135                                           page_address(spd.pages[i]),
7136                                           trace_seq_used(&iter->seq));
7137                 if (ret < 0) {
7138                         __free_page(spd.pages[i]);
7139                         break;
7140                 }
7141                 spd.partial[i].offset = 0;
7142                 spd.partial[i].len = trace_seq_used(&iter->seq);
7143
7144                 trace_seq_init(&iter->seq);
7145         }
7146
7147         trace_access_unlock(iter->cpu_file);
7148         trace_event_read_unlock();
7149         mutex_unlock(&iter->mutex);
7150
7151         spd.nr_pages = i;
7152
7153         if (i)
7154                 ret = splice_to_pipe(pipe, &spd);
7155         else
7156                 ret = 0;
7157 out:
7158         splice_shrink_spd(&spd);
7159         return ret;
7160
7161 out_err:
7162         mutex_unlock(&iter->mutex);
7163         goto out;
7164 }
7165
7166 static ssize_t
7167 tracing_entries_read(struct file *filp, char __user *ubuf,
7168                      size_t cnt, loff_t *ppos)
7169 {
7170         struct inode *inode = file_inode(filp);
7171         struct trace_array *tr = inode->i_private;
7172         int cpu = tracing_get_cpu(inode);
7173         char buf[64];
7174         int r = 0;
7175         ssize_t ret;
7176
7177         mutex_lock(&trace_types_lock);
7178
7179         if (cpu == RING_BUFFER_ALL_CPUS) {
7180                 int cpu, buf_size_same;
7181                 unsigned long size;
7182
7183                 size = 0;
7184                 buf_size_same = 1;
7185                 /* check if all cpu sizes are same */
7186                 for_each_tracing_cpu(cpu) {
7187                         /* fill in the size from first enabled cpu */
7188                         if (size == 0)
7189                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7190                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7191                                 buf_size_same = 0;
7192                                 break;
7193                         }
7194                 }
7195
7196                 if (buf_size_same) {
7197                         if (!tr->ring_buffer_expanded)
7198                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7199                                             size >> 10,
7200                                             trace_buf_size >> 10);
7201                         else
7202                                 r = sprintf(buf, "%lu\n", size >> 10);
7203                 } else
7204                         r = sprintf(buf, "X\n");
7205         } else
7206                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7207
7208         mutex_unlock(&trace_types_lock);
7209
7210         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7211         return ret;
7212 }
7213
7214 static ssize_t
7215 tracing_entries_write(struct file *filp, const char __user *ubuf,
7216                       size_t cnt, loff_t *ppos)
7217 {
7218         struct inode *inode = file_inode(filp);
7219         struct trace_array *tr = inode->i_private;
7220         unsigned long val;
7221         int ret;
7222
7223         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7224         if (ret)
7225                 return ret;
7226
7227         /* must have at least 1 entry */
7228         if (!val)
7229                 return -EINVAL;
7230
7231         /* value is in KB */
7232         val <<= 10;
7233         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7234         if (ret < 0)
7235                 return ret;
7236
7237         *ppos += cnt;
7238
7239         return cnt;
7240 }
7241
7242 static ssize_t
7243 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7244                                 size_t cnt, loff_t *ppos)
7245 {
7246         struct trace_array *tr = filp->private_data;
7247         char buf[64];
7248         int r, cpu;
7249         unsigned long size = 0, expanded_size = 0;
7250
7251         mutex_lock(&trace_types_lock);
7252         for_each_tracing_cpu(cpu) {
7253                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7254                 if (!tr->ring_buffer_expanded)
7255                         expanded_size += trace_buf_size >> 10;
7256         }
7257         if (tr->ring_buffer_expanded)
7258                 r = sprintf(buf, "%lu\n", size);
7259         else
7260                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7261         mutex_unlock(&trace_types_lock);
7262
7263         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7264 }
7265
7266 static ssize_t
7267 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7268                           size_t cnt, loff_t *ppos)
7269 {
7270         /*
7271          * There is no need to read what the user has written, this function
7272          * is just to make sure that there is no error when "echo" is used
7273          */
7274
7275         *ppos += cnt;
7276
7277         return cnt;
7278 }
7279
7280 static int
7281 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7282 {
7283         struct trace_array *tr = inode->i_private;
7284
7285         /* disable tracing ? */
7286         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7287                 tracer_tracing_off(tr);
7288         /* resize the ring buffer to 0 */
7289         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7290
7291         trace_array_put(tr);
7292
7293         return 0;
7294 }
7295
7296 #define TRACE_MARKER_MAX_SIZE           4096
7297
7298 static ssize_t
7299 tracing_mark_write(struct file *filp, const char __user *ubuf,
7300                                         size_t cnt, loff_t *fpos)
7301 {
7302         struct trace_array *tr = filp->private_data;
7303         struct ring_buffer_event *event;
7304         enum event_trigger_type tt = ETT_NONE;
7305         struct trace_buffer *buffer;
7306         struct print_entry *entry;
7307         int meta_size;
7308         ssize_t written;
7309         size_t size;
7310         int len;
7311
7312 /* Used in tracing_mark_raw_write() as well */
7313 #define FAULTED_STR "<faulted>"
7314 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7315
7316         if (tracing_disabled)
7317                 return -EINVAL;
7318
7319         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7320                 return -EINVAL;
7321
7322         if ((ssize_t)cnt < 0)
7323                 return -EINVAL;
7324
7325         if (cnt > TRACE_MARKER_MAX_SIZE)
7326                 cnt = TRACE_MARKER_MAX_SIZE;
7327
7328         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7329  again:
7330         size = cnt + meta_size;
7331
7332         /* If less than "<faulted>", then make sure we can still add that */
7333         if (cnt < FAULTED_SIZE)
7334                 size += FAULTED_SIZE - cnt;
7335
7336         buffer = tr->array_buffer.buffer;
7337         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7338                                             tracing_gen_ctx());
7339         if (unlikely(!event)) {
7340                 /*
7341                  * If the size was greater than what was allowed, then
7342                  * make it smaller and try again.
7343                  */
7344                 if (size > ring_buffer_max_event_size(buffer)) {
7345                         /* cnt < FAULTED size should never be bigger than max */
7346                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7347                                 return -EBADF;
7348                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7349                         /* The above should only happen once */
7350                         if (WARN_ON_ONCE(cnt + meta_size == size))
7351                                 return -EBADF;
7352                         goto again;
7353                 }
7354
7355                 /* Ring buffer disabled, return as if not open for write */
7356                 return -EBADF;
7357         }
7358
7359         entry = ring_buffer_event_data(event);
7360         entry->ip = _THIS_IP_;
7361
7362         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7363         if (len) {
7364                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7365                 cnt = FAULTED_SIZE;
7366                 written = -EFAULT;
7367         } else
7368                 written = cnt;
7369
7370         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7371                 /* do not add \n before testing triggers, but add \0 */
7372                 entry->buf[cnt] = '\0';
7373                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7374         }
7375
7376         if (entry->buf[cnt - 1] != '\n') {
7377                 entry->buf[cnt] = '\n';
7378                 entry->buf[cnt + 1] = '\0';
7379         } else
7380                 entry->buf[cnt] = '\0';
7381
7382         if (static_branch_unlikely(&trace_marker_exports_enabled))
7383                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7384         __buffer_unlock_commit(buffer, event);
7385
7386         if (tt)
7387                 event_triggers_post_call(tr->trace_marker_file, tt);
7388
7389         return written;
7390 }
7391
7392 static ssize_t
7393 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7394                                         size_t cnt, loff_t *fpos)
7395 {
7396         struct trace_array *tr = filp->private_data;
7397         struct ring_buffer_event *event;
7398         struct trace_buffer *buffer;
7399         struct raw_data_entry *entry;
7400         ssize_t written;
7401         int size;
7402         int len;
7403
7404 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7405
7406         if (tracing_disabled)
7407                 return -EINVAL;
7408
7409         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7410                 return -EINVAL;
7411
7412         /* The marker must at least have a tag id */
7413         if (cnt < sizeof(unsigned int))
7414                 return -EINVAL;
7415
7416         size = sizeof(*entry) + cnt;
7417         if (cnt < FAULT_SIZE_ID)
7418                 size += FAULT_SIZE_ID - cnt;
7419
7420         buffer = tr->array_buffer.buffer;
7421
7422         if (size > ring_buffer_max_event_size(buffer))
7423                 return -EINVAL;
7424
7425         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7426                                             tracing_gen_ctx());
7427         if (!event)
7428                 /* Ring buffer disabled, return as if not open for write */
7429                 return -EBADF;
7430
7431         entry = ring_buffer_event_data(event);
7432
7433         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7434         if (len) {
7435                 entry->id = -1;
7436                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7437                 written = -EFAULT;
7438         } else
7439                 written = cnt;
7440
7441         __buffer_unlock_commit(buffer, event);
7442
7443         return written;
7444 }
7445
7446 static int tracing_clock_show(struct seq_file *m, void *v)
7447 {
7448         struct trace_array *tr = m->private;
7449         int i;
7450
7451         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7452                 seq_printf(m,
7453                         "%s%s%s%s", i ? " " : "",
7454                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7455                         i == tr->clock_id ? "]" : "");
7456         seq_putc(m, '\n');
7457
7458         return 0;
7459 }
7460
7461 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7462 {
7463         int i;
7464
7465         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7466                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7467                         break;
7468         }
7469         if (i == ARRAY_SIZE(trace_clocks))
7470                 return -EINVAL;
7471
7472         mutex_lock(&trace_types_lock);
7473
7474         tr->clock_id = i;
7475
7476         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7477
7478         /*
7479          * New clock may not be consistent with the previous clock.
7480          * Reset the buffer so that it doesn't have incomparable timestamps.
7481          */
7482         tracing_reset_online_cpus(&tr->array_buffer);
7483
7484 #ifdef CONFIG_TRACER_MAX_TRACE
7485         if (tr->max_buffer.buffer)
7486                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7487         tracing_reset_online_cpus(&tr->max_buffer);
7488 #endif
7489
7490         mutex_unlock(&trace_types_lock);
7491
7492         return 0;
7493 }
7494
7495 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7496                                    size_t cnt, loff_t *fpos)
7497 {
7498         struct seq_file *m = filp->private_data;
7499         struct trace_array *tr = m->private;
7500         char buf[64];
7501         const char *clockstr;
7502         int ret;
7503
7504         if (cnt >= sizeof(buf))
7505                 return -EINVAL;
7506
7507         if (copy_from_user(buf, ubuf, cnt))
7508                 return -EFAULT;
7509
7510         buf[cnt] = 0;
7511
7512         clockstr = strstrip(buf);
7513
7514         ret = tracing_set_clock(tr, clockstr);
7515         if (ret)
7516                 return ret;
7517
7518         *fpos += cnt;
7519
7520         return cnt;
7521 }
7522
7523 static int tracing_clock_open(struct inode *inode, struct file *file)
7524 {
7525         struct trace_array *tr = inode->i_private;
7526         int ret;
7527
7528         ret = tracing_check_open_get_tr(tr);
7529         if (ret)
7530                 return ret;
7531
7532         ret = single_open(file, tracing_clock_show, inode->i_private);
7533         if (ret < 0)
7534                 trace_array_put(tr);
7535
7536         return ret;
7537 }
7538
7539 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7540 {
7541         struct trace_array *tr = m->private;
7542
7543         mutex_lock(&trace_types_lock);
7544
7545         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7546                 seq_puts(m, "delta [absolute]\n");
7547         else
7548                 seq_puts(m, "[delta] absolute\n");
7549
7550         mutex_unlock(&trace_types_lock);
7551
7552         return 0;
7553 }
7554
7555 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7556 {
7557         struct trace_array *tr = inode->i_private;
7558         int ret;
7559
7560         ret = tracing_check_open_get_tr(tr);
7561         if (ret)
7562                 return ret;
7563
7564         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7565         if (ret < 0)
7566                 trace_array_put(tr);
7567
7568         return ret;
7569 }
7570
7571 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7572 {
7573         if (rbe == this_cpu_read(trace_buffered_event))
7574                 return ring_buffer_time_stamp(buffer);
7575
7576         return ring_buffer_event_time_stamp(buffer, rbe);
7577 }
7578
7579 /*
7580  * Set or disable using the per CPU trace_buffer_event when possible.
7581  */
7582 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7583 {
7584         int ret = 0;
7585
7586         mutex_lock(&trace_types_lock);
7587
7588         if (set && tr->no_filter_buffering_ref++)
7589                 goto out;
7590
7591         if (!set) {
7592                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7593                         ret = -EINVAL;
7594                         goto out;
7595                 }
7596
7597                 --tr->no_filter_buffering_ref;
7598         }
7599  out:
7600         mutex_unlock(&trace_types_lock);
7601
7602         return ret;
7603 }
7604
7605 struct ftrace_buffer_info {
7606         struct trace_iterator   iter;
7607         void                    *spare;
7608         unsigned int            spare_cpu;
7609         unsigned int            spare_size;
7610         unsigned int            read;
7611 };
7612
7613 #ifdef CONFIG_TRACER_SNAPSHOT
7614 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7615 {
7616         struct trace_array *tr = inode->i_private;
7617         struct trace_iterator *iter;
7618         struct seq_file *m;
7619         int ret;
7620
7621         ret = tracing_check_open_get_tr(tr);
7622         if (ret)
7623                 return ret;
7624
7625         if (file->f_mode & FMODE_READ) {
7626                 iter = __tracing_open(inode, file, true);
7627                 if (IS_ERR(iter))
7628                         ret = PTR_ERR(iter);
7629         } else {
7630                 /* Writes still need the seq_file to hold the private data */
7631                 ret = -ENOMEM;
7632                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7633                 if (!m)
7634                         goto out;
7635                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7636                 if (!iter) {
7637                         kfree(m);
7638                         goto out;
7639                 }
7640                 ret = 0;
7641
7642                 iter->tr = tr;
7643                 iter->array_buffer = &tr->max_buffer;
7644                 iter->cpu_file = tracing_get_cpu(inode);
7645                 m->private = iter;
7646                 file->private_data = m;
7647         }
7648 out:
7649         if (ret < 0)
7650                 trace_array_put(tr);
7651
7652         return ret;
7653 }
7654
7655 static void tracing_swap_cpu_buffer(void *tr)
7656 {
7657         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7658 }
7659
7660 static ssize_t
7661 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7662                        loff_t *ppos)
7663 {
7664         struct seq_file *m = filp->private_data;
7665         struct trace_iterator *iter = m->private;
7666         struct trace_array *tr = iter->tr;
7667         unsigned long val;
7668         int ret;
7669
7670         ret = tracing_update_buffers(tr);
7671         if (ret < 0)
7672                 return ret;
7673
7674         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7675         if (ret)
7676                 return ret;
7677
7678         mutex_lock(&trace_types_lock);
7679
7680         if (tr->current_trace->use_max_tr) {
7681                 ret = -EBUSY;
7682                 goto out;
7683         }
7684
7685         local_irq_disable();
7686         arch_spin_lock(&tr->max_lock);
7687         if (tr->cond_snapshot)
7688                 ret = -EBUSY;
7689         arch_spin_unlock(&tr->max_lock);
7690         local_irq_enable();
7691         if (ret)
7692                 goto out;
7693
7694         switch (val) {
7695         case 0:
7696                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7697                         ret = -EINVAL;
7698                         break;
7699                 }
7700                 if (tr->allocated_snapshot)
7701                         free_snapshot(tr);
7702                 break;
7703         case 1:
7704 /* Only allow per-cpu swap if the ring buffer supports it */
7705 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7706                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7707                         ret = -EINVAL;
7708                         break;
7709                 }
7710 #endif
7711                 if (tr->allocated_snapshot)
7712                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7713                                         &tr->array_buffer, iter->cpu_file);
7714                 else
7715                         ret = tracing_alloc_snapshot_instance(tr);
7716                 if (ret < 0)
7717                         break;
7718                 /* Now, we're going to swap */
7719                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7720                         local_irq_disable();
7721                         update_max_tr(tr, current, smp_processor_id(), NULL);
7722                         local_irq_enable();
7723                 } else {
7724                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7725                                                  (void *)tr, 1);
7726                 }
7727                 break;
7728         default:
7729                 if (tr->allocated_snapshot) {
7730                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7731                                 tracing_reset_online_cpus(&tr->max_buffer);
7732                         else
7733                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7734                 }
7735                 break;
7736         }
7737
7738         if (ret >= 0) {
7739                 *ppos += cnt;
7740                 ret = cnt;
7741         }
7742 out:
7743         mutex_unlock(&trace_types_lock);
7744         return ret;
7745 }
7746
7747 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7748 {
7749         struct seq_file *m = file->private_data;
7750         int ret;
7751
7752         ret = tracing_release(inode, file);
7753
7754         if (file->f_mode & FMODE_READ)
7755                 return ret;
7756
7757         /* If write only, the seq_file is just a stub */
7758         if (m)
7759                 kfree(m->private);
7760         kfree(m);
7761
7762         return 0;
7763 }
7764
7765 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7766 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7767                                     size_t count, loff_t *ppos);
7768 static int tracing_buffers_release(struct inode *inode, struct file *file);
7769 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7770                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7771
7772 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7773 {
7774         struct ftrace_buffer_info *info;
7775         int ret;
7776
7777         /* The following checks for tracefs lockdown */
7778         ret = tracing_buffers_open(inode, filp);
7779         if (ret < 0)
7780                 return ret;
7781
7782         info = filp->private_data;
7783
7784         if (info->iter.trace->use_max_tr) {
7785                 tracing_buffers_release(inode, filp);
7786                 return -EBUSY;
7787         }
7788
7789         info->iter.snapshot = true;
7790         info->iter.array_buffer = &info->iter.tr->max_buffer;
7791
7792         return ret;
7793 }
7794
7795 #endif /* CONFIG_TRACER_SNAPSHOT */
7796
7797
7798 static const struct file_operations tracing_thresh_fops = {
7799         .open           = tracing_open_generic,
7800         .read           = tracing_thresh_read,
7801         .write          = tracing_thresh_write,
7802         .llseek         = generic_file_llseek,
7803 };
7804
7805 #ifdef CONFIG_TRACER_MAX_TRACE
7806 static const struct file_operations tracing_max_lat_fops = {
7807         .open           = tracing_open_generic_tr,
7808         .read           = tracing_max_lat_read,
7809         .write          = tracing_max_lat_write,
7810         .llseek         = generic_file_llseek,
7811         .release        = tracing_release_generic_tr,
7812 };
7813 #endif
7814
7815 static const struct file_operations set_tracer_fops = {
7816         .open           = tracing_open_generic_tr,
7817         .read           = tracing_set_trace_read,
7818         .write          = tracing_set_trace_write,
7819         .llseek         = generic_file_llseek,
7820         .release        = tracing_release_generic_tr,
7821 };
7822
7823 static const struct file_operations tracing_pipe_fops = {
7824         .open           = tracing_open_pipe,
7825         .poll           = tracing_poll_pipe,
7826         .read           = tracing_read_pipe,
7827         .splice_read    = tracing_splice_read_pipe,
7828         .release        = tracing_release_pipe,
7829         .llseek         = no_llseek,
7830 };
7831
7832 static const struct file_operations tracing_entries_fops = {
7833         .open           = tracing_open_generic_tr,
7834         .read           = tracing_entries_read,
7835         .write          = tracing_entries_write,
7836         .llseek         = generic_file_llseek,
7837         .release        = tracing_release_generic_tr,
7838 };
7839
7840 static const struct file_operations tracing_total_entries_fops = {
7841         .open           = tracing_open_generic_tr,
7842         .read           = tracing_total_entries_read,
7843         .llseek         = generic_file_llseek,
7844         .release        = tracing_release_generic_tr,
7845 };
7846
7847 static const struct file_operations tracing_free_buffer_fops = {
7848         .open           = tracing_open_generic_tr,
7849         .write          = tracing_free_buffer_write,
7850         .release        = tracing_free_buffer_release,
7851 };
7852
7853 static const struct file_operations tracing_mark_fops = {
7854         .open           = tracing_mark_open,
7855         .write          = tracing_mark_write,
7856         .release        = tracing_release_generic_tr,
7857 };
7858
7859 static const struct file_operations tracing_mark_raw_fops = {
7860         .open           = tracing_mark_open,
7861         .write          = tracing_mark_raw_write,
7862         .release        = tracing_release_generic_tr,
7863 };
7864
7865 static const struct file_operations trace_clock_fops = {
7866         .open           = tracing_clock_open,
7867         .read           = seq_read,
7868         .llseek         = seq_lseek,
7869         .release        = tracing_single_release_tr,
7870         .write          = tracing_clock_write,
7871 };
7872
7873 static const struct file_operations trace_time_stamp_mode_fops = {
7874         .open           = tracing_time_stamp_mode_open,
7875         .read           = seq_read,
7876         .llseek         = seq_lseek,
7877         .release        = tracing_single_release_tr,
7878 };
7879
7880 #ifdef CONFIG_TRACER_SNAPSHOT
7881 static const struct file_operations snapshot_fops = {
7882         .open           = tracing_snapshot_open,
7883         .read           = seq_read,
7884         .write          = tracing_snapshot_write,
7885         .llseek         = tracing_lseek,
7886         .release        = tracing_snapshot_release,
7887 };
7888
7889 static const struct file_operations snapshot_raw_fops = {
7890         .open           = snapshot_raw_open,
7891         .read           = tracing_buffers_read,
7892         .release        = tracing_buffers_release,
7893         .splice_read    = tracing_buffers_splice_read,
7894         .llseek         = no_llseek,
7895 };
7896
7897 #endif /* CONFIG_TRACER_SNAPSHOT */
7898
7899 /*
7900  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7901  * @filp: The active open file structure
7902  * @ubuf: The userspace provided buffer to read value into
7903  * @cnt: The maximum number of bytes to read
7904  * @ppos: The current "file" position
7905  *
7906  * This function implements the write interface for a struct trace_min_max_param.
7907  * The filp->private_data must point to a trace_min_max_param structure that
7908  * defines where to write the value, the min and the max acceptable values,
7909  * and a lock to protect the write.
7910  */
7911 static ssize_t
7912 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7913 {
7914         struct trace_min_max_param *param = filp->private_data;
7915         u64 val;
7916         int err;
7917
7918         if (!param)
7919                 return -EFAULT;
7920
7921         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7922         if (err)
7923                 return err;
7924
7925         if (param->lock)
7926                 mutex_lock(param->lock);
7927
7928         if (param->min && val < *param->min)
7929                 err = -EINVAL;
7930
7931         if (param->max && val > *param->max)
7932                 err = -EINVAL;
7933
7934         if (!err)
7935                 *param->val = val;
7936
7937         if (param->lock)
7938                 mutex_unlock(param->lock);
7939
7940         if (err)
7941                 return err;
7942
7943         return cnt;
7944 }
7945
7946 /*
7947  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7948  * @filp: The active open file structure
7949  * @ubuf: The userspace provided buffer to read value into
7950  * @cnt: The maximum number of bytes to read
7951  * @ppos: The current "file" position
7952  *
7953  * This function implements the read interface for a struct trace_min_max_param.
7954  * The filp->private_data must point to a trace_min_max_param struct with valid
7955  * data.
7956  */
7957 static ssize_t
7958 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7959 {
7960         struct trace_min_max_param *param = filp->private_data;
7961         char buf[U64_STR_SIZE];
7962         int len;
7963         u64 val;
7964
7965         if (!param)
7966                 return -EFAULT;
7967
7968         val = *param->val;
7969
7970         if (cnt > sizeof(buf))
7971                 cnt = sizeof(buf);
7972
7973         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7974
7975         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7976 }
7977
7978 const struct file_operations trace_min_max_fops = {
7979         .open           = tracing_open_generic,
7980         .read           = trace_min_max_read,
7981         .write          = trace_min_max_write,
7982 };
7983
7984 #define TRACING_LOG_ERRS_MAX    8
7985 #define TRACING_LOG_LOC_MAX     128
7986
7987 #define CMD_PREFIX "  Command: "
7988
7989 struct err_info {
7990         const char      **errs; /* ptr to loc-specific array of err strings */
7991         u8              type;   /* index into errs -> specific err string */
7992         u16             pos;    /* caret position */
7993         u64             ts;
7994 };
7995
7996 struct tracing_log_err {
7997         struct list_head        list;
7998         struct err_info         info;
7999         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
8000         char                    *cmd;                     /* what caused err */
8001 };
8002
8003 static DEFINE_MUTEX(tracing_err_log_lock);
8004
8005 static struct tracing_log_err *alloc_tracing_log_err(int len)
8006 {
8007         struct tracing_log_err *err;
8008
8009         err = kzalloc(sizeof(*err), GFP_KERNEL);
8010         if (!err)
8011                 return ERR_PTR(-ENOMEM);
8012
8013         err->cmd = kzalloc(len, GFP_KERNEL);
8014         if (!err->cmd) {
8015                 kfree(err);
8016                 return ERR_PTR(-ENOMEM);
8017         }
8018
8019         return err;
8020 }
8021
8022 static void free_tracing_log_err(struct tracing_log_err *err)
8023 {
8024         kfree(err->cmd);
8025         kfree(err);
8026 }
8027
8028 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8029                                                    int len)
8030 {
8031         struct tracing_log_err *err;
8032         char *cmd;
8033
8034         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8035                 err = alloc_tracing_log_err(len);
8036                 if (PTR_ERR(err) != -ENOMEM)
8037                         tr->n_err_log_entries++;
8038
8039                 return err;
8040         }
8041         cmd = kzalloc(len, GFP_KERNEL);
8042         if (!cmd)
8043                 return ERR_PTR(-ENOMEM);
8044         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8045         kfree(err->cmd);
8046         err->cmd = cmd;
8047         list_del(&err->list);
8048
8049         return err;
8050 }
8051
8052 /**
8053  * err_pos - find the position of a string within a command for error careting
8054  * @cmd: The tracing command that caused the error
8055  * @str: The string to position the caret at within @cmd
8056  *
8057  * Finds the position of the first occurrence of @str within @cmd.  The
8058  * return value can be passed to tracing_log_err() for caret placement
8059  * within @cmd.
8060  *
8061  * Returns the index within @cmd of the first occurrence of @str or 0
8062  * if @str was not found.
8063  */
8064 unsigned int err_pos(char *cmd, const char *str)
8065 {
8066         char *found;
8067
8068         if (WARN_ON(!strlen(cmd)))
8069                 return 0;
8070
8071         found = strstr(cmd, str);
8072         if (found)
8073                 return found - cmd;
8074
8075         return 0;
8076 }
8077
8078 /**
8079  * tracing_log_err - write an error to the tracing error log
8080  * @tr: The associated trace array for the error (NULL for top level array)
8081  * @loc: A string describing where the error occurred
8082  * @cmd: The tracing command that caused the error
8083  * @errs: The array of loc-specific static error strings
8084  * @type: The index into errs[], which produces the specific static err string
8085  * @pos: The position the caret should be placed in the cmd
8086  *
8087  * Writes an error into tracing/error_log of the form:
8088  *
8089  * <loc>: error: <text>
8090  *   Command: <cmd>
8091  *              ^
8092  *
8093  * tracing/error_log is a small log file containing the last
8094  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8095  * unless there has been a tracing error, and the error log can be
8096  * cleared and have its memory freed by writing the empty string in
8097  * truncation mode to it i.e. echo > tracing/error_log.
8098  *
8099  * NOTE: the @errs array along with the @type param are used to
8100  * produce a static error string - this string is not copied and saved
8101  * when the error is logged - only a pointer to it is saved.  See
8102  * existing callers for examples of how static strings are typically
8103  * defined for use with tracing_log_err().
8104  */
8105 void tracing_log_err(struct trace_array *tr,
8106                      const char *loc, const char *cmd,
8107                      const char **errs, u8 type, u16 pos)
8108 {
8109         struct tracing_log_err *err;
8110         int len = 0;
8111
8112         if (!tr)
8113                 tr = &global_trace;
8114
8115         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8116
8117         mutex_lock(&tracing_err_log_lock);
8118         err = get_tracing_log_err(tr, len);
8119         if (PTR_ERR(err) == -ENOMEM) {
8120                 mutex_unlock(&tracing_err_log_lock);
8121                 return;
8122         }
8123
8124         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8125         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8126
8127         err->info.errs = errs;
8128         err->info.type = type;
8129         err->info.pos = pos;
8130         err->info.ts = local_clock();
8131
8132         list_add_tail(&err->list, &tr->err_log);
8133         mutex_unlock(&tracing_err_log_lock);
8134 }
8135
8136 static void clear_tracing_err_log(struct trace_array *tr)
8137 {
8138         struct tracing_log_err *err, *next;
8139
8140         mutex_lock(&tracing_err_log_lock);
8141         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8142                 list_del(&err->list);
8143                 free_tracing_log_err(err);
8144         }
8145
8146         tr->n_err_log_entries = 0;
8147         mutex_unlock(&tracing_err_log_lock);
8148 }
8149
8150 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8151 {
8152         struct trace_array *tr = m->private;
8153
8154         mutex_lock(&tracing_err_log_lock);
8155
8156         return seq_list_start(&tr->err_log, *pos);
8157 }
8158
8159 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8160 {
8161         struct trace_array *tr = m->private;
8162
8163         return seq_list_next(v, &tr->err_log, pos);
8164 }
8165
8166 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8167 {
8168         mutex_unlock(&tracing_err_log_lock);
8169 }
8170
8171 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8172 {
8173         u16 i;
8174
8175         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8176                 seq_putc(m, ' ');
8177         for (i = 0; i < pos; i++)
8178                 seq_putc(m, ' ');
8179         seq_puts(m, "^\n");
8180 }
8181
8182 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8183 {
8184         struct tracing_log_err *err = v;
8185
8186         if (err) {
8187                 const char *err_text = err->info.errs[err->info.type];
8188                 u64 sec = err->info.ts;
8189                 u32 nsec;
8190
8191                 nsec = do_div(sec, NSEC_PER_SEC);
8192                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8193                            err->loc, err_text);
8194                 seq_printf(m, "%s", err->cmd);
8195                 tracing_err_log_show_pos(m, err->info.pos);
8196         }
8197
8198         return 0;
8199 }
8200
8201 static const struct seq_operations tracing_err_log_seq_ops = {
8202         .start  = tracing_err_log_seq_start,
8203         .next   = tracing_err_log_seq_next,
8204         .stop   = tracing_err_log_seq_stop,
8205         .show   = tracing_err_log_seq_show
8206 };
8207
8208 static int tracing_err_log_open(struct inode *inode, struct file *file)
8209 {
8210         struct trace_array *tr = inode->i_private;
8211         int ret = 0;
8212
8213         ret = tracing_check_open_get_tr(tr);
8214         if (ret)
8215                 return ret;
8216
8217         /* If this file was opened for write, then erase contents */
8218         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8219                 clear_tracing_err_log(tr);
8220
8221         if (file->f_mode & FMODE_READ) {
8222                 ret = seq_open(file, &tracing_err_log_seq_ops);
8223                 if (!ret) {
8224                         struct seq_file *m = file->private_data;
8225                         m->private = tr;
8226                 } else {
8227                         trace_array_put(tr);
8228                 }
8229         }
8230         return ret;
8231 }
8232
8233 static ssize_t tracing_err_log_write(struct file *file,
8234                                      const char __user *buffer,
8235                                      size_t count, loff_t *ppos)
8236 {
8237         return count;
8238 }
8239
8240 static int tracing_err_log_release(struct inode *inode, struct file *file)
8241 {
8242         struct trace_array *tr = inode->i_private;
8243
8244         trace_array_put(tr);
8245
8246         if (file->f_mode & FMODE_READ)
8247                 seq_release(inode, file);
8248
8249         return 0;
8250 }
8251
8252 static const struct file_operations tracing_err_log_fops = {
8253         .open           = tracing_err_log_open,
8254         .write          = tracing_err_log_write,
8255         .read           = seq_read,
8256         .llseek         = tracing_lseek,
8257         .release        = tracing_err_log_release,
8258 };
8259
8260 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8261 {
8262         struct trace_array *tr = inode->i_private;
8263         struct ftrace_buffer_info *info;
8264         int ret;
8265
8266         ret = tracing_check_open_get_tr(tr);
8267         if (ret)
8268                 return ret;
8269
8270         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8271         if (!info) {
8272                 trace_array_put(tr);
8273                 return -ENOMEM;
8274         }
8275
8276         mutex_lock(&trace_types_lock);
8277
8278         info->iter.tr           = tr;
8279         info->iter.cpu_file     = tracing_get_cpu(inode);
8280         info->iter.trace        = tr->current_trace;
8281         info->iter.array_buffer = &tr->array_buffer;
8282         info->spare             = NULL;
8283         /* Force reading ring buffer for first read */
8284         info->read              = (unsigned int)-1;
8285
8286         filp->private_data = info;
8287
8288         tr->trace_ref++;
8289
8290         mutex_unlock(&trace_types_lock);
8291
8292         ret = nonseekable_open(inode, filp);
8293         if (ret < 0)
8294                 trace_array_put(tr);
8295
8296         return ret;
8297 }
8298
8299 static __poll_t
8300 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8301 {
8302         struct ftrace_buffer_info *info = filp->private_data;
8303         struct trace_iterator *iter = &info->iter;
8304
8305         return trace_poll(iter, filp, poll_table);
8306 }
8307
8308 static ssize_t
8309 tracing_buffers_read(struct file *filp, char __user *ubuf,
8310                      size_t count, loff_t *ppos)
8311 {
8312         struct ftrace_buffer_info *info = filp->private_data;
8313         struct trace_iterator *iter = &info->iter;
8314         void *trace_data;
8315         int page_size;
8316         ssize_t ret = 0;
8317         ssize_t size;
8318
8319         if (!count)
8320                 return 0;
8321
8322 #ifdef CONFIG_TRACER_MAX_TRACE
8323         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8324                 return -EBUSY;
8325 #endif
8326
8327         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8328
8329         /* Make sure the spare matches the current sub buffer size */
8330         if (info->spare) {
8331                 if (page_size != info->spare_size) {
8332                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8333                                                    info->spare_cpu, info->spare);
8334                         info->spare = NULL;
8335                 }
8336         }
8337
8338         if (!info->spare) {
8339                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8340                                                           iter->cpu_file);
8341                 if (IS_ERR(info->spare)) {
8342                         ret = PTR_ERR(info->spare);
8343                         info->spare = NULL;
8344                 } else {
8345                         info->spare_cpu = iter->cpu_file;
8346                         info->spare_size = page_size;
8347                 }
8348         }
8349         if (!info->spare)
8350                 return ret;
8351
8352         /* Do we have previous read data to read? */
8353         if (info->read < page_size)
8354                 goto read;
8355
8356  again:
8357         trace_access_lock(iter->cpu_file);
8358         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8359                                     info->spare,
8360                                     count,
8361                                     iter->cpu_file, 0);
8362         trace_access_unlock(iter->cpu_file);
8363
8364         if (ret < 0) {
8365                 if (trace_empty(iter)) {
8366                         if ((filp->f_flags & O_NONBLOCK))
8367                                 return -EAGAIN;
8368
8369                         ret = wait_on_pipe(iter, 0);
8370                         if (ret)
8371                                 return ret;
8372
8373                         goto again;
8374                 }
8375                 return 0;
8376         }
8377
8378         info->read = 0;
8379  read:
8380         size = page_size - info->read;
8381         if (size > count)
8382                 size = count;
8383         trace_data = ring_buffer_read_page_data(info->spare);
8384         ret = copy_to_user(ubuf, trace_data + info->read, size);
8385         if (ret == size)
8386                 return -EFAULT;
8387
8388         size -= ret;
8389
8390         *ppos += size;
8391         info->read += size;
8392
8393         return size;
8394 }
8395
8396 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8397 {
8398         struct ftrace_buffer_info *info = file->private_data;
8399         struct trace_iterator *iter = &info->iter;
8400
8401         iter->wait_index++;
8402         /* Make sure the waiters see the new wait_index */
8403         smp_wmb();
8404
8405         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8406
8407         return 0;
8408 }
8409
8410 static int tracing_buffers_release(struct inode *inode, struct file *file)
8411 {
8412         struct ftrace_buffer_info *info = file->private_data;
8413         struct trace_iterator *iter = &info->iter;
8414
8415         mutex_lock(&trace_types_lock);
8416
8417         iter->tr->trace_ref--;
8418
8419         __trace_array_put(iter->tr);
8420
8421         if (info->spare)
8422                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8423                                            info->spare_cpu, info->spare);
8424         kvfree(info);
8425
8426         mutex_unlock(&trace_types_lock);
8427
8428         return 0;
8429 }
8430
8431 struct buffer_ref {
8432         struct trace_buffer     *buffer;
8433         void                    *page;
8434         int                     cpu;
8435         refcount_t              refcount;
8436 };
8437
8438 static void buffer_ref_release(struct buffer_ref *ref)
8439 {
8440         if (!refcount_dec_and_test(&ref->refcount))
8441                 return;
8442         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8443         kfree(ref);
8444 }
8445
8446 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8447                                     struct pipe_buffer *buf)
8448 {
8449         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8450
8451         buffer_ref_release(ref);
8452         buf->private = 0;
8453 }
8454
8455 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8456                                 struct pipe_buffer *buf)
8457 {
8458         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8459
8460         if (refcount_read(&ref->refcount) > INT_MAX/2)
8461                 return false;
8462
8463         refcount_inc(&ref->refcount);
8464         return true;
8465 }
8466
8467 /* Pipe buffer operations for a buffer. */
8468 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8469         .release                = buffer_pipe_buf_release,
8470         .get                    = buffer_pipe_buf_get,
8471 };
8472
8473 /*
8474  * Callback from splice_to_pipe(), if we need to release some pages
8475  * at the end of the spd in case we error'ed out in filling the pipe.
8476  */
8477 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8478 {
8479         struct buffer_ref *ref =
8480                 (struct buffer_ref *)spd->partial[i].private;
8481
8482         buffer_ref_release(ref);
8483         spd->partial[i].private = 0;
8484 }
8485
8486 static ssize_t
8487 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8488                             struct pipe_inode_info *pipe, size_t len,
8489                             unsigned int flags)
8490 {
8491         struct ftrace_buffer_info *info = file->private_data;
8492         struct trace_iterator *iter = &info->iter;
8493         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8494         struct page *pages_def[PIPE_DEF_BUFFERS];
8495         struct splice_pipe_desc spd = {
8496                 .pages          = pages_def,
8497                 .partial        = partial_def,
8498                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8499                 .ops            = &buffer_pipe_buf_ops,
8500                 .spd_release    = buffer_spd_release,
8501         };
8502         struct buffer_ref *ref;
8503         int page_size;
8504         int entries, i;
8505         ssize_t ret = 0;
8506
8507 #ifdef CONFIG_TRACER_MAX_TRACE
8508         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8509                 return -EBUSY;
8510 #endif
8511
8512         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8513         if (*ppos & (page_size - 1))
8514                 return -EINVAL;
8515
8516         if (len & (page_size - 1)) {
8517                 if (len < page_size)
8518                         return -EINVAL;
8519                 len &= (~(page_size - 1));
8520         }
8521
8522         if (splice_grow_spd(pipe, &spd))
8523                 return -ENOMEM;
8524
8525  again:
8526         trace_access_lock(iter->cpu_file);
8527         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8528
8529         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8530                 struct page *page;
8531                 int r;
8532
8533                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8534                 if (!ref) {
8535                         ret = -ENOMEM;
8536                         break;
8537                 }
8538
8539                 refcount_set(&ref->refcount, 1);
8540                 ref->buffer = iter->array_buffer->buffer;
8541                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8542                 if (IS_ERR(ref->page)) {
8543                         ret = PTR_ERR(ref->page);
8544                         ref->page = NULL;
8545                         kfree(ref);
8546                         break;
8547                 }
8548                 ref->cpu = iter->cpu_file;
8549
8550                 r = ring_buffer_read_page(ref->buffer, ref->page,
8551                                           len, iter->cpu_file, 1);
8552                 if (r < 0) {
8553                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8554                                                    ref->page);
8555                         kfree(ref);
8556                         break;
8557                 }
8558
8559                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8560
8561                 spd.pages[i] = page;
8562                 spd.partial[i].len = page_size;
8563                 spd.partial[i].offset = 0;
8564                 spd.partial[i].private = (unsigned long)ref;
8565                 spd.nr_pages++;
8566                 *ppos += page_size;
8567
8568                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8569         }
8570
8571         trace_access_unlock(iter->cpu_file);
8572         spd.nr_pages = i;
8573
8574         /* did we read anything? */
8575         if (!spd.nr_pages) {
8576                 long wait_index;
8577
8578                 if (ret)
8579                         goto out;
8580
8581                 ret = -EAGAIN;
8582                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8583                         goto out;
8584
8585                 wait_index = READ_ONCE(iter->wait_index);
8586
8587                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8588                 if (ret)
8589                         goto out;
8590
8591                 /* No need to wait after waking up when tracing is off */
8592                 if (!tracer_tracing_is_on(iter->tr))
8593                         goto out;
8594
8595                 /* Make sure we see the new wait_index */
8596                 smp_rmb();
8597                 if (wait_index != iter->wait_index)
8598                         goto out;
8599
8600                 goto again;
8601         }
8602
8603         ret = splice_to_pipe(pipe, &spd);
8604 out:
8605         splice_shrink_spd(&spd);
8606
8607         return ret;
8608 }
8609
8610 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8611 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8612 {
8613         struct ftrace_buffer_info *info = file->private_data;
8614         struct trace_iterator *iter = &info->iter;
8615
8616         if (cmd)
8617                 return -ENOIOCTLCMD;
8618
8619         mutex_lock(&trace_types_lock);
8620
8621         iter->wait_index++;
8622         /* Make sure the waiters see the new wait_index */
8623         smp_wmb();
8624
8625         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8626
8627         mutex_unlock(&trace_types_lock);
8628         return 0;
8629 }
8630
8631 static const struct file_operations tracing_buffers_fops = {
8632         .open           = tracing_buffers_open,
8633         .read           = tracing_buffers_read,
8634         .poll           = tracing_buffers_poll,
8635         .release        = tracing_buffers_release,
8636         .flush          = tracing_buffers_flush,
8637         .splice_read    = tracing_buffers_splice_read,
8638         .unlocked_ioctl = tracing_buffers_ioctl,
8639         .llseek         = no_llseek,
8640 };
8641
8642 static ssize_t
8643 tracing_stats_read(struct file *filp, char __user *ubuf,
8644                    size_t count, loff_t *ppos)
8645 {
8646         struct inode *inode = file_inode(filp);
8647         struct trace_array *tr = inode->i_private;
8648         struct array_buffer *trace_buf = &tr->array_buffer;
8649         int cpu = tracing_get_cpu(inode);
8650         struct trace_seq *s;
8651         unsigned long cnt;
8652         unsigned long long t;
8653         unsigned long usec_rem;
8654
8655         s = kmalloc(sizeof(*s), GFP_KERNEL);
8656         if (!s)
8657                 return -ENOMEM;
8658
8659         trace_seq_init(s);
8660
8661         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8662         trace_seq_printf(s, "entries: %ld\n", cnt);
8663
8664         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8665         trace_seq_printf(s, "overrun: %ld\n", cnt);
8666
8667         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8668         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8669
8670         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8671         trace_seq_printf(s, "bytes: %ld\n", cnt);
8672
8673         if (trace_clocks[tr->clock_id].in_ns) {
8674                 /* local or global for trace_clock */
8675                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8676                 usec_rem = do_div(t, USEC_PER_SEC);
8677                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8678                                                                 t, usec_rem);
8679
8680                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8681                 usec_rem = do_div(t, USEC_PER_SEC);
8682                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8683         } else {
8684                 /* counter or tsc mode for trace_clock */
8685                 trace_seq_printf(s, "oldest event ts: %llu\n",
8686                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8687
8688                 trace_seq_printf(s, "now ts: %llu\n",
8689                                 ring_buffer_time_stamp(trace_buf->buffer));
8690         }
8691
8692         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8693         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8694
8695         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8696         trace_seq_printf(s, "read events: %ld\n", cnt);
8697
8698         count = simple_read_from_buffer(ubuf, count, ppos,
8699                                         s->buffer, trace_seq_used(s));
8700
8701         kfree(s);
8702
8703         return count;
8704 }
8705
8706 static const struct file_operations tracing_stats_fops = {
8707         .open           = tracing_open_generic_tr,
8708         .read           = tracing_stats_read,
8709         .llseek         = generic_file_llseek,
8710         .release        = tracing_release_generic_tr,
8711 };
8712
8713 #ifdef CONFIG_DYNAMIC_FTRACE
8714
8715 static ssize_t
8716 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8717                   size_t cnt, loff_t *ppos)
8718 {
8719         ssize_t ret;
8720         char *buf;
8721         int r;
8722
8723         /* 256 should be plenty to hold the amount needed */
8724         buf = kmalloc(256, GFP_KERNEL);
8725         if (!buf)
8726                 return -ENOMEM;
8727
8728         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8729                       ftrace_update_tot_cnt,
8730                       ftrace_number_of_pages,
8731                       ftrace_number_of_groups);
8732
8733         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8734         kfree(buf);
8735         return ret;
8736 }
8737
8738 static const struct file_operations tracing_dyn_info_fops = {
8739         .open           = tracing_open_generic,
8740         .read           = tracing_read_dyn_info,
8741         .llseek         = generic_file_llseek,
8742 };
8743 #endif /* CONFIG_DYNAMIC_FTRACE */
8744
8745 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8746 static void
8747 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8748                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8749                 void *data)
8750 {
8751         tracing_snapshot_instance(tr);
8752 }
8753
8754 static void
8755 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8756                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8757                       void *data)
8758 {
8759         struct ftrace_func_mapper *mapper = data;
8760         long *count = NULL;
8761
8762         if (mapper)
8763                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8764
8765         if (count) {
8766
8767                 if (*count <= 0)
8768                         return;
8769
8770                 (*count)--;
8771         }
8772
8773         tracing_snapshot_instance(tr);
8774 }
8775
8776 static int
8777 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8778                       struct ftrace_probe_ops *ops, void *data)
8779 {
8780         struct ftrace_func_mapper *mapper = data;
8781         long *count = NULL;
8782
8783         seq_printf(m, "%ps:", (void *)ip);
8784
8785         seq_puts(m, "snapshot");
8786
8787         if (mapper)
8788                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8789
8790         if (count)
8791                 seq_printf(m, ":count=%ld\n", *count);
8792         else
8793                 seq_puts(m, ":unlimited\n");
8794
8795         return 0;
8796 }
8797
8798 static int
8799 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8800                      unsigned long ip, void *init_data, void **data)
8801 {
8802         struct ftrace_func_mapper *mapper = *data;
8803
8804         if (!mapper) {
8805                 mapper = allocate_ftrace_func_mapper();
8806                 if (!mapper)
8807                         return -ENOMEM;
8808                 *data = mapper;
8809         }
8810
8811         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8812 }
8813
8814 static void
8815 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8816                      unsigned long ip, void *data)
8817 {
8818         struct ftrace_func_mapper *mapper = data;
8819
8820         if (!ip) {
8821                 if (!mapper)
8822                         return;
8823                 free_ftrace_func_mapper(mapper, NULL);
8824                 return;
8825         }
8826
8827         ftrace_func_mapper_remove_ip(mapper, ip);
8828 }
8829
8830 static struct ftrace_probe_ops snapshot_probe_ops = {
8831         .func                   = ftrace_snapshot,
8832         .print                  = ftrace_snapshot_print,
8833 };
8834
8835 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8836         .func                   = ftrace_count_snapshot,
8837         .print                  = ftrace_snapshot_print,
8838         .init                   = ftrace_snapshot_init,
8839         .free                   = ftrace_snapshot_free,
8840 };
8841
8842 static int
8843 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8844                                char *glob, char *cmd, char *param, int enable)
8845 {
8846         struct ftrace_probe_ops *ops;
8847         void *count = (void *)-1;
8848         char *number;
8849         int ret;
8850
8851         if (!tr)
8852                 return -ENODEV;
8853
8854         /* hash funcs only work with set_ftrace_filter */
8855         if (!enable)
8856                 return -EINVAL;
8857
8858         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8859
8860         if (glob[0] == '!')
8861                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8862
8863         if (!param)
8864                 goto out_reg;
8865
8866         number = strsep(&param, ":");
8867
8868         if (!strlen(number))
8869                 goto out_reg;
8870
8871         /*
8872          * We use the callback data field (which is a pointer)
8873          * as our counter.
8874          */
8875         ret = kstrtoul(number, 0, (unsigned long *)&count);
8876         if (ret)
8877                 return ret;
8878
8879  out_reg:
8880         ret = tracing_alloc_snapshot_instance(tr);
8881         if (ret < 0)
8882                 goto out;
8883
8884         ret = register_ftrace_function_probe(glob, tr, ops, count);
8885
8886  out:
8887         return ret < 0 ? ret : 0;
8888 }
8889
8890 static struct ftrace_func_command ftrace_snapshot_cmd = {
8891         .name                   = "snapshot",
8892         .func                   = ftrace_trace_snapshot_callback,
8893 };
8894
8895 static __init int register_snapshot_cmd(void)
8896 {
8897         return register_ftrace_command(&ftrace_snapshot_cmd);
8898 }
8899 #else
8900 static inline __init int register_snapshot_cmd(void) { return 0; }
8901 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8902
8903 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8904 {
8905         if (WARN_ON(!tr->dir))
8906                 return ERR_PTR(-ENODEV);
8907
8908         /* Top directory uses NULL as the parent */
8909         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8910                 return NULL;
8911
8912         /* All sub buffers have a descriptor */
8913         return tr->dir;
8914 }
8915
8916 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8917 {
8918         struct dentry *d_tracer;
8919
8920         if (tr->percpu_dir)
8921                 return tr->percpu_dir;
8922
8923         d_tracer = tracing_get_dentry(tr);
8924         if (IS_ERR(d_tracer))
8925                 return NULL;
8926
8927         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8928
8929         MEM_FAIL(!tr->percpu_dir,
8930                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8931
8932         return tr->percpu_dir;
8933 }
8934
8935 static struct dentry *
8936 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8937                       void *data, long cpu, const struct file_operations *fops)
8938 {
8939         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8940
8941         if (ret) /* See tracing_get_cpu() */
8942                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8943         return ret;
8944 }
8945
8946 static void
8947 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8948 {
8949         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8950         struct dentry *d_cpu;
8951         char cpu_dir[30]; /* 30 characters should be more than enough */
8952
8953         if (!d_percpu)
8954                 return;
8955
8956         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8957         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8958         if (!d_cpu) {
8959                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8960                 return;
8961         }
8962
8963         /* per cpu trace_pipe */
8964         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8965                                 tr, cpu, &tracing_pipe_fops);
8966
8967         /* per cpu trace */
8968         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8969                                 tr, cpu, &tracing_fops);
8970
8971         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8972                                 tr, cpu, &tracing_buffers_fops);
8973
8974         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8975                                 tr, cpu, &tracing_stats_fops);
8976
8977         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8978                                 tr, cpu, &tracing_entries_fops);
8979
8980 #ifdef CONFIG_TRACER_SNAPSHOT
8981         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8982                                 tr, cpu, &snapshot_fops);
8983
8984         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8985                                 tr, cpu, &snapshot_raw_fops);
8986 #endif
8987 }
8988
8989 #ifdef CONFIG_FTRACE_SELFTEST
8990 /* Let selftest have access to static functions in this file */
8991 #include "trace_selftest.c"
8992 #endif
8993
8994 static ssize_t
8995 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8996                         loff_t *ppos)
8997 {
8998         struct trace_option_dentry *topt = filp->private_data;
8999         char *buf;
9000
9001         if (topt->flags->val & topt->opt->bit)
9002                 buf = "1\n";
9003         else
9004                 buf = "0\n";
9005
9006         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9007 }
9008
9009 static ssize_t
9010 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9011                          loff_t *ppos)
9012 {
9013         struct trace_option_dentry *topt = filp->private_data;
9014         unsigned long val;
9015         int ret;
9016
9017         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9018         if (ret)
9019                 return ret;
9020
9021         if (val != 0 && val != 1)
9022                 return -EINVAL;
9023
9024         if (!!(topt->flags->val & topt->opt->bit) != val) {
9025                 mutex_lock(&trace_types_lock);
9026                 ret = __set_tracer_option(topt->tr, topt->flags,
9027                                           topt->opt, !val);
9028                 mutex_unlock(&trace_types_lock);
9029                 if (ret)
9030                         return ret;
9031         }
9032
9033         *ppos += cnt;
9034
9035         return cnt;
9036 }
9037
9038 static int tracing_open_options(struct inode *inode, struct file *filp)
9039 {
9040         struct trace_option_dentry *topt = inode->i_private;
9041         int ret;
9042
9043         ret = tracing_check_open_get_tr(topt->tr);
9044         if (ret)
9045                 return ret;
9046
9047         filp->private_data = inode->i_private;
9048         return 0;
9049 }
9050
9051 static int tracing_release_options(struct inode *inode, struct file *file)
9052 {
9053         struct trace_option_dentry *topt = file->private_data;
9054
9055         trace_array_put(topt->tr);
9056         return 0;
9057 }
9058
9059 static const struct file_operations trace_options_fops = {
9060         .open = tracing_open_options,
9061         .read = trace_options_read,
9062         .write = trace_options_write,
9063         .llseek = generic_file_llseek,
9064         .release = tracing_release_options,
9065 };
9066
9067 /*
9068  * In order to pass in both the trace_array descriptor as well as the index
9069  * to the flag that the trace option file represents, the trace_array
9070  * has a character array of trace_flags_index[], which holds the index
9071  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9072  * The address of this character array is passed to the flag option file
9073  * read/write callbacks.
9074  *
9075  * In order to extract both the index and the trace_array descriptor,
9076  * get_tr_index() uses the following algorithm.
9077  *
9078  *   idx = *ptr;
9079  *
9080  * As the pointer itself contains the address of the index (remember
9081  * index[1] == 1).
9082  *
9083  * Then to get the trace_array descriptor, by subtracting that index
9084  * from the ptr, we get to the start of the index itself.
9085  *
9086  *   ptr - idx == &index[0]
9087  *
9088  * Then a simple container_of() from that pointer gets us to the
9089  * trace_array descriptor.
9090  */
9091 static void get_tr_index(void *data, struct trace_array **ptr,
9092                          unsigned int *pindex)
9093 {
9094         *pindex = *(unsigned char *)data;
9095
9096         *ptr = container_of(data - *pindex, struct trace_array,
9097                             trace_flags_index);
9098 }
9099
9100 static ssize_t
9101 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9102                         loff_t *ppos)
9103 {
9104         void *tr_index = filp->private_data;
9105         struct trace_array *tr;
9106         unsigned int index;
9107         char *buf;
9108
9109         get_tr_index(tr_index, &tr, &index);
9110
9111         if (tr->trace_flags & (1 << index))
9112                 buf = "1\n";
9113         else
9114                 buf = "0\n";
9115
9116         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9117 }
9118
9119 static ssize_t
9120 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9121                          loff_t *ppos)
9122 {
9123         void *tr_index = filp->private_data;
9124         struct trace_array *tr;
9125         unsigned int index;
9126         unsigned long val;
9127         int ret;
9128
9129         get_tr_index(tr_index, &tr, &index);
9130
9131         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9132         if (ret)
9133                 return ret;
9134
9135         if (val != 0 && val != 1)
9136                 return -EINVAL;
9137
9138         mutex_lock(&event_mutex);
9139         mutex_lock(&trace_types_lock);
9140         ret = set_tracer_flag(tr, 1 << index, val);
9141         mutex_unlock(&trace_types_lock);
9142         mutex_unlock(&event_mutex);
9143
9144         if (ret < 0)
9145                 return ret;
9146
9147         *ppos += cnt;
9148
9149         return cnt;
9150 }
9151
9152 static const struct file_operations trace_options_core_fops = {
9153         .open = tracing_open_generic,
9154         .read = trace_options_core_read,
9155         .write = trace_options_core_write,
9156         .llseek = generic_file_llseek,
9157 };
9158
9159 struct dentry *trace_create_file(const char *name,
9160                                  umode_t mode,
9161                                  struct dentry *parent,
9162                                  void *data,
9163                                  const struct file_operations *fops)
9164 {
9165         struct dentry *ret;
9166
9167         ret = tracefs_create_file(name, mode, parent, data, fops);
9168         if (!ret)
9169                 pr_warn("Could not create tracefs '%s' entry\n", name);
9170
9171         return ret;
9172 }
9173
9174
9175 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9176 {
9177         struct dentry *d_tracer;
9178
9179         if (tr->options)
9180                 return tr->options;
9181
9182         d_tracer = tracing_get_dentry(tr);
9183         if (IS_ERR(d_tracer))
9184                 return NULL;
9185
9186         tr->options = tracefs_create_dir("options", d_tracer);
9187         if (!tr->options) {
9188                 pr_warn("Could not create tracefs directory 'options'\n");
9189                 return NULL;
9190         }
9191
9192         return tr->options;
9193 }
9194
9195 static void
9196 create_trace_option_file(struct trace_array *tr,
9197                          struct trace_option_dentry *topt,
9198                          struct tracer_flags *flags,
9199                          struct tracer_opt *opt)
9200 {
9201         struct dentry *t_options;
9202
9203         t_options = trace_options_init_dentry(tr);
9204         if (!t_options)
9205                 return;
9206
9207         topt->flags = flags;
9208         topt->opt = opt;
9209         topt->tr = tr;
9210
9211         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9212                                         t_options, topt, &trace_options_fops);
9213
9214 }
9215
9216 static void
9217 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9218 {
9219         struct trace_option_dentry *topts;
9220         struct trace_options *tr_topts;
9221         struct tracer_flags *flags;
9222         struct tracer_opt *opts;
9223         int cnt;
9224         int i;
9225
9226         if (!tracer)
9227                 return;
9228
9229         flags = tracer->flags;
9230
9231         if (!flags || !flags->opts)
9232                 return;
9233
9234         /*
9235          * If this is an instance, only create flags for tracers
9236          * the instance may have.
9237          */
9238         if (!trace_ok_for_array(tracer, tr))
9239                 return;
9240
9241         for (i = 0; i < tr->nr_topts; i++) {
9242                 /* Make sure there's no duplicate flags. */
9243                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9244                         return;
9245         }
9246
9247         opts = flags->opts;
9248
9249         for (cnt = 0; opts[cnt].name; cnt++)
9250                 ;
9251
9252         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9253         if (!topts)
9254                 return;
9255
9256         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9257                             GFP_KERNEL);
9258         if (!tr_topts) {
9259                 kfree(topts);
9260                 return;
9261         }
9262
9263         tr->topts = tr_topts;
9264         tr->topts[tr->nr_topts].tracer = tracer;
9265         tr->topts[tr->nr_topts].topts = topts;
9266         tr->nr_topts++;
9267
9268         for (cnt = 0; opts[cnt].name; cnt++) {
9269                 create_trace_option_file(tr, &topts[cnt], flags,
9270                                          &opts[cnt]);
9271                 MEM_FAIL(topts[cnt].entry == NULL,
9272                           "Failed to create trace option: %s",
9273                           opts[cnt].name);
9274         }
9275 }
9276
9277 static struct dentry *
9278 create_trace_option_core_file(struct trace_array *tr,
9279                               const char *option, long index)
9280 {
9281         struct dentry *t_options;
9282
9283         t_options = trace_options_init_dentry(tr);
9284         if (!t_options)
9285                 return NULL;
9286
9287         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9288                                  (void *)&tr->trace_flags_index[index],
9289                                  &trace_options_core_fops);
9290 }
9291
9292 static void create_trace_options_dir(struct trace_array *tr)
9293 {
9294         struct dentry *t_options;
9295         bool top_level = tr == &global_trace;
9296         int i;
9297
9298         t_options = trace_options_init_dentry(tr);
9299         if (!t_options)
9300                 return;
9301
9302         for (i = 0; trace_options[i]; i++) {
9303                 if (top_level ||
9304                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9305                         create_trace_option_core_file(tr, trace_options[i], i);
9306         }
9307 }
9308
9309 static ssize_t
9310 rb_simple_read(struct file *filp, char __user *ubuf,
9311                size_t cnt, loff_t *ppos)
9312 {
9313         struct trace_array *tr = filp->private_data;
9314         char buf[64];
9315         int r;
9316
9317         r = tracer_tracing_is_on(tr);
9318         r = sprintf(buf, "%d\n", r);
9319
9320         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9321 }
9322
9323 static ssize_t
9324 rb_simple_write(struct file *filp, const char __user *ubuf,
9325                 size_t cnt, loff_t *ppos)
9326 {
9327         struct trace_array *tr = filp->private_data;
9328         struct trace_buffer *buffer = tr->array_buffer.buffer;
9329         unsigned long val;
9330         int ret;
9331
9332         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9333         if (ret)
9334                 return ret;
9335
9336         if (buffer) {
9337                 mutex_lock(&trace_types_lock);
9338                 if (!!val == tracer_tracing_is_on(tr)) {
9339                         val = 0; /* do nothing */
9340                 } else if (val) {
9341                         tracer_tracing_on(tr);
9342                         if (tr->current_trace->start)
9343                                 tr->current_trace->start(tr);
9344                 } else {
9345                         tracer_tracing_off(tr);
9346                         if (tr->current_trace->stop)
9347                                 tr->current_trace->stop(tr);
9348                         /* Wake up any waiters */
9349                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9350                 }
9351                 mutex_unlock(&trace_types_lock);
9352         }
9353
9354         (*ppos)++;
9355
9356         return cnt;
9357 }
9358
9359 static const struct file_operations rb_simple_fops = {
9360         .open           = tracing_open_generic_tr,
9361         .read           = rb_simple_read,
9362         .write          = rb_simple_write,
9363         .release        = tracing_release_generic_tr,
9364         .llseek         = default_llseek,
9365 };
9366
9367 static ssize_t
9368 buffer_percent_read(struct file *filp, char __user *ubuf,
9369                     size_t cnt, loff_t *ppos)
9370 {
9371         struct trace_array *tr = filp->private_data;
9372         char buf[64];
9373         int r;
9374
9375         r = tr->buffer_percent;
9376         r = sprintf(buf, "%d\n", r);
9377
9378         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9379 }
9380
9381 static ssize_t
9382 buffer_percent_write(struct file *filp, const char __user *ubuf,
9383                      size_t cnt, loff_t *ppos)
9384 {
9385         struct trace_array *tr = filp->private_data;
9386         unsigned long val;
9387         int ret;
9388
9389         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9390         if (ret)
9391                 return ret;
9392
9393         if (val > 100)
9394                 return -EINVAL;
9395
9396         tr->buffer_percent = val;
9397
9398         (*ppos)++;
9399
9400         return cnt;
9401 }
9402
9403 static const struct file_operations buffer_percent_fops = {
9404         .open           = tracing_open_generic_tr,
9405         .read           = buffer_percent_read,
9406         .write          = buffer_percent_write,
9407         .release        = tracing_release_generic_tr,
9408         .llseek         = default_llseek,
9409 };
9410
9411 static ssize_t
9412 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9413 {
9414         struct trace_array *tr = filp->private_data;
9415         size_t size;
9416         char buf[64];
9417         int order;
9418         int r;
9419
9420         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9421         size = (PAGE_SIZE << order) / 1024;
9422
9423         r = sprintf(buf, "%zd\n", size);
9424
9425         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9426 }
9427
9428 static ssize_t
9429 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9430                          size_t cnt, loff_t *ppos)
9431 {
9432         struct trace_array *tr = filp->private_data;
9433         unsigned long val;
9434         int old_order;
9435         int order;
9436         int pages;
9437         int ret;
9438
9439         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9440         if (ret)
9441                 return ret;
9442
9443         val *= 1024; /* value passed in is in KB */
9444
9445         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9446         order = fls(pages - 1);
9447
9448         /* limit between 1 and 128 system pages */
9449         if (order < 0 || order > 7)
9450                 return -EINVAL;
9451
9452         /* Do not allow tracing while changing the order of the ring buffer */
9453         tracing_stop_tr(tr);
9454
9455         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9456         if (old_order == order)
9457                 goto out;
9458
9459         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9460         if (ret)
9461                 goto out;
9462
9463 #ifdef CONFIG_TRACER_MAX_TRACE
9464
9465         if (!tr->allocated_snapshot)
9466                 goto out_max;
9467
9468         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9469         if (ret) {
9470                 /* Put back the old order */
9471                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9472                 if (WARN_ON_ONCE(cnt)) {
9473                         /*
9474                          * AARGH! We are left with different orders!
9475                          * The max buffer is our "snapshot" buffer.
9476                          * When a tracer needs a snapshot (one of the
9477                          * latency tracers), it swaps the max buffer
9478                          * with the saved snap shot. We succeeded to
9479                          * update the order of the main buffer, but failed to
9480                          * update the order of the max buffer. But when we tried
9481                          * to reset the main buffer to the original size, we
9482                          * failed there too. This is very unlikely to
9483                          * happen, but if it does, warn and kill all
9484                          * tracing.
9485                          */
9486                         tracing_disabled = 1;
9487                 }
9488                 goto out;
9489         }
9490  out_max:
9491 #endif
9492         (*ppos)++;
9493  out:
9494         if (ret)
9495                 cnt = ret;
9496         tracing_start_tr(tr);
9497         return cnt;
9498 }
9499
9500 static const struct file_operations buffer_subbuf_size_fops = {
9501         .open           = tracing_open_generic_tr,
9502         .read           = buffer_subbuf_size_read,
9503         .write          = buffer_subbuf_size_write,
9504         .release        = tracing_release_generic_tr,
9505         .llseek         = default_llseek,
9506 };
9507
9508 static struct dentry *trace_instance_dir;
9509
9510 static void
9511 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9512
9513 static int
9514 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9515 {
9516         enum ring_buffer_flags rb_flags;
9517
9518         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9519
9520         buf->tr = tr;
9521
9522         buf->buffer = ring_buffer_alloc(size, rb_flags);
9523         if (!buf->buffer)
9524                 return -ENOMEM;
9525
9526         buf->data = alloc_percpu(struct trace_array_cpu);
9527         if (!buf->data) {
9528                 ring_buffer_free(buf->buffer);
9529                 buf->buffer = NULL;
9530                 return -ENOMEM;
9531         }
9532
9533         /* Allocate the first page for all buffers */
9534         set_buffer_entries(&tr->array_buffer,
9535                            ring_buffer_size(tr->array_buffer.buffer, 0));
9536
9537         return 0;
9538 }
9539
9540 static void free_trace_buffer(struct array_buffer *buf)
9541 {
9542         if (buf->buffer) {
9543                 ring_buffer_free(buf->buffer);
9544                 buf->buffer = NULL;
9545                 free_percpu(buf->data);
9546                 buf->data = NULL;
9547         }
9548 }
9549
9550 static int allocate_trace_buffers(struct trace_array *tr, int size)
9551 {
9552         int ret;
9553
9554         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9555         if (ret)
9556                 return ret;
9557
9558 #ifdef CONFIG_TRACER_MAX_TRACE
9559         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9560                                     allocate_snapshot ? size : 1);
9561         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9562                 free_trace_buffer(&tr->array_buffer);
9563                 return -ENOMEM;
9564         }
9565         tr->allocated_snapshot = allocate_snapshot;
9566
9567         allocate_snapshot = false;
9568 #endif
9569
9570         return 0;
9571 }
9572
9573 static void free_trace_buffers(struct trace_array *tr)
9574 {
9575         if (!tr)
9576                 return;
9577
9578         free_trace_buffer(&tr->array_buffer);
9579
9580 #ifdef CONFIG_TRACER_MAX_TRACE
9581         free_trace_buffer(&tr->max_buffer);
9582 #endif
9583 }
9584
9585 static void init_trace_flags_index(struct trace_array *tr)
9586 {
9587         int i;
9588
9589         /* Used by the trace options files */
9590         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9591                 tr->trace_flags_index[i] = i;
9592 }
9593
9594 static void __update_tracer_options(struct trace_array *tr)
9595 {
9596         struct tracer *t;
9597
9598         for (t = trace_types; t; t = t->next)
9599                 add_tracer_options(tr, t);
9600 }
9601
9602 static void update_tracer_options(struct trace_array *tr)
9603 {
9604         mutex_lock(&trace_types_lock);
9605         tracer_options_updated = true;
9606         __update_tracer_options(tr);
9607         mutex_unlock(&trace_types_lock);
9608 }
9609
9610 /* Must have trace_types_lock held */
9611 struct trace_array *trace_array_find(const char *instance)
9612 {
9613         struct trace_array *tr, *found = NULL;
9614
9615         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9616                 if (tr->name && strcmp(tr->name, instance) == 0) {
9617                         found = tr;
9618                         break;
9619                 }
9620         }
9621
9622         return found;
9623 }
9624
9625 struct trace_array *trace_array_find_get(const char *instance)
9626 {
9627         struct trace_array *tr;
9628
9629         mutex_lock(&trace_types_lock);
9630         tr = trace_array_find(instance);
9631         if (tr)
9632                 tr->ref++;
9633         mutex_unlock(&trace_types_lock);
9634
9635         return tr;
9636 }
9637
9638 static int trace_array_create_dir(struct trace_array *tr)
9639 {
9640         int ret;
9641
9642         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9643         if (!tr->dir)
9644                 return -EINVAL;
9645
9646         ret = event_trace_add_tracer(tr->dir, tr);
9647         if (ret) {
9648                 tracefs_remove(tr->dir);
9649                 return ret;
9650         }
9651
9652         init_tracer_tracefs(tr, tr->dir);
9653         __update_tracer_options(tr);
9654
9655         return ret;
9656 }
9657
9658 static struct trace_array *
9659 trace_array_create_systems(const char *name, const char *systems)
9660 {
9661         struct trace_array *tr;
9662         int ret;
9663
9664         ret = -ENOMEM;
9665         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9666         if (!tr)
9667                 return ERR_PTR(ret);
9668
9669         tr->name = kstrdup(name, GFP_KERNEL);
9670         if (!tr->name)
9671                 goto out_free_tr;
9672
9673         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9674                 goto out_free_tr;
9675
9676         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9677                 goto out_free_tr;
9678
9679         if (systems) {
9680                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9681                 if (!tr->system_names)
9682                         goto out_free_tr;
9683         }
9684
9685         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9686
9687         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9688
9689         raw_spin_lock_init(&tr->start_lock);
9690
9691         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9692
9693         tr->current_trace = &nop_trace;
9694
9695         INIT_LIST_HEAD(&tr->systems);
9696         INIT_LIST_HEAD(&tr->events);
9697         INIT_LIST_HEAD(&tr->hist_vars);
9698         INIT_LIST_HEAD(&tr->err_log);
9699
9700         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9701                 goto out_free_tr;
9702
9703         /* The ring buffer is defaultly expanded */
9704         trace_set_ring_buffer_expanded(tr);
9705
9706         if (ftrace_allocate_ftrace_ops(tr) < 0)
9707                 goto out_free_tr;
9708
9709         ftrace_init_trace_array(tr);
9710
9711         init_trace_flags_index(tr);
9712
9713         if (trace_instance_dir) {
9714                 ret = trace_array_create_dir(tr);
9715                 if (ret)
9716                         goto out_free_tr;
9717         } else
9718                 __trace_early_add_events(tr);
9719
9720         list_add(&tr->list, &ftrace_trace_arrays);
9721
9722         tr->ref++;
9723
9724         return tr;
9725
9726  out_free_tr:
9727         ftrace_free_ftrace_ops(tr);
9728         free_trace_buffers(tr);
9729         free_cpumask_var(tr->pipe_cpumask);
9730         free_cpumask_var(tr->tracing_cpumask);
9731         kfree_const(tr->system_names);
9732         kfree(tr->name);
9733         kfree(tr);
9734
9735         return ERR_PTR(ret);
9736 }
9737
9738 static struct trace_array *trace_array_create(const char *name)
9739 {
9740         return trace_array_create_systems(name, NULL);
9741 }
9742
9743 static int instance_mkdir(const char *name)
9744 {
9745         struct trace_array *tr;
9746         int ret;
9747
9748         mutex_lock(&event_mutex);
9749         mutex_lock(&trace_types_lock);
9750
9751         ret = -EEXIST;
9752         if (trace_array_find(name))
9753                 goto out_unlock;
9754
9755         tr = trace_array_create(name);
9756
9757         ret = PTR_ERR_OR_ZERO(tr);
9758
9759 out_unlock:
9760         mutex_unlock(&trace_types_lock);
9761         mutex_unlock(&event_mutex);
9762         return ret;
9763 }
9764
9765 /**
9766  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9767  * @name: The name of the trace array to be looked up/created.
9768  * @systems: A list of systems to create event directories for (NULL for all)
9769  *
9770  * Returns pointer to trace array with given name.
9771  * NULL, if it cannot be created.
9772  *
9773  * NOTE: This function increments the reference counter associated with the
9774  * trace array returned. This makes sure it cannot be freed while in use.
9775  * Use trace_array_put() once the trace array is no longer needed.
9776  * If the trace_array is to be freed, trace_array_destroy() needs to
9777  * be called after the trace_array_put(), or simply let user space delete
9778  * it from the tracefs instances directory. But until the
9779  * trace_array_put() is called, user space can not delete it.
9780  *
9781  */
9782 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9783 {
9784         struct trace_array *tr;
9785
9786         mutex_lock(&event_mutex);
9787         mutex_lock(&trace_types_lock);
9788
9789         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9790                 if (tr->name && strcmp(tr->name, name) == 0)
9791                         goto out_unlock;
9792         }
9793
9794         tr = trace_array_create_systems(name, systems);
9795
9796         if (IS_ERR(tr))
9797                 tr = NULL;
9798 out_unlock:
9799         if (tr)
9800                 tr->ref++;
9801
9802         mutex_unlock(&trace_types_lock);
9803         mutex_unlock(&event_mutex);
9804         return tr;
9805 }
9806 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9807
9808 static int __remove_instance(struct trace_array *tr)
9809 {
9810         int i;
9811
9812         /* Reference counter for a newly created trace array = 1. */
9813         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9814                 return -EBUSY;
9815
9816         list_del(&tr->list);
9817
9818         /* Disable all the flags that were enabled coming in */
9819         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9820                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9821                         set_tracer_flag(tr, 1 << i, 0);
9822         }
9823
9824         tracing_set_nop(tr);
9825         clear_ftrace_function_probes(tr);
9826         event_trace_del_tracer(tr);
9827         ftrace_clear_pids(tr);
9828         ftrace_destroy_function_files(tr);
9829         tracefs_remove(tr->dir);
9830         free_percpu(tr->last_func_repeats);
9831         free_trace_buffers(tr);
9832         clear_tracing_err_log(tr);
9833
9834         for (i = 0; i < tr->nr_topts; i++) {
9835                 kfree(tr->topts[i].topts);
9836         }
9837         kfree(tr->topts);
9838
9839         free_cpumask_var(tr->pipe_cpumask);
9840         free_cpumask_var(tr->tracing_cpumask);
9841         kfree_const(tr->system_names);
9842         kfree(tr->name);
9843         kfree(tr);
9844
9845         return 0;
9846 }
9847
9848 int trace_array_destroy(struct trace_array *this_tr)
9849 {
9850         struct trace_array *tr;
9851         int ret;
9852
9853         if (!this_tr)
9854                 return -EINVAL;
9855
9856         mutex_lock(&event_mutex);
9857         mutex_lock(&trace_types_lock);
9858
9859         ret = -ENODEV;
9860
9861         /* Making sure trace array exists before destroying it. */
9862         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9863                 if (tr == this_tr) {
9864                         ret = __remove_instance(tr);
9865                         break;
9866                 }
9867         }
9868
9869         mutex_unlock(&trace_types_lock);
9870         mutex_unlock(&event_mutex);
9871
9872         return ret;
9873 }
9874 EXPORT_SYMBOL_GPL(trace_array_destroy);
9875
9876 static int instance_rmdir(const char *name)
9877 {
9878         struct trace_array *tr;
9879         int ret;
9880
9881         mutex_lock(&event_mutex);
9882         mutex_lock(&trace_types_lock);
9883
9884         ret = -ENODEV;
9885         tr = trace_array_find(name);
9886         if (tr)
9887                 ret = __remove_instance(tr);
9888
9889         mutex_unlock(&trace_types_lock);
9890         mutex_unlock(&event_mutex);
9891
9892         return ret;
9893 }
9894
9895 static __init void create_trace_instances(struct dentry *d_tracer)
9896 {
9897         struct trace_array *tr;
9898
9899         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9900                                                          instance_mkdir,
9901                                                          instance_rmdir);
9902         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9903                 return;
9904
9905         mutex_lock(&event_mutex);
9906         mutex_lock(&trace_types_lock);
9907
9908         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9909                 if (!tr->name)
9910                         continue;
9911                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9912                              "Failed to create instance directory\n"))
9913                         break;
9914         }
9915
9916         mutex_unlock(&trace_types_lock);
9917         mutex_unlock(&event_mutex);
9918 }
9919
9920 static void
9921 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9922 {
9923         int cpu;
9924
9925         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9926                         tr, &show_traces_fops);
9927
9928         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9929                         tr, &set_tracer_fops);
9930
9931         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9932                           tr, &tracing_cpumask_fops);
9933
9934         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9935                           tr, &tracing_iter_fops);
9936
9937         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9938                           tr, &tracing_fops);
9939
9940         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9941                           tr, &tracing_pipe_fops);
9942
9943         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9944                           tr, &tracing_entries_fops);
9945
9946         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9947                           tr, &tracing_total_entries_fops);
9948
9949         trace_create_file("free_buffer", 0200, d_tracer,
9950                           tr, &tracing_free_buffer_fops);
9951
9952         trace_create_file("trace_marker", 0220, d_tracer,
9953                           tr, &tracing_mark_fops);
9954
9955         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9956
9957         trace_create_file("trace_marker_raw", 0220, d_tracer,
9958                           tr, &tracing_mark_raw_fops);
9959
9960         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9961                           &trace_clock_fops);
9962
9963         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9964                           tr, &rb_simple_fops);
9965
9966         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9967                           &trace_time_stamp_mode_fops);
9968
9969         tr->buffer_percent = 50;
9970
9971         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9972                         tr, &buffer_percent_fops);
9973
9974         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9975                           tr, &buffer_subbuf_size_fops);
9976
9977         create_trace_options_dir(tr);
9978
9979 #ifdef CONFIG_TRACER_MAX_TRACE
9980         trace_create_maxlat_file(tr, d_tracer);
9981 #endif
9982
9983         if (ftrace_create_function_files(tr, d_tracer))
9984                 MEM_FAIL(1, "Could not allocate function filter files");
9985
9986 #ifdef CONFIG_TRACER_SNAPSHOT
9987         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9988                           tr, &snapshot_fops);
9989 #endif
9990
9991         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9992                           tr, &tracing_err_log_fops);
9993
9994         for_each_tracing_cpu(cpu)
9995                 tracing_init_tracefs_percpu(tr, cpu);
9996
9997         ftrace_init_tracefs(tr, d_tracer);
9998 }
9999
10000 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10001 {
10002         struct vfsmount *mnt;
10003         struct file_system_type *type;
10004
10005         /*
10006          * To maintain backward compatibility for tools that mount
10007          * debugfs to get to the tracing facility, tracefs is automatically
10008          * mounted to the debugfs/tracing directory.
10009          */
10010         type = get_fs_type("tracefs");
10011         if (!type)
10012                 return NULL;
10013         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10014         put_filesystem(type);
10015         if (IS_ERR(mnt))
10016                 return NULL;
10017         mntget(mnt);
10018
10019         return mnt;
10020 }
10021
10022 /**
10023  * tracing_init_dentry - initialize top level trace array
10024  *
10025  * This is called when creating files or directories in the tracing
10026  * directory. It is called via fs_initcall() by any of the boot up code
10027  * and expects to return the dentry of the top level tracing directory.
10028  */
10029 int tracing_init_dentry(void)
10030 {
10031         struct trace_array *tr = &global_trace;
10032
10033         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10034                 pr_warn("Tracing disabled due to lockdown\n");
10035                 return -EPERM;
10036         }
10037
10038         /* The top level trace array uses  NULL as parent */
10039         if (tr->dir)
10040                 return 0;
10041
10042         if (WARN_ON(!tracefs_initialized()))
10043                 return -ENODEV;
10044
10045         /*
10046          * As there may still be users that expect the tracing
10047          * files to exist in debugfs/tracing, we must automount
10048          * the tracefs file system there, so older tools still
10049          * work with the newer kernel.
10050          */
10051         tr->dir = debugfs_create_automount("tracing", NULL,
10052                                            trace_automount, NULL);
10053
10054         return 0;
10055 }
10056
10057 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10058 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10059
10060 static struct workqueue_struct *eval_map_wq __initdata;
10061 static struct work_struct eval_map_work __initdata;
10062 static struct work_struct tracerfs_init_work __initdata;
10063
10064 static void __init eval_map_work_func(struct work_struct *work)
10065 {
10066         int len;
10067
10068         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10069         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10070 }
10071
10072 static int __init trace_eval_init(void)
10073 {
10074         INIT_WORK(&eval_map_work, eval_map_work_func);
10075
10076         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10077         if (!eval_map_wq) {
10078                 pr_err("Unable to allocate eval_map_wq\n");
10079                 /* Do work here */
10080                 eval_map_work_func(&eval_map_work);
10081                 return -ENOMEM;
10082         }
10083
10084         queue_work(eval_map_wq, &eval_map_work);
10085         return 0;
10086 }
10087
10088 subsys_initcall(trace_eval_init);
10089
10090 static int __init trace_eval_sync(void)
10091 {
10092         /* Make sure the eval map updates are finished */
10093         if (eval_map_wq)
10094                 destroy_workqueue(eval_map_wq);
10095         return 0;
10096 }
10097
10098 late_initcall_sync(trace_eval_sync);
10099
10100
10101 #ifdef CONFIG_MODULES
10102 static void trace_module_add_evals(struct module *mod)
10103 {
10104         if (!mod->num_trace_evals)
10105                 return;
10106
10107         /*
10108          * Modules with bad taint do not have events created, do
10109          * not bother with enums either.
10110          */
10111         if (trace_module_has_bad_taint(mod))
10112                 return;
10113
10114         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10115 }
10116
10117 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10118 static void trace_module_remove_evals(struct module *mod)
10119 {
10120         union trace_eval_map_item *map;
10121         union trace_eval_map_item **last = &trace_eval_maps;
10122
10123         if (!mod->num_trace_evals)
10124                 return;
10125
10126         mutex_lock(&trace_eval_mutex);
10127
10128         map = trace_eval_maps;
10129
10130         while (map) {
10131                 if (map->head.mod == mod)
10132                         break;
10133                 map = trace_eval_jmp_to_tail(map);
10134                 last = &map->tail.next;
10135                 map = map->tail.next;
10136         }
10137         if (!map)
10138                 goto out;
10139
10140         *last = trace_eval_jmp_to_tail(map)->tail.next;
10141         kfree(map);
10142  out:
10143         mutex_unlock(&trace_eval_mutex);
10144 }
10145 #else
10146 static inline void trace_module_remove_evals(struct module *mod) { }
10147 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10148
10149 static int trace_module_notify(struct notifier_block *self,
10150                                unsigned long val, void *data)
10151 {
10152         struct module *mod = data;
10153
10154         switch (val) {
10155         case MODULE_STATE_COMING:
10156                 trace_module_add_evals(mod);
10157                 break;
10158         case MODULE_STATE_GOING:
10159                 trace_module_remove_evals(mod);
10160                 break;
10161         }
10162
10163         return NOTIFY_OK;
10164 }
10165
10166 static struct notifier_block trace_module_nb = {
10167         .notifier_call = trace_module_notify,
10168         .priority = 0,
10169 };
10170 #endif /* CONFIG_MODULES */
10171
10172 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10173 {
10174
10175         event_trace_init();
10176
10177         init_tracer_tracefs(&global_trace, NULL);
10178         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10179
10180         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10181                         &global_trace, &tracing_thresh_fops);
10182
10183         trace_create_file("README", TRACE_MODE_READ, NULL,
10184                         NULL, &tracing_readme_fops);
10185
10186         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10187                         NULL, &tracing_saved_cmdlines_fops);
10188
10189         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10190                           NULL, &tracing_saved_cmdlines_size_fops);
10191
10192         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10193                         NULL, &tracing_saved_tgids_fops);
10194
10195         trace_create_eval_file(NULL);
10196
10197 #ifdef CONFIG_MODULES
10198         register_module_notifier(&trace_module_nb);
10199 #endif
10200
10201 #ifdef CONFIG_DYNAMIC_FTRACE
10202         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10203                         NULL, &tracing_dyn_info_fops);
10204 #endif
10205
10206         create_trace_instances(NULL);
10207
10208         update_tracer_options(&global_trace);
10209 }
10210
10211 static __init int tracer_init_tracefs(void)
10212 {
10213         int ret;
10214
10215         trace_access_lock_init();
10216
10217         ret = tracing_init_dentry();
10218         if (ret)
10219                 return 0;
10220
10221         if (eval_map_wq) {
10222                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10223                 queue_work(eval_map_wq, &tracerfs_init_work);
10224         } else {
10225                 tracer_init_tracefs_work_func(NULL);
10226         }
10227
10228         rv_init_interface();
10229
10230         return 0;
10231 }
10232
10233 fs_initcall(tracer_init_tracefs);
10234
10235 static int trace_die_panic_handler(struct notifier_block *self,
10236                                 unsigned long ev, void *unused);
10237
10238 static struct notifier_block trace_panic_notifier = {
10239         .notifier_call = trace_die_panic_handler,
10240         .priority = INT_MAX - 1,
10241 };
10242
10243 static struct notifier_block trace_die_notifier = {
10244         .notifier_call = trace_die_panic_handler,
10245         .priority = INT_MAX - 1,
10246 };
10247
10248 /*
10249  * The idea is to execute the following die/panic callback early, in order
10250  * to avoid showing irrelevant information in the trace (like other panic
10251  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10252  * warnings get disabled (to prevent potential log flooding).
10253  */
10254 static int trace_die_panic_handler(struct notifier_block *self,
10255                                 unsigned long ev, void *unused)
10256 {
10257         if (!ftrace_dump_on_oops)
10258                 return NOTIFY_DONE;
10259
10260         /* The die notifier requires DIE_OOPS to trigger */
10261         if (self == &trace_die_notifier && ev != DIE_OOPS)
10262                 return NOTIFY_DONE;
10263
10264         ftrace_dump(ftrace_dump_on_oops);
10265
10266         return NOTIFY_DONE;
10267 }
10268
10269 /*
10270  * printk is set to max of 1024, we really don't need it that big.
10271  * Nothing should be printing 1000 characters anyway.
10272  */
10273 #define TRACE_MAX_PRINT         1000
10274
10275 /*
10276  * Define here KERN_TRACE so that we have one place to modify
10277  * it if we decide to change what log level the ftrace dump
10278  * should be at.
10279  */
10280 #define KERN_TRACE              KERN_EMERG
10281
10282 void
10283 trace_printk_seq(struct trace_seq *s)
10284 {
10285         /* Probably should print a warning here. */
10286         if (s->seq.len >= TRACE_MAX_PRINT)
10287                 s->seq.len = TRACE_MAX_PRINT;
10288
10289         /*
10290          * More paranoid code. Although the buffer size is set to
10291          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10292          * an extra layer of protection.
10293          */
10294         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10295                 s->seq.len = s->seq.size - 1;
10296
10297         /* should be zero ended, but we are paranoid. */
10298         s->buffer[s->seq.len] = 0;
10299
10300         printk(KERN_TRACE "%s", s->buffer);
10301
10302         trace_seq_init(s);
10303 }
10304
10305 void trace_init_global_iter(struct trace_iterator *iter)
10306 {
10307         iter->tr = &global_trace;
10308         iter->trace = iter->tr->current_trace;
10309         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10310         iter->array_buffer = &global_trace.array_buffer;
10311
10312         if (iter->trace && iter->trace->open)
10313                 iter->trace->open(iter);
10314
10315         /* Annotate start of buffers if we had overruns */
10316         if (ring_buffer_overruns(iter->array_buffer->buffer))
10317                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10318
10319         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10320         if (trace_clocks[iter->tr->clock_id].in_ns)
10321                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10322
10323         /* Can not use kmalloc for iter.temp and iter.fmt */
10324         iter->temp = static_temp_buf;
10325         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10326         iter->fmt = static_fmt_buf;
10327         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10328 }
10329
10330 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10331 {
10332         /* use static because iter can be a bit big for the stack */
10333         static struct trace_iterator iter;
10334         static atomic_t dump_running;
10335         struct trace_array *tr = &global_trace;
10336         unsigned int old_userobj;
10337         unsigned long flags;
10338         int cnt = 0, cpu;
10339
10340         /* Only allow one dump user at a time. */
10341         if (atomic_inc_return(&dump_running) != 1) {
10342                 atomic_dec(&dump_running);
10343                 return;
10344         }
10345
10346         /*
10347          * Always turn off tracing when we dump.
10348          * We don't need to show trace output of what happens
10349          * between multiple crashes.
10350          *
10351          * If the user does a sysrq-z, then they can re-enable
10352          * tracing with echo 1 > tracing_on.
10353          */
10354         tracing_off();
10355
10356         local_irq_save(flags);
10357
10358         /* Simulate the iterator */
10359         trace_init_global_iter(&iter);
10360
10361         for_each_tracing_cpu(cpu) {
10362                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10363         }
10364
10365         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10366
10367         /* don't look at user memory in panic mode */
10368         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10369
10370         switch (oops_dump_mode) {
10371         case DUMP_ALL:
10372                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10373                 break;
10374         case DUMP_ORIG:
10375                 iter.cpu_file = raw_smp_processor_id();
10376                 break;
10377         case DUMP_NONE:
10378                 goto out_enable;
10379         default:
10380                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10381                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10382         }
10383
10384         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10385
10386         /* Did function tracer already get disabled? */
10387         if (ftrace_is_dead()) {
10388                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10389                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10390         }
10391
10392         /*
10393          * We need to stop all tracing on all CPUS to read
10394          * the next buffer. This is a bit expensive, but is
10395          * not done often. We fill all what we can read,
10396          * and then release the locks again.
10397          */
10398
10399         while (!trace_empty(&iter)) {
10400
10401                 if (!cnt)
10402                         printk(KERN_TRACE "---------------------------------\n");
10403
10404                 cnt++;
10405
10406                 trace_iterator_reset(&iter);
10407                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10408
10409                 if (trace_find_next_entry_inc(&iter) != NULL) {
10410                         int ret;
10411
10412                         ret = print_trace_line(&iter);
10413                         if (ret != TRACE_TYPE_NO_CONSUME)
10414                                 trace_consume(&iter);
10415                 }
10416                 touch_nmi_watchdog();
10417
10418                 trace_printk_seq(&iter.seq);
10419         }
10420
10421         if (!cnt)
10422                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10423         else
10424                 printk(KERN_TRACE "---------------------------------\n");
10425
10426  out_enable:
10427         tr->trace_flags |= old_userobj;
10428
10429         for_each_tracing_cpu(cpu) {
10430                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10431         }
10432         atomic_dec(&dump_running);
10433         local_irq_restore(flags);
10434 }
10435 EXPORT_SYMBOL_GPL(ftrace_dump);
10436
10437 #define WRITE_BUFSIZE  4096
10438
10439 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10440                                 size_t count, loff_t *ppos,
10441                                 int (*createfn)(const char *))
10442 {
10443         char *kbuf, *buf, *tmp;
10444         int ret = 0;
10445         size_t done = 0;
10446         size_t size;
10447
10448         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10449         if (!kbuf)
10450                 return -ENOMEM;
10451
10452         while (done < count) {
10453                 size = count - done;
10454
10455                 if (size >= WRITE_BUFSIZE)
10456                         size = WRITE_BUFSIZE - 1;
10457
10458                 if (copy_from_user(kbuf, buffer + done, size)) {
10459                         ret = -EFAULT;
10460                         goto out;
10461                 }
10462                 kbuf[size] = '\0';
10463                 buf = kbuf;
10464                 do {
10465                         tmp = strchr(buf, '\n');
10466                         if (tmp) {
10467                                 *tmp = '\0';
10468                                 size = tmp - buf + 1;
10469                         } else {
10470                                 size = strlen(buf);
10471                                 if (done + size < count) {
10472                                         if (buf != kbuf)
10473                                                 break;
10474                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10475                                         pr_warn("Line length is too long: Should be less than %d\n",
10476                                                 WRITE_BUFSIZE - 2);
10477                                         ret = -EINVAL;
10478                                         goto out;
10479                                 }
10480                         }
10481                         done += size;
10482
10483                         /* Remove comments */
10484                         tmp = strchr(buf, '#');
10485
10486                         if (tmp)
10487                                 *tmp = '\0';
10488
10489                         ret = createfn(buf);
10490                         if (ret)
10491                                 goto out;
10492                         buf += size;
10493
10494                 } while (done < count);
10495         }
10496         ret = done;
10497
10498 out:
10499         kfree(kbuf);
10500
10501         return ret;
10502 }
10503
10504 #ifdef CONFIG_TRACER_MAX_TRACE
10505 __init static bool tr_needs_alloc_snapshot(const char *name)
10506 {
10507         char *test;
10508         int len = strlen(name);
10509         bool ret;
10510
10511         if (!boot_snapshot_index)
10512                 return false;
10513
10514         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10515             boot_snapshot_info[len] == '\t')
10516                 return true;
10517
10518         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10519         if (!test)
10520                 return false;
10521
10522         sprintf(test, "\t%s\t", name);
10523         ret = strstr(boot_snapshot_info, test) == NULL;
10524         kfree(test);
10525         return ret;
10526 }
10527
10528 __init static void do_allocate_snapshot(const char *name)
10529 {
10530         if (!tr_needs_alloc_snapshot(name))
10531                 return;
10532
10533         /*
10534          * When allocate_snapshot is set, the next call to
10535          * allocate_trace_buffers() (called by trace_array_get_by_name())
10536          * will allocate the snapshot buffer. That will alse clear
10537          * this flag.
10538          */
10539         allocate_snapshot = true;
10540 }
10541 #else
10542 static inline void do_allocate_snapshot(const char *name) { }
10543 #endif
10544
10545 __init static void enable_instances(void)
10546 {
10547         struct trace_array *tr;
10548         char *curr_str;
10549         char *str;
10550         char *tok;
10551
10552         /* A tab is always appended */
10553         boot_instance_info[boot_instance_index - 1] = '\0';
10554         str = boot_instance_info;
10555
10556         while ((curr_str = strsep(&str, "\t"))) {
10557
10558                 tok = strsep(&curr_str, ",");
10559
10560                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10561                         do_allocate_snapshot(tok);
10562
10563                 tr = trace_array_get_by_name(tok, NULL);
10564                 if (!tr) {
10565                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10566                         continue;
10567                 }
10568                 /* Allow user space to delete it */
10569                 trace_array_put(tr);
10570
10571                 while ((tok = strsep(&curr_str, ","))) {
10572                         early_enable_events(tr, tok, true);
10573                 }
10574         }
10575 }
10576
10577 __init static int tracer_alloc_buffers(void)
10578 {
10579         int ring_buf_size;
10580         int ret = -ENOMEM;
10581
10582
10583         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10584                 pr_warn("Tracing disabled due to lockdown\n");
10585                 return -EPERM;
10586         }
10587
10588         /*
10589          * Make sure we don't accidentally add more trace options
10590          * than we have bits for.
10591          */
10592         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10593
10594         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10595                 goto out;
10596
10597         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10598                 goto out_free_buffer_mask;
10599
10600         /* Only allocate trace_printk buffers if a trace_printk exists */
10601         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10602                 /* Must be called before global_trace.buffer is allocated */
10603                 trace_printk_init_buffers();
10604
10605         /* To save memory, keep the ring buffer size to its minimum */
10606         if (global_trace.ring_buffer_expanded)
10607                 ring_buf_size = trace_buf_size;
10608         else
10609                 ring_buf_size = 1;
10610
10611         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10612         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10613
10614         raw_spin_lock_init(&global_trace.start_lock);
10615
10616         /*
10617          * The prepare callbacks allocates some memory for the ring buffer. We
10618          * don't free the buffer if the CPU goes down. If we were to free
10619          * the buffer, then the user would lose any trace that was in the
10620          * buffer. The memory will be removed once the "instance" is removed.
10621          */
10622         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10623                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10624                                       NULL);
10625         if (ret < 0)
10626                 goto out_free_cpumask;
10627         /* Used for event triggers */
10628         ret = -ENOMEM;
10629         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10630         if (!temp_buffer)
10631                 goto out_rm_hp_state;
10632
10633         if (trace_create_savedcmd() < 0)
10634                 goto out_free_temp_buffer;
10635
10636         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10637                 goto out_free_savedcmd;
10638
10639         /* TODO: make the number of buffers hot pluggable with CPUS */
10640         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10641                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10642                 goto out_free_pipe_cpumask;
10643         }
10644         if (global_trace.buffer_disabled)
10645                 tracing_off();
10646
10647         if (trace_boot_clock) {
10648                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10649                 if (ret < 0)
10650                         pr_warn("Trace clock %s not defined, going back to default\n",
10651                                 trace_boot_clock);
10652         }
10653
10654         /*
10655          * register_tracer() might reference current_trace, so it
10656          * needs to be set before we register anything. This is
10657          * just a bootstrap of current_trace anyway.
10658          */
10659         global_trace.current_trace = &nop_trace;
10660
10661         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10662
10663         ftrace_init_global_array_ops(&global_trace);
10664
10665         init_trace_flags_index(&global_trace);
10666
10667         register_tracer(&nop_trace);
10668
10669         /* Function tracing may start here (via kernel command line) */
10670         init_function_trace();
10671
10672         /* All seems OK, enable tracing */
10673         tracing_disabled = 0;
10674
10675         atomic_notifier_chain_register(&panic_notifier_list,
10676                                        &trace_panic_notifier);
10677
10678         register_die_notifier(&trace_die_notifier);
10679
10680         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10681
10682         INIT_LIST_HEAD(&global_trace.systems);
10683         INIT_LIST_HEAD(&global_trace.events);
10684         INIT_LIST_HEAD(&global_trace.hist_vars);
10685         INIT_LIST_HEAD(&global_trace.err_log);
10686         list_add(&global_trace.list, &ftrace_trace_arrays);
10687
10688         apply_trace_boot_options();
10689
10690         register_snapshot_cmd();
10691
10692         test_can_verify();
10693
10694         return 0;
10695
10696 out_free_pipe_cpumask:
10697         free_cpumask_var(global_trace.pipe_cpumask);
10698 out_free_savedcmd:
10699         free_saved_cmdlines_buffer(savedcmd);
10700 out_free_temp_buffer:
10701         ring_buffer_free(temp_buffer);
10702 out_rm_hp_state:
10703         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10704 out_free_cpumask:
10705         free_cpumask_var(global_trace.tracing_cpumask);
10706 out_free_buffer_mask:
10707         free_cpumask_var(tracing_buffer_mask);
10708 out:
10709         return ret;
10710 }
10711
10712 void __init ftrace_boot_snapshot(void)
10713 {
10714 #ifdef CONFIG_TRACER_MAX_TRACE
10715         struct trace_array *tr;
10716
10717         if (!snapshot_at_boot)
10718                 return;
10719
10720         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10721                 if (!tr->allocated_snapshot)
10722                         continue;
10723
10724                 tracing_snapshot_instance(tr);
10725                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10726         }
10727 #endif
10728 }
10729
10730 void __init early_trace_init(void)
10731 {
10732         if (tracepoint_printk) {
10733                 tracepoint_print_iter =
10734                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10735                 if (MEM_FAIL(!tracepoint_print_iter,
10736                              "Failed to allocate trace iterator\n"))
10737                         tracepoint_printk = 0;
10738                 else
10739                         static_key_enable(&tracepoint_printk_key.key);
10740         }
10741         tracer_alloc_buffers();
10742
10743         init_events();
10744 }
10745
10746 void __init trace_init(void)
10747 {
10748         trace_event_init();
10749
10750         if (boot_instance_index)
10751                 enable_instances();
10752 }
10753
10754 __init static void clear_boot_tracer(void)
10755 {
10756         /*
10757          * The default tracer at boot buffer is an init section.
10758          * This function is called in lateinit. If we did not
10759          * find the boot tracer, then clear it out, to prevent
10760          * later registration from accessing the buffer that is
10761          * about to be freed.
10762          */
10763         if (!default_bootup_tracer)
10764                 return;
10765
10766         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10767                default_bootup_tracer);
10768         default_bootup_tracer = NULL;
10769 }
10770
10771 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10772 __init static void tracing_set_default_clock(void)
10773 {
10774         /* sched_clock_stable() is determined in late_initcall */
10775         if (!trace_boot_clock && !sched_clock_stable()) {
10776                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10777                         pr_warn("Can not set tracing clock due to lockdown\n");
10778                         return;
10779                 }
10780
10781                 printk(KERN_WARNING
10782                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10783                        "If you want to keep using the local clock, then add:\n"
10784                        "  \"trace_clock=local\"\n"
10785                        "on the kernel command line\n");
10786                 tracing_set_clock(&global_trace, "global");
10787         }
10788 }
10789 #else
10790 static inline void tracing_set_default_clock(void) { }
10791 #endif
10792
10793 __init static int late_trace_init(void)
10794 {
10795         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10796                 static_key_disable(&tracepoint_printk_key.key);
10797                 tracepoint_printk = 0;
10798         }
10799
10800         tracing_set_default_clock();
10801         clear_boot_tracer();
10802         return 0;
10803 }
10804
10805 late_initcall_sync(late_trace_init);