Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163                                    unsigned long flags, int pc);
164
165 #define MAX_TRACER_SIZE         100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168
169 static bool allocate_snapshot;
170
171 static int __init set_cmdline_ftrace(char *str)
172 {
173         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174         default_bootup_tracer = bootup_tracer_buf;
175         /* We are using ftrace early, expand it */
176         ring_buffer_expanded = true;
177         return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183         if (*str++ != '=' || !*str) {
184                 ftrace_dump_on_oops = DUMP_ALL;
185                 return 1;
186         }
187
188         if (!strcmp("orig_cpu", str)) {
189                 ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196
197 static int __init stop_trace_on_warning(char *str)
198 {
199         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200                 __disable_trace_on_warning = 1;
201         return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204
205 static int __init boot_alloc_snapshot(char *str)
206 {
207         allocate_snapshot = true;
208         /* We also need the main ring buffer expanded */
209         ring_buffer_expanded = true;
210         return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213
214
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216
217 static int __init set_trace_boot_options(char *str)
218 {
219         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220         return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226
227 static int __init set_trace_boot_clock(char *str)
228 {
229         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230         trace_boot_clock = trace_boot_clock_buf;
231         return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234
235 static int __init set_tracepoint_printk(char *str)
236 {
237         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238                 tracepoint_printk = 1;
239         return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242
243 unsigned long long ns2usecs(u64 nsec)
244 {
245         nsec += 500;
246         do_div(nsec, 1000);
247         return nsec;
248 }
249
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS                                             \
252         (FUNCTION_DEFAULT_FLAGS |                                       \
253          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
254          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
255          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
256          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
260                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271         .trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273
274 LIST_HEAD(ftrace_trace_arrays);
275
276 int trace_array_get(struct trace_array *this_tr)
277 {
278         struct trace_array *tr;
279         int ret = -ENODEV;
280
281         mutex_lock(&trace_types_lock);
282         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283                 if (tr == this_tr) {
284                         tr->ref++;
285                         ret = 0;
286                         break;
287                 }
288         }
289         mutex_unlock(&trace_types_lock);
290
291         return ret;
292 }
293
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296         WARN_ON(!this_tr->ref);
297         this_tr->ref--;
298 }
299
300 void trace_array_put(struct trace_array *this_tr)
301 {
302         mutex_lock(&trace_types_lock);
303         __trace_array_put(this_tr);
304         mutex_unlock(&trace_types_lock);
305 }
306
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308                               struct ring_buffer *buffer,
309                               struct ring_buffer_event *event)
310 {
311         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312             !filter_match_preds(call->filter, rec)) {
313                 __trace_event_discard_commit(buffer, event);
314                 return 1;
315         }
316
317         return 0;
318 }
319
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322         vfree(pid_list->pids);
323         kfree(pid_list);
324 }
325
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336         /*
337          * If pid_max changed after filtered_pids was created, we
338          * by default ignore all pids greater than the previous pid_max.
339          */
340         if (search_pid >= filtered_pids->pid_max)
341                 return false;
342
343         return test_bit(search_pid, filtered_pids->pids);
344 }
345
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358         /*
359          * Return false, because if filtered_pids does not exist,
360          * all pids are good to trace.
361          */
362         if (!filtered_pids)
363                 return false;
364
365         return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367
368 /**
369  * trace_filter_add_remove_task - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381                                   struct task_struct *self,
382                                   struct task_struct *task)
383 {
384         if (!pid_list)
385                 return;
386
387         /* For forks, we only add if the forking task is listed */
388         if (self) {
389                 if (!trace_find_filtered_pid(pid_list, self->pid))
390                         return;
391         }
392
393         /* Sorry, but we don't support pid_max changing after setting */
394         if (task->pid >= pid_list->pid_max)
395                 return;
396
397         /* "self" is set for forks, and NULL for exits */
398         if (self)
399                 set_bit(task->pid, pid_list->pids);
400         else
401                 clear_bit(task->pid, pid_list->pids);
402 }
403
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418         unsigned long pid = (unsigned long)v;
419
420         (*pos)++;
421
422         /* pid already is +1 of the actual prevous bit */
423         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424
425         /* Return pid + 1 to allow zero to be represented */
426         if (pid < pid_list->pid_max)
427                 return (void *)(pid + 1);
428
429         return NULL;
430 }
431
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445         unsigned long pid;
446         loff_t l = 0;
447
448         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449         if (pid >= pid_list->pid_max)
450                 return NULL;
451
452         /* Return pid + 1 so that zero can be the exit value */
453         for (pid++; pid && l < *pos;
454              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455                 ;
456         return (void *)pid;
457 }
458
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469         unsigned long pid = (unsigned long)v - 1;
470
471         seq_printf(m, "%lu\n", pid);
472         return 0;
473 }
474
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE            127
477
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479                     struct trace_pid_list **new_pid_list,
480                     const char __user *ubuf, size_t cnt)
481 {
482         struct trace_pid_list *pid_list;
483         struct trace_parser parser;
484         unsigned long val;
485         int nr_pids = 0;
486         ssize_t read = 0;
487         ssize_t ret = 0;
488         loff_t pos;
489         pid_t pid;
490
491         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492                 return -ENOMEM;
493
494         /*
495          * Always recreate a new array. The write is an all or nothing
496          * operation. Always create a new array when adding new pids by
497          * the user. If the operation fails, then the current list is
498          * not modified.
499          */
500         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501         if (!pid_list) {
502                 trace_parser_put(&parser);
503                 return -ENOMEM;
504         }
505
506         pid_list->pid_max = READ_ONCE(pid_max);
507
508         /* Only truncating will shrink pid_max */
509         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510                 pid_list->pid_max = filtered_pids->pid_max;
511
512         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513         if (!pid_list->pids) {
514                 trace_parser_put(&parser);
515                 kfree(pid_list);
516                 return -ENOMEM;
517         }
518
519         if (filtered_pids) {
520                 /* copy the current bits to the new max */
521                 for_each_set_bit(pid, filtered_pids->pids,
522                                  filtered_pids->pid_max) {
523                         set_bit(pid, pid_list->pids);
524                         nr_pids++;
525                 }
526         }
527
528         while (cnt > 0) {
529
530                 pos = 0;
531
532                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
533                 if (ret < 0 || !trace_parser_loaded(&parser))
534                         break;
535
536                 read += ret;
537                 ubuf += ret;
538                 cnt -= ret;
539
540                 ret = -EINVAL;
541                 if (kstrtoul(parser.buffer, 0, &val))
542                         break;
543                 if (val >= pid_list->pid_max)
544                         break;
545
546                 pid = (pid_t)val;
547
548                 set_bit(pid, pid_list->pids);
549                 nr_pids++;
550
551                 trace_parser_clear(&parser);
552                 ret = 0;
553         }
554         trace_parser_put(&parser);
555
556         if (ret < 0) {
557                 trace_free_pid_list(pid_list);
558                 return ret;
559         }
560
561         if (!nr_pids) {
562                 /* Cleared the list of pids */
563                 trace_free_pid_list(pid_list);
564                 read = ret;
565                 pid_list = NULL;
566         }
567
568         *new_pid_list = pid_list;
569
570         return read;
571 }
572
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575         u64 ts;
576
577         /* Early boot up does not have a buffer yet */
578         if (!buf->buffer)
579                 return trace_clock_local();
580
581         ts = ring_buffer_time_stamp(buf->buffer, cpu);
582         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583
584         return ts;
585 }
586
587 u64 ftrace_now(int cpu)
588 {
589         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591
592 /**
593  * tracing_is_enabled - Show if global_trace has been disabled
594  *
595  * Shows if the global trace has been enabled or not. It uses the
596  * mirror flag "buffer_disabled" to be used in fast paths such as for
597  * the irqsoff tracer. But it may be inaccurate due to races. If you
598  * need to know the accurate state, use tracing_is_on() which is a little
599  * slower, but accurate.
600  */
601 int tracing_is_enabled(void)
602 {
603         /*
604          * For quick access (irqsoff uses this in fast path), just
605          * return the mirror variable of the state of the ring buffer.
606          * It's a little racy, but we don't really care.
607          */
608         smp_rmb();
609         return !global_trace.buffer_disabled;
610 }
611
612 /*
613  * trace_buf_size is the size in bytes that is allocated
614  * for a buffer. Note, the number of bytes is always rounded
615  * to page size.
616  *
617  * This number is purposely set to a low number of 16384.
618  * If the dump on oops happens, it will be much appreciated
619  * to not have to wait for all that output. Anyway this can be
620  * boot time and run time configurable.
621  */
622 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
623
624 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer            *trace_types __read_mostly;
628
629 /*
630  * trace_types_lock is used to protect the trace_types list.
631  */
632 DEFINE_MUTEX(trace_types_lock);
633
634 /*
635  * serialize the access of the ring buffer
636  *
637  * ring buffer serializes readers, but it is low level protection.
638  * The validity of the events (which returns by ring_buffer_peek() ..etc)
639  * are not protected by ring buffer.
640  *
641  * The content of events may become garbage if we allow other process consumes
642  * these events concurrently:
643  *   A) the page of the consumed events may become a normal page
644  *      (not reader page) in ring buffer, and this page will be rewrited
645  *      by events producer.
646  *   B) The page of the consumed events may become a page for splice_read,
647  *      and this page will be returned to system.
648  *
649  * These primitives allow multi process access to different cpu ring buffer
650  * concurrently.
651  *
652  * These primitives don't distinguish read-only and read-consume access.
653  * Multi read-only access are also serialized.
654  */
655
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659
660 static inline void trace_access_lock(int cpu)
661 {
662         if (cpu == RING_BUFFER_ALL_CPUS) {
663                 /* gain it for accessing the whole ring buffer. */
664                 down_write(&all_cpu_access_lock);
665         } else {
666                 /* gain it for accessing a cpu ring buffer. */
667
668                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669                 down_read(&all_cpu_access_lock);
670
671                 /* Secondly block other access to this @cpu ring buffer. */
672                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
673         }
674 }
675
676 static inline void trace_access_unlock(int cpu)
677 {
678         if (cpu == RING_BUFFER_ALL_CPUS) {
679                 up_write(&all_cpu_access_lock);
680         } else {
681                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682                 up_read(&all_cpu_access_lock);
683         }
684 }
685
686 static inline void trace_access_lock_init(void)
687 {
688         int cpu;
689
690         for_each_possible_cpu(cpu)
691                 mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693
694 #else
695
696 static DEFINE_MUTEX(access_lock);
697
698 static inline void trace_access_lock(int cpu)
699 {
700         (void)cpu;
701         mutex_lock(&access_lock);
702 }
703
704 static inline void trace_access_unlock(int cpu)
705 {
706         (void)cpu;
707         mutex_unlock(&access_lock);
708 }
709
710 static inline void trace_access_lock_init(void)
711 {
712 }
713
714 #endif
715
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718                                  unsigned long flags,
719                                  int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721                                       struct ring_buffer *buffer,
722                                       unsigned long flags,
723                                       int skip, int pc, struct pt_regs *regs);
724
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727                                         unsigned long flags,
728                                         int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732                                       struct ring_buffer *buffer,
733                                       unsigned long flags,
734                                       int skip, int pc, struct pt_regs *regs)
735 {
736 }
737
738 #endif
739
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742                   int type, unsigned long flags, int pc)
743 {
744         struct trace_entry *ent = ring_buffer_event_data(event);
745
746         tracing_generic_entry_update(ent, type, flags, pc);
747 }
748
749 static __always_inline struct ring_buffer_event *
750 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
751                           int type,
752                           unsigned long len,
753                           unsigned long flags, int pc)
754 {
755         struct ring_buffer_event *event;
756
757         event = ring_buffer_lock_reserve(buffer, len);
758         if (event != NULL)
759                 trace_event_setup(event, type, flags, pc);
760
761         return event;
762 }
763
764 void tracer_tracing_on(struct trace_array *tr)
765 {
766         if (tr->trace_buffer.buffer)
767                 ring_buffer_record_on(tr->trace_buffer.buffer);
768         /*
769          * This flag is looked at when buffers haven't been allocated
770          * yet, or by some tracers (like irqsoff), that just want to
771          * know if the ring buffer has been disabled, but it can handle
772          * races of where it gets disabled but we still do a record.
773          * As the check is in the fast path of the tracers, it is more
774          * important to be fast than accurate.
775          */
776         tr->buffer_disabled = 0;
777         /* Make the flag seen by readers */
778         smp_wmb();
779 }
780
781 /**
782  * tracing_on - enable tracing buffers
783  *
784  * This function enables tracing buffers that may have been
785  * disabled with tracing_off.
786  */
787 void tracing_on(void)
788 {
789         tracer_tracing_on(&global_trace);
790 }
791 EXPORT_SYMBOL_GPL(tracing_on);
792
793
794 static __always_inline void
795 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
796 {
797         __this_cpu_write(trace_taskinfo_save, true);
798
799         /* If this is the temp buffer, we need to commit fully */
800         if (this_cpu_read(trace_buffered_event) == event) {
801                 /* Length is in event->array[0] */
802                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
803                 /* Release the temp buffer */
804                 this_cpu_dec(trace_buffered_event_cnt);
805         } else
806                 ring_buffer_unlock_commit(buffer, event);
807 }
808
809 /**
810  * __trace_puts - write a constant string into the trace buffer.
811  * @ip:    The address of the caller
812  * @str:   The constant string to write
813  * @size:  The size of the string.
814  */
815 int __trace_puts(unsigned long ip, const char *str, int size)
816 {
817         struct ring_buffer_event *event;
818         struct ring_buffer *buffer;
819         struct print_entry *entry;
820         unsigned long irq_flags;
821         int alloc;
822         int pc;
823
824         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
825                 return 0;
826
827         pc = preempt_count();
828
829         if (unlikely(tracing_selftest_running || tracing_disabled))
830                 return 0;
831
832         alloc = sizeof(*entry) + size + 2; /* possible \n added */
833
834         local_save_flags(irq_flags);
835         buffer = global_trace.trace_buffer.buffer;
836         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
837                                             irq_flags, pc);
838         if (!event)
839                 return 0;
840
841         entry = ring_buffer_event_data(event);
842         entry->ip = ip;
843
844         memcpy(&entry->buf, str, size);
845
846         /* Add a newline if necessary */
847         if (entry->buf[size - 1] != '\n') {
848                 entry->buf[size] = '\n';
849                 entry->buf[size + 1] = '\0';
850         } else
851                 entry->buf[size] = '\0';
852
853         __buffer_unlock_commit(buffer, event);
854         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
855
856         return size;
857 }
858 EXPORT_SYMBOL_GPL(__trace_puts);
859
860 /**
861  * __trace_bputs - write the pointer to a constant string into trace buffer
862  * @ip:    The address of the caller
863  * @str:   The constant string to write to the buffer to
864  */
865 int __trace_bputs(unsigned long ip, const char *str)
866 {
867         struct ring_buffer_event *event;
868         struct ring_buffer *buffer;
869         struct bputs_entry *entry;
870         unsigned long irq_flags;
871         int size = sizeof(struct bputs_entry);
872         int pc;
873
874         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
875                 return 0;
876
877         pc = preempt_count();
878
879         if (unlikely(tracing_selftest_running || tracing_disabled))
880                 return 0;
881
882         local_save_flags(irq_flags);
883         buffer = global_trace.trace_buffer.buffer;
884         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885                                             irq_flags, pc);
886         if (!event)
887                 return 0;
888
889         entry = ring_buffer_event_data(event);
890         entry->ip                       = ip;
891         entry->str                      = str;
892
893         __buffer_unlock_commit(buffer, event);
894         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
895
896         return 1;
897 }
898 EXPORT_SYMBOL_GPL(__trace_bputs);
899
900 #ifdef CONFIG_TRACER_SNAPSHOT
901 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
902 {
903         struct tracer *tracer = tr->current_trace;
904         unsigned long flags;
905
906         if (in_nmi()) {
907                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
908                 internal_trace_puts("*** snapshot is being ignored        ***\n");
909                 return;
910         }
911
912         if (!tr->allocated_snapshot) {
913                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
914                 internal_trace_puts("*** stopping trace here!   ***\n");
915                 tracing_off();
916                 return;
917         }
918
919         /* Note, snapshot can not be used when the tracer uses it */
920         if (tracer->use_max_tr) {
921                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
922                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
923                 return;
924         }
925
926         local_irq_save(flags);
927         update_max_tr(tr, current, smp_processor_id(), cond_data);
928         local_irq_restore(flags);
929 }
930
931 void tracing_snapshot_instance(struct trace_array *tr)
932 {
933         tracing_snapshot_instance_cond(tr, NULL);
934 }
935
936 /**
937  * tracing_snapshot - take a snapshot of the current buffer.
938  *
939  * This causes a swap between the snapshot buffer and the current live
940  * tracing buffer. You can use this to take snapshots of the live
941  * trace when some condition is triggered, but continue to trace.
942  *
943  * Note, make sure to allocate the snapshot with either
944  * a tracing_snapshot_alloc(), or by doing it manually
945  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
946  *
947  * If the snapshot buffer is not allocated, it will stop tracing.
948  * Basically making a permanent snapshot.
949  */
950 void tracing_snapshot(void)
951 {
952         struct trace_array *tr = &global_trace;
953
954         tracing_snapshot_instance(tr);
955 }
956 EXPORT_SYMBOL_GPL(tracing_snapshot);
957
958 /**
959  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
960  * @tr:         The tracing instance to snapshot
961  * @cond_data:  The data to be tested conditionally, and possibly saved
962  *
963  * This is the same as tracing_snapshot() except that the snapshot is
964  * conditional - the snapshot will only happen if the
965  * cond_snapshot.update() implementation receiving the cond_data
966  * returns true, which means that the trace array's cond_snapshot
967  * update() operation used the cond_data to determine whether the
968  * snapshot should be taken, and if it was, presumably saved it along
969  * with the snapshot.
970  */
971 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
972 {
973         tracing_snapshot_instance_cond(tr, cond_data);
974 }
975 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
976
977 /**
978  * tracing_snapshot_cond_data - get the user data associated with a snapshot
979  * @tr:         The tracing instance
980  *
981  * When the user enables a conditional snapshot using
982  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
983  * with the snapshot.  This accessor is used to retrieve it.
984  *
985  * Should not be called from cond_snapshot.update(), since it takes
986  * the tr->max_lock lock, which the code calling
987  * cond_snapshot.update() has already done.
988  *
989  * Returns the cond_data associated with the trace array's snapshot.
990  */
991 void *tracing_cond_snapshot_data(struct trace_array *tr)
992 {
993         void *cond_data = NULL;
994
995         arch_spin_lock(&tr->max_lock);
996
997         if (tr->cond_snapshot)
998                 cond_data = tr->cond_snapshot->cond_data;
999
1000         arch_spin_unlock(&tr->max_lock);
1001
1002         return cond_data;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1005
1006 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1007                                         struct trace_buffer *size_buf, int cpu_id);
1008 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1009
1010 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1011 {
1012         int ret;
1013
1014         if (!tr->allocated_snapshot) {
1015
1016                 /* allocate spare buffer */
1017                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1018                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1019                 if (ret < 0)
1020                         return ret;
1021
1022                 tr->allocated_snapshot = true;
1023         }
1024
1025         return 0;
1026 }
1027
1028 static void free_snapshot(struct trace_array *tr)
1029 {
1030         /*
1031          * We don't free the ring buffer. instead, resize it because
1032          * The max_tr ring buffer has some state (e.g. ring->clock) and
1033          * we want preserve it.
1034          */
1035         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1036         set_buffer_entries(&tr->max_buffer, 1);
1037         tracing_reset_online_cpus(&tr->max_buffer);
1038         tr->allocated_snapshot = false;
1039 }
1040
1041 /**
1042  * tracing_alloc_snapshot - allocate snapshot buffer.
1043  *
1044  * This only allocates the snapshot buffer if it isn't already
1045  * allocated - it doesn't also take a snapshot.
1046  *
1047  * This is meant to be used in cases where the snapshot buffer needs
1048  * to be set up for events that can't sleep but need to be able to
1049  * trigger a snapshot.
1050  */
1051 int tracing_alloc_snapshot(void)
1052 {
1053         struct trace_array *tr = &global_trace;
1054         int ret;
1055
1056         ret = tracing_alloc_snapshot_instance(tr);
1057         WARN_ON(ret < 0);
1058
1059         return ret;
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1062
1063 /**
1064  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1065  *
1066  * This is similar to tracing_snapshot(), but it will allocate the
1067  * snapshot buffer if it isn't already allocated. Use this only
1068  * where it is safe to sleep, as the allocation may sleep.
1069  *
1070  * This causes a swap between the snapshot buffer and the current live
1071  * tracing buffer. You can use this to take snapshots of the live
1072  * trace when some condition is triggered, but continue to trace.
1073  */
1074 void tracing_snapshot_alloc(void)
1075 {
1076         int ret;
1077
1078         ret = tracing_alloc_snapshot();
1079         if (ret < 0)
1080                 return;
1081
1082         tracing_snapshot();
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1085
1086 /**
1087  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1088  * @tr:         The tracing instance
1089  * @cond_data:  User data to associate with the snapshot
1090  * @update:     Implementation of the cond_snapshot update function
1091  *
1092  * Check whether the conditional snapshot for the given instance has
1093  * already been enabled, or if the current tracer is already using a
1094  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1095  * save the cond_data and update function inside.
1096  *
1097  * Returns 0 if successful, error otherwise.
1098  */
1099 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1100                                  cond_update_fn_t update)
1101 {
1102         struct cond_snapshot *cond_snapshot;
1103         int ret = 0;
1104
1105         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1106         if (!cond_snapshot)
1107                 return -ENOMEM;
1108
1109         cond_snapshot->cond_data = cond_data;
1110         cond_snapshot->update = update;
1111
1112         mutex_lock(&trace_types_lock);
1113
1114         ret = tracing_alloc_snapshot_instance(tr);
1115         if (ret)
1116                 goto fail_unlock;
1117
1118         if (tr->current_trace->use_max_tr) {
1119                 ret = -EBUSY;
1120                 goto fail_unlock;
1121         }
1122
1123         /*
1124          * The cond_snapshot can only change to NULL without the
1125          * trace_types_lock. We don't care if we race with it going
1126          * to NULL, but we want to make sure that it's not set to
1127          * something other than NULL when we get here, which we can
1128          * do safely with only holding the trace_types_lock and not
1129          * having to take the max_lock.
1130          */
1131         if (tr->cond_snapshot) {
1132                 ret = -EBUSY;
1133                 goto fail_unlock;
1134         }
1135
1136         arch_spin_lock(&tr->max_lock);
1137         tr->cond_snapshot = cond_snapshot;
1138         arch_spin_unlock(&tr->max_lock);
1139
1140         mutex_unlock(&trace_types_lock);
1141
1142         return ret;
1143
1144  fail_unlock:
1145         mutex_unlock(&trace_types_lock);
1146         kfree(cond_snapshot);
1147         return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1150
1151 /**
1152  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1153  * @tr:         The tracing instance
1154  *
1155  * Check whether the conditional snapshot for the given instance is
1156  * enabled; if so, free the cond_snapshot associated with it,
1157  * otherwise return -EINVAL.
1158  *
1159  * Returns 0 if successful, error otherwise.
1160  */
1161 int tracing_snapshot_cond_disable(struct trace_array *tr)
1162 {
1163         int ret = 0;
1164
1165         arch_spin_lock(&tr->max_lock);
1166
1167         if (!tr->cond_snapshot)
1168                 ret = -EINVAL;
1169         else {
1170                 kfree(tr->cond_snapshot);
1171                 tr->cond_snapshot = NULL;
1172         }
1173
1174         arch_spin_unlock(&tr->max_lock);
1175
1176         return ret;
1177 }
1178 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1179 #else
1180 void tracing_snapshot(void)
1181 {
1182         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1183 }
1184 EXPORT_SYMBOL_GPL(tracing_snapshot);
1185 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1186 {
1187         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1190 int tracing_alloc_snapshot(void)
1191 {
1192         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1193         return -ENODEV;
1194 }
1195 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1196 void tracing_snapshot_alloc(void)
1197 {
1198         /* Give warning */
1199         tracing_snapshot();
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204         return NULL;
1205 }
1206 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1207 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1208 {
1209         return -ENODEV;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1212 int tracing_snapshot_cond_disable(struct trace_array *tr)
1213 {
1214         return false;
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1217 #endif /* CONFIG_TRACER_SNAPSHOT */
1218
1219 void tracer_tracing_off(struct trace_array *tr)
1220 {
1221         if (tr->trace_buffer.buffer)
1222                 ring_buffer_record_off(tr->trace_buffer.buffer);
1223         /*
1224          * This flag is looked at when buffers haven't been allocated
1225          * yet, or by some tracers (like irqsoff), that just want to
1226          * know if the ring buffer has been disabled, but it can handle
1227          * races of where it gets disabled but we still do a record.
1228          * As the check is in the fast path of the tracers, it is more
1229          * important to be fast than accurate.
1230          */
1231         tr->buffer_disabled = 1;
1232         /* Make the flag seen by readers */
1233         smp_wmb();
1234 }
1235
1236 /**
1237  * tracing_off - turn off tracing buffers
1238  *
1239  * This function stops the tracing buffers from recording data.
1240  * It does not disable any overhead the tracers themselves may
1241  * be causing. This function simply causes all recording to
1242  * the ring buffers to fail.
1243  */
1244 void tracing_off(void)
1245 {
1246         tracer_tracing_off(&global_trace);
1247 }
1248 EXPORT_SYMBOL_GPL(tracing_off);
1249
1250 void disable_trace_on_warning(void)
1251 {
1252         if (__disable_trace_on_warning)
1253                 tracing_off();
1254 }
1255
1256 /**
1257  * tracer_tracing_is_on - show real state of ring buffer enabled
1258  * @tr : the trace array to know if ring buffer is enabled
1259  *
1260  * Shows real state of the ring buffer if it is enabled or not.
1261  */
1262 bool tracer_tracing_is_on(struct trace_array *tr)
1263 {
1264         if (tr->trace_buffer.buffer)
1265                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1266         return !tr->buffer_disabled;
1267 }
1268
1269 /**
1270  * tracing_is_on - show state of ring buffers enabled
1271  */
1272 int tracing_is_on(void)
1273 {
1274         return tracer_tracing_is_on(&global_trace);
1275 }
1276 EXPORT_SYMBOL_GPL(tracing_is_on);
1277
1278 static int __init set_buf_size(char *str)
1279 {
1280         unsigned long buf_size;
1281
1282         if (!str)
1283                 return 0;
1284         buf_size = memparse(str, &str);
1285         /* nr_entries can not be zero */
1286         if (buf_size == 0)
1287                 return 0;
1288         trace_buf_size = buf_size;
1289         return 1;
1290 }
1291 __setup("trace_buf_size=", set_buf_size);
1292
1293 static int __init set_tracing_thresh(char *str)
1294 {
1295         unsigned long threshold;
1296         int ret;
1297
1298         if (!str)
1299                 return 0;
1300         ret = kstrtoul(str, 0, &threshold);
1301         if (ret < 0)
1302                 return 0;
1303         tracing_thresh = threshold * 1000;
1304         return 1;
1305 }
1306 __setup("tracing_thresh=", set_tracing_thresh);
1307
1308 unsigned long nsecs_to_usecs(unsigned long nsecs)
1309 {
1310         return nsecs / 1000;
1311 }
1312
1313 /*
1314  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1315  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1316  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1317  * of strings in the order that the evals (enum) were defined.
1318  */
1319 #undef C
1320 #define C(a, b) b
1321
1322 /* These must match the bit postions in trace_iterator_flags */
1323 static const char *trace_options[] = {
1324         TRACE_FLAGS
1325         NULL
1326 };
1327
1328 static struct {
1329         u64 (*func)(void);
1330         const char *name;
1331         int in_ns;              /* is this clock in nanoseconds? */
1332 } trace_clocks[] = {
1333         { trace_clock_local,            "local",        1 },
1334         { trace_clock_global,           "global",       1 },
1335         { trace_clock_counter,          "counter",      0 },
1336         { trace_clock_jiffies,          "uptime",       0 },
1337         { trace_clock,                  "perf",         1 },
1338         { ktime_get_mono_fast_ns,       "mono",         1 },
1339         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1340         { ktime_get_boot_fast_ns,       "boot",         1 },
1341         ARCH_TRACE_CLOCKS
1342 };
1343
1344 bool trace_clock_in_ns(struct trace_array *tr)
1345 {
1346         if (trace_clocks[tr->clock_id].in_ns)
1347                 return true;
1348
1349         return false;
1350 }
1351
1352 /*
1353  * trace_parser_get_init - gets the buffer for trace parser
1354  */
1355 int trace_parser_get_init(struct trace_parser *parser, int size)
1356 {
1357         memset(parser, 0, sizeof(*parser));
1358
1359         parser->buffer = kmalloc(size, GFP_KERNEL);
1360         if (!parser->buffer)
1361                 return 1;
1362
1363         parser->size = size;
1364         return 0;
1365 }
1366
1367 /*
1368  * trace_parser_put - frees the buffer for trace parser
1369  */
1370 void trace_parser_put(struct trace_parser *parser)
1371 {
1372         kfree(parser->buffer);
1373         parser->buffer = NULL;
1374 }
1375
1376 /*
1377  * trace_get_user - reads the user input string separated by  space
1378  * (matched by isspace(ch))
1379  *
1380  * For each string found the 'struct trace_parser' is updated,
1381  * and the function returns.
1382  *
1383  * Returns number of bytes read.
1384  *
1385  * See kernel/trace/trace.h for 'struct trace_parser' details.
1386  */
1387 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1388         size_t cnt, loff_t *ppos)
1389 {
1390         char ch;
1391         size_t read = 0;
1392         ssize_t ret;
1393
1394         if (!*ppos)
1395                 trace_parser_clear(parser);
1396
1397         ret = get_user(ch, ubuf++);
1398         if (ret)
1399                 goto out;
1400
1401         read++;
1402         cnt--;
1403
1404         /*
1405          * The parser is not finished with the last write,
1406          * continue reading the user input without skipping spaces.
1407          */
1408         if (!parser->cont) {
1409                 /* skip white space */
1410                 while (cnt && isspace(ch)) {
1411                         ret = get_user(ch, ubuf++);
1412                         if (ret)
1413                                 goto out;
1414                         read++;
1415                         cnt--;
1416                 }
1417
1418                 parser->idx = 0;
1419
1420                 /* only spaces were written */
1421                 if (isspace(ch) || !ch) {
1422                         *ppos += read;
1423                         ret = read;
1424                         goto out;
1425                 }
1426         }
1427
1428         /* read the non-space input */
1429         while (cnt && !isspace(ch) && ch) {
1430                 if (parser->idx < parser->size - 1)
1431                         parser->buffer[parser->idx++] = ch;
1432                 else {
1433                         ret = -EINVAL;
1434                         goto out;
1435                 }
1436                 ret = get_user(ch, ubuf++);
1437                 if (ret)
1438                         goto out;
1439                 read++;
1440                 cnt--;
1441         }
1442
1443         /* We either got finished input or we have to wait for another call. */
1444         if (isspace(ch) || !ch) {
1445                 parser->buffer[parser->idx] = 0;
1446                 parser->cont = false;
1447         } else if (parser->idx < parser->size - 1) {
1448                 parser->cont = true;
1449                 parser->buffer[parser->idx++] = ch;
1450                 /* Make sure the parsed string always terminates with '\0'. */
1451                 parser->buffer[parser->idx] = 0;
1452         } else {
1453                 ret = -EINVAL;
1454                 goto out;
1455         }
1456
1457         *ppos += read;
1458         ret = read;
1459
1460 out:
1461         return ret;
1462 }
1463
1464 /* TODO add a seq_buf_to_buffer() */
1465 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1466 {
1467         int len;
1468
1469         if (trace_seq_used(s) <= s->seq.readpos)
1470                 return -EBUSY;
1471
1472         len = trace_seq_used(s) - s->seq.readpos;
1473         if (cnt > len)
1474                 cnt = len;
1475         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1476
1477         s->seq.readpos += cnt;
1478         return cnt;
1479 }
1480
1481 unsigned long __read_mostly     tracing_thresh;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 /*
1485  * Copy the new maximum trace into the separate maximum-trace
1486  * structure. (this way the maximum trace is permanently saved,
1487  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1488  */
1489 static void
1490 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1491 {
1492         struct trace_buffer *trace_buf = &tr->trace_buffer;
1493         struct trace_buffer *max_buf = &tr->max_buffer;
1494         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1495         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1496
1497         max_buf->cpu = cpu;
1498         max_buf->time_start = data->preempt_timestamp;
1499
1500         max_data->saved_latency = tr->max_latency;
1501         max_data->critical_start = data->critical_start;
1502         max_data->critical_end = data->critical_end;
1503
1504         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1505         max_data->pid = tsk->pid;
1506         /*
1507          * If tsk == current, then use current_uid(), as that does not use
1508          * RCU. The irq tracer can be called out of RCU scope.
1509          */
1510         if (tsk == current)
1511                 max_data->uid = current_uid();
1512         else
1513                 max_data->uid = task_uid(tsk);
1514
1515         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1516         max_data->policy = tsk->policy;
1517         max_data->rt_priority = tsk->rt_priority;
1518
1519         /* record this tasks comm */
1520         tracing_record_cmdline(tsk);
1521 }
1522
1523 /**
1524  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1525  * @tr: tracer
1526  * @tsk: the task with the latency
1527  * @cpu: The cpu that initiated the trace.
1528  * @cond_data: User data associated with a conditional snapshot
1529  *
1530  * Flip the buffers between the @tr and the max_tr and record information
1531  * about which task was the cause of this latency.
1532  */
1533 void
1534 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1535               void *cond_data)
1536 {
1537         if (tr->stop_count)
1538                 return;
1539
1540         WARN_ON_ONCE(!irqs_disabled());
1541
1542         if (!tr->allocated_snapshot) {
1543                 /* Only the nop tracer should hit this when disabling */
1544                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1545                 return;
1546         }
1547
1548         arch_spin_lock(&tr->max_lock);
1549
1550         /* Inherit the recordable setting from trace_buffer */
1551         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1552                 ring_buffer_record_on(tr->max_buffer.buffer);
1553         else
1554                 ring_buffer_record_off(tr->max_buffer.buffer);
1555
1556 #ifdef CONFIG_TRACER_SNAPSHOT
1557         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1558                 goto out_unlock;
1559 #endif
1560         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1561
1562         __update_max_tr(tr, tsk, cpu);
1563
1564  out_unlock:
1565         arch_spin_unlock(&tr->max_lock);
1566 }
1567
1568 /**
1569  * update_max_tr_single - only copy one trace over, and reset the rest
1570  * @tr: tracer
1571  * @tsk: task with the latency
1572  * @cpu: the cpu of the buffer to copy.
1573  *
1574  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1575  */
1576 void
1577 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1578 {
1579         int ret;
1580
1581         if (tr->stop_count)
1582                 return;
1583
1584         WARN_ON_ONCE(!irqs_disabled());
1585         if (!tr->allocated_snapshot) {
1586                 /* Only the nop tracer should hit this when disabling */
1587                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1588                 return;
1589         }
1590
1591         arch_spin_lock(&tr->max_lock);
1592
1593         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1594
1595         if (ret == -EBUSY) {
1596                 /*
1597                  * We failed to swap the buffer due to a commit taking
1598                  * place on this CPU. We fail to record, but we reset
1599                  * the max trace buffer (no one writes directly to it)
1600                  * and flag that it failed.
1601                  */
1602                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1603                         "Failed to swap buffers due to commit in progress\n");
1604         }
1605
1606         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1607
1608         __update_max_tr(tr, tsk, cpu);
1609         arch_spin_unlock(&tr->max_lock);
1610 }
1611 #endif /* CONFIG_TRACER_MAX_TRACE */
1612
1613 static int wait_on_pipe(struct trace_iterator *iter, int full)
1614 {
1615         /* Iterators are static, they should be filled or empty */
1616         if (trace_buffer_iter(iter, iter->cpu_file))
1617                 return 0;
1618
1619         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1620                                 full);
1621 }
1622
1623 #ifdef CONFIG_FTRACE_STARTUP_TEST
1624 static bool selftests_can_run;
1625
1626 struct trace_selftests {
1627         struct list_head                list;
1628         struct tracer                   *type;
1629 };
1630
1631 static LIST_HEAD(postponed_selftests);
1632
1633 static int save_selftest(struct tracer *type)
1634 {
1635         struct trace_selftests *selftest;
1636
1637         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1638         if (!selftest)
1639                 return -ENOMEM;
1640
1641         selftest->type = type;
1642         list_add(&selftest->list, &postponed_selftests);
1643         return 0;
1644 }
1645
1646 static int run_tracer_selftest(struct tracer *type)
1647 {
1648         struct trace_array *tr = &global_trace;
1649         struct tracer *saved_tracer = tr->current_trace;
1650         int ret;
1651
1652         if (!type->selftest || tracing_selftest_disabled)
1653                 return 0;
1654
1655         /*
1656          * If a tracer registers early in boot up (before scheduling is
1657          * initialized and such), then do not run its selftests yet.
1658          * Instead, run it a little later in the boot process.
1659          */
1660         if (!selftests_can_run)
1661                 return save_selftest(type);
1662
1663         /*
1664          * Run a selftest on this tracer.
1665          * Here we reset the trace buffer, and set the current
1666          * tracer to be this tracer. The tracer can then run some
1667          * internal tracing to verify that everything is in order.
1668          * If we fail, we do not register this tracer.
1669          */
1670         tracing_reset_online_cpus(&tr->trace_buffer);
1671
1672         tr->current_trace = type;
1673
1674 #ifdef CONFIG_TRACER_MAX_TRACE
1675         if (type->use_max_tr) {
1676                 /* If we expanded the buffers, make sure the max is expanded too */
1677                 if (ring_buffer_expanded)
1678                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1679                                            RING_BUFFER_ALL_CPUS);
1680                 tr->allocated_snapshot = true;
1681         }
1682 #endif
1683
1684         /* the test is responsible for initializing and enabling */
1685         pr_info("Testing tracer %s: ", type->name);
1686         ret = type->selftest(type, tr);
1687         /* the test is responsible for resetting too */
1688         tr->current_trace = saved_tracer;
1689         if (ret) {
1690                 printk(KERN_CONT "FAILED!\n");
1691                 /* Add the warning after printing 'FAILED' */
1692                 WARN_ON(1);
1693                 return -1;
1694         }
1695         /* Only reset on passing, to avoid touching corrupted buffers */
1696         tracing_reset_online_cpus(&tr->trace_buffer);
1697
1698 #ifdef CONFIG_TRACER_MAX_TRACE
1699         if (type->use_max_tr) {
1700                 tr->allocated_snapshot = false;
1701
1702                 /* Shrink the max buffer again */
1703                 if (ring_buffer_expanded)
1704                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1705                                            RING_BUFFER_ALL_CPUS);
1706         }
1707 #endif
1708
1709         printk(KERN_CONT "PASSED\n");
1710         return 0;
1711 }
1712
1713 static __init int init_trace_selftests(void)
1714 {
1715         struct trace_selftests *p, *n;
1716         struct tracer *t, **last;
1717         int ret;
1718
1719         selftests_can_run = true;
1720
1721         mutex_lock(&trace_types_lock);
1722
1723         if (list_empty(&postponed_selftests))
1724                 goto out;
1725
1726         pr_info("Running postponed tracer tests:\n");
1727
1728         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1729                 /* This loop can take minutes when sanitizers are enabled, so
1730                  * lets make sure we allow RCU processing.
1731                  */
1732                 cond_resched();
1733                 ret = run_tracer_selftest(p->type);
1734                 /* If the test fails, then warn and remove from available_tracers */
1735                 if (ret < 0) {
1736                         WARN(1, "tracer: %s failed selftest, disabling\n",
1737                              p->type->name);
1738                         last = &trace_types;
1739                         for (t = trace_types; t; t = t->next) {
1740                                 if (t == p->type) {
1741                                         *last = t->next;
1742                                         break;
1743                                 }
1744                                 last = &t->next;
1745                         }
1746                 }
1747                 list_del(&p->list);
1748                 kfree(p);
1749         }
1750
1751  out:
1752         mutex_unlock(&trace_types_lock);
1753
1754         return 0;
1755 }
1756 core_initcall(init_trace_selftests);
1757 #else
1758 static inline int run_tracer_selftest(struct tracer *type)
1759 {
1760         return 0;
1761 }
1762 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1763
1764 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1765
1766 static void __init apply_trace_boot_options(void);
1767
1768 /**
1769  * register_tracer - register a tracer with the ftrace system.
1770  * @type: the plugin for the tracer
1771  *
1772  * Register a new plugin tracer.
1773  */
1774 int __init register_tracer(struct tracer *type)
1775 {
1776         struct tracer *t;
1777         int ret = 0;
1778
1779         if (!type->name) {
1780                 pr_info("Tracer must have a name\n");
1781                 return -1;
1782         }
1783
1784         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1785                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1786                 return -1;
1787         }
1788
1789         mutex_lock(&trace_types_lock);
1790
1791         tracing_selftest_running = true;
1792
1793         for (t = trace_types; t; t = t->next) {
1794                 if (strcmp(type->name, t->name) == 0) {
1795                         /* already found */
1796                         pr_info("Tracer %s already registered\n",
1797                                 type->name);
1798                         ret = -1;
1799                         goto out;
1800                 }
1801         }
1802
1803         if (!type->set_flag)
1804                 type->set_flag = &dummy_set_flag;
1805         if (!type->flags) {
1806                 /*allocate a dummy tracer_flags*/
1807                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1808                 if (!type->flags) {
1809                         ret = -ENOMEM;
1810                         goto out;
1811                 }
1812                 type->flags->val = 0;
1813                 type->flags->opts = dummy_tracer_opt;
1814         } else
1815                 if (!type->flags->opts)
1816                         type->flags->opts = dummy_tracer_opt;
1817
1818         /* store the tracer for __set_tracer_option */
1819         type->flags->trace = type;
1820
1821         ret = run_tracer_selftest(type);
1822         if (ret < 0)
1823                 goto out;
1824
1825         type->next = trace_types;
1826         trace_types = type;
1827         add_tracer_options(&global_trace, type);
1828
1829  out:
1830         tracing_selftest_running = false;
1831         mutex_unlock(&trace_types_lock);
1832
1833         if (ret || !default_bootup_tracer)
1834                 goto out_unlock;
1835
1836         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1837                 goto out_unlock;
1838
1839         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1840         /* Do we want this tracer to start on bootup? */
1841         tracing_set_tracer(&global_trace, type->name);
1842         default_bootup_tracer = NULL;
1843
1844         apply_trace_boot_options();
1845
1846         /* disable other selftests, since this will break it. */
1847         tracing_selftest_disabled = true;
1848 #ifdef CONFIG_FTRACE_STARTUP_TEST
1849         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1850                type->name);
1851 #endif
1852
1853  out_unlock:
1854         return ret;
1855 }
1856
1857 void tracing_reset(struct trace_buffer *buf, int cpu)
1858 {
1859         struct ring_buffer *buffer = buf->buffer;
1860
1861         if (!buffer)
1862                 return;
1863
1864         ring_buffer_record_disable(buffer);
1865
1866         /* Make sure all commits have finished */
1867         synchronize_rcu();
1868         ring_buffer_reset_cpu(buffer, cpu);
1869
1870         ring_buffer_record_enable(buffer);
1871 }
1872
1873 void tracing_reset_online_cpus(struct trace_buffer *buf)
1874 {
1875         struct ring_buffer *buffer = buf->buffer;
1876         int cpu;
1877
1878         if (!buffer)
1879                 return;
1880
1881         ring_buffer_record_disable(buffer);
1882
1883         /* Make sure all commits have finished */
1884         synchronize_rcu();
1885
1886         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1887
1888         for_each_online_cpu(cpu)
1889                 ring_buffer_reset_cpu(buffer, cpu);
1890
1891         ring_buffer_record_enable(buffer);
1892 }
1893
1894 /* Must have trace_types_lock held */
1895 void tracing_reset_all_online_cpus(void)
1896 {
1897         struct trace_array *tr;
1898
1899         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1900                 if (!tr->clear_trace)
1901                         continue;
1902                 tr->clear_trace = false;
1903                 tracing_reset_online_cpus(&tr->trace_buffer);
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905                 tracing_reset_online_cpus(&tr->max_buffer);
1906 #endif
1907         }
1908 }
1909
1910 static int *tgid_map;
1911
1912 #define SAVED_CMDLINES_DEFAULT 128
1913 #define NO_CMDLINE_MAP UINT_MAX
1914 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1915 struct saved_cmdlines_buffer {
1916         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1917         unsigned *map_cmdline_to_pid;
1918         unsigned cmdline_num;
1919         int cmdline_idx;
1920         char *saved_cmdlines;
1921 };
1922 static struct saved_cmdlines_buffer *savedcmd;
1923
1924 /* temporary disable recording */
1925 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1926
1927 static inline char *get_saved_cmdlines(int idx)
1928 {
1929         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1930 }
1931
1932 static inline void set_cmdline(int idx, const char *cmdline)
1933 {
1934         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1935 }
1936
1937 static int allocate_cmdlines_buffer(unsigned int val,
1938                                     struct saved_cmdlines_buffer *s)
1939 {
1940         s->map_cmdline_to_pid = kmalloc_array(val,
1941                                               sizeof(*s->map_cmdline_to_pid),
1942                                               GFP_KERNEL);
1943         if (!s->map_cmdline_to_pid)
1944                 return -ENOMEM;
1945
1946         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1947         if (!s->saved_cmdlines) {
1948                 kfree(s->map_cmdline_to_pid);
1949                 return -ENOMEM;
1950         }
1951
1952         s->cmdline_idx = 0;
1953         s->cmdline_num = val;
1954         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1955                sizeof(s->map_pid_to_cmdline));
1956         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1957                val * sizeof(*s->map_cmdline_to_pid));
1958
1959         return 0;
1960 }
1961
1962 static int trace_create_savedcmd(void)
1963 {
1964         int ret;
1965
1966         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1967         if (!savedcmd)
1968                 return -ENOMEM;
1969
1970         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1971         if (ret < 0) {
1972                 kfree(savedcmd);
1973                 savedcmd = NULL;
1974                 return -ENOMEM;
1975         }
1976
1977         return 0;
1978 }
1979
1980 int is_tracing_stopped(void)
1981 {
1982         return global_trace.stop_count;
1983 }
1984
1985 /**
1986  * tracing_start - quick start of the tracer
1987  *
1988  * If tracing is enabled but was stopped by tracing_stop,
1989  * this will start the tracer back up.
1990  */
1991 void tracing_start(void)
1992 {
1993         struct ring_buffer *buffer;
1994         unsigned long flags;
1995
1996         if (tracing_disabled)
1997                 return;
1998
1999         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2000         if (--global_trace.stop_count) {
2001                 if (global_trace.stop_count < 0) {
2002                         /* Someone screwed up their debugging */
2003                         WARN_ON_ONCE(1);
2004                         global_trace.stop_count = 0;
2005                 }
2006                 goto out;
2007         }
2008
2009         /* Prevent the buffers from switching */
2010         arch_spin_lock(&global_trace.max_lock);
2011
2012         buffer = global_trace.trace_buffer.buffer;
2013         if (buffer)
2014                 ring_buffer_record_enable(buffer);
2015
2016 #ifdef CONFIG_TRACER_MAX_TRACE
2017         buffer = global_trace.max_buffer.buffer;
2018         if (buffer)
2019                 ring_buffer_record_enable(buffer);
2020 #endif
2021
2022         arch_spin_unlock(&global_trace.max_lock);
2023
2024  out:
2025         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2026 }
2027
2028 static void tracing_start_tr(struct trace_array *tr)
2029 {
2030         struct ring_buffer *buffer;
2031         unsigned long flags;
2032
2033         if (tracing_disabled)
2034                 return;
2035
2036         /* If global, we need to also start the max tracer */
2037         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2038                 return tracing_start();
2039
2040         raw_spin_lock_irqsave(&tr->start_lock, flags);
2041
2042         if (--tr->stop_count) {
2043                 if (tr->stop_count < 0) {
2044                         /* Someone screwed up their debugging */
2045                         WARN_ON_ONCE(1);
2046                         tr->stop_count = 0;
2047                 }
2048                 goto out;
2049         }
2050
2051         buffer = tr->trace_buffer.buffer;
2052         if (buffer)
2053                 ring_buffer_record_enable(buffer);
2054
2055  out:
2056         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2057 }
2058
2059 /**
2060  * tracing_stop - quick stop of the tracer
2061  *
2062  * Light weight way to stop tracing. Use in conjunction with
2063  * tracing_start.
2064  */
2065 void tracing_stop(void)
2066 {
2067         struct ring_buffer *buffer;
2068         unsigned long flags;
2069
2070         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071         if (global_trace.stop_count++)
2072                 goto out;
2073
2074         /* Prevent the buffers from switching */
2075         arch_spin_lock(&global_trace.max_lock);
2076
2077         buffer = global_trace.trace_buffer.buffer;
2078         if (buffer)
2079                 ring_buffer_record_disable(buffer);
2080
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082         buffer = global_trace.max_buffer.buffer;
2083         if (buffer)
2084                 ring_buffer_record_disable(buffer);
2085 #endif
2086
2087         arch_spin_unlock(&global_trace.max_lock);
2088
2089  out:
2090         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2091 }
2092
2093 static void tracing_stop_tr(struct trace_array *tr)
2094 {
2095         struct ring_buffer *buffer;
2096         unsigned long flags;
2097
2098         /* If global, we need to also stop the max tracer */
2099         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2100                 return tracing_stop();
2101
2102         raw_spin_lock_irqsave(&tr->start_lock, flags);
2103         if (tr->stop_count++)
2104                 goto out;
2105
2106         buffer = tr->trace_buffer.buffer;
2107         if (buffer)
2108                 ring_buffer_record_disable(buffer);
2109
2110  out:
2111         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2112 }
2113
2114 static int trace_save_cmdline(struct task_struct *tsk)
2115 {
2116         unsigned pid, idx;
2117
2118         /* treat recording of idle task as a success */
2119         if (!tsk->pid)
2120                 return 1;
2121
2122         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2123                 return 0;
2124
2125         /*
2126          * It's not the end of the world if we don't get
2127          * the lock, but we also don't want to spin
2128          * nor do we want to disable interrupts,
2129          * so if we miss here, then better luck next time.
2130          */
2131         if (!arch_spin_trylock(&trace_cmdline_lock))
2132                 return 0;
2133
2134         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2135         if (idx == NO_CMDLINE_MAP) {
2136                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2137
2138                 /*
2139                  * Check whether the cmdline buffer at idx has a pid
2140                  * mapped. We are going to overwrite that entry so we
2141                  * need to clear the map_pid_to_cmdline. Otherwise we
2142                  * would read the new comm for the old pid.
2143                  */
2144                 pid = savedcmd->map_cmdline_to_pid[idx];
2145                 if (pid != NO_CMDLINE_MAP)
2146                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2147
2148                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2149                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2150
2151                 savedcmd->cmdline_idx = idx;
2152         }
2153
2154         set_cmdline(idx, tsk->comm);
2155
2156         arch_spin_unlock(&trace_cmdline_lock);
2157
2158         return 1;
2159 }
2160
2161 static void __trace_find_cmdline(int pid, char comm[])
2162 {
2163         unsigned map;
2164
2165         if (!pid) {
2166                 strcpy(comm, "<idle>");
2167                 return;
2168         }
2169
2170         if (WARN_ON_ONCE(pid < 0)) {
2171                 strcpy(comm, "<XXX>");
2172                 return;
2173         }
2174
2175         if (pid > PID_MAX_DEFAULT) {
2176                 strcpy(comm, "<...>");
2177                 return;
2178         }
2179
2180         map = savedcmd->map_pid_to_cmdline[pid];
2181         if (map != NO_CMDLINE_MAP)
2182                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2183         else
2184                 strcpy(comm, "<...>");
2185 }
2186
2187 void trace_find_cmdline(int pid, char comm[])
2188 {
2189         preempt_disable();
2190         arch_spin_lock(&trace_cmdline_lock);
2191
2192         __trace_find_cmdline(pid, comm);
2193
2194         arch_spin_unlock(&trace_cmdline_lock);
2195         preempt_enable();
2196 }
2197
2198 int trace_find_tgid(int pid)
2199 {
2200         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2201                 return 0;
2202
2203         return tgid_map[pid];
2204 }
2205
2206 static int trace_save_tgid(struct task_struct *tsk)
2207 {
2208         /* treat recording of idle task as a success */
2209         if (!tsk->pid)
2210                 return 1;
2211
2212         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2213                 return 0;
2214
2215         tgid_map[tsk->pid] = tsk->tgid;
2216         return 1;
2217 }
2218
2219 static bool tracing_record_taskinfo_skip(int flags)
2220 {
2221         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2222                 return true;
2223         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2224                 return true;
2225         if (!__this_cpu_read(trace_taskinfo_save))
2226                 return true;
2227         return false;
2228 }
2229
2230 /**
2231  * tracing_record_taskinfo - record the task info of a task
2232  *
2233  * @task:  task to record
2234  * @flags: TRACE_RECORD_CMDLINE for recording comm
2235  *         TRACE_RECORD_TGID for recording tgid
2236  */
2237 void tracing_record_taskinfo(struct task_struct *task, int flags)
2238 {
2239         bool done;
2240
2241         if (tracing_record_taskinfo_skip(flags))
2242                 return;
2243
2244         /*
2245          * Record as much task information as possible. If some fail, continue
2246          * to try to record the others.
2247          */
2248         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2249         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2250
2251         /* If recording any information failed, retry again soon. */
2252         if (!done)
2253                 return;
2254
2255         __this_cpu_write(trace_taskinfo_save, false);
2256 }
2257
2258 /**
2259  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2260  *
2261  * @prev: previous task during sched_switch
2262  * @next: next task during sched_switch
2263  * @flags: TRACE_RECORD_CMDLINE for recording comm
2264  *         TRACE_RECORD_TGID for recording tgid
2265  */
2266 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2267                                           struct task_struct *next, int flags)
2268 {
2269         bool done;
2270
2271         if (tracing_record_taskinfo_skip(flags))
2272                 return;
2273
2274         /*
2275          * Record as much task information as possible. If some fail, continue
2276          * to try to record the others.
2277          */
2278         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2279         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2280         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2281         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2282
2283         /* If recording any information failed, retry again soon. */
2284         if (!done)
2285                 return;
2286
2287         __this_cpu_write(trace_taskinfo_save, false);
2288 }
2289
2290 /* Helpers to record a specific task information */
2291 void tracing_record_cmdline(struct task_struct *task)
2292 {
2293         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2294 }
2295
2296 void tracing_record_tgid(struct task_struct *task)
2297 {
2298         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2299 }
2300
2301 /*
2302  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2303  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2304  * simplifies those functions and keeps them in sync.
2305  */
2306 enum print_line_t trace_handle_return(struct trace_seq *s)
2307 {
2308         return trace_seq_has_overflowed(s) ?
2309                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_handle_return);
2312
2313 void
2314 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2315                              unsigned long flags, int pc)
2316 {
2317         struct task_struct *tsk = current;
2318
2319         entry->preempt_count            = pc & 0xff;
2320         entry->pid                      = (tsk) ? tsk->pid : 0;
2321         entry->type                     = type;
2322         entry->flags =
2323 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2324                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2325 #else
2326                 TRACE_FLAG_IRQS_NOSUPPORT |
2327 #endif
2328                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2329                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2330                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2331                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2332                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2333 }
2334 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2335
2336 struct ring_buffer_event *
2337 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2338                           int type,
2339                           unsigned long len,
2340                           unsigned long flags, int pc)
2341 {
2342         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2343 }
2344
2345 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2346 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2347 static int trace_buffered_event_ref;
2348
2349 /**
2350  * trace_buffered_event_enable - enable buffering events
2351  *
2352  * When events are being filtered, it is quicker to use a temporary
2353  * buffer to write the event data into if there's a likely chance
2354  * that it will not be committed. The discard of the ring buffer
2355  * is not as fast as committing, and is much slower than copying
2356  * a commit.
2357  *
2358  * When an event is to be filtered, allocate per cpu buffers to
2359  * write the event data into, and if the event is filtered and discarded
2360  * it is simply dropped, otherwise, the entire data is to be committed
2361  * in one shot.
2362  */
2363 void trace_buffered_event_enable(void)
2364 {
2365         struct ring_buffer_event *event;
2366         struct page *page;
2367         int cpu;
2368
2369         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2370
2371         if (trace_buffered_event_ref++)
2372                 return;
2373
2374         for_each_tracing_cpu(cpu) {
2375                 page = alloc_pages_node(cpu_to_node(cpu),
2376                                         GFP_KERNEL | __GFP_NORETRY, 0);
2377                 if (!page)
2378                         goto failed;
2379
2380                 event = page_address(page);
2381                 memset(event, 0, sizeof(*event));
2382
2383                 per_cpu(trace_buffered_event, cpu) = event;
2384
2385                 preempt_disable();
2386                 if (cpu == smp_processor_id() &&
2387                     this_cpu_read(trace_buffered_event) !=
2388                     per_cpu(trace_buffered_event, cpu))
2389                         WARN_ON_ONCE(1);
2390                 preempt_enable();
2391         }
2392
2393         return;
2394  failed:
2395         trace_buffered_event_disable();
2396 }
2397
2398 static void enable_trace_buffered_event(void *data)
2399 {
2400         /* Probably not needed, but do it anyway */
2401         smp_rmb();
2402         this_cpu_dec(trace_buffered_event_cnt);
2403 }
2404
2405 static void disable_trace_buffered_event(void *data)
2406 {
2407         this_cpu_inc(trace_buffered_event_cnt);
2408 }
2409
2410 /**
2411  * trace_buffered_event_disable - disable buffering events
2412  *
2413  * When a filter is removed, it is faster to not use the buffered
2414  * events, and to commit directly into the ring buffer. Free up
2415  * the temp buffers when there are no more users. This requires
2416  * special synchronization with current events.
2417  */
2418 void trace_buffered_event_disable(void)
2419 {
2420         int cpu;
2421
2422         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2423
2424         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2425                 return;
2426
2427         if (--trace_buffered_event_ref)
2428                 return;
2429
2430         preempt_disable();
2431         /* For each CPU, set the buffer as used. */
2432         smp_call_function_many(tracing_buffer_mask,
2433                                disable_trace_buffered_event, NULL, 1);
2434         preempt_enable();
2435
2436         /* Wait for all current users to finish */
2437         synchronize_rcu();
2438
2439         for_each_tracing_cpu(cpu) {
2440                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2441                 per_cpu(trace_buffered_event, cpu) = NULL;
2442         }
2443         /*
2444          * Make sure trace_buffered_event is NULL before clearing
2445          * trace_buffered_event_cnt.
2446          */
2447         smp_wmb();
2448
2449         preempt_disable();
2450         /* Do the work on each cpu */
2451         smp_call_function_many(tracing_buffer_mask,
2452                                enable_trace_buffered_event, NULL, 1);
2453         preempt_enable();
2454 }
2455
2456 static struct ring_buffer *temp_buffer;
2457
2458 struct ring_buffer_event *
2459 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2460                           struct trace_event_file *trace_file,
2461                           int type, unsigned long len,
2462                           unsigned long flags, int pc)
2463 {
2464         struct ring_buffer_event *entry;
2465         int val;
2466
2467         *current_rb = trace_file->tr->trace_buffer.buffer;
2468
2469         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2470              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2471             (entry = this_cpu_read(trace_buffered_event))) {
2472                 /* Try to use the per cpu buffer first */
2473                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2474                 if (val == 1) {
2475                         trace_event_setup(entry, type, flags, pc);
2476                         entry->array[0] = len;
2477                         return entry;
2478                 }
2479                 this_cpu_dec(trace_buffered_event_cnt);
2480         }
2481
2482         entry = __trace_buffer_lock_reserve(*current_rb,
2483                                             type, len, flags, pc);
2484         /*
2485          * If tracing is off, but we have triggers enabled
2486          * we still need to look at the event data. Use the temp_buffer
2487          * to store the trace event for the tigger to use. It's recusive
2488          * safe and will not be recorded anywhere.
2489          */
2490         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2491                 *current_rb = temp_buffer;
2492                 entry = __trace_buffer_lock_reserve(*current_rb,
2493                                                     type, len, flags, pc);
2494         }
2495         return entry;
2496 }
2497 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2498
2499 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2500 static DEFINE_MUTEX(tracepoint_printk_mutex);
2501
2502 static void output_printk(struct trace_event_buffer *fbuffer)
2503 {
2504         struct trace_event_call *event_call;
2505         struct trace_event *event;
2506         unsigned long flags;
2507         struct trace_iterator *iter = tracepoint_print_iter;
2508
2509         /* We should never get here if iter is NULL */
2510         if (WARN_ON_ONCE(!iter))
2511                 return;
2512
2513         event_call = fbuffer->trace_file->event_call;
2514         if (!event_call || !event_call->event.funcs ||
2515             !event_call->event.funcs->trace)
2516                 return;
2517
2518         event = &fbuffer->trace_file->event_call->event;
2519
2520         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2521         trace_seq_init(&iter->seq);
2522         iter->ent = fbuffer->entry;
2523         event_call->event.funcs->trace(iter, 0, event);
2524         trace_seq_putc(&iter->seq, 0);
2525         printk("%s", iter->seq.buffer);
2526
2527         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2528 }
2529
2530 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2531                              void __user *buffer, size_t *lenp,
2532                              loff_t *ppos)
2533 {
2534         int save_tracepoint_printk;
2535         int ret;
2536
2537         mutex_lock(&tracepoint_printk_mutex);
2538         save_tracepoint_printk = tracepoint_printk;
2539
2540         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2541
2542         /*
2543          * This will force exiting early, as tracepoint_printk
2544          * is always zero when tracepoint_printk_iter is not allocated
2545          */
2546         if (!tracepoint_print_iter)
2547                 tracepoint_printk = 0;
2548
2549         if (save_tracepoint_printk == tracepoint_printk)
2550                 goto out;
2551
2552         if (tracepoint_printk)
2553                 static_key_enable(&tracepoint_printk_key.key);
2554         else
2555                 static_key_disable(&tracepoint_printk_key.key);
2556
2557  out:
2558         mutex_unlock(&tracepoint_printk_mutex);
2559
2560         return ret;
2561 }
2562
2563 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2564 {
2565         if (static_key_false(&tracepoint_printk_key.key))
2566                 output_printk(fbuffer);
2567
2568         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2569                                     fbuffer->event, fbuffer->entry,
2570                                     fbuffer->flags, fbuffer->pc);
2571 }
2572 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2573
2574 /*
2575  * Skip 3:
2576  *
2577  *   trace_buffer_unlock_commit_regs()
2578  *   trace_event_buffer_commit()
2579  *   trace_event_raw_event_xxx()
2580  */
2581 # define STACK_SKIP 3
2582
2583 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2584                                      struct ring_buffer *buffer,
2585                                      struct ring_buffer_event *event,
2586                                      unsigned long flags, int pc,
2587                                      struct pt_regs *regs)
2588 {
2589         __buffer_unlock_commit(buffer, event);
2590
2591         /*
2592          * If regs is not set, then skip the necessary functions.
2593          * Note, we can still get here via blktrace, wakeup tracer
2594          * and mmiotrace, but that's ok if they lose a function or
2595          * two. They are not that meaningful.
2596          */
2597         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2598         ftrace_trace_userstack(buffer, flags, pc);
2599 }
2600
2601 /*
2602  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2603  */
2604 void
2605 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2606                                    struct ring_buffer_event *event)
2607 {
2608         __buffer_unlock_commit(buffer, event);
2609 }
2610
2611 static void
2612 trace_process_export(struct trace_export *export,
2613                struct ring_buffer_event *event)
2614 {
2615         struct trace_entry *entry;
2616         unsigned int size = 0;
2617
2618         entry = ring_buffer_event_data(event);
2619         size = ring_buffer_event_length(event);
2620         export->write(export, entry, size);
2621 }
2622
2623 static DEFINE_MUTEX(ftrace_export_lock);
2624
2625 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2626
2627 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2628
2629 static inline void ftrace_exports_enable(void)
2630 {
2631         static_branch_enable(&ftrace_exports_enabled);
2632 }
2633
2634 static inline void ftrace_exports_disable(void)
2635 {
2636         static_branch_disable(&ftrace_exports_enabled);
2637 }
2638
2639 static void ftrace_exports(struct ring_buffer_event *event)
2640 {
2641         struct trace_export *export;
2642
2643         preempt_disable_notrace();
2644
2645         export = rcu_dereference_raw_check(ftrace_exports_list);
2646         while (export) {
2647                 trace_process_export(export, event);
2648                 export = rcu_dereference_raw_check(export->next);
2649         }
2650
2651         preempt_enable_notrace();
2652 }
2653
2654 static inline void
2655 add_trace_export(struct trace_export **list, struct trace_export *export)
2656 {
2657         rcu_assign_pointer(export->next, *list);
2658         /*
2659          * We are entering export into the list but another
2660          * CPU might be walking that list. We need to make sure
2661          * the export->next pointer is valid before another CPU sees
2662          * the export pointer included into the list.
2663          */
2664         rcu_assign_pointer(*list, export);
2665 }
2666
2667 static inline int
2668 rm_trace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670         struct trace_export **p;
2671
2672         for (p = list; *p != NULL; p = &(*p)->next)
2673                 if (*p == export)
2674                         break;
2675
2676         if (*p != export)
2677                 return -1;
2678
2679         rcu_assign_pointer(*p, (*p)->next);
2680
2681         return 0;
2682 }
2683
2684 static inline void
2685 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687         if (*list == NULL)
2688                 ftrace_exports_enable();
2689
2690         add_trace_export(list, export);
2691 }
2692
2693 static inline int
2694 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2695 {
2696         int ret;
2697
2698         ret = rm_trace_export(list, export);
2699         if (*list == NULL)
2700                 ftrace_exports_disable();
2701
2702         return ret;
2703 }
2704
2705 int register_ftrace_export(struct trace_export *export)
2706 {
2707         if (WARN_ON_ONCE(!export->write))
2708                 return -1;
2709
2710         mutex_lock(&ftrace_export_lock);
2711
2712         add_ftrace_export(&ftrace_exports_list, export);
2713
2714         mutex_unlock(&ftrace_export_lock);
2715
2716         return 0;
2717 }
2718 EXPORT_SYMBOL_GPL(register_ftrace_export);
2719
2720 int unregister_ftrace_export(struct trace_export *export)
2721 {
2722         int ret;
2723
2724         mutex_lock(&ftrace_export_lock);
2725
2726         ret = rm_ftrace_export(&ftrace_exports_list, export);
2727
2728         mutex_unlock(&ftrace_export_lock);
2729
2730         return ret;
2731 }
2732 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2733
2734 void
2735 trace_function(struct trace_array *tr,
2736                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2737                int pc)
2738 {
2739         struct trace_event_call *call = &event_function;
2740         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2741         struct ring_buffer_event *event;
2742         struct ftrace_entry *entry;
2743
2744         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2745                                             flags, pc);
2746         if (!event)
2747                 return;
2748         entry   = ring_buffer_event_data(event);
2749         entry->ip                       = ip;
2750         entry->parent_ip                = parent_ip;
2751
2752         if (!call_filter_check_discard(call, entry, buffer, event)) {
2753                 if (static_branch_unlikely(&ftrace_exports_enabled))
2754                         ftrace_exports(event);
2755                 __buffer_unlock_commit(buffer, event);
2756         }
2757 }
2758
2759 #ifdef CONFIG_STACKTRACE
2760
2761 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2762 #define FTRACE_KSTACK_NESTING   4
2763
2764 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2765
2766 struct ftrace_stack {
2767         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2768 };
2769
2770
2771 struct ftrace_stacks {
2772         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2773 };
2774
2775 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2776 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2777
2778 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2779                                  unsigned long flags,
2780                                  int skip, int pc, struct pt_regs *regs)
2781 {
2782         struct trace_event_call *call = &event_kernel_stack;
2783         struct ring_buffer_event *event;
2784         unsigned int size, nr_entries;
2785         struct ftrace_stack *fstack;
2786         struct stack_entry *entry;
2787         int stackidx;
2788
2789         /*
2790          * Add one, for this function and the call to save_stack_trace()
2791          * If regs is set, then these functions will not be in the way.
2792          */
2793 #ifndef CONFIG_UNWINDER_ORC
2794         if (!regs)
2795                 skip++;
2796 #endif
2797
2798         /*
2799          * Since events can happen in NMIs there's no safe way to
2800          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2801          * or NMI comes in, it will just have to use the default
2802          * FTRACE_STACK_SIZE.
2803          */
2804         preempt_disable_notrace();
2805
2806         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2807
2808         /* This should never happen. If it does, yell once and skip */
2809         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2810                 goto out;
2811
2812         /*
2813          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2814          * interrupt will either see the value pre increment or post
2815          * increment. If the interrupt happens pre increment it will have
2816          * restored the counter when it returns.  We just need a barrier to
2817          * keep gcc from moving things around.
2818          */
2819         barrier();
2820
2821         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2822         size = ARRAY_SIZE(fstack->calls);
2823
2824         if (regs) {
2825                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2826                                                    size, skip);
2827         } else {
2828                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2829         }
2830
2831         size = nr_entries * sizeof(unsigned long);
2832         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2833                                             sizeof(*entry) + size, flags, pc);
2834         if (!event)
2835                 goto out;
2836         entry = ring_buffer_event_data(event);
2837
2838         memcpy(&entry->caller, fstack->calls, size);
2839         entry->size = nr_entries;
2840
2841         if (!call_filter_check_discard(call, entry, buffer, event))
2842                 __buffer_unlock_commit(buffer, event);
2843
2844  out:
2845         /* Again, don't let gcc optimize things here */
2846         barrier();
2847         __this_cpu_dec(ftrace_stack_reserve);
2848         preempt_enable_notrace();
2849
2850 }
2851
2852 static inline void ftrace_trace_stack(struct trace_array *tr,
2853                                       struct ring_buffer *buffer,
2854                                       unsigned long flags,
2855                                       int skip, int pc, struct pt_regs *regs)
2856 {
2857         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2858                 return;
2859
2860         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2861 }
2862
2863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2864                    int pc)
2865 {
2866         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2867
2868         if (rcu_is_watching()) {
2869                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2870                 return;
2871         }
2872
2873         /*
2874          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2875          * but if the above rcu_is_watching() failed, then the NMI
2876          * triggered someplace critical, and rcu_irq_enter() should
2877          * not be called from NMI.
2878          */
2879         if (unlikely(in_nmi()))
2880                 return;
2881
2882         rcu_irq_enter_irqson();
2883         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2884         rcu_irq_exit_irqson();
2885 }
2886
2887 /**
2888  * trace_dump_stack - record a stack back trace in the trace buffer
2889  * @skip: Number of functions to skip (helper handlers)
2890  */
2891 void trace_dump_stack(int skip)
2892 {
2893         unsigned long flags;
2894
2895         if (tracing_disabled || tracing_selftest_running)
2896                 return;
2897
2898         local_save_flags(flags);
2899
2900 #ifndef CONFIG_UNWINDER_ORC
2901         /* Skip 1 to skip this function. */
2902         skip++;
2903 #endif
2904         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2905                              flags, skip, preempt_count(), NULL);
2906 }
2907 EXPORT_SYMBOL_GPL(trace_dump_stack);
2908
2909 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911
2912 static void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915         struct trace_event_call *call = &event_user_stack;
2916         struct ring_buffer_event *event;
2917         struct userstack_entry *entry;
2918
2919         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2920                 return;
2921
2922         /*
2923          * NMIs can not handle page faults, even with fix ups.
2924          * The save user stack can (and often does) fault.
2925          */
2926         if (unlikely(in_nmi()))
2927                 return;
2928
2929         /*
2930          * prevent recursion, since the user stack tracing may
2931          * trigger other kernel events.
2932          */
2933         preempt_disable();
2934         if (__this_cpu_read(user_stack_count))
2935                 goto out;
2936
2937         __this_cpu_inc(user_stack_count);
2938
2939         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2940                                             sizeof(*entry), flags, pc);
2941         if (!event)
2942                 goto out_drop_count;
2943         entry   = ring_buffer_event_data(event);
2944
2945         entry->tgid             = current->tgid;
2946         memset(&entry->caller, 0, sizeof(entry->caller));
2947
2948         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2949         if (!call_filter_check_discard(call, entry, buffer, event))
2950                 __buffer_unlock_commit(buffer, event);
2951
2952  out_drop_count:
2953         __this_cpu_dec(user_stack_count);
2954  out:
2955         preempt_enable();
2956 }
2957 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2958 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2959                                    unsigned long flags, int pc)
2960 {
2961 }
2962 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2963
2964 #endif /* CONFIG_STACKTRACE */
2965
2966 /* created for use with alloc_percpu */
2967 struct trace_buffer_struct {
2968         int nesting;
2969         char buffer[4][TRACE_BUF_SIZE];
2970 };
2971
2972 static struct trace_buffer_struct *trace_percpu_buffer;
2973
2974 /*
2975  * Thise allows for lockless recording.  If we're nested too deeply, then
2976  * this returns NULL.
2977  */
2978 static char *get_trace_buf(void)
2979 {
2980         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2981
2982         if (!buffer || buffer->nesting >= 4)
2983                 return NULL;
2984
2985         buffer->nesting++;
2986
2987         /* Interrupts must see nesting incremented before we use the buffer */
2988         barrier();
2989         return &buffer->buffer[buffer->nesting][0];
2990 }
2991
2992 static void put_trace_buf(void)
2993 {
2994         /* Don't let the decrement of nesting leak before this */
2995         barrier();
2996         this_cpu_dec(trace_percpu_buffer->nesting);
2997 }
2998
2999 static int alloc_percpu_trace_buffer(void)
3000 {
3001         struct trace_buffer_struct *buffers;
3002
3003         buffers = alloc_percpu(struct trace_buffer_struct);
3004         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3005                 return -ENOMEM;
3006
3007         trace_percpu_buffer = buffers;
3008         return 0;
3009 }
3010
3011 static int buffers_allocated;
3012
3013 void trace_printk_init_buffers(void)
3014 {
3015         if (buffers_allocated)
3016                 return;
3017
3018         if (alloc_percpu_trace_buffer())
3019                 return;
3020
3021         /* trace_printk() is for debug use only. Don't use it in production. */
3022
3023         pr_warn("\n");
3024         pr_warn("**********************************************************\n");
3025         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3026         pr_warn("**                                                      **\n");
3027         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3028         pr_warn("**                                                      **\n");
3029         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3030         pr_warn("** unsafe for production use.                           **\n");
3031         pr_warn("**                                                      **\n");
3032         pr_warn("** If you see this message and you are not debugging    **\n");
3033         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3034         pr_warn("**                                                      **\n");
3035         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3036         pr_warn("**********************************************************\n");
3037
3038         /* Expand the buffers to set size */
3039         tracing_update_buffers();
3040
3041         buffers_allocated = 1;
3042
3043         /*
3044          * trace_printk_init_buffers() can be called by modules.
3045          * If that happens, then we need to start cmdline recording
3046          * directly here. If the global_trace.buffer is already
3047          * allocated here, then this was called by module code.
3048          */
3049         if (global_trace.trace_buffer.buffer)
3050                 tracing_start_cmdline_record();
3051 }
3052 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3053
3054 void trace_printk_start_comm(void)
3055 {
3056         /* Start tracing comms if trace printk is set */
3057         if (!buffers_allocated)
3058                 return;
3059         tracing_start_cmdline_record();
3060 }
3061
3062 static void trace_printk_start_stop_comm(int enabled)
3063 {
3064         if (!buffers_allocated)
3065                 return;
3066
3067         if (enabled)
3068                 tracing_start_cmdline_record();
3069         else
3070                 tracing_stop_cmdline_record();
3071 }
3072
3073 /**
3074  * trace_vbprintk - write binary msg to tracing buffer
3075  * @ip:    The address of the caller
3076  * @fmt:   The string format to write to the buffer
3077  * @args:  Arguments for @fmt
3078  */
3079 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3080 {
3081         struct trace_event_call *call = &event_bprint;
3082         struct ring_buffer_event *event;
3083         struct ring_buffer *buffer;
3084         struct trace_array *tr = &global_trace;
3085         struct bprint_entry *entry;
3086         unsigned long flags;
3087         char *tbuffer;
3088         int len = 0, size, pc;
3089
3090         if (unlikely(tracing_selftest_running || tracing_disabled))
3091                 return 0;
3092
3093         /* Don't pollute graph traces with trace_vprintk internals */
3094         pause_graph_tracing();
3095
3096         pc = preempt_count();
3097         preempt_disable_notrace();
3098
3099         tbuffer = get_trace_buf();
3100         if (!tbuffer) {
3101                 len = 0;
3102                 goto out_nobuffer;
3103         }
3104
3105         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3106
3107         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3108                 goto out;
3109
3110         local_save_flags(flags);
3111         size = sizeof(*entry) + sizeof(u32) * len;
3112         buffer = tr->trace_buffer.buffer;
3113         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3114                                             flags, pc);
3115         if (!event)
3116                 goto out;
3117         entry = ring_buffer_event_data(event);
3118         entry->ip                       = ip;
3119         entry->fmt                      = fmt;
3120
3121         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3122         if (!call_filter_check_discard(call, entry, buffer, event)) {
3123                 __buffer_unlock_commit(buffer, event);
3124                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3125         }
3126
3127 out:
3128         put_trace_buf();
3129
3130 out_nobuffer:
3131         preempt_enable_notrace();
3132         unpause_graph_tracing();
3133
3134         return len;
3135 }
3136 EXPORT_SYMBOL_GPL(trace_vbprintk);
3137
3138 __printf(3, 0)
3139 static int
3140 __trace_array_vprintk(struct ring_buffer *buffer,
3141                       unsigned long ip, const char *fmt, va_list args)
3142 {
3143         struct trace_event_call *call = &event_print;
3144         struct ring_buffer_event *event;
3145         int len = 0, size, pc;
3146         struct print_entry *entry;
3147         unsigned long flags;
3148         char *tbuffer;
3149
3150         if (tracing_disabled || tracing_selftest_running)
3151                 return 0;
3152
3153         /* Don't pollute graph traces with trace_vprintk internals */
3154         pause_graph_tracing();
3155
3156         pc = preempt_count();
3157         preempt_disable_notrace();
3158
3159
3160         tbuffer = get_trace_buf();
3161         if (!tbuffer) {
3162                 len = 0;
3163                 goto out_nobuffer;
3164         }
3165
3166         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3167
3168         local_save_flags(flags);
3169         size = sizeof(*entry) + len + 1;
3170         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3171                                             flags, pc);
3172         if (!event)
3173                 goto out;
3174         entry = ring_buffer_event_data(event);
3175         entry->ip = ip;
3176
3177         memcpy(&entry->buf, tbuffer, len + 1);
3178         if (!call_filter_check_discard(call, entry, buffer, event)) {
3179                 __buffer_unlock_commit(buffer, event);
3180                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3181         }
3182
3183 out:
3184         put_trace_buf();
3185
3186 out_nobuffer:
3187         preempt_enable_notrace();
3188         unpause_graph_tracing();
3189
3190         return len;
3191 }
3192
3193 __printf(3, 0)
3194 int trace_array_vprintk(struct trace_array *tr,
3195                         unsigned long ip, const char *fmt, va_list args)
3196 {
3197         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3198 }
3199
3200 __printf(3, 0)
3201 int trace_array_printk(struct trace_array *tr,
3202                        unsigned long ip, const char *fmt, ...)
3203 {
3204         int ret;
3205         va_list ap;
3206
3207         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3208                 return 0;
3209
3210         va_start(ap, fmt);
3211         ret = trace_array_vprintk(tr, ip, fmt, ap);
3212         va_end(ap);
3213         return ret;
3214 }
3215 EXPORT_SYMBOL_GPL(trace_array_printk);
3216
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219                            unsigned long ip, const char *fmt, ...)
3220 {
3221         int ret;
3222         va_list ap;
3223
3224         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225                 return 0;
3226
3227         va_start(ap, fmt);
3228         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229         va_end(ap);
3230         return ret;
3231 }
3232
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236         return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243
3244         iter->idx++;
3245         if (buf_iter)
3246                 ring_buffer_read(buf_iter, NULL);
3247 }
3248
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251                 unsigned long *lost_events)
3252 {
3253         struct ring_buffer_event *event;
3254         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255
3256         if (buf_iter)
3257                 event = ring_buffer_iter_peek(buf_iter, ts);
3258         else
3259                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260                                          lost_events);
3261
3262         if (event) {
3263                 iter->ent_size = ring_buffer_event_length(event);
3264                 return ring_buffer_event_data(event);
3265         }
3266         iter->ent_size = 0;
3267         return NULL;
3268 }
3269
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272                   unsigned long *missing_events, u64 *ent_ts)
3273 {
3274         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275         struct trace_entry *ent, *next = NULL;
3276         unsigned long lost_events = 0, next_lost = 0;
3277         int cpu_file = iter->cpu_file;
3278         u64 next_ts = 0, ts;
3279         int next_cpu = -1;
3280         int next_size = 0;
3281         int cpu;
3282
3283         /*
3284          * If we are in a per_cpu trace file, don't bother by iterating over
3285          * all cpu and peek directly.
3286          */
3287         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3289                         return NULL;
3290                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291                 if (ent_cpu)
3292                         *ent_cpu = cpu_file;
3293
3294                 return ent;
3295         }
3296
3297         for_each_tracing_cpu(cpu) {
3298
3299                 if (ring_buffer_empty_cpu(buffer, cpu))
3300                         continue;
3301
3302                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303
3304                 /*
3305                  * Pick the entry with the smallest timestamp:
3306                  */
3307                 if (ent && (!next || ts < next_ts)) {
3308                         next = ent;
3309                         next_cpu = cpu;
3310                         next_ts = ts;
3311                         next_lost = lost_events;
3312                         next_size = iter->ent_size;
3313                 }
3314         }
3315
3316         iter->ent_size = next_size;
3317
3318         if (ent_cpu)
3319                 *ent_cpu = next_cpu;
3320
3321         if (ent_ts)
3322                 *ent_ts = next_ts;
3323
3324         if (missing_events)
3325                 *missing_events = next_lost;
3326
3327         return next;
3328 }
3329
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332                                           int *ent_cpu, u64 *ent_ts)
3333 {
3334         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340         iter->ent = __find_next_entry(iter, &iter->cpu,
3341                                       &iter->lost_events, &iter->ts);
3342
3343         if (iter->ent)
3344                 trace_iterator_increment(iter);
3345
3346         return iter->ent ? iter : NULL;
3347 }
3348
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352                             &iter->lost_events);
3353 }
3354
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357         struct trace_iterator *iter = m->private;
3358         int i = (int)*pos;
3359         void *ent;
3360
3361         WARN_ON_ONCE(iter->leftover);
3362
3363         (*pos)++;
3364
3365         /* can't go backwards */
3366         if (iter->idx > i)
3367                 return NULL;
3368
3369         if (iter->idx < 0)
3370                 ent = trace_find_next_entry_inc(iter);
3371         else
3372                 ent = iter;
3373
3374         while (ent && iter->idx < i)
3375                 ent = trace_find_next_entry_inc(iter);
3376
3377         iter->pos = *pos;
3378
3379         return ent;
3380 }
3381
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384         struct ring_buffer_event *event;
3385         struct ring_buffer_iter *buf_iter;
3386         unsigned long entries = 0;
3387         u64 ts;
3388
3389         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390
3391         buf_iter = trace_buffer_iter(iter, cpu);
3392         if (!buf_iter)
3393                 return;
3394
3395         ring_buffer_iter_reset(buf_iter);
3396
3397         /*
3398          * We could have the case with the max latency tracers
3399          * that a reset never took place on a cpu. This is evident
3400          * by the timestamp being before the start of the buffer.
3401          */
3402         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403                 if (ts >= iter->trace_buffer->time_start)
3404                         break;
3405                 entries++;
3406                 ring_buffer_read(buf_iter, NULL);
3407         }
3408
3409         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418         struct trace_iterator *iter = m->private;
3419         struct trace_array *tr = iter->tr;
3420         int cpu_file = iter->cpu_file;
3421         void *p = NULL;
3422         loff_t l = 0;
3423         int cpu;
3424
3425         /*
3426          * copy the tracer to avoid using a global lock all around.
3427          * iter->trace is a copy of current_trace, the pointer to the
3428          * name may be used instead of a strcmp(), as iter->trace->name
3429          * will point to the same string as current_trace->name.
3430          */
3431         mutex_lock(&trace_types_lock);
3432         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433                 *iter->trace = *tr->current_trace;
3434         mutex_unlock(&trace_types_lock);
3435
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437         if (iter->snapshot && iter->trace->use_max_tr)
3438                 return ERR_PTR(-EBUSY);
3439 #endif
3440
3441         if (!iter->snapshot)
3442                 atomic_inc(&trace_record_taskinfo_disabled);
3443
3444         if (*pos != iter->pos) {
3445                 iter->ent = NULL;
3446                 iter->cpu = 0;
3447                 iter->idx = -1;
3448
3449                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450                         for_each_tracing_cpu(cpu)
3451                                 tracing_iter_reset(iter, cpu);
3452                 } else
3453                         tracing_iter_reset(iter, cpu_file);
3454
3455                 iter->leftover = 0;
3456                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457                         ;
3458
3459         } else {
3460                 /*
3461                  * If we overflowed the seq_file before, then we want
3462                  * to just reuse the trace_seq buffer again.
3463                  */
3464                 if (iter->leftover)
3465                         p = iter;
3466                 else {
3467                         l = *pos - 1;
3468                         p = s_next(m, p, &l);
3469                 }
3470         }
3471
3472         trace_event_read_lock();
3473         trace_access_lock(cpu_file);
3474         return p;
3475 }
3476
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479         struct trace_iterator *iter = m->private;
3480
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482         if (iter->snapshot && iter->trace->use_max_tr)
3483                 return;
3484 #endif
3485
3486         if (!iter->snapshot)
3487                 atomic_dec(&trace_record_taskinfo_disabled);
3488
3489         trace_access_unlock(iter->cpu_file);
3490         trace_event_read_unlock();
3491 }