drm/bridge: vga-dac: Fix edid memory leak
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46
47 #include "trace.h"
48 #include "trace_output.h"
49
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77         { }
78 };
79
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83         return 0;
84 }
85
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100
101 cpumask_var_t __read_mostly     tracing_buffer_mask;
102
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127         struct module                   *mod;
128         unsigned long                   length;
129 };
130
131 union trace_eval_map_item;
132
133 struct trace_eval_map_tail {
134         /*
135          * "end" is first and points to NULL as it must be different
136          * than "mod" or "eval_string"
137          */
138         union trace_eval_map_item       *next;
139         const char                      *end;   /* points to NULL */
140 };
141
142 static DEFINE_MUTEX(trace_eval_mutex);
143
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152         struct trace_eval_map           map;
153         struct trace_eval_map_head      head;
154         struct trace_eval_map_tail      tail;
155 };
156
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161
162 #define MAX_TRACER_SIZE         100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165
166 static bool allocate_snapshot;
167
168 static int __init set_cmdline_ftrace(char *str)
169 {
170         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171         default_bootup_tracer = bootup_tracer_buf;
172         /* We are using ftrace early, expand it */
173         ring_buffer_expanded = true;
174         return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180         if (*str++ != '=' || !*str) {
181                 ftrace_dump_on_oops = DUMP_ALL;
182                 return 1;
183         }
184
185         if (!strcmp("orig_cpu", str)) {
186                 ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193
194 static int __init stop_trace_on_warning(char *str)
195 {
196         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197                 __disable_trace_on_warning = 1;
198         return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201
202 static int __init boot_alloc_snapshot(char *str)
203 {
204         allocate_snapshot = true;
205         /* We also need the main ring buffer expanded */
206         ring_buffer_expanded = true;
207         return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210
211
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213
214 static int __init set_trace_boot_options(char *str)
215 {
216         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217         return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223
224 static int __init set_trace_boot_clock(char *str)
225 {
226         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227         trace_boot_clock = trace_boot_clock_buf;
228         return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231
232 static int __init set_tracepoint_printk(char *str)
233 {
234         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235                 tracepoint_printk = 1;
236         return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239
240 unsigned long long ns2usecs(u64 nsec)
241 {
242         nsec += 500;
243         do_div(nsec, 1000);
244         return nsec;
245 }
246
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS                                             \
249         (FUNCTION_DEFAULT_FLAGS |                                       \
250          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
251          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
252          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
253          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
257                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268         .trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270
271 LIST_HEAD(ftrace_trace_arrays);
272
273 int trace_array_get(struct trace_array *this_tr)
274 {
275         struct trace_array *tr;
276         int ret = -ENODEV;
277
278         mutex_lock(&trace_types_lock);
279         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280                 if (tr == this_tr) {
281                         tr->ref++;
282                         ret = 0;
283                         break;
284                 }
285         }
286         mutex_unlock(&trace_types_lock);
287
288         return ret;
289 }
290
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293         WARN_ON(!this_tr->ref);
294         this_tr->ref--;
295 }
296
297 void trace_array_put(struct trace_array *this_tr)
298 {
299         mutex_lock(&trace_types_lock);
300         __trace_array_put(this_tr);
301         mutex_unlock(&trace_types_lock);
302 }
303
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305                               struct ring_buffer *buffer,
306                               struct ring_buffer_event *event)
307 {
308         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309             !filter_match_preds(call->filter, rec)) {
310                 __trace_event_discard_commit(buffer, event);
311                 return 1;
312         }
313
314         return 0;
315 }
316
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319         vfree(pid_list->pids);
320         kfree(pid_list);
321 }
322
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333         /*
334          * If pid_max changed after filtered_pids was created, we
335          * by default ignore all pids greater than the previous pid_max.
336          */
337         if (search_pid >= filtered_pids->pid_max)
338                 return false;
339
340         return test_bit(search_pid, filtered_pids->pids);
341 }
342
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355         /*
356          * Return false, because if filtered_pids does not exist,
357          * all pids are good to trace.
358          */
359         if (!filtered_pids)
360                 return false;
361
362         return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378                                   struct task_struct *self,
379                                   struct task_struct *task)
380 {
381         if (!pid_list)
382                 return;
383
384         /* For forks, we only add if the forking task is listed */
385         if (self) {
386                 if (!trace_find_filtered_pid(pid_list, self->pid))
387                         return;
388         }
389
390         /* Sorry, but we don't support pid_max changing after setting */
391         if (task->pid >= pid_list->pid_max)
392                 return;
393
394         /* "self" is set for forks, and NULL for exits */
395         if (self)
396                 set_bit(task->pid, pid_list->pids);
397         else
398                 clear_bit(task->pid, pid_list->pids);
399 }
400
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415         unsigned long pid = (unsigned long)v;
416
417         (*pos)++;
418
419         /* pid already is +1 of the actual prevous bit */
420         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421
422         /* Return pid + 1 to allow zero to be represented */
423         if (pid < pid_list->pid_max)
424                 return (void *)(pid + 1);
425
426         return NULL;
427 }
428
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442         unsigned long pid;
443         loff_t l = 0;
444
445         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446         if (pid >= pid_list->pid_max)
447                 return NULL;
448
449         /* Return pid + 1 so that zero can be the exit value */
450         for (pid++; pid && l < *pos;
451              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452                 ;
453         return (void *)pid;
454 }
455
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466         unsigned long pid = (unsigned long)v - 1;
467
468         seq_printf(m, "%lu\n", pid);
469         return 0;
470 }
471
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE            127
474
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476                     struct trace_pid_list **new_pid_list,
477                     const char __user *ubuf, size_t cnt)
478 {
479         struct trace_pid_list *pid_list;
480         struct trace_parser parser;
481         unsigned long val;
482         int nr_pids = 0;
483         ssize_t read = 0;
484         ssize_t ret = 0;
485         loff_t pos;
486         pid_t pid;
487
488         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489                 return -ENOMEM;
490
491         /*
492          * Always recreate a new array. The write is an all or nothing
493          * operation. Always create a new array when adding new pids by
494          * the user. If the operation fails, then the current list is
495          * not modified.
496          */
497         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498         if (!pid_list)
499                 return -ENOMEM;
500
501         pid_list->pid_max = READ_ONCE(pid_max);
502
503         /* Only truncating will shrink pid_max */
504         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505                 pid_list->pid_max = filtered_pids->pid_max;
506
507         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508         if (!pid_list->pids) {
509                 kfree(pid_list);
510                 return -ENOMEM;
511         }
512
513         if (filtered_pids) {
514                 /* copy the current bits to the new max */
515                 for_each_set_bit(pid, filtered_pids->pids,
516                                  filtered_pids->pid_max) {
517                         set_bit(pid, pid_list->pids);
518                         nr_pids++;
519                 }
520         }
521
522         while (cnt > 0) {
523
524                 pos = 0;
525
526                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
527                 if (ret < 0 || !trace_parser_loaded(&parser))
528                         break;
529
530                 read += ret;
531                 ubuf += ret;
532                 cnt -= ret;
533
534                 ret = -EINVAL;
535                 if (kstrtoul(parser.buffer, 0, &val))
536                         break;
537                 if (val >= pid_list->pid_max)
538                         break;
539
540                 pid = (pid_t)val;
541
542                 set_bit(pid, pid_list->pids);
543                 nr_pids++;
544
545                 trace_parser_clear(&parser);
546                 ret = 0;
547         }
548         trace_parser_put(&parser);
549
550         if (ret < 0) {
551                 trace_free_pid_list(pid_list);
552                 return ret;
553         }
554
555         if (!nr_pids) {
556                 /* Cleared the list of pids */
557                 trace_free_pid_list(pid_list);
558                 read = ret;
559                 pid_list = NULL;
560         }
561
562         *new_pid_list = pid_list;
563
564         return read;
565 }
566
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569         u64 ts;
570
571         /* Early boot up does not have a buffer yet */
572         if (!buf->buffer)
573                 return trace_clock_local();
574
575         ts = ring_buffer_time_stamp(buf->buffer, cpu);
576         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577
578         return ts;
579 }
580
581 u64 ftrace_now(int cpu)
582 {
583         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597         /*
598          * For quick access (irqsoff uses this in fast path), just
599          * return the mirror variable of the state of the ring buffer.
600          * It's a little racy, but we don't really care.
601          */
602         smp_rmb();
603         return !global_trace.buffer_disabled;
604 }
605
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
617
618 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer            *trace_types __read_mostly;
622
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653
654 static inline void trace_access_lock(int cpu)
655 {
656         if (cpu == RING_BUFFER_ALL_CPUS) {
657                 /* gain it for accessing the whole ring buffer. */
658                 down_write(&all_cpu_access_lock);
659         } else {
660                 /* gain it for accessing a cpu ring buffer. */
661
662                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663                 down_read(&all_cpu_access_lock);
664
665                 /* Secondly block other access to this @cpu ring buffer. */
666                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
667         }
668 }
669
670 static inline void trace_access_unlock(int cpu)
671 {
672         if (cpu == RING_BUFFER_ALL_CPUS) {
673                 up_write(&all_cpu_access_lock);
674         } else {
675                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676                 up_read(&all_cpu_access_lock);
677         }
678 }
679
680 static inline void trace_access_lock_init(void)
681 {
682         int cpu;
683
684         for_each_possible_cpu(cpu)
685                 mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687
688 #else
689
690 static DEFINE_MUTEX(access_lock);
691
692 static inline void trace_access_lock(int cpu)
693 {
694         (void)cpu;
695         mutex_lock(&access_lock);
696 }
697
698 static inline void trace_access_unlock(int cpu)
699 {
700         (void)cpu;
701         mutex_unlock(&access_lock);
702 }
703
704 static inline void trace_access_lock_init(void)
705 {
706 }
707
708 #endif
709
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712                                  unsigned long flags,
713                                  int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715                                       struct ring_buffer *buffer,
716                                       unsigned long flags,
717                                       int skip, int pc, struct pt_regs *regs);
718
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                         unsigned long flags,
722                                         int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726                                       struct ring_buffer *buffer,
727                                       unsigned long flags,
728                                       int skip, int pc, struct pt_regs *regs)
729 {
730 }
731
732 #endif
733
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736                   int type, unsigned long flags, int pc)
737 {
738         struct trace_entry *ent = ring_buffer_event_data(event);
739
740         tracing_generic_entry_update(ent, flags, pc);
741         ent->type = type;
742 }
743
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746                           int type,
747                           unsigned long len,
748                           unsigned long flags, int pc)
749 {
750         struct ring_buffer_event *event;
751
752         event = ring_buffer_lock_reserve(buffer, len);
753         if (event != NULL)
754                 trace_event_setup(event, type, flags, pc);
755
756         return event;
757 }
758
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761         if (tr->trace_buffer.buffer)
762                 ring_buffer_record_on(tr->trace_buffer.buffer);
763         /*
764          * This flag is looked at when buffers haven't been allocated
765          * yet, or by some tracers (like irqsoff), that just want to
766          * know if the ring buffer has been disabled, but it can handle
767          * races of where it gets disabled but we still do a record.
768          * As the check is in the fast path of the tracers, it is more
769          * important to be fast than accurate.
770          */
771         tr->buffer_disabled = 0;
772         /* Make the flag seen by readers */
773         smp_wmb();
774 }
775
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784         tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787
788
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792         __this_cpu_write(trace_taskinfo_save, true);
793
794         /* If this is the temp buffer, we need to commit fully */
795         if (this_cpu_read(trace_buffered_event) == event) {
796                 /* Length is in event->array[0] */
797                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
798                 /* Release the temp buffer */
799                 this_cpu_dec(trace_buffered_event_cnt);
800         } else
801                 ring_buffer_unlock_commit(buffer, event);
802 }
803
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:    The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812         struct ring_buffer_event *event;
813         struct ring_buffer *buffer;
814         struct print_entry *entry;
815         unsigned long irq_flags;
816         int alloc;
817         int pc;
818
819         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820                 return 0;
821
822         pc = preempt_count();
823
824         if (unlikely(tracing_selftest_running || tracing_disabled))
825                 return 0;
826
827         alloc = sizeof(*entry) + size + 2; /* possible \n added */
828
829         local_save_flags(irq_flags);
830         buffer = global_trace.trace_buffer.buffer;
831         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
832                                             irq_flags, pc);
833         if (!event)
834                 return 0;
835
836         entry = ring_buffer_event_data(event);
837         entry->ip = ip;
838
839         memcpy(&entry->buf, str, size);
840
841         /* Add a newline if necessary */
842         if (entry->buf[size - 1] != '\n') {
843                 entry->buf[size] = '\n';
844                 entry->buf[size + 1] = '\0';
845         } else
846                 entry->buf[size] = '\0';
847
848         __buffer_unlock_commit(buffer, event);
849         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850
851         return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:    The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862         struct ring_buffer_event *event;
863         struct ring_buffer *buffer;
864         struct bputs_entry *entry;
865         unsigned long irq_flags;
866         int size = sizeof(struct bputs_entry);
867         int pc;
868
869         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870                 return 0;
871
872         pc = preempt_count();
873
874         if (unlikely(tracing_selftest_running || tracing_disabled))
875                 return 0;
876
877         local_save_flags(irq_flags);
878         buffer = global_trace.trace_buffer.buffer;
879         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880                                             irq_flags, pc);
881         if (!event)
882                 return 0;
883
884         entry = ring_buffer_event_data(event);
885         entry->ip                       = ip;
886         entry->str                      = str;
887
888         __buffer_unlock_commit(buffer, event);
889         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890
891         return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 static void tracing_snapshot_instance(struct trace_array *tr)
897 {
898         struct tracer *tracer = tr->current_trace;
899         unsigned long flags;
900
901         if (in_nmi()) {
902                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903                 internal_trace_puts("*** snapshot is being ignored        ***\n");
904                 return;
905         }
906
907         if (!tr->allocated_snapshot) {
908                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909                 internal_trace_puts("*** stopping trace here!   ***\n");
910                 tracing_off();
911                 return;
912         }
913
914         /* Note, snapshot can not be used when the tracer uses it */
915         if (tracer->use_max_tr) {
916                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918                 return;
919         }
920
921         local_irq_save(flags);
922         update_max_tr(tr, current, smp_processor_id());
923         local_irq_restore(flags);
924 }
925
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942         struct trace_array *tr = &global_trace;
943
944         tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949                                         struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951
952 static int alloc_snapshot(struct trace_array *tr)
953 {
954         int ret;
955
956         if (!tr->allocated_snapshot) {
957
958                 /* allocate spare buffer */
959                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
960                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961                 if (ret < 0)
962                         return ret;
963
964                 tr->allocated_snapshot = true;
965         }
966
967         return 0;
968 }
969
970 static void free_snapshot(struct trace_array *tr)
971 {
972         /*
973          * We don't free the ring buffer. instead, resize it because
974          * The max_tr ring buffer has some state (e.g. ring->clock) and
975          * we want preserve it.
976          */
977         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978         set_buffer_entries(&tr->max_buffer, 1);
979         tracing_reset_online_cpus(&tr->max_buffer);
980         tr->allocated_snapshot = false;
981 }
982
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996         int ret;
997
998         ret = alloc_snapshot(tr);
999         WARN_ON(ret < 0);
1000
1001         return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018         int ret;
1019
1020         ret = tracing_alloc_snapshot();
1021         if (ret < 0)
1022                 return;
1023
1024         tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036         return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041         /* Give warning */
1042         tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049         if (tr->trace_buffer.buffer)
1050                 ring_buffer_record_off(tr->trace_buffer.buffer);
1051         /*
1052          * This flag is looked at when buffers haven't been allocated
1053          * yet, or by some tracers (like irqsoff), that just want to
1054          * know if the ring buffer has been disabled, but it can handle
1055          * races of where it gets disabled but we still do a record.
1056          * As the check is in the fast path of the tracers, it is more
1057          * important to be fast than accurate.
1058          */
1059         tr->buffer_disabled = 1;
1060         /* Make the flag seen by readers */
1061         smp_wmb();
1062 }
1063
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074         tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077
1078 void disable_trace_on_warning(void)
1079 {
1080         if (__disable_trace_on_warning)
1081                 tracing_off();
1082 }
1083
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092         if (tr->trace_buffer.buffer)
1093                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094         return !tr->buffer_disabled;
1095 }
1096
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102         return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105
1106 static int __init set_buf_size(char *str)
1107 {
1108         unsigned long buf_size;
1109
1110         if (!str)
1111                 return 0;
1112         buf_size = memparse(str, &str);
1113         /* nr_entries can not be zero */
1114         if (buf_size == 0)
1115                 return 0;
1116         trace_buf_size = buf_size;
1117         return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123         unsigned long threshold;
1124         int ret;
1125
1126         if (!str)
1127                 return 0;
1128         ret = kstrtoul(str, 0, &threshold);
1129         if (ret < 0)
1130                 return 0;
1131         tracing_thresh = threshold * 1000;
1132         return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138         return nsecs / 1000;
1139 }
1140
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152         TRACE_FLAGS
1153         NULL
1154 };
1155
1156 static struct {
1157         u64 (*func)(void);
1158         const char *name;
1159         int in_ns;              /* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161         { trace_clock_local,            "local",        1 },
1162         { trace_clock_global,           "global",       1 },
1163         { trace_clock_counter,          "counter",      0 },
1164         { trace_clock_jiffies,          "uptime",       0 },
1165         { trace_clock,                  "perf",         1 },
1166         { ktime_get_mono_fast_ns,       "mono",         1 },
1167         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1168         { ktime_get_mono_fast_ns,       "boot",         1 },
1169         ARCH_TRACE_CLOCKS
1170 };
1171
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174         if (trace_clocks[tr->clock_id].in_ns)
1175                 return true;
1176
1177         return false;
1178 }
1179
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185         memset(parser, 0, sizeof(*parser));
1186
1187         parser->buffer = kmalloc(size, GFP_KERNEL);
1188         if (!parser->buffer)
1189                 return 1;
1190
1191         parser->size = size;
1192         return 0;
1193 }
1194
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200         kfree(parser->buffer);
1201         parser->buffer = NULL;
1202 }
1203
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216         size_t cnt, loff_t *ppos)
1217 {
1218         char ch;
1219         size_t read = 0;
1220         ssize_t ret;
1221
1222         if (!*ppos)
1223                 trace_parser_clear(parser);
1224
1225         ret = get_user(ch, ubuf++);
1226         if (ret)
1227                 goto out;
1228
1229         read++;
1230         cnt--;
1231
1232         /*
1233          * The parser is not finished with the last write,
1234          * continue reading the user input without skipping spaces.
1235          */
1236         if (!parser->cont) {
1237                 /* skip white space */
1238                 while (cnt && isspace(ch)) {
1239                         ret = get_user(ch, ubuf++);
1240                         if (ret)
1241                                 goto out;
1242                         read++;
1243                         cnt--;
1244                 }
1245
1246                 parser->idx = 0;
1247
1248                 /* only spaces were written */
1249                 if (isspace(ch) || !ch) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254         }
1255
1256         /* read the non-space input */
1257         while (cnt && !isspace(ch) && ch) {
1258                 if (parser->idx < parser->size - 1)
1259                         parser->buffer[parser->idx++] = ch;
1260                 else {
1261                         ret = -EINVAL;
1262                         goto out;
1263                 }
1264                 ret = get_user(ch, ubuf++);
1265                 if (ret)
1266                         goto out;
1267                 read++;
1268                 cnt--;
1269         }
1270
1271         /* We either got finished input or we have to wait for another call. */
1272         if (isspace(ch) || !ch) {
1273                 parser->buffer[parser->idx] = 0;
1274                 parser->cont = false;
1275         } else if (parser->idx < parser->size - 1) {
1276                 parser->cont = true;
1277                 parser->buffer[parser->idx++] = ch;
1278                 /* Make sure the parsed string always terminates with '\0'. */
1279                 parser->buffer[parser->idx] = 0;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284
1285         *ppos += read;
1286         ret = read;
1287
1288 out:
1289         return ret;
1290 }
1291
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308
1309 unsigned long __read_mostly     tracing_thresh;
1310
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         struct ring_buffer *buf;
1364
1365         if (tr->stop_count)
1366                 return;
1367
1368         WARN_ON_ONCE(!irqs_disabled());
1369
1370         if (!tr->allocated_snapshot) {
1371                 /* Only the nop tracer should hit this when disabling */
1372                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373                 return;
1374         }
1375
1376         arch_spin_lock(&tr->max_lock);
1377
1378         buf = tr->trace_buffer.buffer;
1379         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380         tr->max_buffer.buffer = buf;
1381
1382         __update_max_tr(tr, tsk, cpu);
1383         arch_spin_unlock(&tr->max_lock);
1384 }
1385
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397         int ret;
1398
1399         if (tr->stop_count)
1400                 return;
1401
1402         WARN_ON_ONCE(!irqs_disabled());
1403         if (!tr->allocated_snapshot) {
1404                 /* Only the nop tracer should hit this when disabling */
1405                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406                 return;
1407         }
1408
1409         arch_spin_lock(&tr->max_lock);
1410
1411         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412
1413         if (ret == -EBUSY) {
1414                 /*
1415                  * We failed to swap the buffer due to a commit taking
1416                  * place on this CPU. We fail to record, but we reset
1417                  * the max trace buffer (no one writes directly to it)
1418                  * and flag that it failed.
1419                  */
1420                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421                         "Failed to swap buffers due to commit in progress\n");
1422         }
1423
1424         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425
1426         __update_max_tr(tr, tsk, cpu);
1427         arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433         /* Iterators are static, they should be filled or empty */
1434         if (trace_buffer_iter(iter, iter->cpu_file))
1435                 return 0;
1436
1437         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438                                 full);
1439 }
1440
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443
1444 struct trace_selftests {
1445         struct list_head                list;
1446         struct tracer                   *type;
1447 };
1448
1449 static LIST_HEAD(postponed_selftests);
1450
1451 static int save_selftest(struct tracer *type)
1452 {
1453         struct trace_selftests *selftest;
1454
1455         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456         if (!selftest)
1457                 return -ENOMEM;
1458
1459         selftest->type = type;
1460         list_add(&selftest->list, &postponed_selftests);
1461         return 0;
1462 }
1463
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466         struct trace_array *tr = &global_trace;
1467         struct tracer *saved_tracer = tr->current_trace;
1468         int ret;
1469
1470         if (!type->selftest || tracing_selftest_disabled)
1471                 return 0;
1472
1473         /*
1474          * If a tracer registers early in boot up (before scheduling is
1475          * initialized and such), then do not run its selftests yet.
1476          * Instead, run it a little later in the boot process.
1477          */
1478         if (!selftests_can_run)
1479                 return save_selftest(type);
1480
1481         /*
1482          * Run a selftest on this tracer.
1483          * Here we reset the trace buffer, and set the current
1484          * tracer to be this tracer. The tracer can then run some
1485          * internal tracing to verify that everything is in order.
1486          * If we fail, we do not register this tracer.
1487          */
1488         tracing_reset_online_cpus(&tr->trace_buffer);
1489
1490         tr->current_trace = type;
1491
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493         if (type->use_max_tr) {
1494                 /* If we expanded the buffers, make sure the max is expanded too */
1495                 if (ring_buffer_expanded)
1496                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497                                            RING_BUFFER_ALL_CPUS);
1498                 tr->allocated_snapshot = true;
1499         }
1500 #endif
1501
1502         /* the test is responsible for initializing and enabling */
1503         pr_info("Testing tracer %s: ", type->name);
1504         ret = type->selftest(type, tr);
1505         /* the test is responsible for resetting too */
1506         tr->current_trace = saved_tracer;
1507         if (ret) {
1508                 printk(KERN_CONT "FAILED!\n");
1509                 /* Add the warning after printing 'FAILED' */
1510                 WARN_ON(1);
1511                 return -1;
1512         }
1513         /* Only reset on passing, to avoid touching corrupted buffers */
1514         tracing_reset_online_cpus(&tr->trace_buffer);
1515
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517         if (type->use_max_tr) {
1518                 tr->allocated_snapshot = false;
1519
1520                 /* Shrink the max buffer again */
1521                 if (ring_buffer_expanded)
1522                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1523                                            RING_BUFFER_ALL_CPUS);
1524         }
1525 #endif
1526
1527         printk(KERN_CONT "PASSED\n");
1528         return 0;
1529 }
1530
1531 static __init int init_trace_selftests(void)
1532 {
1533         struct trace_selftests *p, *n;
1534         struct tracer *t, **last;
1535         int ret;
1536
1537         selftests_can_run = true;
1538
1539         mutex_lock(&trace_types_lock);
1540
1541         if (list_empty(&postponed_selftests))
1542                 goto out;
1543
1544         pr_info("Running postponed tracer tests:\n");
1545
1546         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547                 ret = run_tracer_selftest(p->type);
1548                 /* If the test fails, then warn and remove from available_tracers */
1549                 if (ret < 0) {
1550                         WARN(1, "tracer: %s failed selftest, disabling\n",
1551                              p->type->name);
1552                         last = &trace_types;
1553                         for (t = trace_types; t; t = t->next) {
1554                                 if (t == p->type) {
1555                                         *last = t->next;
1556                                         break;
1557                                 }
1558                                 last = &t->next;
1559                         }
1560                 }
1561                 list_del(&p->list);
1562                 kfree(p);
1563         }
1564
1565  out:
1566         mutex_unlock(&trace_types_lock);
1567
1568         return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574         return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579
1580 static void __init apply_trace_boot_options(void);
1581
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590         struct tracer *t;
1591         int ret = 0;
1592
1593         if (!type->name) {
1594                 pr_info("Tracer must have a name\n");
1595                 return -1;
1596         }
1597
1598         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600                 return -1;
1601         }
1602
1603         mutex_lock(&trace_types_lock);
1604
1605         tracing_selftest_running = true;
1606
1607         for (t = trace_types; t; t = t->next) {
1608                 if (strcmp(type->name, t->name) == 0) {
1609                         /* already found */
1610                         pr_info("Tracer %s already registered\n",
1611                                 type->name);
1612                         ret = -1;
1613                         goto out;
1614                 }
1615         }
1616
1617         if (!type->set_flag)
1618                 type->set_flag = &dummy_set_flag;
1619         if (!type->flags) {
1620                 /*allocate a dummy tracer_flags*/
1621                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622                 if (!type->flags) {
1623                         ret = -ENOMEM;
1624                         goto out;
1625                 }
1626                 type->flags->val = 0;
1627                 type->flags->opts = dummy_tracer_opt;
1628         } else
1629                 if (!type->flags->opts)
1630                         type->flags->opts = dummy_tracer_opt;
1631
1632         /* store the tracer for __set_tracer_option */
1633         type->flags->trace = type;
1634
1635         ret = run_tracer_selftest(type);
1636         if (ret < 0)
1637                 goto out;
1638
1639         type->next = trace_types;
1640         trace_types = type;
1641         add_tracer_options(&global_trace, type);
1642
1643  out:
1644         tracing_selftest_running = false;
1645         mutex_unlock(&trace_types_lock);
1646
1647         if (ret || !default_bootup_tracer)
1648                 goto out_unlock;
1649
1650         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651                 goto out_unlock;
1652
1653         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654         /* Do we want this tracer to start on bootup? */
1655         tracing_set_tracer(&global_trace, type->name);
1656         default_bootup_tracer = NULL;
1657
1658         apply_trace_boot_options();
1659
1660         /* disable other selftests, since this will break it. */
1661         tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664                type->name);
1665 #endif
1666
1667  out_unlock:
1668         return ret;
1669 }
1670
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673         struct ring_buffer *buffer = buf->buffer;
1674
1675         if (!buffer)
1676                 return;
1677
1678         ring_buffer_record_disable(buffer);
1679
1680         /* Make sure all commits have finished */
1681         synchronize_sched();
1682         ring_buffer_reset_cpu(buffer, cpu);
1683
1684         ring_buffer_record_enable(buffer);
1685 }
1686
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689         struct ring_buffer *buffer = buf->buffer;
1690         int cpu;
1691
1692         if (!buffer)
1693                 return;
1694
1695         ring_buffer_record_disable(buffer);
1696
1697         /* Make sure all commits have finished */
1698         synchronize_sched();
1699
1700         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701
1702         for_each_online_cpu(cpu)
1703                 ring_buffer_reset_cpu(buffer, cpu);
1704
1705         ring_buffer_record_enable(buffer);
1706 }
1707
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711         struct trace_array *tr;
1712
1713         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714                 if (!tr->clear_trace)
1715                         continue;
1716                 tr->clear_trace = false;
1717                 tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719                 tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721         }
1722 }
1723
1724 static int *tgid_map;
1725
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731         unsigned *map_cmdline_to_pid;
1732         unsigned cmdline_num;
1733         int cmdline_idx;
1734         char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752                                     struct saved_cmdlines_buffer *s)
1753 {
1754         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1755                                         GFP_KERNEL);
1756         if (!s->map_cmdline_to_pid)
1757                 return -ENOMEM;
1758
1759         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1760         if (!s->saved_cmdlines) {
1761                 kfree(s->map_cmdline_to_pid);
1762                 return -ENOMEM;
1763         }
1764
1765         s->cmdline_idx = 0;
1766         s->cmdline_num = val;
1767         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1768                sizeof(s->map_pid_to_cmdline));
1769         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1770                val * sizeof(*s->map_cmdline_to_pid));
1771
1772         return 0;
1773 }
1774
1775 static int trace_create_savedcmd(void)
1776 {
1777         int ret;
1778
1779         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1780         if (!savedcmd)
1781                 return -ENOMEM;
1782
1783         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1784         if (ret < 0) {
1785                 kfree(savedcmd);
1786                 savedcmd = NULL;
1787                 return -ENOMEM;
1788         }
1789
1790         return 0;
1791 }
1792
1793 int is_tracing_stopped(void)
1794 {
1795         return global_trace.stop_count;
1796 }
1797
1798 /**
1799  * tracing_start - quick start of the tracer
1800  *
1801  * If tracing is enabled but was stopped by tracing_stop,
1802  * this will start the tracer back up.
1803  */
1804 void tracing_start(void)
1805 {
1806         struct ring_buffer *buffer;
1807         unsigned long flags;
1808
1809         if (tracing_disabled)
1810                 return;
1811
1812         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1813         if (--global_trace.stop_count) {
1814                 if (global_trace.stop_count < 0) {
1815                         /* Someone screwed up their debugging */
1816                         WARN_ON_ONCE(1);
1817                         global_trace.stop_count = 0;
1818                 }
1819                 goto out;
1820         }
1821
1822         /* Prevent the buffers from switching */
1823         arch_spin_lock(&global_trace.max_lock);
1824
1825         buffer = global_trace.trace_buffer.buffer;
1826         if (buffer)
1827                 ring_buffer_record_enable(buffer);
1828
1829 #ifdef CONFIG_TRACER_MAX_TRACE
1830         buffer = global_trace.max_buffer.buffer;
1831         if (buffer)
1832                 ring_buffer_record_enable(buffer);
1833 #endif
1834
1835         arch_spin_unlock(&global_trace.max_lock);
1836
1837  out:
1838         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1839 }
1840
1841 static void tracing_start_tr(struct trace_array *tr)
1842 {
1843         struct ring_buffer *buffer;
1844         unsigned long flags;
1845
1846         if (tracing_disabled)
1847                 return;
1848
1849         /* If global, we need to also start the max tracer */
1850         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1851                 return tracing_start();
1852
1853         raw_spin_lock_irqsave(&tr->start_lock, flags);
1854
1855         if (--tr->stop_count) {
1856                 if (tr->stop_count < 0) {
1857                         /* Someone screwed up their debugging */
1858                         WARN_ON_ONCE(1);
1859                         tr->stop_count = 0;
1860                 }
1861                 goto out;
1862         }
1863
1864         buffer = tr->trace_buffer.buffer;
1865         if (buffer)
1866                 ring_buffer_record_enable(buffer);
1867
1868  out:
1869         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1870 }
1871
1872 /**
1873  * tracing_stop - quick stop of the tracer
1874  *
1875  * Light weight way to stop tracing. Use in conjunction with
1876  * tracing_start.
1877  */
1878 void tracing_stop(void)
1879 {
1880         struct ring_buffer *buffer;
1881         unsigned long flags;
1882
1883         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1884         if (global_trace.stop_count++)
1885                 goto out;
1886
1887         /* Prevent the buffers from switching */
1888         arch_spin_lock(&global_trace.max_lock);
1889
1890         buffer = global_trace.trace_buffer.buffer;
1891         if (buffer)
1892                 ring_buffer_record_disable(buffer);
1893
1894 #ifdef CONFIG_TRACER_MAX_TRACE
1895         buffer = global_trace.max_buffer.buffer;
1896         if (buffer)
1897                 ring_buffer_record_disable(buffer);
1898 #endif
1899
1900         arch_spin_unlock(&global_trace.max_lock);
1901
1902  out:
1903         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1904 }
1905
1906 static void tracing_stop_tr(struct trace_array *tr)
1907 {
1908         struct ring_buffer *buffer;
1909         unsigned long flags;
1910
1911         /* If global, we need to also stop the max tracer */
1912         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1913                 return tracing_stop();
1914
1915         raw_spin_lock_irqsave(&tr->start_lock, flags);
1916         if (tr->stop_count++)
1917                 goto out;
1918
1919         buffer = tr->trace_buffer.buffer;
1920         if (buffer)
1921                 ring_buffer_record_disable(buffer);
1922
1923  out:
1924         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1925 }
1926
1927 static int trace_save_cmdline(struct task_struct *tsk)
1928 {
1929         unsigned pid, idx;
1930
1931         /* treat recording of idle task as a success */
1932         if (!tsk->pid)
1933                 return 1;
1934
1935         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1936                 return 0;
1937
1938         /*
1939          * It's not the end of the world if we don't get
1940          * the lock, but we also don't want to spin
1941          * nor do we want to disable interrupts,
1942          * so if we miss here, then better luck next time.
1943          */
1944         if (!arch_spin_trylock(&trace_cmdline_lock))
1945                 return 0;
1946
1947         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1948         if (idx == NO_CMDLINE_MAP) {
1949                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1950
1951                 /*
1952                  * Check whether the cmdline buffer at idx has a pid
1953                  * mapped. We are going to overwrite that entry so we
1954                  * need to clear the map_pid_to_cmdline. Otherwise we
1955                  * would read the new comm for the old pid.
1956                  */
1957                 pid = savedcmd->map_cmdline_to_pid[idx];
1958                 if (pid != NO_CMDLINE_MAP)
1959                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1960
1961                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1962                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1963
1964                 savedcmd->cmdline_idx = idx;
1965         }
1966
1967         set_cmdline(idx, tsk->comm);
1968
1969         arch_spin_unlock(&trace_cmdline_lock);
1970
1971         return 1;
1972 }
1973
1974 static void __trace_find_cmdline(int pid, char comm[])
1975 {
1976         unsigned map;
1977
1978         if (!pid) {
1979                 strcpy(comm, "<idle>");
1980                 return;
1981         }
1982
1983         if (WARN_ON_ONCE(pid < 0)) {
1984                 strcpy(comm, "<XXX>");
1985                 return;
1986         }
1987
1988         if (pid > PID_MAX_DEFAULT) {
1989                 strcpy(comm, "<...>");
1990                 return;
1991         }
1992
1993         map = savedcmd->map_pid_to_cmdline[pid];
1994         if (map != NO_CMDLINE_MAP)
1995                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1996         else
1997                 strcpy(comm, "<...>");
1998 }
1999
2000 void trace_find_cmdline(int pid, char comm[])
2001 {
2002         preempt_disable();
2003         arch_spin_lock(&trace_cmdline_lock);
2004
2005         __trace_find_cmdline(pid, comm);
2006
2007         arch_spin_unlock(&trace_cmdline_lock);
2008         preempt_enable();
2009 }
2010
2011 int trace_find_tgid(int pid)
2012 {
2013         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2014                 return 0;
2015
2016         return tgid_map[pid];
2017 }
2018
2019 static int trace_save_tgid(struct task_struct *tsk)
2020 {
2021         /* treat recording of idle task as a success */
2022         if (!tsk->pid)
2023                 return 1;
2024
2025         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2026                 return 0;
2027
2028         tgid_map[tsk->pid] = tsk->tgid;
2029         return 1;
2030 }
2031
2032 static bool tracing_record_taskinfo_skip(int flags)
2033 {
2034         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2035                 return true;
2036         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2037                 return true;
2038         if (!__this_cpu_read(trace_taskinfo_save))
2039                 return true;
2040         return false;
2041 }
2042
2043 /**
2044  * tracing_record_taskinfo - record the task info of a task
2045  *
2046  * @task  - task to record
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *        - TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo(struct task_struct *task, int flags)
2051 {
2052         bool done;
2053
2054         if (tracing_record_taskinfo_skip(flags))
2055                 return;
2056
2057         /*
2058          * Record as much task information as possible. If some fail, continue
2059          * to try to record the others.
2060          */
2061         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2062         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2063
2064         /* If recording any information failed, retry again soon. */
2065         if (!done)
2066                 return;
2067
2068         __this_cpu_write(trace_taskinfo_save, false);
2069 }
2070
2071 /**
2072  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2073  *
2074  * @prev - previous task during sched_switch
2075  * @next - next task during sched_switch
2076  * @flags - TRACE_RECORD_CMDLINE for recording comm
2077  *          TRACE_RECORD_TGID for recording tgid
2078  */
2079 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2080                                           struct task_struct *next, int flags)
2081 {
2082         bool done;
2083
2084         if (tracing_record_taskinfo_skip(flags))
2085                 return;
2086
2087         /*
2088          * Record as much task information as possible. If some fail, continue
2089          * to try to record the others.
2090          */
2091         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2092         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2093         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2094         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2095
2096         /* If recording any information failed, retry again soon. */
2097         if (!done)
2098                 return;
2099
2100         __this_cpu_write(trace_taskinfo_save, false);
2101 }
2102
2103 /* Helpers to record a specific task information */
2104 void tracing_record_cmdline(struct task_struct *task)
2105 {
2106         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2107 }
2108
2109 void tracing_record_tgid(struct task_struct *task)
2110 {
2111         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2112 }
2113
2114 /*
2115  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2116  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2117  * simplifies those functions and keeps them in sync.
2118  */
2119 enum print_line_t trace_handle_return(struct trace_seq *s)
2120 {
2121         return trace_seq_has_overflowed(s) ?
2122                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2123 }
2124 EXPORT_SYMBOL_GPL(trace_handle_return);
2125
2126 void
2127 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2128                              int pc)
2129 {
2130         struct task_struct *tsk = current;
2131
2132         entry->preempt_count            = pc & 0xff;
2133         entry->pid                      = (tsk) ? tsk->pid : 0;
2134         entry->flags =
2135 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2136                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2137 #else
2138                 TRACE_FLAG_IRQS_NOSUPPORT |
2139 #endif
2140                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2141                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2142                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2143                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2144                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2145 }
2146 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2147
2148 struct ring_buffer_event *
2149 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2150                           int type,
2151                           unsigned long len,
2152                           unsigned long flags, int pc)
2153 {
2154         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2155 }
2156
2157 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2158 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2159 static int trace_buffered_event_ref;
2160
2161 /**
2162  * trace_buffered_event_enable - enable buffering events
2163  *
2164  * When events are being filtered, it is quicker to use a temporary
2165  * buffer to write the event data into if there's a likely chance
2166  * that it will not be committed. The discard of the ring buffer
2167  * is not as fast as committing, and is much slower than copying
2168  * a commit.
2169  *
2170  * When an event is to be filtered, allocate per cpu buffers to
2171  * write the event data into, and if the event is filtered and discarded
2172  * it is simply dropped, otherwise, the entire data is to be committed
2173  * in one shot.
2174  */
2175 void trace_buffered_event_enable(void)
2176 {
2177         struct ring_buffer_event *event;
2178         struct page *page;
2179         int cpu;
2180
2181         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2182
2183         if (trace_buffered_event_ref++)
2184                 return;
2185
2186         for_each_tracing_cpu(cpu) {
2187                 page = alloc_pages_node(cpu_to_node(cpu),
2188                                         GFP_KERNEL | __GFP_NORETRY, 0);
2189                 if (!page)
2190                         goto failed;
2191
2192                 event = page_address(page);
2193                 memset(event, 0, sizeof(*event));
2194
2195                 per_cpu(trace_buffered_event, cpu) = event;
2196
2197                 preempt_disable();
2198                 if (cpu == smp_processor_id() &&
2199                     this_cpu_read(trace_buffered_event) !=
2200                     per_cpu(trace_buffered_event, cpu))
2201                         WARN_ON_ONCE(1);
2202                 preempt_enable();
2203         }
2204
2205         return;
2206  failed:
2207         trace_buffered_event_disable();
2208 }
2209
2210 static void enable_trace_buffered_event(void *data)
2211 {
2212         /* Probably not needed, but do it anyway */
2213         smp_rmb();
2214         this_cpu_dec(trace_buffered_event_cnt);
2215 }
2216
2217 static void disable_trace_buffered_event(void *data)
2218 {
2219         this_cpu_inc(trace_buffered_event_cnt);
2220 }
2221
2222 /**
2223  * trace_buffered_event_disable - disable buffering events
2224  *
2225  * When a filter is removed, it is faster to not use the buffered
2226  * events, and to commit directly into the ring buffer. Free up
2227  * the temp buffers when there are no more users. This requires
2228  * special synchronization with current events.
2229  */
2230 void trace_buffered_event_disable(void)
2231 {
2232         int cpu;
2233
2234         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2235
2236         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2237                 return;
2238
2239         if (--trace_buffered_event_ref)
2240                 return;
2241
2242         preempt_disable();
2243         /* For each CPU, set the buffer as used. */
2244         smp_call_function_many(tracing_buffer_mask,
2245                                disable_trace_buffered_event, NULL, 1);
2246         preempt_enable();
2247
2248         /* Wait for all current users to finish */
2249         synchronize_sched();
2250
2251         for_each_tracing_cpu(cpu) {
2252                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2253                 per_cpu(trace_buffered_event, cpu) = NULL;
2254         }
2255         /*
2256          * Make sure trace_buffered_event is NULL before clearing
2257          * trace_buffered_event_cnt.
2258          */
2259         smp_wmb();
2260
2261         preempt_disable();
2262         /* Do the work on each cpu */
2263         smp_call_function_many(tracing_buffer_mask,
2264                                enable_trace_buffered_event, NULL, 1);
2265         preempt_enable();
2266 }
2267
2268 static struct ring_buffer *temp_buffer;
2269
2270 struct ring_buffer_event *
2271 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2272                           struct trace_event_file *trace_file,
2273                           int type, unsigned long len,
2274                           unsigned long flags, int pc)
2275 {
2276         struct ring_buffer_event *entry;
2277         int val;
2278
2279         *current_rb = trace_file->tr->trace_buffer.buffer;
2280
2281         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2282              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2283             (entry = this_cpu_read(trace_buffered_event))) {
2284                 /* Try to use the per cpu buffer first */
2285                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2286                 if (val == 1) {
2287                         trace_event_setup(entry, type, flags, pc);
2288                         entry->array[0] = len;
2289                         return entry;
2290                 }
2291                 this_cpu_dec(trace_buffered_event_cnt);
2292         }
2293
2294         entry = __trace_buffer_lock_reserve(*current_rb,
2295                                             type, len, flags, pc);
2296         /*
2297          * If tracing is off, but we have triggers enabled
2298          * we still need to look at the event data. Use the temp_buffer
2299          * to store the trace event for the tigger to use. It's recusive
2300          * safe and will not be recorded anywhere.
2301          */
2302         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2303                 *current_rb = temp_buffer;
2304                 entry = __trace_buffer_lock_reserve(*current_rb,
2305                                                     type, len, flags, pc);
2306         }
2307         return entry;
2308 }
2309 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2310
2311 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2312 static DEFINE_MUTEX(tracepoint_printk_mutex);
2313
2314 static void output_printk(struct trace_event_buffer *fbuffer)
2315 {
2316         struct trace_event_call *event_call;
2317         struct trace_event *event;
2318         unsigned long flags;
2319         struct trace_iterator *iter = tracepoint_print_iter;
2320
2321         /* We should never get here if iter is NULL */
2322         if (WARN_ON_ONCE(!iter))
2323                 return;
2324
2325         event_call = fbuffer->trace_file->event_call;
2326         if (!event_call || !event_call->event.funcs ||
2327             !event_call->event.funcs->trace)
2328                 return;
2329
2330         event = &fbuffer->trace_file->event_call->event;
2331
2332         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2333         trace_seq_init(&iter->seq);
2334         iter->ent = fbuffer->entry;
2335         event_call->event.funcs->trace(iter, 0, event);
2336         trace_seq_putc(&iter->seq, 0);
2337         printk("%s", iter->seq.buffer);
2338
2339         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2340 }
2341
2342 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2343                              void __user *buffer, size_t *lenp,
2344                              loff_t *ppos)
2345 {
2346         int save_tracepoint_printk;
2347         int ret;
2348
2349         mutex_lock(&tracepoint_printk_mutex);
2350         save_tracepoint_printk = tracepoint_printk;
2351
2352         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2353
2354         /*
2355          * This will force exiting early, as tracepoint_printk
2356          * is always zero when tracepoint_printk_iter is not allocated
2357          */
2358         if (!tracepoint_print_iter)
2359                 tracepoint_printk = 0;
2360
2361         if (save_tracepoint_printk == tracepoint_printk)
2362                 goto out;
2363
2364         if (tracepoint_printk)
2365                 static_key_enable(&tracepoint_printk_key.key);
2366         else
2367                 static_key_disable(&tracepoint_printk_key.key);
2368
2369  out:
2370         mutex_unlock(&tracepoint_printk_mutex);
2371
2372         return ret;
2373 }
2374
2375 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2376 {
2377         if (static_key_false(&tracepoint_printk_key.key))
2378                 output_printk(fbuffer);
2379
2380         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2381                                     fbuffer->event, fbuffer->entry,
2382                                     fbuffer->flags, fbuffer->pc);
2383 }
2384 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2385
2386 /*
2387  * Skip 3:
2388  *
2389  *   trace_buffer_unlock_commit_regs()
2390  *   trace_event_buffer_commit()
2391  *   trace_event_raw_event_xxx()
2392  */
2393 # define STACK_SKIP 3
2394
2395 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2396                                      struct ring_buffer *buffer,
2397                                      struct ring_buffer_event *event,
2398                                      unsigned long flags, int pc,
2399                                      struct pt_regs *regs)
2400 {
2401         __buffer_unlock_commit(buffer, event);
2402
2403         /*
2404          * If regs is not set, then skip the necessary functions.
2405          * Note, we can still get here via blktrace, wakeup tracer
2406          * and mmiotrace, but that's ok if they lose a function or
2407          * two. They are not that meaningful.
2408          */
2409         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2410         ftrace_trace_userstack(buffer, flags, pc);
2411 }
2412
2413 /*
2414  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2415  */
2416 void
2417 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2418                                    struct ring_buffer_event *event)
2419 {
2420         __buffer_unlock_commit(buffer, event);
2421 }
2422
2423 static void
2424 trace_process_export(struct trace_export *export,
2425                struct ring_buffer_event *event)
2426 {
2427         struct trace_entry *entry;
2428         unsigned int size = 0;
2429
2430         entry = ring_buffer_event_data(event);
2431         size = ring_buffer_event_length(event);
2432         export->write(export, entry, size);
2433 }
2434
2435 static DEFINE_MUTEX(ftrace_export_lock);
2436
2437 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2438
2439 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2440
2441 static inline void ftrace_exports_enable(void)
2442 {
2443         static_branch_enable(&ftrace_exports_enabled);
2444 }
2445
2446 static inline void ftrace_exports_disable(void)
2447 {
2448         static_branch_disable(&ftrace_exports_enabled);
2449 }
2450
2451 void ftrace_exports(struct ring_buffer_event *event)
2452 {
2453         struct trace_export *export;
2454
2455         preempt_disable_notrace();
2456
2457         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2458         while (export) {
2459                 trace_process_export(export, event);
2460                 export = rcu_dereference_raw_notrace(export->next);
2461         }
2462
2463         preempt_enable_notrace();
2464 }
2465
2466 static inline void
2467 add_trace_export(struct trace_export **list, struct trace_export *export)
2468 {
2469         rcu_assign_pointer(export->next, *list);
2470         /*
2471          * We are entering export into the list but another
2472          * CPU might be walking that list. We need to make sure
2473          * the export->next pointer is valid before another CPU sees
2474          * the export pointer included into the list.
2475          */
2476         rcu_assign_pointer(*list, export);
2477 }
2478
2479 static inline int
2480 rm_trace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482         struct trace_export **p;
2483
2484         for (p = list; *p != NULL; p = &(*p)->next)
2485                 if (*p == export)
2486                         break;
2487
2488         if (*p != export)
2489                 return -1;
2490
2491         rcu_assign_pointer(*p, (*p)->next);
2492
2493         return 0;
2494 }
2495
2496 static inline void
2497 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499         if (*list == NULL)
2500                 ftrace_exports_enable();
2501
2502         add_trace_export(list, export);
2503 }
2504
2505 static inline int
2506 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2507 {
2508         int ret;
2509
2510         ret = rm_trace_export(list, export);
2511         if (*list == NULL)
2512                 ftrace_exports_disable();
2513
2514         return ret;
2515 }
2516
2517 int register_ftrace_export(struct trace_export *export)
2518 {
2519         if (WARN_ON_ONCE(!export->write))
2520                 return -1;
2521
2522         mutex_lock(&ftrace_export_lock);
2523
2524         add_ftrace_export(&ftrace_exports_list, export);
2525
2526         mutex_unlock(&ftrace_export_lock);
2527
2528         return 0;
2529 }
2530 EXPORT_SYMBOL_GPL(register_ftrace_export);
2531
2532 int unregister_ftrace_export(struct trace_export *export)
2533 {
2534         int ret;
2535
2536         mutex_lock(&ftrace_export_lock);
2537
2538         ret = rm_ftrace_export(&ftrace_exports_list, export);
2539
2540         mutex_unlock(&ftrace_export_lock);
2541
2542         return ret;
2543 }
2544 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2545
2546 void
2547 trace_function(struct trace_array *tr,
2548                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2549                int pc)
2550 {
2551         struct trace_event_call *call = &event_function;
2552         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2553         struct ring_buffer_event *event;
2554         struct ftrace_entry *entry;
2555
2556         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2557                                             flags, pc);
2558         if (!event)
2559                 return;
2560         entry   = ring_buffer_event_data(event);
2561         entry->ip                       = ip;
2562         entry->parent_ip                = parent_ip;
2563
2564         if (!call_filter_check_discard(call, entry, buffer, event)) {
2565                 if (static_branch_unlikely(&ftrace_exports_enabled))
2566                         ftrace_exports(event);
2567                 __buffer_unlock_commit(buffer, event);
2568         }
2569 }
2570
2571 #ifdef CONFIG_STACKTRACE
2572
2573 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2574 struct ftrace_stack {
2575         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2576 };
2577
2578 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2579 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2580
2581 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2582                                  unsigned long flags,
2583                                  int skip, int pc, struct pt_regs *regs)
2584 {
2585         struct trace_event_call *call = &event_kernel_stack;
2586         struct ring_buffer_event *event;
2587         struct stack_entry *entry;
2588         struct stack_trace trace;
2589         int use_stack;
2590         int size = FTRACE_STACK_ENTRIES;
2591
2592         trace.nr_entries        = 0;
2593         trace.skip              = skip;
2594
2595         /*
2596          * Add one, for this function and the call to save_stack_trace()
2597          * If regs is set, then these functions will not be in the way.
2598          */
2599 #ifndef CONFIG_UNWINDER_ORC
2600         if (!regs)
2601                 trace.skip++;
2602 #endif
2603
2604         /*
2605          * Since events can happen in NMIs there's no safe way to
2606          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2607          * or NMI comes in, it will just have to use the default
2608          * FTRACE_STACK_SIZE.
2609          */
2610         preempt_disable_notrace();
2611
2612         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2613         /*
2614          * We don't need any atomic variables, just a barrier.
2615          * If an interrupt comes in, we don't care, because it would
2616          * have exited and put the counter back to what we want.
2617          * We just need a barrier to keep gcc from moving things
2618          * around.
2619          */
2620         barrier();
2621         if (use_stack == 1) {
2622                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2623                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2624
2625                 if (regs)
2626                         save_stack_trace_regs(regs, &trace);
2627                 else
2628                         save_stack_trace(&trace);
2629
2630                 if (trace.nr_entries > size)
2631                         size = trace.nr_entries;
2632         } else
2633                 /* From now on, use_stack is a boolean */
2634                 use_stack = 0;
2635
2636         size *= sizeof(unsigned long);
2637
2638         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2639                                             sizeof(*entry) + size, flags, pc);
2640         if (!event)
2641                 goto out;
2642         entry = ring_buffer_event_data(event);
2643
2644         memset(&entry->caller, 0, size);
2645
2646         if (use_stack)
2647                 memcpy(&entry->caller, trace.entries,
2648                        trace.nr_entries * sizeof(unsigned long));
2649         else {
2650                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2651                 trace.entries           = entry->caller;
2652                 if (regs)
2653                         save_stack_trace_regs(regs, &trace);
2654                 else
2655                         save_stack_trace(&trace);
2656         }
2657
2658         entry->size = trace.nr_entries;
2659
2660         if (!call_filter_check_discard(call, entry, buffer, event))
2661                 __buffer_unlock_commit(buffer, event);
2662
2663  out:
2664         /* Again, don't let gcc optimize things here */
2665         barrier();
2666         __this_cpu_dec(ftrace_stack_reserve);
2667         preempt_enable_notrace();
2668
2669 }
2670
2671 static inline void ftrace_trace_stack(struct trace_array *tr,
2672                                       struct ring_buffer *buffer,
2673                                       unsigned long flags,
2674                                       int skip, int pc, struct pt_regs *regs)
2675 {
2676         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2677                 return;
2678
2679         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2680 }
2681
2682 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2683                    int pc)
2684 {
2685         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2686
2687         if (rcu_is_watching()) {
2688                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2689                 return;
2690         }
2691
2692         /*
2693          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2694          * but if the above rcu_is_watching() failed, then the NMI
2695          * triggered someplace critical, and rcu_irq_enter() should
2696          * not be called from NMI.
2697          */
2698         if (unlikely(in_nmi()))
2699                 return;
2700
2701         rcu_irq_enter_irqson();
2702         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2703         rcu_irq_exit_irqson();
2704 }
2705
2706 /**
2707  * trace_dump_stack - record a stack back trace in the trace buffer
2708  * @skip: Number of functions to skip (helper handlers)
2709  */
2710 void trace_dump_stack(int skip)
2711 {
2712         unsigned long flags;
2713
2714         if (tracing_disabled || tracing_selftest_running)
2715                 return;
2716
2717         local_save_flags(flags);
2718
2719 #ifndef CONFIG_UNWINDER_ORC
2720         /* Skip 1 to skip this function. */
2721         skip++;
2722 #endif
2723         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2724                              flags, skip, preempt_count(), NULL);
2725 }
2726
2727 static DEFINE_PER_CPU(int, user_stack_count);
2728
2729 void
2730 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2731 {
2732         struct trace_event_call *call = &event_user_stack;
2733         struct ring_buffer_event *event;
2734         struct userstack_entry *entry;
2735         struct stack_trace trace;
2736
2737         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2738                 return;
2739
2740         /*
2741          * NMIs can not handle page faults, even with fix ups.
2742          * The save user stack can (and often does) fault.
2743          */
2744         if (unlikely(in_nmi()))
2745                 return;
2746
2747         /*
2748          * prevent recursion, since the user stack tracing may
2749          * trigger other kernel events.
2750          */
2751         preempt_disable();
2752         if (__this_cpu_read(user_stack_count))
2753                 goto out;
2754
2755         __this_cpu_inc(user_stack_count);
2756
2757         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2758                                             sizeof(*entry), flags, pc);
2759         if (!event)
2760                 goto out_drop_count;
2761         entry   = ring_buffer_event_data(event);
2762
2763         entry->tgid             = current->tgid;
2764         memset(&entry->caller, 0, sizeof(entry->caller));
2765
2766         trace.nr_entries        = 0;
2767         trace.max_entries       = FTRACE_STACK_ENTRIES;
2768         trace.skip              = 0;
2769         trace.entries           = entry->caller;
2770
2771         save_stack_trace_user(&trace);
2772         if (!call_filter_check_discard(call, entry, buffer, event))
2773                 __buffer_unlock_commit(buffer, event);
2774
2775  out_drop_count:
2776         __this_cpu_dec(user_stack_count);
2777  out:
2778         preempt_enable();
2779 }
2780
2781 #ifdef UNUSED
2782 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2783 {
2784         ftrace_trace_userstack(tr, flags, preempt_count());
2785 }
2786 #endif /* UNUSED */
2787
2788 #endif /* CONFIG_STACKTRACE */
2789
2790 /* created for use with alloc_percpu */
2791 struct trace_buffer_struct {
2792         int nesting;
2793         char buffer[4][TRACE_BUF_SIZE];
2794 };
2795
2796 static struct trace_buffer_struct *trace_percpu_buffer;
2797
2798 /*
2799  * Thise allows for lockless recording.  If we're nested too deeply, then
2800  * this returns NULL.
2801  */
2802 static char *get_trace_buf(void)
2803 {
2804         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2805
2806         if (!buffer || buffer->nesting >= 4)
2807                 return NULL;
2808
2809         buffer->nesting++;
2810
2811         /* Interrupts must see nesting incremented before we use the buffer */
2812         barrier();
2813         return &buffer->buffer[buffer->nesting][0];
2814 }
2815
2816 static void put_trace_buf(void)
2817 {
2818         /* Don't let the decrement of nesting leak before this */
2819         barrier();
2820         this_cpu_dec(trace_percpu_buffer->nesting);
2821 }
2822
2823 static int alloc_percpu_trace_buffer(void)
2824 {
2825         struct trace_buffer_struct *buffers;
2826
2827         buffers = alloc_percpu(struct trace_buffer_struct);
2828         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2829                 return -ENOMEM;
2830
2831         trace_percpu_buffer = buffers;
2832         return 0;
2833 }
2834
2835 static int buffers_allocated;
2836
2837 void trace_printk_init_buffers(void)
2838 {
2839         if (buffers_allocated)
2840                 return;
2841
2842         if (alloc_percpu_trace_buffer())
2843                 return;
2844
2845         /* trace_printk() is for debug use only. Don't use it in production. */
2846
2847         pr_warn("\n");
2848         pr_warn("**********************************************************\n");
2849         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2850         pr_warn("**                                                      **\n");
2851         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2852         pr_warn("**                                                      **\n");
2853         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2854         pr_warn("** unsafe for production use.                           **\n");
2855         pr_warn("**                                                      **\n");
2856         pr_warn("** If you see this message and you are not debugging    **\n");
2857         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2858         pr_warn("**                                                      **\n");
2859         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2860         pr_warn("**********************************************************\n");
2861
2862         /* Expand the buffers to set size */
2863         tracing_update_buffers();
2864
2865         buffers_allocated = 1;
2866
2867         /*
2868          * trace_printk_init_buffers() can be called by modules.
2869          * If that happens, then we need to start cmdline recording
2870          * directly here. If the global_trace.buffer is already
2871          * allocated here, then this was called by module code.
2872          */
2873         if (global_trace.trace_buffer.buffer)
2874                 tracing_start_cmdline_record();
2875 }
2876
2877 void trace_printk_start_comm(void)
2878 {
2879         /* Start tracing comms if trace printk is set */
2880         if (!buffers_allocated)
2881                 return;
2882         tracing_start_cmdline_record();
2883 }
2884
2885 static void trace_printk_start_stop_comm(int enabled)
2886 {
2887         if (!buffers_allocated)
2888                 return;
2889
2890         if (enabled)
2891                 tracing_start_cmdline_record();
2892         else
2893                 tracing_stop_cmdline_record();
2894 }
2895
2896 /**
2897  * trace_vbprintk - write binary msg to tracing buffer
2898  *
2899  */
2900 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2901 {
2902         struct trace_event_call *call = &event_bprint;
2903         struct ring_buffer_event *event;
2904         struct ring_buffer *buffer;
2905         struct trace_array *tr = &global_trace;
2906         struct bprint_entry *entry;
2907         unsigned long flags;
2908         char *tbuffer;
2909         int len = 0, size, pc;
2910
2911         if (unlikely(tracing_selftest_running || tracing_disabled))
2912                 return 0;
2913
2914         /* Don't pollute graph traces with trace_vprintk internals */
2915         pause_graph_tracing();
2916
2917         pc = preempt_count();
2918         preempt_disable_notrace();
2919
2920         tbuffer = get_trace_buf();
2921         if (!tbuffer) {
2922                 len = 0;
2923                 goto out_nobuffer;
2924         }
2925
2926         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2927
2928         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2929                 goto out;
2930
2931         local_save_flags(flags);
2932         size = sizeof(*entry) + sizeof(u32) * len;
2933         buffer = tr->trace_buffer.buffer;
2934         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2935                                             flags, pc);
2936         if (!event)
2937                 goto out;
2938         entry = ring_buffer_event_data(event);
2939         entry->ip                       = ip;
2940         entry->fmt                      = fmt;
2941
2942         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2943         if (!call_filter_check_discard(call, entry, buffer, event)) {
2944                 __buffer_unlock_commit(buffer, event);
2945                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2946         }
2947
2948 out:
2949         put_trace_buf();
2950
2951 out_nobuffer:
2952         preempt_enable_notrace();
2953         unpause_graph_tracing();
2954
2955         return len;
2956 }
2957 EXPORT_SYMBOL_GPL(trace_vbprintk);
2958
2959 static int
2960 __trace_array_vprintk(struct ring_buffer *buffer,
2961                       unsigned long ip, const char *fmt, va_list args)
2962 {
2963         struct trace_event_call *call = &event_print;
2964         struct ring_buffer_event *event;
2965         int len = 0, size, pc;
2966         struct print_entry *entry;
2967         unsigned long flags;
2968         char *tbuffer;
2969
2970         if (tracing_disabled || tracing_selftest_running)
2971                 return 0;
2972
2973         /* Don't pollute graph traces with trace_vprintk internals */
2974         pause_graph_tracing();
2975
2976         pc = preempt_count();
2977         preempt_disable_notrace();
2978
2979
2980         tbuffer = get_trace_buf();
2981         if (!tbuffer) {
2982                 len = 0;
2983                 goto out_nobuffer;
2984         }
2985
2986         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2987
2988         local_save_flags(flags);
2989         size = sizeof(*entry) + len + 1;
2990         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2991                                             flags, pc);
2992         if (!event)
2993                 goto out;
2994         entry = ring_buffer_event_data(event);
2995         entry->ip = ip;
2996
2997         memcpy(&entry->buf, tbuffer, len + 1);
2998         if (!call_filter_check_discard(call, entry, buffer, event)) {
2999                 __buffer_unlock_commit(buffer, event);
3000                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3001         }
3002
3003 out:
3004         put_trace_buf();
3005
3006 out_nobuffer:
3007         preempt_enable_notrace();
3008         unpause_graph_tracing();
3009
3010         return len;
3011 }
3012
3013 int trace_array_vprintk(struct trace_array *tr,
3014                         unsigned long ip, const char *fmt, va_list args)
3015 {
3016         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3017 }
3018
3019 int trace_array_printk(struct trace_array *tr,
3020                        unsigned long ip, const char *fmt, ...)
3021 {
3022         int ret;
3023         va_list ap;
3024
3025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3026                 return 0;
3027
3028         va_start(ap, fmt);
3029         ret = trace_array_vprintk(tr, ip, fmt, ap);
3030         va_end(ap);
3031         return ret;
3032 }
3033
3034 int trace_array_printk_buf(struct ring_buffer *buffer,
3035                            unsigned long ip, const char *fmt, ...)
3036 {
3037         int ret;
3038         va_list ap;
3039
3040         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3041                 return 0;
3042
3043         va_start(ap, fmt);
3044         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3045         va_end(ap);
3046         return ret;
3047 }
3048
3049 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3050 {
3051         return trace_array_vprintk(&global_trace, ip, fmt, args);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_vprintk);
3054
3055 static void trace_iterator_increment(struct trace_iterator *iter)
3056 {
3057         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3058
3059         iter->idx++;
3060         if (buf_iter)
3061                 ring_buffer_read(buf_iter, NULL);
3062 }
3063
3064 static struct trace_entry *
3065 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3066                 unsigned long *lost_events)
3067 {
3068         struct ring_buffer_event *event;
3069         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3070
3071         if (buf_iter)
3072                 event = ring_buffer_iter_peek(buf_iter, ts);
3073         else
3074                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3075                                          lost_events);
3076
3077         if (event) {
3078                 iter->ent_size = ring_buffer_event_length(event);
3079                 return ring_buffer_event_data(event);
3080         }
3081         iter->ent_size = 0;
3082         return NULL;
3083 }
3084
3085 static struct trace_entry *
3086 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3087                   unsigned long *missing_events, u64 *ent_ts)
3088 {
3089         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3090         struct trace_entry *ent, *next = NULL;
3091         unsigned long lost_events = 0, next_lost = 0;
3092         int cpu_file = iter->cpu_file;
3093         u64 next_ts = 0, ts;
3094         int next_cpu = -1;
3095         int next_size = 0;
3096         int cpu;
3097
3098         /*
3099          * If we are in a per_cpu trace file, don't bother by iterating over
3100          * all cpu and peek directly.
3101          */
3102         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3103                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3104                         return NULL;
3105                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3106                 if (ent_cpu)
3107                         *ent_cpu = cpu_file;
3108
3109                 return ent;
3110         }
3111
3112         for_each_tracing_cpu(cpu) {
3113
3114                 if (ring_buffer_empty_cpu(buffer, cpu))
3115                         continue;
3116
3117                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3118
3119                 /*
3120                  * Pick the entry with the smallest timestamp:
3121                  */
3122                 if (ent && (!next || ts < next_ts)) {
3123                         next = ent;
3124                         next_cpu = cpu;
3125                         next_ts = ts;
3126                         next_lost = lost_events;
3127                         next_size = iter->ent_size;
3128                 }
3129         }
3130
3131         iter->ent_size = next_size;
3132
3133         if (ent_cpu)
3134                 *ent_cpu = next_cpu;
3135
3136         if (ent_ts)
3137                 *ent_ts = next_ts;
3138
3139         if (missing_events)
3140                 *missing_events = next_lost;
3141
3142         return next;
3143 }
3144
3145 /* Find the next real entry, without updating the iterator itself */
3146 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3147                                           int *ent_cpu, u64 *ent_ts)
3148 {
3149         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3150 }
3151
3152 /* Find the next real entry, and increment the iterator to the next entry */
3153 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3154 {
3155         iter->ent = __find_next_entry(iter, &iter->cpu,
3156                                       &iter->lost_events, &iter->ts);
3157
3158         if (iter->ent)
3159                 trace_iterator_increment(iter);
3160
3161         return iter->ent ? iter : NULL;
3162 }
3163
3164 static void trace_consume(struct trace_iterator *iter)
3165 {
3166         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3167                             &iter->lost_events);
3168 }
3169
3170 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3171 {
3172         struct trace_iterator *iter = m->private;
3173         int i = (int)*pos;
3174         void *ent;
3175
3176         WARN_ON_ONCE(iter->leftover);
3177
3178         (*pos)++;
3179
3180         /* can't go backwards */
3181         if (iter->idx > i)
3182                 return NULL;
3183
3184         if (iter->idx < 0)
3185                 ent = trace_find_next_entry_inc(iter);
3186         else
3187                 ent = iter;
3188
3189         while (ent && iter->idx < i)
3190                 ent = trace_find_next_entry_inc(iter);
3191
3192         iter->pos = *pos;
3193
3194         return ent;
3195 }
3196
3197 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3198 {
3199         struct ring_buffer_event *event;
3200         struct ring_buffer_iter *buf_iter;
3201         unsigned long entries = 0;
3202         u64 ts;
3203
3204         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3205
3206         buf_iter = trace_buffer_iter(iter, cpu);
3207         if (!buf_iter)
3208                 return;
3209
3210         ring_buffer_iter_reset(buf_iter);
3211
3212         /*
3213          * We could have the case with the max latency tracers
3214          * that a reset never took place on a cpu. This is evident
3215          * by the timestamp being before the start of the buffer.
3216          */
3217         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3218                 if (ts >= iter->trace_buffer->time_start)
3219                         break;
3220                 entries++;
3221                 ring_buffer_read(buf_iter, NULL);
3222         }
3223
3224         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3225 }
3226
3227 /*
3228  * The current tracer is copied to avoid a global locking
3229  * all around.
3230  */
3231 static void *s_start(struct seq_file *m, loff_t *pos)
3232 {
3233         struct trace_iterator *iter = m->private;
3234         struct trace_array *tr = iter->tr;
3235         int cpu_file = iter->cpu_file;
3236         void *p = NULL;
3237         loff_t l = 0;
3238         int cpu;
3239
3240         /*
3241          * copy the tracer to avoid using a global lock all around.
3242          * iter->trace is a copy of current_trace, the pointer to the
3243          * name may be used instead of a strcmp(), as iter->trace->name
3244          * will point to the same string as current_trace->name.
3245          */
3246         mutex_lock(&trace_types_lock);
3247         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3248                 *iter->trace = *tr->current_trace;
3249         mutex_unlock(&trace_types_lock);
3250
3251 #ifdef CONFIG_TRACER_MAX_TRACE
3252         if (iter->snapshot && iter->trace->use_max_tr)
3253                 return ERR_PTR(-EBUSY);
3254 #endif
3255
3256         if (!iter->snapshot)
3257                 atomic_inc(&trace_record_taskinfo_disabled);
3258
3259         if (*pos != iter->pos) {
3260                 iter->ent = NULL;
3261                 iter->cpu = 0;
3262                 iter->idx = -1;
3263
3264                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3265                         for_each_tracing_cpu(cpu)
3266                                 tracing_iter_reset(iter, cpu);
3267                 } else
3268                         tracing_iter_reset(iter, cpu_file);
3269
3270                 iter->leftover = 0;
3271                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3272                         ;
3273
3274         } else {
3275                 /*
3276                  * If we overflowed the seq_file before, then we want
3277                  * to just reuse the trace_seq buffer again.
3278                  */
3279                 if (iter->leftover)
3280                         p = iter;
3281                 else {
3282                         l = *pos - 1;
3283                         p = s_next(m, p, &l);
3284                 }
3285         }
3286
3287         trace_event_read_lock();
3288         trace_access_lock(cpu_file);
3289         return p;
3290 }
3291
3292 static void s_stop(struct seq_file *m, void *p)
3293 {
3294         struct trace_iterator *iter = m->private;
3295
3296 #ifdef CONFIG_TRACER_MAX_TRACE
3297         if (iter->snapshot && iter->trace->use_max_tr)
3298                 return;
3299 #endif
3300
3301         if (!iter->snapshot)
3302                 atomic_dec(&trace_record_taskinfo_disabled);
3303
3304         trace_access_unlock(iter->cpu_file);
3305         trace_event_read_unlock();
3306 }
3307
3308 static void
3309 get_total_entries(struct trace_buffer *buf,
3310                   unsigned long *total, unsigned long *entries)
3311 {
3312         unsigned long count;
3313         int cpu;
3314
3315         *total = 0;
3316         *entries = 0;
3317
3318         for_each_tracing_cpu(cpu) {
3319                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3320                 /*
3321                  * If this buffer has skipped entries, then we hold all
3322                  * entries for the trace and we need to ignore the
3323                  * ones before the time stamp.
3324                  */
3325                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3326                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3327                         /* total is the same as the entries */
3328                         *total += count;
3329                 } else
3330                         *total += count +
3331                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3332                 *entries += count;
3333         }
3334 }
3335
3336 static void print_lat_help_header(struct seq_file *m)
3337 {
3338         seq_puts(m, "#                  _------=> CPU#            \n"
3339                     "#                 / _-----=> irqs-off        \n"
3340                     "#                | / _----=> need-resched    \n"
3341                     "#                || / _---=> hardirq/softirq \n"
3342                     "#                ||| / _--=> preempt-depth   \n"
3343                     "#                |||| /     delay            \n"
3344                     "#  cmd     pid   ||||| time  |   caller      \n"
3345                     "#     \\   /      |||||  \\    |   /         \n");
3346 }
3347
3348 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3349 {
3350         unsigned long total;
3351         unsigned long entries;
3352
3353         get_total_entries(buf, &total, &entries);
3354         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3355                    entries, total, num_online_cpus());
3356         seq_puts(m, "#\n");
3357 }
3358
3359 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3360                                    unsigned int flags)
3361 {
3362         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3363
3364         print_event_info(buf, m);
3365
3366         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3367         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3368 }
3369
3370 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3371                                        unsigned int flags)
3372 {
3373         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3374         const char tgid_space[] = "          ";
3375         const char space[] = "  ";
3376
3377         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3378                    tgid ? tgid_space : space);
3379         seq_printf(m, "#                          %s / _----=> need-resched\n",
3380                    tgid ? tgid_space : space);
3381         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3382                    tgid ? tgid_space : space);
3383         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3384                    tgid ? tgid_space : space);
3385         seq_printf(m, "#                          %s||| /     delay\n",
3386                    tgid ? tgid_space : space);
3387         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3388                    tgid ? "   TGID   " : space);
3389         seq_printf(m, "#              | |       | %s||||       |         |\n",
3390                    tgid ? "     |    " : space);
3391 }
3392
3393 void
3394 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3395 {
3396         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3397         struct trace_buffer *buf = iter->trace_buffer;
3398         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3399         struct tracer *type = iter->trace;
3400         unsigned long entries;
3401         unsigned long total;
3402         const char *name = "preemption";
3403
3404         name = type->name;
3405
3406         get_total_entries(buf, &total, &entries);
3407
3408         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3409                    name, UTS_RELEASE);
3410         seq_puts(m, "# -----------------------------------"
3411                  "---------------------------------\n");
3412         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3413                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3414                    nsecs_to_usecs(data->saved_latency),
3415                    entries,
3416                    total,
3417                    buf->cpu,
3418 #if defined(CONFIG_PREEMPT_NONE)
3419                    "server",
3420 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3421                    "desktop",
3422 #elif defined(CONFIG_PREEMPT)
3423                    "preempt",
3424 #else
3425                    "unknown",
3426 #endif
3427                    /* These are reserved for later use */
3428                    0, 0, 0, 0);
3429 #ifdef CONFIG_SMP
3430         seq_printf(m, " #P:%d)\n", num_online_cpus());
3431 #else
3432         seq_puts(m, ")\n");
3433 #endif
3434         seq_puts(m, "#    -----------------\n");
3435         seq_printf(m, "#    | task: %.16s-%d "
3436                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3437                    data->comm, data->pid,
3438                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3439                    data->policy, data->rt_priority);
3440         seq_puts(m, "#    -----------------\n");
3441
3442         if (data->critical_start) {
3443                 seq_puts(m, "#  => started at: ");
3444                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3445                 trace_print_seq(m, &iter->seq);
3446                 seq_puts(m, "\n#  => ended at:   ");
3447                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3448                 trace_print_seq(m, &iter->seq);
3449                 seq_puts(m, "\n#\n");
3450         }
3451
3452         seq_puts(m, "#\n");
3453 }
3454
3455 static void test_cpu_buff_start(struct trace_iterator *iter)
3456 {
3457         struct trace_seq *s = &iter->seq;
3458         struct trace_array *tr = iter->tr;
3459
3460         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3461                 return;
3462
3463         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3464                 return;
3465
3466         if (cpumask_available(iter->started) &&
3467             cpumask_test_cpu(iter->cpu, iter->started))
3468                 return;
3469
3470         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3471                 return;
3472
3473         if (cpumask_available(iter->started))
3474                 cpumask_set_cpu(iter->cpu, iter->started);
3475
3476         /* Don't print started cpu buffer for the first entry of the trace */
3477         if (iter->idx > 1)
3478                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3479                                 iter->cpu);
3480 }
3481
3482 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3483 {
3484         struct trace_array *tr = iter->tr;
3485         struct trace_seq *s = &iter->seq;
3486         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3487         struct trace_entry *entry;
3488         struct trace_event *event;
3489
3490         entry = iter->ent;
3491
3492         test_cpu_buff_start(iter);
3493
3494         event = ftrace_find_event(entry->type);
3495
3496         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3497                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3498                         trace_print_lat_context(iter);
3499                 else
3500                         trace_print_context(iter);
3501         }
3502
3503         if (trace_seq_has_overflowed(s))
3504                 return TRACE_TYPE_PARTIAL_LINE;
3505
3506         if (event)
3507                 return event->funcs->trace(iter, sym_flags, event);
3508
3509         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3510
3511         return trace_handle_return(s);
3512 }
3513
3514 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3515 {
3516         struct trace_array *tr = iter->tr;
3517         struct trace_seq *s = &iter->seq;
3518         struct trace_entry *entry;
3519         struct trace_event *event;
3520
3521         entry = iter->ent;
3522
3523         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3524                 trace_seq_printf(s, "%d %d %llu ",
3525                                  entry->pid, iter->cpu, iter->ts);
3526
3527         if (trace_seq_has_overflowed(s))
3528                 return TRACE_TYPE_PARTIAL_LINE;
3529
3530         event = ftrace_find_event(entry->type);
3531         if (event)
3532                 return event->funcs->raw(iter, 0, event);
3533
3534         trace_seq_printf(s, "%d ?\n", entry->type);
3535
3536         return trace_handle_return(s);
3537 }
3538
3539 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3540 {
3541         struct trace_array *tr = iter->tr;
3542         struct trace_seq *s = &iter->seq;
3543         unsigned char newline = '\n';
3544         struct trace_entry *entry;
3545         struct trace_event *event;
3546
3547         entry = iter->ent;
3548
3549         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3550                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3551                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3552                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3553                 if (trace_seq_has_overflowed(s))
3554                         return TRACE_TYPE_PARTIAL_LINE;
3555         }
3556
3557         event = ftrace_find_event(entry->type);
3558         if (event) {
3559                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3560                 if (ret != TRACE_TYPE_HANDLED)
3561                         return ret;
3562         }
3563
3564         SEQ_PUT_FIELD(s, newline);
3565
3566         return trace_handle_return(s);
3567 }
3568
3569 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3570 {
3571         struct trace_array *tr = iter->tr;
3572         struct trace_seq *s = &iter->seq;
3573         struct trace_entry *entry;
3574         struct trace_event *event;
3575
3576         entry = iter->ent;
3577
3578         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3579                 SEQ_PUT_FIELD(s, entry->pid);
3580                 SEQ_PUT_FIELD(s, iter->cpu);
3581                 SEQ_PUT_FIELD(s, iter->ts);
3582                 if (trace_seq_has_overflowed(s))
3583                         return TRACE_TYPE_PARTIAL_LINE;
3584         }
3585
3586         event = ftrace_find_event(entry->type);
3587         return event ? event->funcs->binary(iter, 0, event) :
3588                 TRACE_TYPE_HANDLED;
3589 }
3590
3591 int trace_empty(struct trace_iterator *iter)
3592 {
3593         struct ring_buffer_iter *buf_iter;
3594         int cpu;
3595
3596         /* If we are looking at one CPU buffer, only check that one */
3597         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3598                 cpu = iter->cpu_file;
3599                 buf_iter = trace_buffer_iter(iter, cpu);
3600                 if (buf_iter) {
3601                         if (!ring_buffer_iter_empty(buf_iter))
3602                                 return 0;
3603                 } else {
3604                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3605                                 return 0;
3606                 }
3607                 return 1;
3608         }
3609
3610         for_each_tracing_cpu(cpu) {
3611                 buf_iter = trace_buffer_iter(iter, cpu);
3612                 if (buf_iter) {
3613                         if (!ring_buffer_iter_empty(buf_iter))
3614                                 return 0;
3615                 } else {
3616                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617                                 return 0;
3618                 }
3619         }
3620
3621         return 1;
3622 }
3623
3624 /*  Called with trace_event_read_lock() held. */
3625 enum print_line_t print_trace_line(struct trace_iterator *iter)
3626 {
3627         struct trace_array *tr = iter->tr;
3628         unsigned long trace_flags = tr->trace_flags;
3629         enum print_line_t ret;
3630
3631         if (iter->lost_events) {
3632                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3633                                  iter->cpu, iter->lost_events);
3634                 if (trace_seq_has_overflowed(&iter->seq))
3635                         return TRACE_TYPE_PARTIAL_LINE;
3636         }
3637
3638         if (iter->trace && iter->trace->print_line) {
3639                 ret = iter->trace->print_line(iter);
3640                 if (ret != TRACE_TYPE_UNHANDLED)
3641                         return ret;
3642         }
3643
3644         if (iter->ent->type == TRACE_BPUTS &&
3645                         trace_flags & TRACE_ITER_PRINTK &&
3646                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3647                 return trace_print_bputs_msg_only(iter);
3648
3649         if (iter->ent->type == TRACE_BPRINT &&
3650                         trace_flags & TRACE_ITER_PRINTK &&
3651                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3652                 return trace_print_bprintk_msg_only(iter);
3653
3654         if (iter->ent->type == TRACE_PRINT &&
3655                         trace_flags & TRACE_ITER_PRINTK &&
3656                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3657                 return trace_print_printk_msg_only(iter);
3658
3659         if (trace_flags & TRACE_ITER_BIN)
3660                 return print_bin_fmt(iter);
3661
3662         if (trace_flags & TRACE_ITER_HEX)
3663                 return print_hex_fmt(iter);
3664
3665         if (trace_flags & TRACE_ITER_RAW)
3666                 return print_raw_fmt(iter);
3667
3668         return print_trace_fmt(iter);
3669 }
3670
3671 void trace_latency_header(struct seq_file *m)
3672 {
3673         struct trace_iterator *iter = m->private;
3674         struct trace_array *tr = iter->tr;
3675
3676         /* print nothing if the buffers are empty */
3677         if (trace_empty(iter))
3678                 return;
3679
3680         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3681                 print_trace_header(m, iter);
3682
3683         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3684                 print_lat_help_header(m);
3685 }
3686
3687 void trace_default_header(struct seq_file *m)
3688 {
3689         struct trace_iterator *iter = m->private;
3690         struct trace_array *tr = iter->tr;
3691         unsigned long trace_flags = tr->trace_flags;
3692
3693         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3694                 return;
3695
3696         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3697                 /* print nothing if the buffers are empty */
3698                 if (trace_empty(iter))
3699                         return;
3700                 print_trace_header(m, iter);
3701                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3702                         print_lat_help_header(m);
3703         } else {
3704                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3705                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3706                                 print_func_help_header_irq(iter->trace_buffer,
3707                                                            m, trace_flags);
3708                         else
3709                                 print_func_help_header(iter->trace_buffer, m,
3710                                                        trace_flags);
3711                 }
3712         }
3713 }
3714
3715 static void test_ftrace_alive(struct seq_file *m)
3716 {
3717         if (!ftrace_is_dead())
3718                 return;
3719         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3720                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3721 }
3722
3723 #ifdef CONFIG_TRACER_MAX_TRACE
3724 static void show_snapshot_main_help(struct seq_file *m)
3725 {
3726         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3727                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3728                     "#                      Takes a snapshot of the main buffer.\n"
3729                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3730                     "#                      (Doesn't have to be '2' works with any number that\n"
3731                     "#                       is not a '0' or '1')\n");
3732 }
3733
3734 static void show_snapshot_percpu_help(struct seq_file *m)
3735 {
3736         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3737 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3738         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3739                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3740 #else
3741         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3742                     "#                     Must use main snapshot file to allocate.\n");
3743 #endif
3744         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3745                     "#                      (Doesn't have to be '2' works with any number that\n"
3746                     "#                       is not a '0' or '1')\n");
3747 }
3748
3749 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3750 {
3751         if (iter->tr->allocated_snapshot)
3752                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3753         else
3754                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3755
3756         seq_puts(m, "# Snapshot commands:\n");
3757         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3758                 show_snapshot_main_help(m);
3759         else
3760                 show_snapshot_percpu_help(m);
3761 }
3762 #else
3763 /* Should never be called */
3764 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3765 #endif
3766
3767 static int s_show(struct seq_file *m, void *v)
3768 {
3769         struct trace_iterator *iter = v;
3770         int ret;
3771
3772         if (iter->ent == NULL) {
3773                 if (iter->tr) {
3774                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3775                         seq_puts(m, "#\n");
3776                         test_ftrace_alive(m);
3777                 }
3778                 if (iter->snapshot && trace_empty(iter))
3779                         print_snapshot_help(m, iter);
3780                 else if (iter->trace && iter->trace->print_header)
3781                         iter->trace->print_header(m);
3782                 else
3783                         trace_default_header(m);
3784
3785         } else if (iter->leftover) {
3786                 /*
3787                  * If we filled the seq_file buffer earlier, we
3788                  * want to just show it now.
3789                  */
3790                 ret = trace_print_seq(m, &iter->seq);
3791
3792                 /* ret should this time be zero, but you never know */
3793                 iter->leftover = ret;
3794
3795         } else {
3796                 print_trace_line(iter);
3797                 ret = trace_print_seq(m, &iter->seq);
3798                 /*
3799                  * If we overflow the seq_file buffer, then it will
3800                  * ask us for this data again at start up.
3801                  * Use that instead.
3802                  *  ret is 0 if seq_file write succeeded.
3803                  *        -1 otherwise.
3804                  */
3805                 iter->leftover = ret;
3806         }
3807
3808         return 0;
3809 }
3810
3811 /*
3812  * Should be used after trace_array_get(), trace_types_lock
3813  * ensures that i_cdev was already initialized.
3814  */
3815 static inline int tracing_get_cpu(struct inode *inode)
3816 {
3817         if (inode->i_cdev) /* See trace_create_cpu_file() */
3818                 return (long)inode->i_cdev - 1;
3819         return RING_BUFFER_ALL_CPUS;
3820 }
3821
3822 static const struct seq_operations tracer_seq_ops = {
3823         .start          = s_start,
3824         .next           = s_next,
3825         .stop           = s_stop,
3826         .show           = s_show,
3827 };
3828
3829 static struct trace_iterator *
3830 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3831 {
3832         struct trace_array *tr = inode->i_private;
3833         struct trace_iterator *iter;
3834         int cpu;
3835
3836         if (tracing_disabled)
3837                 return ERR_PTR(-ENODEV);
3838
3839         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3840         if (!iter)
3841                 return ERR_PTR(-ENOMEM);
3842
3843         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3844                                     GFP_KERNEL);
3845         if (!iter->buffer_iter)
3846                 goto release;
3847
3848         /*
3849          * We make a copy of the current tracer to avoid concurrent
3850          * changes on it while we are reading.
3851          */
3852         mutex_lock(&trace_types_lock);
3853         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3854         if (!iter->trace)
3855                 goto fail;
3856
3857         *iter->trace = *tr->current_trace;
3858
3859         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3860                 goto fail;
3861
3862         iter->tr = tr;
3863
3864 #ifdef CONFIG_TRACER_MAX_TRACE
3865         /* Currently only the top directory has a snapshot */
3866         if (tr->current_trace->print_max || snapshot)
3867                 iter->trace_buffer = &tr->max_buffer;
3868         else
3869 #endif
3870                 iter->trace_buffer = &tr->trace_buffer;
3871         iter->snapshot = snapshot;
3872         iter->pos = -1;
3873         iter->cpu_file = tracing_get_cpu(inode);
3874         mutex_init(&iter->mutex);
3875
3876         /* Notify the tracer early; before we stop tracing. */
3877         if (iter->trace && iter->trace->open)
3878                 iter->trace->open(iter);
3879
3880         /* Annotate start of buffers if we had overruns */
3881         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3882                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3883
3884         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3885         if (trace_clocks[tr->clock_id].in_ns)
3886                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3887
3888         /* stop the trace while dumping if we are not opening "snapshot" */
3889         if (!iter->snapshot)
3890                 tracing_stop_tr(tr);
3891
3892         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3893                 for_each_tracing_cpu(cpu) {
3894                         iter->buffer_iter[cpu] =
3895                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3896                 }
3897                 ring_buffer_read_prepare_sync();
3898                 for_each_tracing_cpu(cpu) {
3899                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3900                         tracing_iter_reset(iter, cpu);
3901                 }
3902         } else {
3903                 cpu = iter->cpu_file;
3904                 iter->buffer_iter[cpu] =
3905                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3906                 ring_buffer_read_prepare_sync();
3907                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3908                 tracing_iter_reset(iter, cpu);
3909         }
3910
3911         mutex_unlock(&trace_types_lock);
3912
3913         return iter;
3914
3915  fail:
3916         mutex_unlock(&trace_types_lock);
3917         kfree(iter->trace);
3918         kfree(iter->buffer_iter);
3919 release:
3920         seq_release_private(inode, file);
3921         return ERR_PTR(-ENOMEM);
3922 }
3923
3924 int tracing_open_generic(struct inode *inode, struct file *filp)
3925 {
3926         if (tracing_disabled)
3927                 return -ENODEV;
3928
3929         filp->private_data = inode->i_private;
3930         return 0;
3931 }
3932
3933 bool tracing_is_disabled(void)
3934 {
3935         return (tracing_disabled) ? true: false;
3936 }
3937
3938 /*
3939  * Open and update trace_array ref count.
3940  * Must have the current trace_array passed to it.
3941  */
3942 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3943 {
3944         struct trace_array *tr = inode->i_private;
3945
3946         if (tracing_disabled)
3947                 return -ENODEV;
3948
3949         if (trace_array_get(tr) < 0)
3950                 return -ENODEV;
3951
3952         filp->private_data = inode->i_private;
3953
3954         return 0;
3955 }
3956
3957 static int tracing_release(struct inode *inode, struct file *file)
3958 {
3959         struct trace_array *tr = inode->i_private;
3960         struct seq_file *m = file->private_data;
3961         struct trace_iterator *iter;
3962         int cpu;
3963
3964         if (!(file->f_mode & FMODE_READ)) {
3965                 trace_array_put(tr);
3966                 return 0;
3967         }
3968
3969         /* Writes do not use seq_file */
3970         iter = m->private;
3971         mutex_lock(&trace_types_lock);
3972
3973         for_each_tracing_cpu(cpu) {
3974                 if (iter->buffer_iter[cpu])
3975                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3976         }
3977
3978         if (iter->trace && iter->trace->close)
3979                 iter->trace->close(iter);
3980
3981         if (!iter->snapshot)
3982                 /* reenable tracing if it was previously enabled */
3983                 tracing_start_tr(tr);
3984
3985         __trace_array_put(tr);
3986
3987         mutex_unlock(&trace_types_lock);
3988
3989         mutex_destroy(&iter->mutex);
3990         free_cpumask_var(iter->started);
3991         kfree(iter->trace);
3992         kfree(iter->buffer_iter);
3993         seq_release_private(inode, file);
3994
3995         return 0;
3996 }
3997
3998 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3999 {
4000         struct trace_array *tr = inode->i_private;
4001
4002         trace_array_put(tr);
4003         return 0;
4004 }
4005
4006 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4007 {
4008         struct trace_array *tr = inode->i_private;
4009
4010         trace_array_put(tr);
4011
4012         return single_release(inode, file);
4013 }
4014
4015 static int tracing_open(struct inode *inode, struct file *file)
4016 {
4017         struct trace_array *tr = inode->i_private;
4018         struct trace_iterator *iter;
4019         int ret = 0;
4020
4021         if (trace_array_get(tr) < 0)
4022                 return -ENODEV;
4023
4024         /* If this file was open for write, then erase contents */
4025         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4026                 int cpu = tracing_get_cpu(inode);
4027                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4028
4029 #ifdef CONFIG_TRACER_MAX_TRACE
4030                 if (tr->current_trace->print_max)
4031                         trace_buf = &tr->max_buffer;
4032 #endif
4033
4034                 if (cpu == RING_BUFFER_ALL_CPUS)
4035                         tracing_reset_online_cpus(trace_buf);
4036                 else
4037                         tracing_reset(trace_buf, cpu);
4038         }
4039
4040         if (file->f_mode & FMODE_READ) {
4041                 iter = __tracing_open(inode, file, false);
4042                 if (IS_ERR(iter))
4043                         ret = PTR_ERR(iter);
4044                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4045                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4046         }
4047
4048         if (ret < 0)
4049                 trace_array_put(tr);
4050
4051         return ret;
4052 }
4053
4054 /*
4055  * Some tracers are not suitable for instance buffers.
4056  * A tracer is always available for the global array (toplevel)
4057  * or if it explicitly states that it is.
4058  */
4059 static bool
4060 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4061 {
4062         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4063 }
4064
4065 /* Find the next tracer that this trace array may use */
4066 static struct tracer *
4067 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4068 {
4069         while (t && !trace_ok_for_array(t, tr))
4070                 t = t->next;
4071
4072         return t;
4073 }
4074
4075 static void *
4076 t_next(struct seq_file *m, void *v, loff_t *pos)
4077 {
4078         struct trace_array *tr = m->private;
4079         struct tracer *t = v;
4080
4081         (*pos)++;
4082
4083         if (t)
4084                 t = get_tracer_for_array(tr, t->next);
4085
4086         return t;
4087 }
4088
4089 static void *t_start(struct seq_file *m, loff_t *pos)
4090 {
4091         struct trace_array *tr = m->private;
4092         struct tracer *t;
4093         loff_t l = 0;
4094
4095         mutex_lock(&trace_types_lock);
4096
4097         t = get_tracer_for_array(tr, trace_types);
4098         for (; t && l < *pos; t = t_next(m, t, &l))
4099                         ;
4100
4101         return t;
4102 }
4103
4104 static void t_stop(struct seq_file *m, void *p)
4105 {
4106         mutex_unlock(&trace_types_lock);
4107 }
4108
4109 static int t_show(struct seq_file *m, void *v)
4110 {
4111         struct tracer *t = v;
4112
4113         if (!t)
4114                 return 0;
4115
4116         seq_puts(m, t->name);
4117         if (t->next)
4118                 seq_putc(m, ' ');
4119         else
4120                 seq_putc(m, '\n');
4121
4122         return 0;
4123 }
4124
4125 static const struct seq_operations show_traces_seq_ops = {
4126         .start          = t_start,
4127         .next           = t_next,
4128         .stop           = t_stop,
4129         .show           = t_show,
4130 };
4131
4132 static int show_traces_open(struct inode *inode, struct file *file)
4133 {
4134         struct trace_array *tr = inode->i_private;
4135         struct seq_file *m;
4136         int ret;
4137
4138         if (tracing_disabled)
4139                 return -ENODEV;
4140
4141         ret = seq_open(file, &show_traces_seq_ops);
4142         if (ret)
4143                 return ret;
4144
4145         m = file->private_data;
4146         m->private = tr;
4147
4148         return 0;
4149 }
4150
4151 static ssize_t
4152 tracing_write_stub(struct file *filp, const char __user *ubuf,
4153                    size_t count, loff_t *ppos)
4154 {
4155         return count;
4156 }
4157
4158 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4159 {
4160         int ret;
4161
4162         if (file->f_mode & FMODE_READ)
4163                 ret = seq_lseek(file, offset, whence);
4164         else
4165                 file->f_pos = ret = 0;
4166
4167         return ret;
4168 }
4169
4170 static const struct file_operations tracing_fops = {
4171         .open           = tracing_open,
4172         .read           = seq_read,
4173         .write          = tracing_write_stub,
4174         .llseek         = tracing_lseek,
4175         .release        = tracing_release,
4176 };
4177
4178 static const struct file_operations show_traces_fops = {
4179         .open           = show_traces_open,
4180         .read           = seq_read,
4181         .release        = seq_release,
4182         .llseek         = seq_lseek,
4183 };
4184
4185 static ssize_t
4186 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4187                      size_t count, loff_t *ppos)
4188 {
4189         struct trace_array *tr = file_inode(filp)->i_private;
4190         char *mask_str;
4191         int len;
4192
4193         len = snprintf(NULL, 0, "%*pb\n",
4194                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4195         mask_str = kmalloc(len, GFP_KERNEL);
4196         if (!mask_str)
4197                 return -ENOMEM;
4198
4199         len = snprintf(mask_str, len, "%*pb\n",
4200                        cpumask_pr_args(tr->tracing_cpumask));
4201         if (len >= count) {
4202                 count = -EINVAL;
4203                 goto out_err;
4204         }
4205         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4206
4207 out_err:
4208         kfree(mask_str);
4209
4210         return count;
4211 }
4212
4213 static ssize_t
4214 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4215                       size_t count, loff_t *ppos)
4216 {
4217         struct trace_array *tr = file_inode(filp)->i_private;
4218         cpumask_var_t tracing_cpumask_new;
4219         int err, cpu;
4220
4221         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4222                 return -ENOMEM;
4223
4224         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4225         if (err)
4226                 goto err_unlock;
4227
4228         local_irq_disable();
4229         arch_spin_lock(&tr->max_lock);
4230         for_each_tracing_cpu(cpu) {
4231                 /*
4232                  * Increase/decrease the disabled counter if we are
4233                  * about to flip a bit in the cpumask:
4234                  */
4235                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4236                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4237                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4238                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4239                 }
4240                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4241                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4242                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4243                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4244                 }
4245         }
4246         arch_spin_unlock(&tr->max_lock);
4247         local_irq_enable();
4248
4249         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4250         free_cpumask_var(tracing_cpumask_new);
4251
4252         return count;
4253
4254 err_unlock:
4255         free_cpumask_var(tracing_cpumask_new);
4256
4257         return err;
4258 }
4259
4260 static const struct file_operations tracing_cpumask_fops = {
4261         .open           = tracing_open_generic_tr,
4262         .read           = tracing_cpumask_read,
4263         .write          = tracing_cpumask_write,
4264         .release        = tracing_release_generic_tr,
4265         .llseek         = generic_file_llseek,
4266 };
4267
4268 static int tracing_trace_options_show(struct seq_file *m, void *v)
4269 {
4270         struct tracer_opt *trace_opts;
4271         struct trace_array *tr = m->private;
4272         u32 tracer_flags;
4273         int i;
4274
4275         mutex_lock(&trace_types_lock);
4276         tracer_flags = tr->current_trace->flags->val;
4277         trace_opts = tr->current_trace->flags->opts;
4278
4279         for (i = 0; trace_options[i]; i++) {
4280                 if (tr->trace_flags & (1 << i))
4281                         seq_printf(m, "%s\n", trace_options[i]);
4282                 else
4283                         seq_printf(m, "no%s\n", trace_options[i]);
4284         }
4285
4286         for (i = 0; trace_opts[i].name; i++) {
4287                 if (tracer_flags & trace_opts[i].bit)
4288                         seq_printf(m, "%s\n", trace_opts[i].name);
4289                 else
4290                         seq_printf(m, "no%s\n", trace_opts[i].name);
4291         }
4292         mutex_unlock(&trace_types_lock);
4293
4294         return 0;
4295 }
4296
4297 static int __set_tracer_option(struct trace_array *tr,
4298                                struct tracer_flags *tracer_flags,
4299                                struct tracer_opt *opts, int neg)
4300 {
4301         struct tracer *trace = tracer_flags->trace;
4302         int ret;
4303
4304         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4305         if (ret)
4306                 return ret;
4307
4308         if (neg)
4309                 tracer_flags->val &= ~opts->bit;
4310         else
4311                 tracer_flags->val |= opts->bit;
4312         return 0;
4313 }
4314
4315 /* Try to assign a tracer specific option */
4316 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4317 {
4318         struct tracer *trace = tr->current_trace;
4319         struct tracer_flags *tracer_flags = trace->flags;
4320         struct tracer_opt *opts = NULL;
4321         int i;
4322
4323         for (i = 0; tracer_flags->opts[i].name; i++) {
4324                 opts = &tracer_flags->opts[i];
4325
4326                 if (strcmp(cmp, opts->name) == 0)
4327                         return __set_tracer_option(tr, trace->flags, opts, neg);
4328         }
4329
4330         return -EINVAL;
4331 }
4332
4333 /* Some tracers require overwrite to stay enabled */
4334 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4335 {
4336         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4337                 return -1;
4338
4339         return 0;
4340 }
4341
4342 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4343 {
4344         /* do nothing if flag is already set */
4345         if (!!(tr->trace_flags & mask) == !!enabled)
4346                 return 0;
4347
4348         /* Give the tracer a chance to approve the change */
4349         if (tr->current_trace->flag_changed)
4350                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4351                         return -EINVAL;
4352
4353         if (enabled)
4354                 tr->trace_flags |= mask;
4355         else
4356                 tr->trace_flags &= ~mask;
4357
4358         if (mask == TRACE_ITER_RECORD_CMD)
4359                 trace_event_enable_cmd_record(enabled);
4360
4361         if (mask == TRACE_ITER_RECORD_TGID) {
4362                 if (!tgid_map)
4363                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4364                                            GFP_KERNEL);
4365                 if (!tgid_map) {
4366                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4367                         return -ENOMEM;
4368                 }
4369
4370                 trace_event_enable_tgid_record(enabled);
4371         }
4372
4373         if (mask == TRACE_ITER_EVENT_FORK)
4374                 trace_event_follow_fork(tr, enabled);
4375
4376         if (mask == TRACE_ITER_FUNC_FORK)
4377                 ftrace_pid_follow_fork(tr, enabled);
4378
4379         if (mask == TRACE_ITER_OVERWRITE) {
4380                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4381 #ifdef CONFIG_TRACER_MAX_TRACE
4382                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4383 #endif
4384         }
4385
4386         if (mask == TRACE_ITER_PRINTK) {
4387                 trace_printk_start_stop_comm(enabled);
4388                 trace_printk_control(enabled);
4389         }
4390
4391         return 0;
4392 }
4393
4394 static int trace_set_options(struct trace_array *tr, char *option)
4395 {
4396         char *cmp;
4397         int neg = 0;
4398         int ret = -ENODEV;
4399         int i;
4400         size_t orig_len = strlen(option);
4401
4402         cmp = strstrip(option);
4403
4404         if (strncmp(cmp, "no", 2) == 0) {
4405                 neg = 1;
4406                 cmp += 2;
4407         }
4408
4409         mutex_lock(&trace_types_lock);
4410
4411         for (i = 0; trace_options[i]; i++) {
4412                 if (strcmp(cmp, trace_options[i]) == 0) {
4413                         ret = set_tracer_flag(tr, 1 << i, !neg);
4414                         break;
4415                 }
4416         }
4417
4418         /* If no option could be set, test the specific tracer options */
4419         if (!trace_options[i])
4420                 ret = set_tracer_option(tr, cmp, neg);
4421
4422         mutex_unlock(&trace_types_lock);
4423
4424         /*
4425          * If the first trailing whitespace is replaced with '\0' by strstrip,
4426          * turn it back into a space.
4427          */
4428         if (orig_len > strlen(option))
4429                 option[strlen(option)] = ' ';
4430
4431         return ret;
4432 }
4433
4434 static void __init apply_trace_boot_options(void)
4435 {
4436         char *buf = trace_boot_options_buf;
4437         char *option;
4438
4439         while (true) {
4440                 option = strsep(&buf, ",");
4441
4442                 if (!option)
4443                         break;
4444
4445                 if (*option)
4446                         trace_set_options(&global_trace, option);
4447
4448                 /* Put back the comma to allow this to be called again */
4449                 if (buf)
4450                         *(buf - 1) = ',';
4451         }
4452 }
4453
4454 static ssize_t
4455 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4456                         size_t cnt, loff_t *ppos)
4457 {
4458         struct seq_file *m = filp->private_data;
4459         struct trace_array *tr = m->private;
4460         char buf[64];
4461         int ret;
4462
4463         if (cnt >= sizeof(buf))
4464                 return -EINVAL;
4465
4466         if (copy_from_user(buf, ubuf, cnt))
4467                 return -EFAULT;
4468
4469         buf[cnt] = 0;
4470
4471         ret = trace_set_options(tr, buf);
4472         if (ret < 0)
4473                 return ret;
4474
4475         *ppos += cnt;
4476
4477         return cnt;
4478 }
4479
4480 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4481 {
4482         struct trace_array *tr = inode->i_private;
4483         int ret;
4484
4485         if (tracing_disabled)
4486                 return -ENODEV;
4487
4488         if (trace_array_get(tr) < 0)
4489                 return -ENODEV;
4490
4491         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4492         if (ret < 0)
4493                 trace_array_put(tr);
4494
4495         return ret;
4496 }
4497
4498 static const struct file_operations tracing_iter_fops = {
4499         .open           = tracing_trace_options_open,
4500         .read           = seq_read,
4501         .llseek         = seq_lseek,
4502         .release        = tracing_single_release_tr,
4503         .write          = tracing_trace_options_write,
4504 };
4505
4506 static const char readme_msg[] =
4507         "tracing mini-HOWTO:\n\n"
4508         "# echo 0 > tracing_on : quick way to disable tracing\n"
4509         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4510         " Important files:\n"
4511         "  trace\t\t\t- The static contents of the buffer\n"
4512         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4513         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4514         "  current_tracer\t- function and latency tracers\n"
4515         "  available_tracers\t- list of configured tracers for current_tracer\n"
4516         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4517         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4518         "  trace_clock\t\t-change the clock used to order events\n"
4519         "       local:   Per cpu clock but may not be synced across CPUs\n"
4520         "      global:   Synced across CPUs but slows tracing down.\n"
4521         "     counter:   Not a clock, but just an increment\n"
4522         "      uptime:   Jiffy counter from time of boot\n"
4523         "        perf:   Same clock that perf events use\n"
4524 #ifdef CONFIG_X86_64
4525         "     x86-tsc:   TSC cycle counter\n"
4526 #endif
4527         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4528         "       delta:   Delta difference against a buffer-wide timestamp\n"
4529         "    absolute:   Absolute (standalone) timestamp\n"
4530         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4531         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4532         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4533         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4534         "\t\t\t  Remove sub-buffer with rmdir\n"
4535         "  trace_options\t\t- Set format or modify how tracing happens\n"
4536         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4537         "\t\t\t  option name\n"
4538         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4539 #ifdef CONFIG_DYNAMIC_FTRACE
4540         "\n  available_filter_functions - list of functions that can be filtered on\n"
4541         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4542         "\t\t\t  functions\n"
4543         "\t     accepts: func_full_name or glob-matching-pattern\n"
4544         "\t     modules: Can select a group via module\n"
4545         "\t      Format: :mod:<module-name>\n"
4546         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4547         "\t    triggers: a command to perform when function is hit\n"
4548         "\t      Format: <function>:<trigger>[:count]\n"
4549         "\t     trigger: traceon, traceoff\n"
4550         "\t\t      enable_event:<system>:<event>\n"
4551         "\t\t      disable_event:<system>:<event>\n"
4552 #ifdef CONFIG_STACKTRACE
4553         "\t\t      stacktrace\n"
4554 #endif
4555 #ifdef CONFIG_TRACER_SNAPSHOT
4556         "\t\t      snapshot\n"
4557 #endif
4558         "\t\t      dump\n"
4559         "\t\t      cpudump\n"
4560         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4561         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4562         "\t     The first one will disable tracing every time do_fault is hit\n"
4563         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4564         "\t       The first time do trap is hit and it disables tracing, the\n"
4565         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4566         "\t       the counter will not decrement. It only decrements when the\n"
4567         "\t       trigger did work\n"
4568         "\t     To remove trigger without count:\n"
4569         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4570         "\t     To remove trigger with a count:\n"
4571         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4572         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4573         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4574         "\t    modules: Can select a group via module command :mod:\n"
4575         "\t    Does not accept triggers\n"
4576 #endif /* CONFIG_DYNAMIC_FTRACE */
4577 #ifdef CONFIG_FUNCTION_TRACER
4578         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4579         "\t\t    (function)\n"
4580 #endif
4581 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4582         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4583         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4584         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4585 #endif
4586 #ifdef CONFIG_TRACER_SNAPSHOT
4587         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4588         "\t\t\t  snapshot buffer. Read the contents for more\n"
4589         "\t\t\t  information\n"
4590 #endif
4591 #ifdef CONFIG_STACK_TRACER
4592         "  stack_trace\t\t- Shows the max stack trace when active\n"
4593         "  stack_max_size\t- Shows current max stack size that was traced\n"
4594         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4595         "\t\t\t  new trace)\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4598         "\t\t\t  traces\n"
4599 #endif
4600 #endif /* CONFIG_STACK_TRACER */
4601 #ifdef CONFIG_KPROBE_EVENTS
4602         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4603         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4604 #endif
4605 #ifdef CONFIG_UPROBE_EVENTS
4606         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4607         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4608 #endif
4609 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4610         "\t  accepts: event-definitions (one definition per line)\n"
4611         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4612         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4613         "\t           -:[<group>/]<event>\n"
4614 #ifdef CONFIG_KPROBE_EVENTS
4615         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4616   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4617 #endif
4618 #ifdef CONFIG_UPROBE_EVENTS
4619         "\t    place: <path>:<offset>\n"
4620 #endif
4621         "\t     args: <name>=fetcharg[:type]\n"
4622         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4623         "\t           $stack<index>, $stack, $retval, $comm\n"
4624         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4625         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4626 #endif
4627         "  events/\t\t- Directory containing all trace event subsystems:\n"
4628         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4629         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4630         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4631         "\t\t\t  events\n"
4632         "      filter\t\t- If set, only events passing filter are traced\n"
4633         "  events/<system>/<event>/\t- Directory containing control files for\n"
4634         "\t\t\t  <event>:\n"
4635         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4636         "      filter\t\t- If set, only events passing filter are traced\n"
4637         "      trigger\t\t- If set, a command to perform when event is hit\n"
4638         "\t    Format: <trigger>[:count][if <filter>]\n"
4639         "\t   trigger: traceon, traceoff\n"
4640         "\t            enable_event:<system>:<event>\n"
4641         "\t            disable_event:<system>:<event>\n"
4642 #ifdef CONFIG_HIST_TRIGGERS
4643         "\t            enable_hist:<system>:<event>\n"
4644         "\t            disable_hist:<system>:<event>\n"
4645 #endif
4646 #ifdef CONFIG_STACKTRACE
4647         "\t\t    stacktrace\n"
4648 #endif
4649 #ifdef CONFIG_TRACER_SNAPSHOT
4650         "\t\t    snapshot\n"
4651 #endif
4652 #ifdef CONFIG_HIST_TRIGGERS
4653         "\t\t    hist (see below)\n"
4654 #endif
4655         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4656         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4657         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4658         "\t                  events/block/block_unplug/trigger\n"
4659         "\t   The first disables tracing every time block_unplug is hit.\n"
4660         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4661         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4662         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4663         "\t   Like function triggers, the counter is only decremented if it\n"
4664         "\t    enabled or disabled tracing.\n"
4665         "\t   To remove a trigger without a count:\n"
4666         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4667         "\t   To remove a trigger with a count:\n"
4668         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4669         "\t   Filters can be ignored when removing a trigger.\n"
4670 #ifdef CONFIG_HIST_TRIGGERS
4671         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4672         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4673         "\t            [:values=<field1[,field2,...]>]\n"
4674         "\t            [:sort=<field1[,field2,...]>]\n"
4675         "\t            [:size=#entries]\n"
4676         "\t            [:pause][:continue][:clear]\n"
4677         "\t            [:name=histname1]\n"
4678         "\t            [if <filter>]\n\n"
4679         "\t    When a matching event is hit, an entry is added to a hash\n"
4680         "\t    table using the key(s) and value(s) named, and the value of a\n"
4681         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4682         "\t    correspond to fields in the event's format description.  Keys\n"
4683         "\t    can be any field, or the special string 'stacktrace'.\n"
4684         "\t    Compound keys consisting of up to two fields can be specified\n"
4685         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4686         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4687         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4688         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4689         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4690         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4691         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4692         "\t    its histogram data will be shared with other triggers of the\n"
4693         "\t    same name, and trigger hits will update this common data.\n\n"
4694         "\t    Reading the 'hist' file for the event will dump the hash\n"
4695         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4696         "\t    triggers attached to an event, there will be a table for each\n"
4697         "\t    trigger in the output.  The table displayed for a named\n"
4698         "\t    trigger will be the same as any other instance having the\n"
4699         "\t    same name.  The default format used to display a given field\n"
4700         "\t    can be modified by appending any of the following modifiers\n"
4701         "\t    to the field name, as applicable:\n\n"
4702         "\t            .hex        display a number as a hex value\n"
4703         "\t            .sym        display an address as a symbol\n"
4704         "\t            .sym-offset display an address as a symbol and offset\n"
4705         "\t            .execname   display a common_pid as a program name\n"
4706         "\t            .syscall    display a syscall id as a syscall name\n"
4707         "\t            .log2       display log2 value rather than raw number\n"
4708         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4709         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4710         "\t    trigger or to start a hist trigger but not log any events\n"
4711         "\t    until told to do so.  'continue' can be used to start or\n"
4712         "\t    restart a paused hist trigger.\n\n"
4713         "\t    The 'clear' parameter will clear the contents of a running\n"
4714         "\t    hist trigger and leave its current paused/active state\n"
4715         "\t    unchanged.\n\n"
4716         "\t    The enable_hist and disable_hist triggers can be used to\n"
4717         "\t    have one event conditionally start and stop another event's\n"
4718         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4719         "\t    the enable_event and disable_event triggers.\n"
4720 #endif
4721 ;
4722
4723 static ssize_t
4724 tracing_readme_read(struct file *filp, char __user *ubuf,
4725                        size_t cnt, loff_t *ppos)
4726 {
4727         return simple_read_from_buffer(ubuf, cnt, ppos,
4728                                         readme_msg, strlen(readme_msg));
4729 }
4730
4731 static const struct file_operations tracing_readme_fops = {
4732         .open           = tracing_open_generic,
4733         .read           = tracing_readme_read,
4734         .llseek         = generic_file_llseek,
4735 };
4736
4737 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4738 {
4739         int *ptr = v;
4740
4741         if (*pos || m->count)
4742                 ptr++;
4743
4744         (*pos)++;
4745
4746         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4747                 if (trace_find_tgid(*ptr))
4748                         return ptr;
4749         }
4750
4751         return NULL;
4752 }
4753
4754 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4755 {
4756         void *v;
4757         loff_t l = 0;
4758
4759         if (!tgid_map)
4760                 return NULL;
4761
4762         v = &tgid_map[0];
4763         while (l <= *pos) {
4764                 v = saved_tgids_next(m, v, &l);
4765                 if (!v)
4766                         return NULL;
4767         }
4768
4769         return v;
4770 }
4771
4772 static void saved_tgids_stop(struct seq_file *m, void *v)
4773 {
4774 }
4775
4776 static int saved_tgids_show(struct seq_file *m, void *v)
4777 {
4778         int pid = (int *)v - tgid_map;
4779
4780         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4781         return 0;
4782 }
4783
4784 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4785         .start          = saved_tgids_start,
4786         .stop           = saved_tgids_stop,
4787         .next           = saved_tgids_next,
4788         .show           = saved_tgids_show,
4789 };
4790
4791 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4792 {
4793         if (tracing_disabled)
4794                 return -ENODEV;
4795
4796         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4797 }
4798
4799
4800 static const struct file_operations tracing_saved_tgids_fops = {
4801         .open           = tracing_saved_tgids_open,
4802         .read           = seq_read,
4803         .llseek         = seq_lseek,
4804         .release        = seq_release,
4805 };
4806
4807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4808 {
4809         unsigned int *ptr = v;
4810
4811         if (*pos || m->count)
4812                 ptr++;
4813
4814         (*pos)++;
4815
4816         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4817              ptr++) {
4818                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4819                         continue;
4820
4821                 return ptr;
4822         }
4823
4824         return NULL;
4825 }
4826
4827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4828 {
4829         void *v;
4830         loff_t l = 0;
4831
4832         preempt_disable();
4833         arch_spin_lock(&trace_cmdline_lock);
4834
4835         v = &savedcmd->map_cmdline_to_pid[0];
4836         while (l <= *pos) {
4837                 v = saved_cmdlines_next(m, v, &l);
4838                 if (!v)
4839                         return NULL;
4840         }
4841
4842         return v;
4843 }
4844
4845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4846 {
4847         arch_spin_unlock(&trace_cmdline_lock);
4848         preempt_enable();
4849 }
4850
4851 static int saved_cmdlines_show(struct seq_file *m, void *v)
4852 {
4853         char buf[TASK_COMM_LEN];
4854         unsigned int *pid = v;
4855
4856         __trace_find_cmdline(*pid, buf);
4857         seq_printf(m, "%d %s\n", *pid, buf);
4858         return 0;
4859 }
4860
4861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4862         .start          = saved_cmdlines_start,
4863         .next           = saved_cmdlines_next,
4864         .stop           = saved_cmdlines_stop,
4865         .show           = saved_cmdlines_show,
4866 };
4867
4868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4869 {
4870         if (tracing_disabled)
4871                 return -ENODEV;
4872
4873         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4874 }
4875
4876 static const struct file_operations tracing_saved_cmdlines_fops = {
4877         .open           = tracing_saved_cmdlines_open,
4878         .read           = seq_read,
4879         .llseek         = seq_lseek,
4880         .release        = seq_release,
4881 };
4882
4883 static ssize_t
4884 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4885                                  size_t cnt, loff_t *ppos)
4886 {
4887         char buf[64];
4888         int r;
4889
4890         arch_spin_lock(&trace_cmdline_lock);
4891         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4892         arch_spin_unlock(&trace_cmdline_lock);
4893
4894         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4895 }
4896
4897 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4898 {
4899         kfree(s->saved_cmdlines);
4900         kfree(s->map_cmdline_to_pid);
4901         kfree(s);
4902 }
4903
4904 static int tracing_resize_saved_cmdlines(unsigned int val)
4905 {
4906         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4907
4908         s = kmalloc(sizeof(*s), GFP_KERNEL);
4909         if (!s)
4910                 return -ENOMEM;
4911
4912         if (allocate_cmdlines_buffer(val, s) < 0) {
4913                 kfree(s);
4914                 return -ENOMEM;
4915         }
4916
4917         arch_spin_lock(&trace_cmdline_lock);
4918         savedcmd_temp = savedcmd;
4919         savedcmd = s;
4920         arch_spin_unlock(&trace_cmdline_lock);
4921         free_saved_cmdlines_buffer(savedcmd_temp);
4922
4923         return 0;
4924 }
4925
4926 static ssize_t
4927 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4928                                   size_t cnt, loff_t *ppos)
4929 {
4930         unsigned long val;
4931         int ret;
4932
4933         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4934         if (ret)
4935                 return ret;
4936
4937         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4938         if (!val || val > PID_MAX_DEFAULT)
4939                 return -EINVAL;
4940
4941         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4942         if (ret < 0)
4943                 return ret;
4944
4945         *ppos += cnt;
4946
4947         return cnt;
4948 }
4949
4950 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4951         .open           = tracing_open_generic,
4952         .read           = tracing_saved_cmdlines_size_read,
4953         .write          = tracing_saved_cmdlines_size_write,
4954 };
4955
4956 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4957 static union trace_eval_map_item *
4958 update_eval_map(union trace_eval_map_item *ptr)
4959 {
4960         if (!ptr->map.eval_string) {
4961                 if (ptr->tail.next) {
4962                         ptr = ptr->tail.next;
4963                         /* Set ptr to the next real item (skip head) */
4964                         ptr++;
4965                 } else
4966                         return NULL;
4967         }
4968         return ptr;
4969 }
4970
4971 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4972 {
4973         union trace_eval_map_item *ptr = v;
4974
4975         /*
4976          * Paranoid! If ptr points to end, we don't want to increment past it.
4977          * This really should never happen.
4978          */
4979         ptr = update_eval_map(ptr);
4980         if (WARN_ON_ONCE(!ptr))
4981                 return NULL;
4982
4983         ptr++;
4984
4985         (*pos)++;
4986
4987         ptr = update_eval_map(ptr);
4988
4989         return ptr;
4990 }
4991
4992 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4993 {
4994         union trace_eval_map_item *v;
4995         loff_t l = 0;
4996
4997         mutex_lock(&trace_eval_mutex);
4998
4999         v = trace_eval_maps;
5000         if (v)
5001                 v++;
5002
5003         while (v && l < *pos) {
5004                 v = eval_map_next(m, v, &l);
5005         }
5006
5007         return v;
5008 }
5009
5010 static void eval_map_stop(struct seq_file *m, void *v)
5011 {
5012         mutex_unlock(&trace_eval_mutex);
5013 }
5014
5015 static int eval_map_show(struct seq_file *m, void *v)
5016 {
5017         union trace_eval_map_item *ptr = v;
5018
5019         seq_printf(m, "%s %ld (%s)\n",
5020                    ptr->map.eval_string, ptr->map.eval_value,
5021                    ptr->map.system);
5022
5023         return 0;
5024 }
5025
5026 static const struct seq_operations tracing_eval_map_seq_ops = {
5027         .start          = eval_map_start,
5028         .next           = eval_map_next,
5029         .stop           = eval_map_stop,
5030         .show           = eval_map_show,
5031 };
5032
5033 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5034 {
5035         if (tracing_disabled)
5036                 return -ENODEV;
5037
5038         return seq_open(filp, &tracing_eval_map_seq_ops);
5039 }
5040
5041 static const struct file_operations tracing_eval_map_fops = {
5042         .open           = tracing_eval_map_open,
5043         .read           = seq_read,
5044         .llseek         = seq_lseek,
5045         .release        = seq_release,
5046 };
5047
5048 static inline union trace_eval_map_item *
5049 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5050 {
5051         /* Return tail of array given the head */
5052         return ptr + ptr->head.length + 1;
5053 }
5054
5055 static void
5056 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5057                            int len)
5058 {
5059         struct trace_eval_map **stop;
5060         struct trace_eval_map **map;
5061         union trace_eval_map_item *map_array;
5062         union trace_eval_map_item *ptr;
5063
5064         stop = start + len;
5065
5066         /*
5067          * The trace_eval_maps contains the map plus a head and tail item,
5068          * where the head holds the module and length of array, and the
5069          * tail holds a pointer to the next list.
5070          */
5071         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5072         if (!map_array) {
5073                 pr_warn("Unable to allocate trace eval mapping\n");
5074                 return;
5075         }
5076
5077         mutex_lock(&trace_eval_mutex);
5078
5079         if (!trace_eval_maps)
5080                 trace_eval_maps = map_array;
5081         else {
5082                 ptr = trace_eval_maps;
5083                 for (;;) {
5084                         ptr = trace_eval_jmp_to_tail(ptr);
5085                         if (!ptr->tail.next)
5086                                 break;
5087                         ptr = ptr->tail.next;
5088
5089                 }
5090                 ptr->tail.next = map_array;
5091         }
5092         map_array->head.mod = mod;
5093         map_array->head.length = len;
5094         map_array++;
5095
5096         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5097                 map_array->map = **map;
5098                 map_array++;
5099         }
5100         memset(map_array, 0, sizeof(*map_array));
5101
5102         mutex_unlock(&trace_eval_mutex);
5103 }
5104
5105 static void trace_create_eval_file(struct dentry *d_tracer)
5106 {
5107         trace_create_file("eval_map", 0444, d_tracer,
5108                           NULL, &tracing_eval_map_fops);
5109 }
5110
5111 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5112 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5113 static inline void trace_insert_eval_map_file(struct module *mod,
5114                               struct trace_eval_map **start, int len) { }
5115 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5116
5117 static void trace_insert_eval_map(struct module *mod,
5118                                   struct trace_eval_map **start, int len)
5119 {
5120         struct trace_eval_map **map;
5121
5122         if (len <= 0)
5123                 return;
5124
5125         map = start;
5126
5127         trace_event_eval_update(map, len);
5128
5129         trace_insert_eval_map_file(mod, start, len);
5130 }
5131
5132 static ssize_t
5133 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5134                        size_t cnt, loff_t *ppos)
5135 {
5136         struct trace_array *tr = filp->private_data;
5137         char buf[MAX_TRACER_SIZE+2];
5138         int r;
5139
5140         mutex_lock(&trace_types_lock);
5141         r = sprintf(buf, "%s\n", tr->current_trace->name);
5142         mutex_unlock(&trace_types_lock);
5143
5144         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5145 }
5146
5147 int tracer_init(struct tracer *t, struct trace_array *tr)
5148 {
5149         tracing_reset_online_cpus(&tr->trace_buffer);
5150         return t->init(tr);
5151 }
5152
5153 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5154 {
5155         int cpu;
5156
5157         for_each_tracing_cpu(cpu)
5158                 per_cpu_ptr(buf->data, cpu)->entries = val;
5159 }
5160
5161 #ifdef CONFIG_TRACER_MAX_TRACE
5162 /* resize @tr's buffer to the size of @size_tr's entries */
5163 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5164                                         struct trace_buffer *size_buf, int cpu_id)
5165 {
5166         int cpu, ret = 0;
5167
5168         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5169                 for_each_tracing_cpu(cpu) {
5170                         ret = ring_buffer_resize(trace_buf->buffer,
5171                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5172                         if (ret < 0)
5173                                 break;
5174                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5175                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5176                 }
5177         } else {
5178                 ret = ring_buffer_resize(trace_buf->buffer,
5179                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5180                 if (ret == 0)
5181                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5182                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5183         }
5184
5185         return ret;
5186 }
5187 #endif /* CONFIG_TRACER_MAX_TRACE */
5188
5189 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5190                                         unsigned long size, int cpu)
5191 {
5192         int ret;
5193
5194         /*
5195          * If kernel or user changes the size of the ring buffer
5196          * we use the size that was given, and we can forget about
5197          * expanding it later.
5198          */
5199         ring_buffer_expanded = true;
5200
5201         /* May be called before buffers are initialized */
5202         if (!tr->trace_buffer.buffer)
5203                 return 0;
5204
5205         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5206         if (ret < 0)
5207                 return ret;
5208
5209 #ifdef CONFIG_TRACER_MAX_TRACE
5210         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5211             !tr->current_trace->use_max_tr)
5212                 goto out;
5213
5214         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5215         if (ret < 0) {
5216                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5217                                                      &tr->trace_buffer, cpu);
5218                 if (r < 0) {
5219                         /*
5220                          * AARGH! We are left with different
5221                          * size max buffer!!!!
5222                          * The max buffer is our "snapshot" buffer.
5223                          * When a tracer needs a snapshot (one of the
5224                          * latency tracers), it swaps the max buffer
5225                          * with the saved snap shot. We succeeded to
5226                          * update the size of the main buffer, but failed to
5227                          * update the size of the max buffer. But when we tried
5228                          * to reset the main buffer to the original size, we
5229                          * failed there too. This is very unlikely to
5230                          * happen, but if it does, warn and kill all
5231                          * tracing.
5232                          */
5233                         WARN_ON(1);
5234                         tracing_disabled = 1;
5235                 }
5236                 return ret;
5237         }
5238
5239         if (cpu == RING_BUFFER_ALL_CPUS)
5240                 set_buffer_entries(&tr->max_buffer, size);
5241         else
5242                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5243
5244  out:
5245 #endif /* CONFIG_TRACER_MAX_TRACE */
5246
5247         if (cpu == RING_BUFFER_ALL_CPUS)
5248                 set_buffer_entries(&tr->trace_buffer, size);
5249         else
5250                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5251
5252         return ret;
5253 }
5254
5255 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5256                                           unsigned long size, int cpu_id)
5257 {
5258         int ret = size;
5259
5260         mutex_lock(&trace_types_lock);
5261
5262         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5263                 /* make sure, this cpu is enabled in the mask */
5264                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5265                         ret = -EINVAL;
5266                         goto out;
5267                 }
5268         }
5269
5270         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5271         if (ret < 0)
5272                 ret = -ENOMEM;
5273
5274 out:
5275         mutex_unlock(&trace_types_lock);
5276
5277         return ret;
5278 }
5279
5280
5281 /**
5282  * tracing_update_buffers - used by tracing facility to expand ring buffers
5283  *
5284  * To save on memory when the tracing is never used on a system with it
5285  * configured in. The ring buffers are set to a minimum size. But once
5286  * a user starts to use the tracing facility, then they need to grow
5287  * to their default size.
5288  *
5289  * This function is to be called when a tracer is about to be used.
5290  */
5291 int tracing_update_buffers(void)
5292 {
5293         int ret = 0;
5294
5295         mutex_lock(&trace_types_lock);
5296         if (!ring_buffer_expanded)
5297                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5298                                                 RING_BUFFER_ALL_CPUS);
5299         mutex_unlock(&trace_types_lock);
5300
5301         return ret;
5302 }
5303
5304 struct trace_option_dentry;
5305
5306 static void
5307 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5308
5309 /*
5310  * Used to clear out the tracer before deletion of an instance.
5311  * Must have trace_types_lock held.
5312  */
5313 static void tracing_set_nop(struct trace_array *tr)
5314 {
5315         if (tr->current_trace == &nop_trace)
5316                 return;
5317         
5318         tr->current_trace->enabled--;
5319
5320         if (tr->current_trace->reset)
5321                 tr->current_trace->reset(tr);
5322
5323         tr->current_trace = &nop_trace;
5324 }
5325
5326 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5327 {
5328         /* Only enable if the directory has been created already. */
5329         if (!tr->dir)
5330                 return;
5331
5332         create_trace_option_files(tr, t);
5333 }
5334
5335 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5336 {
5337         struct tracer *t;
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339         bool had_max_tr;
5340 #endif
5341         int ret = 0;
5342
5343         mutex_lock(&trace_types_lock);
5344
5345         if (!ring_buffer_expanded) {
5346                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5347                                                 RING_BUFFER_ALL_CPUS);
5348                 if (ret < 0)
5349                         goto out;
5350                 ret = 0;
5351         }
5352
5353         for (t = trace_types; t; t = t->next) {
5354                 if (strcmp(t->name, buf) == 0)
5355                         break;
5356         }
5357         if (!t) {
5358                 ret = -EINVAL;
5359                 goto out;
5360         }
5361         if (t == tr->current_trace)
5362                 goto out;
5363
5364         /* Some tracers won't work on kernel command line */
5365         if (system_state < SYSTEM_RUNNING && t->noboot) {
5366                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5367                         t->name);
5368                 goto out;
5369         }
5370
5371         /* Some tracers are only allowed for the top level buffer */
5372         if (!trace_ok_for_array(t, tr)) {
5373                 ret = -EINVAL;
5374                 goto out;
5375         }
5376
5377         /* If trace pipe files are being read, we can't change the tracer */
5378         if (tr->current_trace->ref) {
5379                 ret = -EBUSY;
5380                 goto out;
5381         }
5382
5383         trace_branch_disable();
5384
5385         tr->current_trace->enabled--;
5386
5387         if (tr->current_trace->reset)
5388                 tr->current_trace->reset(tr);
5389
5390         /* Current trace needs to be nop_trace before synchronize_sched */
5391         tr->current_trace = &nop_trace;
5392
5393 #ifdef CONFIG_TRACER_MAX_TRACE
5394         had_max_tr = tr->allocated_snapshot;
5395
5396         if (had_max_tr && !t->use_max_tr) {
5397                 /*
5398                  * We need to make sure that the update_max_tr sees that
5399                  * current_trace changed to nop_trace to keep it from
5400                  * swapping the buffers after we resize it.
5401                  * The update_max_tr is called from interrupts disabled
5402                  * so a synchronized_sched() is sufficient.
5403                  */
5404                 synchronize_sched();
5405                 free_snapshot(tr);
5406         }
5407 #endif
5408
5409 #ifdef CONFIG_TRACER_MAX_TRACE
5410         if (t->use_max_tr && !had_max_tr) {
5411                 ret = alloc_snapshot(tr);
5412                 if (ret < 0)
5413                         goto out;
5414         }
5415 #endif
5416
5417         if (t->init) {
5418                 ret = tracer_init(t, tr);
5419                 if (ret)
5420                         goto out;
5421         }
5422
5423         tr->current_trace = t;
5424         tr->current_trace->enabled++;
5425         trace_branch_enable(tr);
5426  out:
5427         mutex_unlock(&trace_types_lock);
5428
5429         return ret;
5430 }
5431
5432 static ssize_t
5433 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5434                         size_t cnt, loff_t *ppos)
5435 {
5436         struct trace_array *tr = filp->private_data;
5437         char buf[MAX_TRACER_SIZE+1];
5438         int i;
5439         size_t ret;
5440         int err;
5441
5442         ret = cnt;
5443
5444         if (cnt > MAX_TRACER_SIZE)
5445                 cnt = MAX_TRACER_SIZE;
5446
5447         if (copy_from_user(buf, ubuf, cnt))
5448                 return -EFAULT;
5449
5450         buf[cnt] = 0;
5451
5452         /* strip ending whitespace. */
5453         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5454                 buf[i] = 0;
5455
5456         err = tracing_set_tracer(tr, buf);
5457         if (err)
5458                 return err;
5459
5460         *ppos += ret;
5461
5462         return ret;
5463 }
5464
5465 static ssize_t
5466 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5467                    size_t cnt, loff_t *ppos)
5468 {
5469         char buf[64];
5470         int r;
5471
5472         r = snprintf(buf, sizeof(buf), "%ld\n",
5473                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5474         if (r > sizeof(buf))
5475                 r = sizeof(buf);
5476         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5477 }
5478
5479 static ssize_t
5480 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5481                     size_t cnt, loff_t *ppos)
5482 {
5483         unsigned long val;
5484         int ret;
5485
5486         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5487         if (ret)
5488                 return ret;
5489
5490         *ptr = val * 1000;
5491
5492         return cnt;
5493 }
5494
5495 static ssize_t
5496 tracing_thresh_read(struct file *filp, char __user *ubuf,
5497                     size_t cnt, loff_t *ppos)
5498 {
5499         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5500 }
5501
5502 static ssize_t
5503 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5504                      size_t cnt, loff_t *ppos)
5505 {
5506         struct trace_array *tr = filp->private_data;
5507         int ret;
5508
5509         mutex_lock(&trace_types_lock);
5510         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5511         if (ret < 0)
5512                 goto out;
5513
5514         if (tr->current_trace->update_thresh) {
5515                 ret = tr->current_trace->update_thresh(tr);
5516                 if (ret < 0)
5517                         goto out;
5518         }
5519
5520         ret = cnt;
5521 out:
5522         mutex_unlock(&trace_types_lock);
5523
5524         return ret;
5525 }
5526
5527 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5528
5529 static ssize_t
5530 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5531                      size_t cnt, loff_t *ppos)
5532 {
5533         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5534 }
5535
5536 static ssize_t
5537 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5538                       size_t cnt, loff_t *ppos)
5539 {
5540         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5541 }
5542
5543 #endif
5544
5545 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5546 {
5547         struct trace_array *tr = inode->i_private;
5548         struct trace_iterator *iter;
5549         int ret = 0;
5550
5551         if (tracing_disabled)
5552                 return -ENODEV;
5553
5554         if (trace_array_get(tr) < 0)
5555                 return -ENODEV;
5556
5557         mutex_lock(&trace_types_lock);
5558
5559         /* create a buffer to store the information to pass to userspace */
5560         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5561         if (!iter) {
5562                 ret = -ENOMEM;
5563                 __trace_array_put(tr);
5564                 goto out;
5565         }
5566
5567         trace_seq_init(&iter->seq);
5568         iter->trace = tr->current_trace;
5569
5570         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5571                 ret = -ENOMEM;
5572                 goto fail;
5573         }
5574
5575         /* trace pipe does not show start of buffer */
5576         cpumask_setall(iter->started);
5577
5578         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5579                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5580
5581         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5582         if (trace_clocks[tr->clock_id].in_ns)
5583                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5584
5585         iter->tr = tr;
5586         iter->trace_buffer = &tr->trace_buffer;
5587         iter->cpu_file = tracing_get_cpu(inode);
5588         mutex_init(&iter->mutex);
5589         filp->private_data = iter;
5590
5591         if (iter->trace->pipe_open)
5592                 iter->trace->pipe_open(iter);
5593
5594         nonseekable_open(inode, filp);
5595
5596         tr->current_trace->ref++;
5597 out:
5598         mutex_unlock(&trace_types_lock);
5599         return ret;
5600
5601 fail:
5602         kfree(iter->trace);
5603         kfree(iter);
5604         __trace_array_put(tr);
5605         mutex_unlock(&trace_types_lock);
5606         return ret;
5607 }
5608
5609 static int tracing_release_pipe(struct inode *inode, struct file *file)
5610 {
5611         struct trace_iterator *iter = file->private_data;
5612         struct trace_array *tr = inode->i_private;
5613
5614         mutex_lock(&trace_types_lock);
5615
5616         tr->current_trace->ref--;
5617
5618         if (iter->trace->pipe_close)
5619                 iter->trace->pipe_close(iter);
5620
5621         mutex_unlock(&trace_types_lock);
5622
5623         free_cpumask_var(iter->started);
5624         mutex_destroy(&iter->mutex);
5625         kfree(iter);
5626
5627         trace_array_put(tr);
5628
5629         return 0;
5630 }
5631
5632 static __poll_t
5633 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5634 {
5635         struct trace_array *tr = iter->tr;
5636
5637         /* Iterators are static, they should be filled or empty */
5638         if (trace_buffer_iter(iter, iter->cpu_file))
5639                 return EPOLLIN | EPOLLRDNORM;
5640
5641         if (tr->trace_flags & TRACE_ITER_BLOCK)
5642                 /*
5643                  * Always select as readable when in blocking mode
5644                  */
5645                 return EPOLLIN | EPOLLRDNORM;
5646         else
5647                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5648                                              filp, poll_table);
5649 }
5650
5651 static __poll_t
5652 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5653 {
5654         struct trace_iterator *iter = filp->private_data;
5655
5656         return trace_poll(iter, filp, poll_table);
5657 }
5658
5659 /* Must be called with iter->mutex held. */
5660 static int tracing_wait_pipe(struct file *filp)
5661 {
5662         struct trace_iterator *iter = filp->private_data;
5663         int ret;
5664
5665         while (trace_empty(iter)) {
5666
5667                 if ((filp->f_flags & O_NONBLOCK)) {
5668                         return -EAGAIN;
5669                 }
5670
5671                 /*
5672                  * We block until we read something and tracing is disabled.
5673                  * We still block if tracing is disabled, but we have never
5674                  * read anything. This allows a user to cat this file, and
5675                  * then enable tracing. But after we have read something,
5676                  * we give an EOF when tracing is again disabled.
5677                  *
5678                  * iter->pos will be 0 if we haven't read anything.
5679                  */
5680                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5681                         break;
5682
5683                 mutex_unlock(&iter->mutex);
5684
5685                 ret = wait_on_pipe(iter, false);
5686
5687                 mutex_lock(&iter->mutex);
5688
5689                 if (ret)
5690                         return ret;
5691         }
5692
5693         return 1;
5694 }
5695
5696 /*
5697  * Consumer reader.
5698  */
5699 static ssize_t
5700 tracing_read_pipe(struct file *filp, char __user *ubuf,
5701                   size_t cnt, loff_t *ppos)
5702 {
5703         struct trace_iterator *iter = filp->private_data;
5704         ssize_t sret;
5705
5706         /*
5707          * Avoid more than one consumer on a single file descriptor
5708          * This is just a matter of traces coherency, the ring buffer itself
5709          * is protected.
5710          */
5711         mutex_lock(&iter->mutex);
5712
5713         /* return any leftover data */
5714         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5715         if (sret != -EBUSY)
5716                 goto out;
5717
5718         trace_seq_init(&iter->seq);
5719
5720         if (iter->trace->read) {
5721                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5722                 if (sret)
5723                         goto out;
5724         }
5725
5726 waitagain:
5727         sret = tracing_wait_pipe(filp);
5728         if (sret <= 0)
5729                 goto out;
5730
5731         /* stop when tracing is finished */
5732         if (trace_empty(iter)) {
5733                 sret = 0;
5734                 goto out;
5735         }
5736
5737         if (cnt >= PAGE_SIZE)
5738                 cnt = PAGE_SIZE - 1;
5739
5740         /* reset all but tr, trace, and overruns */
5741         memset(&iter->seq, 0,
5742                sizeof(struct trace_iterator) -
5743                offsetof(struct trace_iterator, seq));
5744         cpumask_clear(iter->started);
5745         iter->pos = -1;
5746
5747         trace_event_read_lock();
5748         trace_access_lock(iter->cpu_file);
5749         while (trace_find_next_entry_inc(iter) != NULL) {
5750                 enum print_line_t ret;
5751                 int save_len = iter->seq.seq.len;
5752
5753                 ret = print_trace_line(iter);
5754                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5755                         /* don't print partial lines */
5756                         iter->seq.seq.len = save_len;
5757                         break;
5758                 }
5759                 if (ret != TRACE_TYPE_NO_CONSUME)
5760                         trace_consume(iter);
5761
5762                 if (trace_seq_used(&iter->seq) >= cnt)
5763                         break;
5764
5765                 /*
5766                  * Setting the full flag means we reached the trace_seq buffer
5767                  * size and we should leave by partial output condition above.
5768                  * One of the trace_seq_* functions is not used properly.
5769                  */
5770                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5771                           iter->ent->type);
5772         }
5773         trace_access_unlock(iter->cpu_file);
5774         trace_event_read_unlock();
5775
5776         /* Now copy what we have to the user */
5777         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5778         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5779                 trace_seq_init(&iter->seq);
5780
5781         /*
5782          * If there was nothing to send to user, in spite of consuming trace
5783          * entries, go back to wait for more entries.
5784          */
5785         if (sret == -EBUSY)
5786                 goto waitagain;
5787
5788 out:
5789         mutex_unlock(&iter->mutex);
5790
5791         return sret;
5792 }
5793
5794 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5795                                      unsigned int idx)
5796 {
5797         __free_page(spd->pages[idx]);
5798 }
5799
5800 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5801         .can_merge              = 0,
5802         .confirm                = generic_pipe_buf_confirm,
5803         .release                = generic_pipe_buf_release,
5804         .steal                  = generic_pipe_buf_steal,
5805         .get                    = generic_pipe_buf_get,
5806 };
5807
5808 static size_t
5809 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5810 {
5811         size_t count;
5812         int save_len;
5813         int ret;
5814
5815         /* Seq buffer is page-sized, exactly what we need. */
5816         for (;;) {
5817                 save_len = iter->seq.seq.len;
5818                 ret = print_trace_line(iter);
5819
5820                 if (trace_seq_has_overflowed(&iter->seq)) {
5821                         iter->seq.seq.len = save_len;
5822                         break;
5823                 }
5824
5825                 /*
5826                  * This should not be hit, because it should only
5827                  * be set if the iter->seq overflowed. But check it
5828                  * anyway to be safe.
5829                  */
5830                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5831                         iter->seq.seq.len = save_len;
5832                         break;
5833                 }
5834
5835                 count = trace_seq_used(&iter->seq) - save_len;
5836                 if (rem < count) {
5837                         rem = 0;
5838                         iter->seq.seq.len = save_len;
5839                         break;
5840                 }
5841
5842                 if (ret != TRACE_TYPE_NO_CONSUME)
5843                         trace_consume(iter);
5844                 rem -= count;
5845                 if (!trace_find_next_entry_inc(iter))   {
5846                         rem = 0;
5847                         iter->ent = NULL;
5848                         break;
5849                 }
5850         }
5851
5852         return rem;
5853 }
5854
5855 static ssize_t tracing_splice_read_pipe(struct file *filp,
5856                                         loff_t *ppos,
5857                                         struct pipe_inode_info *pipe,
5858                                         size_t len,
5859                                         unsigned int flags)
5860 {
5861         struct page *pages_def[PIPE_DEF_BUFFERS];
5862         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5863         struct trace_iterator *iter = filp->private_data;
5864         struct splice_pipe_desc spd = {
5865                 .pages          = pages_def,
5866                 .partial        = partial_def,
5867                 .nr_pages       = 0, /* This gets updated below. */
5868                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5869                 .ops            = &tracing_pipe_buf_ops,
5870                 .spd_release    = tracing_spd_release_pipe,
5871         };
5872         ssize_t ret;
5873         size_t rem;
5874         unsigned int i;
5875
5876         if (splice_grow_spd(pipe, &spd))
5877                 return -ENOMEM;
5878
5879         mutex_lock(&iter->mutex);
5880
5881         if (iter->trace->splice_read) {
5882                 ret = iter->trace->splice_read(iter, filp,
5883                                                ppos, pipe, len, flags);
5884                 if (ret)
5885                         goto out_err;
5886         }
5887
5888         ret = tracing_wait_pipe(filp);
5889         if (ret <= 0)
5890                 goto out_err;
5891
5892         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5893                 ret = -EFAULT;
5894                 goto out_err;
5895         }
5896
5897         trace_event_read_lock();
5898         trace_access_lock(iter->cpu_file);
5899
5900         /* Fill as many pages as possible. */
5901         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5902                 spd.pages[i] = alloc_page(GFP_KERNEL);
5903                 if (!spd.pages[i])
5904                         break;
5905
5906                 rem = tracing_fill_pipe_page(rem, iter);
5907
5908                 /* Copy the data into the page, so we can start over. */
5909                 ret = trace_seq_to_buffer(&iter->seq,
5910                                           page_address(spd.pages[i]),
5911                                           trace_seq_used(&iter->seq));
5912                 if (ret < 0) {
5913                         __free_page(spd.pages[i]);
5914                         break;
5915                 }
5916                 spd.partial[i].offset = 0;
5917                 spd.partial[i].len = trace_seq_used(&iter->seq);
5918
5919                 trace_seq_init(&iter->seq);
5920         }
5921
5922         trace_access_unlock(iter->cpu_file);
5923         trace_event_read_unlock();
5924         mutex_unlock(&iter->mutex);
5925
5926         spd.nr_pages = i;
5927
5928         if (i)
5929                 ret = splice_to_pipe(pipe, &spd);
5930         else
5931                 ret = 0;
5932 out:
5933         splice_shrink_spd(&spd);
5934         return ret;
5935
5936 out_err:
5937         mutex_unlock(&iter->mutex);
5938         goto out;
5939 }
5940
5941 static ssize_t
5942 tracing_entries_read(struct file *filp, char __user *ubuf,
5943                      size_t cnt, loff_t *ppos)
5944 {
5945         struct inode *inode = file_inode(filp);
5946         struct trace_array *tr = inode->i_private;
5947         int cpu = tracing_get_cpu(inode);
5948         char buf[64];
5949         int r = 0;
5950         ssize_t ret;
5951
5952         mutex_lock(&trace_types_lock);
5953
5954         if (cpu == RING_BUFFER_ALL_CPUS) {
5955                 int cpu, buf_size_same;
5956                 unsigned long size;
5957
5958                 size = 0;
5959                 buf_size_same = 1;
5960                 /* check if all cpu sizes are same */
5961                 for_each_tracing_cpu(cpu) {
5962                         /* fill in the size from first enabled cpu */
5963                         if (size == 0)
5964                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5965                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5966                                 buf_size_same = 0;
5967                                 break;
5968                         }
5969                 }
5970
5971                 if (buf_size_same) {
5972                         if (!ring_buffer_expanded)
5973                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5974                                             size >> 10,
5975                                             trace_buf_size >> 10);
5976                         else
5977                                 r = sprintf(buf, "%lu\n", size >> 10);
5978                 } else
5979                         r = sprintf(buf, "X\n");
5980         } else
5981                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5982
5983         mutex_unlock(&trace_types_lock);
5984
5985         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5986         return ret;
5987 }
5988
5989 static ssize_t
5990 tracing_entries_write(struct file *filp, const char __user *ubuf,
5991                       size_t cnt, loff_t *ppos)
5992 {
5993         struct inode *inode = file_inode(filp);
5994         struct trace_array *tr = inode->i_private;
5995         unsigned long val;
5996         int ret;
5997
5998         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5999         if (ret)
6000                 return ret;
6001
6002         /* must have at least 1 entry */
6003         if (!val)
6004                 return -EINVAL;
6005
6006         /* value is in KB */
6007         val <<= 10;
6008         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6009         if (ret < 0)
6010                 return ret;
6011
6012         *ppos += cnt;
6013
6014         return cnt;
6015 }
6016
6017 static ssize_t
6018 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6019                                 size_t cnt, loff_t *ppos)
6020 {
6021         struct trace_array *tr = filp->private_data;
6022         char buf[64];
6023         int r, cpu;
6024         unsigned long size = 0, expanded_size = 0;
6025
6026         mutex_lock(&trace_types_lock);
6027         for_each_tracing_cpu(cpu) {
6028                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6029                 if (!ring_buffer_expanded)
6030                         expanded_size += trace_buf_size >> 10;
6031         }
6032         if (ring_buffer_expanded)
6033                 r = sprintf(buf, "%lu\n", size);
6034         else
6035                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6036         mutex_unlock(&trace_types_lock);
6037
6038         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6039 }
6040
6041 static ssize_t
6042 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6043                           size_t cnt, loff_t *ppos)
6044 {
6045         /*
6046          * There is no need to read what the user has written, this function
6047          * is just to make sure that there is no error when "echo" is used
6048          */
6049
6050         *ppos += cnt;
6051
6052         return cnt;
6053 }
6054
6055 static int
6056 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6057 {
6058         struct trace_array *tr = inode->i_private;
6059
6060         /* disable tracing ? */
6061         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6062                 tracer_tracing_off(tr);
6063         /* resize the ring buffer to 0 */
6064         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6065
6066         trace_array_put(tr);
6067
6068         return 0;
6069 }
6070
6071 static ssize_t
6072 tracing_mark_write(struct file *filp, const char __user *ubuf,
6073                                         size_t cnt, loff_t *fpos)
6074 {
6075         struct trace_array *tr = filp->private_data;
6076         struct ring_buffer_event *event;
6077         struct ring_buffer *buffer;
6078         struct print_entry *entry;
6079         unsigned long irq_flags;
6080         const char faulted[] = "<faulted>";
6081         ssize_t written;
6082         int size;
6083         int len;
6084
6085 /* Used in tracing_mark_raw_write() as well */
6086 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6087
6088         if (tracing_disabled)
6089                 return -EINVAL;
6090
6091         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6092                 return -EINVAL;
6093
6094         if (cnt > TRACE_BUF_SIZE)
6095                 cnt = TRACE_BUF_SIZE;
6096
6097         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6098
6099         local_save_flags(irq_flags);
6100         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6101
6102         /* If less than "<faulted>", then make sure we can still add that */
6103         if (cnt < FAULTED_SIZE)
6104                 size += FAULTED_SIZE - cnt;
6105
6106         buffer = tr->trace_buffer.buffer;
6107         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6108                                             irq_flags, preempt_count());
6109         if (unlikely(!event))
6110                 /* Ring buffer disabled, return as if not open for write */
6111                 return -EBADF;
6112
6113         entry = ring_buffer_event_data(event);
6114         entry->ip = _THIS_IP_;
6115
6116         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6117         if (len) {
6118                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6119                 cnt = FAULTED_SIZE;
6120                 written = -EFAULT;
6121         } else
6122                 written = cnt;
6123         len = cnt;
6124
6125         if (entry->buf[cnt - 1] != '\n') {
6126                 entry->buf[cnt] = '\n';
6127                 entry->buf[cnt + 1] = '\0';
6128         } else
6129                 entry->buf[cnt] = '\0';
6130
6131         __buffer_unlock_commit(buffer, event);
6132
6133         if (written > 0)
6134                 *fpos += written;
6135
6136         return written;
6137 }
6138
6139 /* Limit it for now to 3K (including tag) */
6140 #define RAW_DATA_MAX_SIZE (1024*3)
6141
6142 static ssize_t
6143 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6144                                         size_t cnt, loff_t *fpos)
6145 {
6146         struct trace_array *tr = filp->private_data;
6147         struct ring_buffer_event *event;
6148         struct ring_buffer *buffer;
6149         struct raw_data_entry *entry;
6150         const char faulted[] = "<faulted>";
6151         unsigned long irq_flags;
6152         ssize_t written;
6153         int size;
6154         int len;
6155
6156 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6157
6158         if (tracing_disabled)
6159                 return -EINVAL;
6160
6161         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6162                 return -EINVAL;
6163
6164         /* The marker must at least have a tag id */
6165         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6166                 return -EINVAL;
6167
6168         if (cnt > TRACE_BUF_SIZE)
6169                 cnt = TRACE_BUF_SIZE;
6170
6171         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6172
6173         local_save_flags(irq_flags);
6174         size = sizeof(*entry) + cnt;
6175         if (cnt < FAULT_SIZE_ID)
6176                 size += FAULT_SIZE_ID - cnt;
6177
6178         buffer = tr->trace_buffer.buffer;
6179         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6180                                             irq_flags, preempt_count());
6181         if (!event)
6182                 /* Ring buffer disabled, return as if not open for write */
6183                 return -EBADF;
6184
6185         entry = ring_buffer_event_data(event);
6186
6187         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6188         if (len) {
6189                 entry->id = -1;
6190                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6191                 written = -EFAULT;
6192         } else
6193                 written = cnt;
6194
6195         __buffer_unlock_commit(buffer, event);
6196
6197         if (written > 0)
6198                 *fpos += written;
6199
6200         return written;
6201 }
6202
6203 static int tracing_clock_show(struct seq_file *m, void *v)
6204 {
6205         struct trace_array *tr = m->private;
6206         int i;
6207
6208         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6209                 seq_printf(m,
6210                         "%s%s%s%s", i ? " " : "",
6211                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6212                         i == tr->clock_id ? "]" : "");
6213         seq_putc(m, '\n');
6214
6215         return 0;
6216 }
6217
6218 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6219 {
6220         int i;
6221
6222         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6223                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6224                         break;
6225         }
6226         if (i == ARRAY_SIZE(trace_clocks))
6227                 return -EINVAL;
6228
6229         mutex_lock(&trace_types_lock);
6230
6231         tr->clock_id = i;
6232
6233         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6234
6235         /*
6236          * New clock may not be consistent with the previous clock.
6237          * Reset the buffer so that it doesn't have incomparable timestamps.
6238          */
6239         tracing_reset_online_cpus(&tr->trace_buffer);
6240
6241 #ifdef CONFIG_TRACER_MAX_TRACE
6242         if (tr->max_buffer.buffer)
6243                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6244         tracing_reset_online_cpus(&tr->max_buffer);
6245 #endif
6246
6247         mutex_unlock(&trace_types_lock);
6248
6249         return 0;
6250 }
6251
6252 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6253                                    size_t cnt, loff_t *fpos)
6254 {
6255         struct seq_file *m = filp->private_data;
6256         struct trace_array *tr = m->private;
6257         char buf[64];
6258         const char *clockstr;
6259         int ret;
6260
6261         if (cnt >= sizeof(buf))
6262                 return -EINVAL;
6263
6264         if (copy_from_user(buf, ubuf, cnt))
6265                 return -EFAULT;
6266
6267         buf[cnt] = 0;
6268
6269         clockstr = strstrip(buf);
6270
6271         ret = tracing_set_clock(tr, clockstr);
6272         if (ret)
6273                 return ret;
6274
6275         *fpos += cnt;
6276
6277         return cnt;
6278 }
6279
6280 static int tracing_clock_open(struct inode *inode, struct file *file)
6281 {
6282         struct trace_array *tr = inode->i_private;
6283         int ret;
6284
6285         if (tracing_disabled)
6286                 return -ENODEV;
6287
6288         if (trace_array_get(tr))
6289                 return -ENODEV;
6290
6291         ret = single_open(file, tracing_clock_show, inode->i_private);
6292         if (ret < 0)
6293                 trace_array_put(tr);
6294
6295         return ret;
6296 }
6297
6298 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6299 {
6300         struct trace_array *tr = m->private;
6301
6302         mutex_lock(&trace_types_lock);
6303
6304         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6305                 seq_puts(m, "delta [absolute]\n");
6306         else
6307                 seq_puts(m, "[delta] absolute\n");
6308
6309         mutex_unlock(&trace_types_lock);
6310
6311         return 0;
6312 }
6313
6314 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6315 {
6316         struct trace_array *tr = inode->i_private;
6317         int ret;
6318
6319         if (tracing_disabled)
6320                 return -ENODEV;
6321
6322         if (trace_array_get(tr))
6323                 return -ENODEV;
6324
6325         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6326         if (ret < 0)
6327                 trace_array_put(tr);
6328
6329         return ret;
6330 }
6331
6332 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6333 {
6334         int ret = 0;
6335
6336         mutex_lock(&trace_types_lock);
6337
6338         if (abs && tr->time_stamp_abs_ref++)
6339                 goto out;
6340
6341         if (!abs) {
6342                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6343                         ret = -EINVAL;
6344                         goto out;
6345                 }
6346
6347                 if (--tr->time_stamp_abs_ref)
6348                         goto out;
6349         }
6350
6351         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6352
6353 #ifdef CONFIG_TRACER_MAX_TRACE
6354         if (tr->max_buffer.buffer)
6355                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6356 #endif
6357  out:
6358         mutex_unlock(&trace_types_lock);
6359
6360         return ret;
6361 }
6362
6363 struct ftrace_buffer_info {
6364         struct trace_iterator   iter;
6365         void                    *spare;
6366         unsigned int            spare_cpu;
6367         unsigned int            read;
6368 };
6369
6370 #ifdef CONFIG_TRACER_SNAPSHOT
6371 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6372 {
6373         struct trace_array *tr = inode->i_private;
6374         struct trace_iterator *iter;
6375         struct seq_file *m;
6376         int ret = 0;
6377
6378         if (trace_array_get(tr) < 0)
6379                 return -ENODEV;
6380
6381         if (file->f_mode & FMODE_READ) {
6382                 iter = __tracing_open(inode, file, true);
6383                 if (IS_ERR(iter))
6384                         ret = PTR_ERR(iter);
6385         } else {
6386                 /* Writes still need the seq_file to hold the private data */
6387                 ret = -ENOMEM;
6388                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6389                 if (!m)
6390                         goto out;
6391                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6392                 if (!iter) {
6393                         kfree(m);
6394                         goto out;
6395                 }
6396                 ret = 0;
6397
6398                 iter->tr = tr;
6399                 iter->trace_buffer = &tr->max_buffer;
6400                 iter->cpu_file = tracing_get_cpu(inode);
6401                 m->private = iter;
6402                 file->private_data = m;
6403         }
6404 out:
6405         if (ret < 0)
6406                 trace_array_put(tr);
6407
6408         return ret;
6409 }
6410
6411 static ssize_t
6412 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6413                        loff_t *ppos)
6414 {
6415         struct seq_file *m = filp->private_data;
6416         struct trace_iterator *iter = m->private;
6417         struct trace_array *tr = iter->tr;
6418         unsigned long val;
6419         int ret;
6420
6421         ret = tracing_update_buffers();
6422         if (ret < 0)
6423                 return ret;
6424
6425         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6426         if (ret)
6427                 return ret;
6428
6429         mutex_lock(&trace_types_lock);
6430
6431         if (tr->current_trace->use_max_tr) {
6432                 ret = -EBUSY;
6433                 goto out;
6434         }
6435
6436         switch (val) {
6437         case 0:
6438                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6439                         ret = -EINVAL;
6440                         break;
6441                 }
6442                 if (tr->allocated_snapshot)
6443                         free_snapshot(tr);
6444                 break;
6445         case 1:
6446 /* Only allow per-cpu swap if the ring buffer supports it */
6447 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6448                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6449                         ret = -EINVAL;
6450                         break;
6451                 }
6452 #endif
6453                 if (!tr->allocated_snapshot) {
6454                         ret = alloc_snapshot(tr);
6455                         if (ret < 0)
6456                                 break;
6457                 }
6458                 local_irq_disable();
6459                 /* Now, we're going to swap */
6460                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6461                         update_max_tr(tr, current, smp_processor_id());
6462                 else
6463                         update_max_tr_single(tr, current, iter->cpu_file);
6464                 local_irq_enable();
6465                 break;
6466         default:
6467                 if (tr->allocated_snapshot) {
6468                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6469                                 tracing_reset_online_cpus(&tr->max_buffer);
6470                         else
6471                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6472                 }
6473                 break;
6474         }
6475
6476         if (ret >= 0) {
6477                 *ppos += cnt;
6478                 ret = cnt;
6479         }
6480 out:
6481         mutex_unlock(&trace_types_lock);
6482         return ret;
6483 }
6484
6485 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6486 {
6487         struct seq_file *m = file->private_data;
6488         int ret;
6489
6490         ret = tracing_release(inode, file);
6491
6492         if (file->f_mode & FMODE_READ)
6493                 return ret;
6494
6495         /* If write only, the seq_file is just a stub */
6496         if (m)
6497                 kfree(m->private);
6498         kfree(m);
6499
6500         return 0;
6501 }
6502
6503 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6504 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6505                                     size_t count, loff_t *ppos);
6506 static int tracing_buffers_release(struct inode *inode, struct file *file);
6507 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6508                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6509
6510 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6511 {
6512         struct ftrace_buffer_info *info;
6513         int ret;
6514
6515         ret = tracing_buffers_open(inode, filp);
6516         if (ret < 0)
6517                 return ret;
6518
6519         info = filp->private_data;
6520
6521         if (info->iter.trace->use_max_tr) {
6522                 tracing_buffers_release(inode, filp);
6523                 return -EBUSY;
6524         }
6525
6526         info->iter.snapshot = true;
6527         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6528
6529         return ret;
6530 }
6531
6532 #endif /* CONFIG_TRACER_SNAPSHOT */
6533
6534
6535 static const struct file_operations tracing_thresh_fops = {
6536         .open           = tracing_open_generic,
6537         .read           = tracing_thresh_read,
6538         .write          = tracing_thresh_write,
6539         .llseek         = generic_file_llseek,
6540 };
6541
6542 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6543 static const struct file_operations tracing_max_lat_fops = {
6544         .open           = tracing_open_generic,
6545         .read           = tracing_max_lat_read,
6546         .write          = tracing_max_lat_write,
6547         .llseek         = generic_file_llseek,
6548 };
6549 #endif
6550
6551 static const struct file_operations set_tracer_fops = {
6552         .open           = tracing_open_generic,
6553         .read           = tracing_set_trace_read,
6554         .write          = tracing_set_trace_write,
6555         .llseek         = generic_file_llseek,
6556 };
6557
6558 static const struct file_operations tracing_pipe_fops = {
6559         .open           = tracing_open_pipe,
6560         .poll           = tracing_poll_pipe,
6561         .read           = tracing_read_pipe,
6562         .splice_read    = tracing_splice_read_pipe,
6563         .release        = tracing_release_pipe,
6564         .llseek         = no_llseek,
6565 };
6566
6567 static const struct file_operations tracing_entries_fops = {
6568         .open           = tracing_open_generic_tr,
6569         .read           = tracing_entries_read,
6570         .write          = tracing_entries_write,
6571         .llseek         = generic_file_llseek,
6572         .release        = tracing_release_generic_tr,
6573 };
6574
6575 static const struct file_operations tracing_total_entries_fops = {
6576         .open           = tracing_open_generic_tr,
6577         .read           = tracing_total_entries_read,
6578         .llseek         = generic_file_llseek,
6579         .release        = tracing_release_generic_tr,
6580 };
6581
6582 static const struct file_operations tracing_free_buffer_fops = {
6583         .open           = tracing_open_generic_tr,
6584         .write          = tracing_free_buffer_write,
6585         .release        = tracing_free_buffer_release,
6586 };
6587
6588 static const struct file_operations tracing_mark_fops = {
6589         .open           = tracing_open_generic_tr,
6590         .write          = tracing_mark_write,
6591         .llseek         = generic_file_llseek,
6592         .release        = tracing_release_generic_tr,
6593 };
6594
6595 static const struct file_operations tracing_mark_raw_fops = {
6596         .open           = tracing_open_generic_tr,
6597         .write          = tracing_mark_raw_write,
6598         .llseek         = generic_file_llseek,
6599         .release        = tracing_release_generic_tr,
6600 };
6601
6602 static const struct file_operations trace_clock_fops = {
6603         .open           = tracing_clock_open,
6604         .read           = seq_read,
6605         .llseek         = seq_lseek,
6606         .release        = tracing_single_release_tr,
6607         .write          = tracing_clock_write,
6608 };
6609
6610 static const struct file_operations trace_time_stamp_mode_fops = {
6611         .open           = tracing_time_stamp_mode_open,
6612         .read           = seq_read,
6613         .llseek         = seq_lseek,
6614         .release        = tracing_single_release_tr,
6615 };
6616
6617 #ifdef CONFIG_TRACER_SNAPSHOT
6618 static const struct file_operations snapshot_fops = {
6619         .open           = tracing_snapshot_open,
6620         .read           = seq_read,
6621         .write          = tracing_snapshot_write,
6622         .llseek         = tracing_lseek,
6623         .release        = tracing_snapshot_release,
6624 };
6625
6626 static const struct file_operations snapshot_raw_fops = {
6627         .open           = snapshot_raw_open,
6628         .read           = tracing_buffers_read,
6629         .release        = tracing_buffers_release,
6630         .splice_read    = tracing_buffers_splice_read,
6631         .llseek         = no_llseek,
6632 };
6633
6634 #endif /* CONFIG_TRACER_SNAPSHOT */
6635
6636 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6637 {
6638         struct trace_array *tr = inode->i_private;
6639         struct ftrace_buffer_info *info;
6640         int ret;
6641
6642         if (tracing_disabled)
6643                 return -ENODEV;
6644
6645         if (trace_array_get(tr) < 0)
6646                 return -ENODEV;
6647
6648         info = kzalloc(sizeof(*info), GFP_KERNEL);
6649         if (!info) {
6650                 trace_array_put(tr);
6651                 return -ENOMEM;
6652         }
6653
6654         mutex_lock(&trace_types_lock);
6655
6656         info->iter.tr           = tr;
6657         info->iter.cpu_file     = tracing_get_cpu(inode);
6658         info->iter.trace        = tr->current_trace;
6659         info->iter.trace_buffer = &tr->trace_buffer;
6660         info->spare             = NULL;
6661         /* Force reading ring buffer for first read */
6662         info->read              = (unsigned int)-1;
6663
6664         filp->private_data = info;
6665
6666         tr->current_trace->ref++;
6667
6668         mutex_unlock(&trace_types_lock);
6669
6670         ret = nonseekable_open(inode, filp);
6671         if (ret < 0)
6672                 trace_array_put(tr);
6673
6674         return ret;
6675 }
6676
6677 static __poll_t
6678 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6679 {
6680         struct ftrace_buffer_info *info = filp->private_data;
6681         struct trace_iterator *iter = &info->iter;
6682
6683         return trace_poll(iter, filp, poll_table);
6684 }
6685
6686 static ssize_t
6687 tracing_buffers_read(struct file *filp, char __user *ubuf,
6688                      size_t count, loff_t *ppos)
6689 {
6690         struct ftrace_buffer_info *info = filp->private_data;
6691         struct trace_iterator *iter = &info->iter;
6692         ssize_t ret = 0;
6693         ssize_t size;
6694
6695         if (!count)
6696                 return 0;
6697
6698 #ifdef CONFIG_TRACER_MAX_TRACE
6699         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6700                 return -EBUSY;
6701 #endif
6702
6703         if (!info->spare) {
6704                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6705                                                           iter->cpu_file);
6706                 if (IS_ERR(info->spare)) {
6707                         ret = PTR_ERR(info->spare);
6708                         info->spare = NULL;
6709                 } else {
6710                         info->spare_cpu = iter->cpu_file;
6711                 }
6712         }
6713         if (!info->spare)
6714                 return ret;
6715
6716         /* Do we have previous read data to read? */
6717         if (info->read < PAGE_SIZE)
6718                 goto read;
6719
6720  again:
6721         trace_access_lock(iter->cpu_file);
6722         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6723                                     &info->spare,
6724                                     count,
6725                                     iter->cpu_file, 0);
6726         trace_access_unlock(iter->cpu_file);
6727
6728         if (ret < 0) {
6729                 if (trace_empty(iter)) {
6730                         if ((filp->f_flags & O_NONBLOCK))
6731                                 return -EAGAIN;
6732
6733                         ret = wait_on_pipe(iter, false);
6734                         if (ret)
6735                                 return ret;
6736
6737                         goto again;
6738                 }
6739                 return 0;
6740         }
6741
6742         info->read = 0;
6743  read:
6744         size = PAGE_SIZE - info->read;
6745         if (size > count)
6746                 size = count;
6747
6748         ret = copy_to_user(ubuf, info->spare + info->read, size);
6749         if (ret == size)
6750                 return -EFAULT;
6751
6752         size -= ret;
6753
6754         *ppos += size;
6755         info->read += size;
6756
6757         return size;
6758 }
6759
6760 static int tracing_buffers_release(struct inode *inode, struct file *file)
6761 {
6762         struct ftrace_buffer_info *info = file->private_data;
6763         struct trace_iterator *iter = &info->iter;
6764
6765         mutex_lock(&trace_types_lock);
6766
6767         iter->tr->current_trace->ref--;
6768
6769         __trace_array_put(iter->tr);
6770
6771         if (info->spare)
6772                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6773                                            info->spare_cpu, info->spare);
6774         kfree(info);
6775
6776         mutex_unlock(&trace_types_lock);
6777
6778         return 0;
6779 }
6780
6781 struct buffer_ref {
6782         struct ring_buffer      *buffer;
6783         void                    *page;
6784         int                     cpu;
6785         int                     ref;
6786 };
6787
6788 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6789                                     struct pipe_buffer *buf)
6790 {
6791         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6792
6793         if (--ref->ref)
6794                 return;
6795
6796         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6797         kfree(ref);
6798         buf->private = 0;
6799 }
6800
6801 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6802                                 struct pipe_buffer *buf)
6803 {
6804         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6805
6806         ref->ref++;
6807 }
6808
6809 /* Pipe buffer operations for a buffer. */
6810 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6811         .can_merge              = 0,
6812         .confirm                = generic_pipe_buf_confirm,
6813         .release                = buffer_pipe_buf_release,
6814         .steal                  = generic_pipe_buf_steal,
6815         .get                    = buffer_pipe_buf_get,
6816 };
6817
6818 /*
6819  * Callback from splice_to_pipe(), if we need to release some pages
6820  * at the end of the spd in case we error'ed out in filling the pipe.
6821  */
6822 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6823 {
6824         struct buffer_ref *ref =
6825                 (struct buffer_ref *)spd->partial[i].private;
6826
6827         if (--ref->ref)
6828                 return;
6829
6830         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6831         kfree(ref);
6832         spd->partial[i].private = 0;
6833 }
6834
6835 static ssize_t
6836 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6837                             struct pipe_inode_info *pipe, size_t len,
6838                             unsigned int flags)
6839 {
6840         struct ftrace_buffer_info *info = file->private_data;
6841         struct trace_iterator *iter = &info->iter;
6842         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6843         struct page *pages_def[PIPE_DEF_BUFFERS];
6844         struct splice_pipe_desc spd = {
6845                 .pages          = pages_def,
6846                 .partial        = partial_def,
6847                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6848                 .ops            = &buffer_pipe_buf_ops,
6849                 .spd_release    = buffer_spd_release,
6850         };
6851         struct buffer_ref *ref;
6852         int entries, i;
6853         ssize_t ret = 0;
6854
6855 #ifdef CONFIG_TRACER_MAX_TRACE
6856         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6857                 return -EBUSY;
6858 #endif
6859
6860         if (*ppos & (PAGE_SIZE - 1))
6861                 return -EINVAL;
6862
6863         if (len & (PAGE_SIZE - 1)) {
6864                 if (len < PAGE_SIZE)
6865                         return -EINVAL;
6866                 len &= PAGE_MASK;
6867         }
6868
6869         if (splice_grow_spd(pipe, &spd))
6870                 return -ENOMEM;
6871
6872  again:
6873         trace_access_lock(iter->cpu_file);
6874         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6875
6876         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6877                 struct page *page;
6878                 int r;
6879
6880                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6881                 if (!ref) {
6882                         ret = -ENOMEM;
6883                         break;
6884                 }
6885
6886                 ref->ref = 1;
6887                 ref->buffer = iter->trace_buffer->buffer;
6888                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6889                 if (IS_ERR(ref->page)) {
6890                         ret = PTR_ERR(ref->page);
6891                         ref->page = NULL;
6892                         kfree(ref);
6893                         break;
6894                 }
6895                 ref->cpu = iter->cpu_file;
6896
6897                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6898                                           len, iter->cpu_file, 1);
6899                 if (r < 0) {
6900                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6901                                                    ref->page);
6902                         kfree(ref);
6903                         break;
6904                 }
6905
6906                 page = virt_to_page(ref->page);
6907
6908                 spd.pages[i] = page;
6909                 spd.partial[i].len = PAGE_SIZE;
6910                 spd.partial[i].offset = 0;
6911                 spd.partial[i].private = (unsigned long)ref;
6912                 spd.nr_pages++;
6913                 *ppos += PAGE_SIZE;
6914
6915                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6916         }
6917
6918         trace_access_unlock(iter->cpu_file);
6919         spd.nr_pages = i;
6920
6921         /* did we read anything? */
6922         if (!spd.nr_pages) {
6923                 if (ret)
6924                         goto out;
6925
6926                 ret = -EAGAIN;
6927                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6928                         goto out;
6929
6930                 ret = wait_on_pipe(iter, true);
6931                 if (ret)
6932                         goto out;
6933
6934                 goto again;
6935         }
6936
6937         ret = splice_to_pipe(pipe, &spd);
6938 out:
6939         splice_shrink_spd(&spd);
6940
6941         return ret;
6942 }
6943
6944 static const struct file_operations tracing_buffers_fops = {
6945         .open           = tracing_buffers_open,
6946         .read           = tracing_buffers_read,
6947         .poll           = tracing_buffers_poll,
6948         .release        = tracing_buffers_release,
6949         .splice_read    = tracing_buffers_splice_read,
6950         .llseek         = no_llseek,
6951 };
6952
6953 static ssize_t
6954 tracing_stats_read(struct file *filp, char __user *ubuf,
6955                    size_t count, loff_t *ppos)
6956 {
6957         struct inode *inode = file_inode(filp);
6958         struct trace_array *tr = inode->i_private;
6959         struct trace_buffer *trace_buf = &tr->trace_buffer;
6960         int cpu = tracing_get_cpu(inode);
6961         struct trace_seq *s;
6962         unsigned long cnt;
6963         unsigned long long t;
6964         unsigned long usec_rem;
6965
6966         s = kmalloc(sizeof(*s), GFP_KERNEL);
6967         if (!s)
6968                 return -ENOMEM;
6969
6970         trace_seq_init(s);
6971
6972         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6973         trace_seq_printf(s, "entries: %ld\n", cnt);
6974
6975         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6976         trace_seq_printf(s, "overrun: %ld\n", cnt);
6977
6978         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6979         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6980
6981         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6982         trace_seq_printf(s, "bytes: %ld\n", cnt);
6983
6984         if (trace_clocks[tr->clock_id].in_ns) {
6985                 /* local or global for trace_clock */
6986                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6987                 usec_rem = do_div(t, USEC_PER_SEC);
6988                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6989                                                                 t, usec_rem);
6990
6991                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6992                 usec_rem = do_div(t, USEC_PER_SEC);
6993                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6994         } else {
6995                 /* counter or tsc mode for trace_clock */
6996                 trace_seq_printf(s, "oldest event ts: %llu\n",
6997                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6998
6999                 trace_seq_printf(s, "now ts: %llu\n",
7000                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7001         }
7002
7003         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7004         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7005
7006         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7007         trace_seq_printf(s, "read events: %ld\n", cnt);
7008
7009         count = simple_read_from_buffer(ubuf, count, ppos,
7010                                         s->buffer, trace_seq_used(s));
7011
7012         kfree(s);
7013
7014         return count;
7015 }
7016
7017 static const struct file_operations tracing_stats_fops = {
7018         .open           = tracing_open_generic_tr,
7019         .read           = tracing_stats_read,
7020         .llseek         = generic_file_llseek,
7021         .release        = tracing_release_generic_tr,
7022 };
7023
7024 #ifdef CONFIG_DYNAMIC_FTRACE
7025
7026 static ssize_t
7027 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7028                   size_t cnt, loff_t *ppos)
7029 {
7030         unsigned long *p = filp->private_data;
7031         char buf[64]; /* Not too big for a shallow stack */
7032         int r;
7033
7034         r = scnprintf(buf, 63, "%ld", *p);
7035         buf[r++] = '\n';
7036
7037         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7038 }
7039
7040 static const struct file_operations tracing_dyn_info_fops = {
7041         .open           = tracing_open_generic,
7042         .read           = tracing_read_dyn_info,
7043         .llseek         = generic_file_llseek,
7044 };
7045 #endif /* CONFIG_DYNAMIC_FTRACE */
7046
7047 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7048 static void
7049 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7050                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7051                 void *data)
7052 {
7053         tracing_snapshot_instance(tr);
7054 }
7055
7056 static void
7057 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7058                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7059                       void *data)
7060 {
7061         struct ftrace_func_mapper *mapper = data;
7062         long *count = NULL;
7063
7064         if (mapper)
7065                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7066
7067         if (count) {
7068
7069                 if (*count <= 0)
7070                         return;
7071
7072                 (*count)--;
7073         }
7074
7075         tracing_snapshot_instance(tr);
7076 }
7077
7078 static int
7079 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7080                       struct ftrace_probe_ops *ops, void *data)
7081 {
7082         struct ftrace_func_mapper *mapper = data;
7083         long *count = NULL;
7084
7085         seq_printf(m, "%ps:", (void *)ip);
7086
7087         seq_puts(m, "snapshot");
7088
7089         if (mapper)
7090                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7091
7092         if (count)
7093                 seq_printf(m, ":count=%ld\n", *count);
7094         else
7095                 seq_puts(m, ":unlimited\n");
7096
7097         return 0;
7098 }
7099
7100 static int
7101 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7102                      unsigned long ip, void *init_data, void **data)
7103 {
7104         struct ftrace_func_mapper *mapper = *data;
7105
7106         if (!mapper) {
7107                 mapper = allocate_ftrace_func_mapper();
7108                 if (!mapper)
7109                         return -ENOMEM;
7110                 *data = mapper;
7111         }
7112
7113         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7114 }
7115
7116 static void
7117 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7118                      unsigned long ip, void *data)
7119 {
7120         struct ftrace_func_mapper *mapper = data;
7121
7122         if (!ip) {
7123                 if (!mapper)
7124                         return;
7125                 free_ftrace_func_mapper(mapper, NULL);
7126                 return;
7127         }
7128
7129         ftrace_func_mapper_remove_ip(mapper, ip);
7130 }
7131
7132 static struct ftrace_probe_ops snapshot_probe_ops = {
7133         .func                   = ftrace_snapshot,
7134         .print                  = ftrace_snapshot_print,
7135 };
7136
7137 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7138         .func                   = ftrace_count_snapshot,
7139         .print                  = ftrace_snapshot_print,
7140         .init                   = ftrace_snapshot_init,
7141         .free                   = ftrace_snapshot_free,
7142 };
7143
7144 static int
7145 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7146                                char *glob, char *cmd, char *param, int enable)
7147 {
7148         struct ftrace_probe_ops *ops;
7149         void *count = (void *)-1;
7150         char *number;
7151         int ret;
7152
7153         if (!tr)
7154                 return -ENODEV;
7155
7156         /* hash funcs only work with set_ftrace_filter */
7157         if (!enable)
7158                 return -EINVAL;
7159
7160         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7161
7162         if (glob[0] == '!')
7163                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7164
7165         if (!param)
7166                 goto out_reg;
7167
7168         number = strsep(&param, ":");
7169
7170         if (!strlen(number))
7171                 goto out_reg;
7172
7173         /*
7174          * We use the callback data field (which is a pointer)
7175          * as our counter.
7176          */
7177         ret = kstrtoul(number, 0, (unsigned long *)&count);
7178         if (ret)
7179                 return ret;
7180
7181  out_reg:
7182         ret = alloc_snapshot(tr);
7183         if (ret < 0)
7184                 goto out;
7185
7186         ret = register_ftrace_function_probe(glob, tr, ops, count);
7187
7188  out:
7189         return ret < 0 ? ret : 0;
7190 }
7191
7192 static struct ftrace_func_command ftrace_snapshot_cmd = {
7193         .name                   = "snapshot",
7194         .func                   = ftrace_trace_snapshot_callback,
7195 };
7196
7197 static __init int register_snapshot_cmd(void)
7198 {
7199         return register_ftrace_command(&ftrace_snapshot_cmd);
7200 }
7201 #else
7202 static inline __init int register_snapshot_cmd(void) { return 0; }
7203 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7204
7205 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7206 {
7207         if (WARN_ON(!tr->dir))
7208                 return ERR_PTR(-ENODEV);
7209
7210         /* Top directory uses NULL as the parent */
7211         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7212                 return NULL;
7213
7214         /* All sub buffers have a descriptor */
7215         return tr->dir;
7216 }
7217
7218 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7219 {
7220         struct dentry *d_tracer;
7221
7222         if (tr->percpu_dir)
7223                 return tr->percpu_dir;
7224
7225         d_tracer = tracing_get_dentry(tr);
7226         if (IS_ERR(d_tracer))
7227                 return NULL;
7228
7229         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7230
7231         WARN_ONCE(!tr->percpu_dir,
7232                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7233
7234         return tr->percpu_dir;
7235 }
7236
7237 static struct dentry *
7238 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7239                       void *data, long cpu, const struct file_operations *fops)
7240 {
7241         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7242
7243         if (ret) /* See tracing_get_cpu() */
7244                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7245         return ret;
7246 }
7247
7248 static void
7249 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7250 {
7251         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7252         struct dentry *d_cpu;
7253         char cpu_dir[30]; /* 30 characters should be more than enough */
7254
7255         if (!d_percpu)
7256                 return;
7257
7258         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7259         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7260         if (!d_cpu) {
7261                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7262                 return;
7263         }
7264
7265         /* per cpu trace_pipe */
7266         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7267                                 tr, cpu, &tracing_pipe_fops);
7268
7269         /* per cpu trace */
7270         trace_create_cpu_file("trace", 0644, d_cpu,
7271                                 tr, cpu, &tracing_fops);
7272
7273         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7274                                 tr, cpu, &tracing_buffers_fops);
7275
7276         trace_create_cpu_file("stats", 0444, d_cpu,
7277                                 tr, cpu, &tracing_stats_fops);
7278
7279         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7280                                 tr, cpu, &tracing_entries_fops);
7281
7282 #ifdef CONFIG_TRACER_SNAPSHOT
7283         trace_create_cpu_file("snapshot", 0644, d_cpu,
7284                                 tr, cpu, &snapshot_fops);
7285
7286         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7287                                 tr, cpu, &snapshot_raw_fops);
7288 #endif
7289 }
7290
7291 #ifdef CONFIG_FTRACE_SELFTEST
7292 /* Let selftest have access to static functions in this file */
7293 #include "trace_selftest.c"
7294 #endif
7295
7296 static ssize_t
7297 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7298                         loff_t *ppos)
7299 {
7300         struct trace_option_dentry *topt = filp->private_data;
7301         char *buf;
7302
7303         if (topt->flags->val & topt->opt->bit)
7304                 buf = "1\n";
7305         else
7306                 buf = "0\n";
7307
7308         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7309 }
7310
7311 static ssize_t
7312 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7313                          loff_t *ppos)
7314 {
7315         struct trace_option_dentry *topt = filp->private_data;
7316         unsigned long val;
7317         int ret;
7318
7319         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7320         if (ret)
7321                 return ret;
7322
7323         if (val != 0 && val != 1)
7324                 return -EINVAL;
7325
7326         if (!!(topt->flags->val & topt->opt->bit) != val) {
7327                 mutex_lock(&trace_types_lock);
7328                 ret = __set_tracer_option(topt->tr, topt->flags,
7329                                           topt->opt, !val);
7330                 mutex_unlock(&trace_types_lock);
7331                 if (ret)
7332                         return ret;
7333         }
7334
7335         *ppos += cnt;
7336
7337         return cnt;
7338 }
7339
7340
7341 static const struct file_operations trace_options_fops = {
7342         .open = tracing_open_generic,
7343         .read = trace_options_read,
7344         .write = trace_options_write,
7345         .llseek = generic_file_llseek,
7346 };
7347
7348 /*
7349  * In order to pass in both the trace_array descriptor as well as the index
7350  * to the flag that the trace option file represents, the trace_array
7351  * has a character array of trace_flags_index[], which holds the index
7352  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7353  * The address of this character array is passed to the flag option file
7354  * read/write callbacks.
7355  *
7356  * In order to extract both the index and the trace_array descriptor,
7357  * get_tr_index() uses the following algorithm.
7358  *
7359  *   idx = *ptr;
7360  *
7361  * As the pointer itself contains the address of the index (remember
7362  * index[1] == 1).
7363  *
7364  * Then to get the trace_array descriptor, by subtracting that index
7365  * from the ptr, we get to the start of the index itself.
7366  *
7367  *   ptr - idx == &index[0]
7368  *
7369  * Then a simple container_of() from that pointer gets us to the
7370  * trace_array descriptor.
7371  */
7372 static void get_tr_index(void *data, struct trace_array **ptr,
7373                          unsigned int *pindex)
7374 {
7375         *pindex = *(unsigned char *)data;
7376
7377         *ptr = container_of(data - *pindex, struct trace_array,
7378                             trace_flags_index);
7379 }
7380
7381 static ssize_t
7382 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7383                         loff_t *ppos)
7384 {
7385         void *tr_index = filp->private_data;
7386         struct trace_array *tr;
7387         unsigned int index;
7388         char *buf;
7389
7390         get_tr_index(tr_index, &tr, &index);
7391
7392         if (tr->trace_flags & (1 << index))
7393                 buf = "1\n";
7394         else
7395                 buf = "0\n";
7396
7397         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7398 }
7399
7400 static ssize_t
7401 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7402                          loff_t *ppos)
7403 {
7404         void *tr_index = filp->private_data;
7405         struct trace_array *tr;
7406         unsigned int index;
7407         unsigned long val;
7408         int ret;
7409
7410         get_tr_index(tr_index, &tr, &index);
7411
7412         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7413         if (ret)
7414                 return ret;
7415
7416         if (val != 0 && val != 1)
7417                 return -EINVAL;
7418
7419         mutex_lock(&trace_types_lock);
7420         ret = set_tracer_flag(tr, 1 << index, val);
7421         mutex_unlock(&trace_types_lock);
7422
7423         if (ret < 0)
7424                 return ret;
7425
7426         *ppos += cnt;
7427
7428         return cnt;
7429 }
7430
7431 static const struct file_operations trace_options_core_fops = {
7432         .open = tracing_open_generic,
7433         .read = trace_options_core_read,
7434         .write = trace_options_core_write,
7435         .llseek = generic_file_llseek,
7436 };
7437
7438 struct dentry *trace_create_file(const char *name,
7439                                  umode_t mode,
7440                                  struct dentry *parent,
7441                                  void *data,
7442                                  const struct file_operations *fops)
7443 {
7444         struct dentry *ret;
7445
7446         ret = tracefs_create_file(name, mode, parent, data, fops);
7447         if (!ret)
7448                 pr_warn("Could not create tracefs '%s' entry\n", name);
7449
7450         return ret;
7451 }
7452
7453
7454 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7455 {
7456         struct dentry *d_tracer;
7457
7458         if (tr->options)
7459                 return tr->options;
7460
7461         d_tracer = tracing_get_dentry(tr);
7462         if (IS_ERR(d_tracer))
7463                 return NULL;
7464
7465         tr->options = tracefs_create_dir("options", d_tracer);
7466         if (!tr->options) {
7467                 pr_warn("Could not create tracefs directory 'options'\n");
7468                 return NULL;
7469         }
7470
7471         return tr->options;
7472 }
7473
7474 static void
7475 create_trace_option_file(struct trace_array *tr,
7476                          struct trace_option_dentry *topt,
7477                          struct tracer_flags *flags,
7478                          struct tracer_opt *opt)
7479 {
7480         struct dentry *t_options;
7481
7482         t_options = trace_options_init_dentry(tr);
7483         if (!t_options)
7484                 return;
7485
7486         topt->flags = flags;
7487         topt->opt = opt;
7488         topt->tr = tr;
7489
7490         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7491                                     &trace_options_fops);
7492
7493 }
7494
7495 static void
7496 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7497 {
7498         struct trace_option_dentry *topts;
7499         struct trace_options *tr_topts;
7500         struct tracer_flags *flags;
7501         struct tracer_opt *opts;
7502         int cnt;
7503         int i;
7504
7505         if (!tracer)
7506                 return;
7507
7508         flags = tracer->flags;
7509
7510         if (!flags || !flags->opts)
7511                 return;
7512
7513         /*
7514          * If this is an instance, only create flags for tracers
7515          * the instance may have.
7516          */
7517         if (!trace_ok_for_array(tracer, tr))
7518                 return;
7519
7520         for (i = 0; i < tr->nr_topts; i++) {
7521                 /* Make sure there's no duplicate flags. */
7522                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7523                         return;
7524         }
7525
7526         opts = flags->opts;
7527
7528         for (cnt = 0; opts[cnt].name; cnt++)
7529                 ;
7530
7531         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7532         if (!topts)
7533                 return;
7534
7535         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7536                             GFP_KERNEL);
7537         if (!tr_topts) {
7538                 kfree(topts);
7539                 return;
7540         }
7541
7542         tr->topts = tr_topts;
7543         tr->topts[tr->nr_topts].tracer = tracer;
7544         tr->topts[tr->nr_topts].topts = topts;
7545         tr->nr_topts++;
7546
7547         for (cnt = 0; opts[cnt].name; cnt++) {
7548                 create_trace_option_file(tr, &topts[cnt], flags,
7549                                          &opts[cnt]);
7550                 WARN_ONCE(topts[cnt].entry == NULL,
7551                           "Failed to create trace option: %s",
7552                           opts[cnt].name);
7553         }
7554 }
7555
7556 static struct dentry *
7557 create_trace_option_core_file(struct trace_array *tr,
7558                               const char *option, long index)
7559 {
7560         struct dentry *t_options;
7561
7562         t_options = trace_options_init_dentry(tr);
7563         if (!t_options)
7564                 return NULL;
7565
7566         return trace_create_file(option, 0644, t_options,
7567                                  (void *)&tr->trace_flags_index[index],
7568                                  &trace_options_core_fops);
7569 }
7570
7571 static void create_trace_options_dir(struct trace_array *tr)
7572 {
7573         struct dentry *t_options;
7574         bool top_level = tr == &global_trace;
7575         int i;
7576
7577         t_options = trace_options_init_dentry(tr);
7578         if (!t_options)
7579                 return;
7580
7581         for (i = 0; trace_options[i]; i++) {
7582                 if (top_level ||
7583                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7584                         create_trace_option_core_file(tr, trace_options[i], i);
7585         }
7586 }
7587
7588 static ssize_t
7589 rb_simple_read(struct file *filp, char __user *ubuf,
7590                size_t cnt, loff_t *ppos)
7591 {
7592         struct trace_array *tr = filp->private_data;
7593         char buf[64];
7594         int r;
7595
7596         r = tracer_tracing_is_on(tr);
7597         r = sprintf(buf, "%d\n", r);
7598
7599         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7600 }
7601
7602 static ssize_t
7603 rb_simple_write(struct file *filp, const char __user *ubuf,
7604                 size_t cnt, loff_t *ppos)
7605 {
7606         struct trace_array *tr = filp->private_data;
7607         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7608         unsigned long val;
7609         int ret;
7610
7611         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7612         if (ret)
7613                 return ret;
7614
7615         if (buffer) {
7616                 mutex_lock(&trace_types_lock);
7617                 if (val) {
7618                         tracer_tracing_on(tr);
7619                         if (tr->current_trace->start)
7620                                 tr->current_trace->start(tr);
7621                 } else {
7622                         tracer_tracing_off(tr);
7623                         if (tr->current_trace->stop)
7624                                 tr->current_trace->stop(tr);
7625                 }
7626                 mutex_unlock(&trace_types_lock);
7627         }
7628
7629         (*ppos)++;
7630
7631         return cnt;
7632 }
7633
7634 static const struct file_operations rb_simple_fops = {
7635         .open           = tracing_open_generic_tr,
7636         .read           = rb_simple_read,
7637         .write          = rb_simple_write,
7638         .release        = tracing_release_generic_tr,
7639         .llseek         = default_llseek,
7640 };
7641
7642 struct dentry *trace_instance_dir;
7643
7644 static void
7645 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7646
7647 static int
7648 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7649 {
7650         enum ring_buffer_flags rb_flags;
7651
7652         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7653
7654         buf->tr = tr;
7655
7656         buf->buffer = ring_buffer_alloc(size, rb_flags);
7657         if (!buf->buffer)
7658                 return -ENOMEM;
7659
7660         buf->data = alloc_percpu(struct trace_array_cpu);
7661         if (!buf->data) {
7662                 ring_buffer_free(buf->buffer);
7663                 buf->buffer = NULL;
7664                 return -ENOMEM;
7665         }
7666
7667         /* Allocate the first page for all buffers */
7668         set_buffer_entries(&tr->trace_buffer,
7669                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7670
7671         return 0;
7672 }
7673
7674 static int allocate_trace_buffers(struct trace_array *tr, int size)
7675 {
7676         int ret;
7677
7678         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7679         if (ret)
7680                 return ret;
7681
7682 #ifdef CONFIG_TRACER_MAX_TRACE
7683         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7684                                     allocate_snapshot ? size : 1);
7685         if (WARN_ON(ret)) {
7686                 ring_buffer_free(tr->trace_buffer.buffer);
7687                 tr->trace_buffer.buffer = NULL;
7688                 free_percpu(tr->trace_buffer.data);
7689                 tr->trace_buffer.data = NULL;
7690                 return -ENOMEM;
7691         }
7692         tr->allocated_snapshot = allocate_snapshot;
7693
7694         /*
7695          * Only the top level trace array gets its snapshot allocated
7696          * from the kernel command line.
7697          */
7698         allocate_snapshot = false;
7699 #endif
7700         return 0;
7701 }
7702
7703 static void free_trace_buffer(struct trace_buffer *buf)
7704 {
7705         if (buf->buffer) {
7706                 ring_buffer_free(buf->buffer);
7707                 buf->buffer = NULL;
7708                 free_percpu(buf->data);
7709                 buf->data = NULL;
7710         }
7711 }
7712
7713 static void free_trace_buffers(struct trace_array *tr)
7714 {
7715         if (!tr)
7716                 return;
7717
7718         free_trace_buffer(&tr->trace_buffer);
7719
7720 #ifdef CONFIG_TRACER_MAX_TRACE
7721         free_trace_buffer(&tr->max_buffer);
7722 #endif
7723 }
7724
7725 static void init_trace_flags_index(struct trace_array *tr)
7726 {
7727         int i;
7728
7729         /* Used by the trace options files */
7730         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7731                 tr->trace_flags_index[i] = i;
7732 }
7733
7734 static void __update_tracer_options(struct trace_array *tr)
7735 {
7736         struct tracer *t;
7737
7738         for (t = trace_types; t; t = t->next)
7739                 add_tracer_options(tr, t);
7740 }
7741
7742 static void update_tracer_options(struct trace_array *tr)
7743 {
7744         mutex_lock(&trace_types_lock);
7745         __update_tracer_options(tr);
7746         mutex_unlock(&trace_types_lock);
7747 }
7748
7749 static int instance_mkdir(const char *name)
7750 {
7751         struct trace_array *tr;
7752         int ret;
7753
7754         mutex_lock(&event_mutex);
7755         mutex_lock(&trace_types_lock);
7756
7757         ret = -EEXIST;
7758         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7759                 if (tr->name && strcmp(tr->name, name) == 0)
7760                         goto out_unlock;
7761         }
7762
7763         ret = -ENOMEM;
7764         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7765         if (!tr)
7766                 goto out_unlock;
7767
7768         tr->name = kstrdup(name, GFP_KERNEL);
7769         if (!tr->name)
7770                 goto out_free_tr;
7771
7772         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7773                 goto out_free_tr;
7774
7775         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7776
7777         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7778
7779         raw_spin_lock_init(&tr->start_lock);
7780
7781         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7782
7783         tr->current_trace = &nop_trace;
7784
7785         INIT_LIST_HEAD(&tr->systems);
7786         INIT_LIST_HEAD(&tr->events);
7787         INIT_LIST_HEAD(&tr->hist_vars);
7788
7789         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7790                 goto out_free_tr;
7791
7792         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7793         if (!tr->dir)
7794                 goto out_free_tr;
7795
7796         ret = event_trace_add_tracer(tr->dir, tr);
7797         if (ret) {
7798                 tracefs_remove_recursive(tr->dir);
7799                 goto out_free_tr;
7800         }
7801
7802         ftrace_init_trace_array(tr);
7803
7804         init_tracer_tracefs(tr, tr->dir);
7805         init_trace_flags_index(tr);
7806         __update_tracer_options(tr);
7807
7808         list_add(&tr->list, &ftrace_trace_arrays);
7809
7810         mutex_unlock(&trace_types_lock);
7811         mutex_unlock(&event_mutex);
7812
7813         return 0;
7814
7815  out_free_tr:
7816         free_trace_buffers(tr);
7817         free_cpumask_var(tr->tracing_cpumask);
7818         kfree(tr->name);
7819         kfree(tr);
7820
7821  out_unlock:
7822         mutex_unlock(&trace_types_lock);
7823         mutex_unlock(&event_mutex);
7824
7825         return ret;
7826
7827 }
7828
7829 static int instance_rmdir(const char *name)
7830 {
7831         struct trace_array *tr;
7832         int found = 0;
7833         int ret;
7834         int i;
7835
7836         mutex_lock(&event_mutex);
7837         mutex_lock(&trace_types_lock);
7838
7839         ret = -ENODEV;
7840         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7841                 if (tr->name && strcmp(tr->name, name) == 0) {
7842                         found = 1;
7843                         break;
7844                 }
7845         }
7846         if (!found)
7847                 goto out_unlock;
7848
7849         ret = -EBUSY;
7850         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7851                 goto out_unlock;
7852
7853         list_del(&tr->list);
7854
7855         /* Disable all the flags that were enabled coming in */
7856         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7857                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7858                         set_tracer_flag(tr, 1 << i, 0);
7859         }
7860
7861         tracing_set_nop(tr);
7862         clear_ftrace_function_probes(tr);
7863         event_trace_del_tracer(tr);
7864         ftrace_clear_pids(tr);
7865         ftrace_destroy_function_files(tr);
7866         tracefs_remove_recursive(tr->dir);
7867         free_trace_buffers(tr);
7868
7869         for (i = 0; i < tr->nr_topts; i++) {
7870                 kfree(tr->topts[i].topts);
7871         }
7872         kfree(tr->topts);
7873
7874         free_cpumask_var(tr->tracing_cpumask);
7875         kfree(tr->name);
7876         kfree(tr);
7877
7878         ret = 0;
7879
7880  out_unlock:
7881         mutex_unlock(&trace_types_lock);
7882         mutex_unlock(&event_mutex);
7883
7884         return ret;
7885 }
7886
7887 static __init void create_trace_instances(struct dentry *d_tracer)
7888 {
7889         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7890                                                          instance_mkdir,
7891                                                          instance_rmdir);
7892         if (WARN_ON(!trace_instance_dir))
7893                 return;
7894 }
7895
7896 static void
7897 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7898 {
7899         int cpu;
7900
7901         trace_create_file("available_tracers", 0444, d_tracer,
7902                         tr, &show_traces_fops);
7903
7904         trace_create_file("current_tracer", 0644, d_tracer,
7905                         tr, &set_tracer_fops);
7906
7907         trace_create_file("tracing_cpumask", 0644, d_tracer,
7908                           tr, &tracing_cpumask_fops);
7909
7910         trace_create_file("trace_options", 0644, d_tracer,
7911                           tr, &tracing_iter_fops);
7912
7913         trace_create_file("trace", 0644, d_tracer,
7914                           tr, &tracing_fops);
7915
7916         trace_create_file("trace_pipe", 0444, d_tracer,
7917                           tr, &tracing_pipe_fops);
7918
7919         trace_create_file("buffer_size_kb", 0644, d_tracer,
7920                           tr, &tracing_entries_fops);
7921
7922         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7923                           tr, &tracing_total_entries_fops);
7924
7925         trace_create_file("free_buffer", 0200, d_tracer,
7926                           tr, &tracing_free_buffer_fops);
7927
7928         trace_create_file("trace_marker", 0220, d_tracer,
7929                           tr, &tracing_mark_fops);
7930
7931         trace_create_file("trace_marker_raw", 0220, d_tracer,
7932                           tr, &tracing_mark_raw_fops);
7933
7934         trace_create_file("trace_clock", 0644, d_tracer, tr,
7935                           &trace_clock_fops);
7936
7937         trace_create_file("tracing_on", 0644, d_tracer,
7938                           tr, &rb_simple_fops);
7939
7940         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7941                           &trace_time_stamp_mode_fops);
7942
7943         create_trace_options_dir(tr);
7944
7945 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7946         trace_create_file("tracing_max_latency", 0644, d_tracer,
7947                         &tr->max_latency, &tracing_max_lat_fops);
7948 #endif
7949
7950         if (ftrace_create_function_files(tr, d_tracer))
7951                 WARN(1, "Could not allocate function filter files");
7952
7953 #ifdef CONFIG_TRACER_SNAPSHOT
7954         trace_create_file("snapshot", 0644, d_tracer,
7955                           tr, &snapshot_fops);
7956 #endif
7957
7958         for_each_tracing_cpu(cpu)
7959                 tracing_init_tracefs_percpu(tr, cpu);
7960
7961         ftrace_init_tracefs(tr, d_tracer);
7962 }
7963
7964 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7965 {
7966         struct vfsmount *mnt;
7967         struct file_system_type *type;
7968
7969         /*
7970          * To maintain backward compatibility for tools that mount
7971          * debugfs to get to the tracing facility, tracefs is automatically
7972          * mounted to the debugfs/tracing directory.
7973          */
7974         type = get_fs_type("tracefs");
7975         if (!type)
7976                 return NULL;
7977         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7978         put_filesystem(type);
7979         if (IS_ERR(mnt))
7980                 return NULL;
7981         mntget(mnt);
7982
7983         return mnt;
7984 }
7985
7986 /**
7987  * tracing_init_dentry - initialize top level trace array
7988  *
7989  * This is called when creating files or directories in the tracing
7990  * directory. It is called via fs_initcall() by any of the boot up code
7991  * and expects to return the dentry of the top level tracing directory.
7992  */
7993 struct dentry *tracing_init_dentry(void)
7994 {
7995         struct trace_array *tr = &global_trace;
7996
7997         /* The top level trace array uses  NULL as parent */
7998         if (tr->dir)
7999                 return NULL;
8000
8001         if (WARN_ON(!tracefs_initialized()) ||
8002                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8003                  WARN_ON(!debugfs_initialized())))
8004                 return ERR_PTR(-ENODEV);
8005
8006         /*
8007          * As there may still be users that expect the tracing
8008          * files to exist in debugfs/tracing, we must automount
8009          * the tracefs file system there, so older tools still
8010          * work with the newer kerenl.
8011          */
8012         tr->dir = debugfs_create_automount("tracing", NULL,
8013                                            trace_automount, NULL);
8014         if (!tr->dir) {
8015                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8016                 return ERR_PTR(-ENOMEM);
8017         }
8018
8019         return NULL;
8020 }
8021
8022 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8023 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8024
8025 static void __init trace_eval_init(void)
8026 {
8027         int len;
8028
8029         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8030         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8031 }
8032
8033 #ifdef CONFIG_MODULES
8034 static void trace_module_add_evals(struct module *mod)
8035 {
8036         if (!mod->num_trace_evals)
8037                 return;
8038
8039         /*
8040          * Modules with bad taint do not have events created, do
8041          * not bother with enums either.
8042          */
8043         if (trace_module_has_bad_taint(mod))
8044                 return;
8045
8046         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8047 }
8048
8049 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8050 static void trace_module_remove_evals(struct module *mod)
8051 {
8052         union trace_eval_map_item *map;
8053         union trace_eval_map_item **last = &trace_eval_maps;
8054
8055         if (!mod->num_trace_evals)
8056                 return;
8057
8058         mutex_lock(&trace_eval_mutex);
8059
8060         map = trace_eval_maps;
8061
8062         while (map) {
8063                 if (map->head.mod == mod)
8064                         break;
8065                 map = trace_eval_jmp_to_tail(map);
8066                 last = &map->tail.next;
8067                 map = map->tail.next;
8068         }
8069         if (!map)
8070                 goto out;
8071
8072         *last = trace_eval_jmp_to_tail(map)->tail.next;
8073         kfree(map);
8074  out:
8075         mutex_unlock(&trace_eval_mutex);
8076 }
8077 #else
8078 static inline void trace_module_remove_evals(struct module *mod) { }
8079 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8080
8081 static int trace_module_notify(struct notifier_block *self,
8082                                unsigned long val, void *data)
8083 {
8084         struct module *mod = data;
8085
8086         switch (val) {
8087         case MODULE_STATE_COMING:
8088                 trace_module_add_evals(mod);
8089                 break;
8090         case MODULE_STATE_GOING:
8091                 trace_module_remove_evals(mod);
8092                 break;
8093         }
8094
8095         return 0;
8096 }
8097
8098 static struct notifier_block trace_module_nb = {
8099         .notifier_call = trace_module_notify,
8100         .priority = 0,
8101 };
8102 #endif /* CONFIG_MODULES */
8103
8104 static __init int tracer_init_tracefs(void)
8105 {
8106         struct dentry *d_tracer;
8107
8108         trace_access_lock_init();
8109
8110         d_tracer = tracing_init_dentry();
8111         if (IS_ERR(d_tracer))
8112                 return 0;
8113
8114         init_tracer_tracefs(&global_trace, d_tracer);
8115         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8116
8117         trace_create_file("tracing_thresh", 0644, d_tracer,
8118                         &global_trace, &tracing_thresh_fops);
8119
8120         trace_create_file("README", 0444, d_tracer,
8121                         NULL, &tracing_readme_fops);
8122
8123         trace_create_file("saved_cmdlines", 0444, d_tracer,
8124                         NULL, &tracing_saved_cmdlines_fops);
8125
8126         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8127                           NULL, &tracing_saved_cmdlines_size_fops);
8128
8129         trace_create_file("saved_tgids", 0444, d_tracer,
8130                         NULL, &tracing_saved_tgids_fops);
8131
8132         trace_eval_init();
8133
8134         trace_create_eval_file(d_tracer);
8135
8136 #ifdef CONFIG_MODULES
8137         register_module_notifier(&trace_module_nb);
8138 #endif
8139
8140 #ifdef CONFIG_DYNAMIC_FTRACE
8141         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8142                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8143 #endif
8144
8145         create_trace_instances(d_tracer);
8146
8147         update_tracer_options(&global_trace);
8148
8149         return 0;
8150 }
8151
8152 static int trace_panic_handler(struct notifier_block *this,
8153                                unsigned long event, void *unused)
8154 {
8155         if (ftrace_dump_on_oops)
8156                 ftrace_dump(ftrace_dump_on_oops);
8157         return NOTIFY_OK;
8158 }
8159
8160 static struct notifier_block trace_panic_notifier = {
8161         .notifier_call  = trace_panic_handler,
8162         .next           = NULL,
8163         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8164 };
8165
8166 static int trace_die_handler(struct notifier_block *self,
8167                              unsigned long val,
8168                              void *data)
8169 {
8170         switch (val) {
8171         case DIE_OOPS:
8172                 if (ftrace_dump_on_oops)
8173                         ftrace_dump(ftrace_dump_on_oops);
8174                 break;
8175         default:
8176                 break;
8177         }
8178         return NOTIFY_OK;
8179 }
8180
8181 static struct notifier_block trace_die_notifier = {
8182         .notifier_call = trace_die_handler,
8183         .priority = 200
8184 };
8185
8186 /*
8187  * printk is set to max of 1024, we really don't need it that big.
8188  * Nothing should be printing 1000 characters anyway.
8189  */
8190 #define TRACE_MAX_PRINT         1000
8191
8192 /*
8193  * Define here KERN_TRACE so that we have one place to modify
8194  * it if we decide to change what log level the ftrace dump
8195  * should be at.
8196  */
8197 #define KERN_TRACE              KERN_EMERG
8198
8199 void
8200 trace_printk_seq(struct trace_seq *s)
8201 {
8202         /* Probably should print a warning here. */
8203         if (s->seq.len >= TRACE_MAX_PRINT)
8204                 s->seq.len = TRACE_MAX_PRINT;
8205
8206         /*
8207          * More paranoid code. Although the buffer size is set to
8208          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8209          * an extra layer of protection.
8210          */
8211         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8212                 s->seq.len = s->seq.size - 1;
8213
8214         /* should be zero ended, but we are paranoid. */
8215         s->buffer[s->seq.len] = 0;
8216
8217         printk(KERN_TRACE "%s", s->buffer);
8218
8219         trace_seq_init(s);
8220 }
8221
8222 void trace_init_global_iter(struct trace_iterator *iter)
8223 {
8224         iter->tr = &global_trace;
8225         iter->trace = iter->tr->current_trace;
8226         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8227         iter->trace_buffer = &global_trace.trace_buffer;
8228
8229         if (iter->trace && iter->trace->open)
8230                 iter->trace->open(iter);
8231
8232         /* Annotate start of buffers if we had overruns */
8233         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8234                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8235
8236         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8237         if (trace_clocks[iter->tr->clock_id].in_ns)
8238                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8239 }
8240
8241 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8242 {
8243         /* use static because iter can be a bit big for the stack */
8244         static struct trace_iterator iter;
8245         static atomic_t dump_running;
8246         struct trace_array *tr = &global_trace;
8247         unsigned int old_userobj;
8248         unsigned long flags;
8249         int cnt = 0, cpu;
8250
8251         /* Only allow one dump user at a time. */
8252         if (atomic_inc_return(&dump_running) != 1) {
8253                 atomic_dec(&dump_running);
8254                 return;
8255         }
8256
8257         /*
8258          * Always turn off tracing when we dump.
8259          * We don't need to show trace output of what happens
8260          * between multiple crashes.
8261          *
8262          * If the user does a sysrq-z, then they can re-enable
8263          * tracing with echo 1 > tracing_on.
8264          */
8265         tracing_off();
8266
8267         local_irq_save(flags);
8268
8269         /* Simulate the iterator */
8270         trace_init_global_iter(&iter);
8271
8272         for_each_tracing_cpu(cpu) {
8273                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8274         }
8275
8276         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8277
8278         /* don't look at user memory in panic mode */
8279         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8280
8281         switch (oops_dump_mode) {
8282         case DUMP_ALL:
8283                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8284                 break;
8285         case DUMP_ORIG:
8286                 iter.cpu_file = raw_smp_processor_id();
8287                 break;
8288         case DUMP_NONE:
8289                 goto out_enable;
8290         default:
8291                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8292                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8293         }
8294
8295         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8296
8297         /* Did function tracer already get disabled? */
8298         if (ftrace_is_dead()) {
8299                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8300                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8301         }
8302
8303         /*
8304          * We need to stop all tracing on all CPUS to read the
8305          * the next buffer. This is a bit expensive, but is
8306          * not done often. We fill all what we can read,
8307          * and then release the locks again.
8308          */
8309
8310         while (!trace_empty(&iter)) {
8311
8312                 if (!cnt)
8313                         printk(KERN_TRACE "---------------------------------\n");
8314
8315                 cnt++;
8316
8317                 /* reset all but tr, trace, and overruns */
8318                 memset(&iter.seq, 0,
8319                        sizeof(struct trace_iterator) -
8320                        offsetof(struct trace_iterator, seq));
8321                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8322                 iter.pos = -1;
8323
8324                 if (trace_find_next_entry_inc(&iter) != NULL) {
8325                         int ret;
8326
8327                         ret = print_trace_line(&iter);
8328                         if (ret != TRACE_TYPE_NO_CONSUME)
8329                                 trace_consume(&iter);
8330                 }
8331                 touch_nmi_watchdog();
8332
8333                 trace_printk_seq(&iter.seq);
8334         }
8335
8336         if (!cnt)
8337                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8338         else
8339                 printk(KERN_TRACE "---------------------------------\n");
8340
8341  out_enable:
8342         tr->trace_flags |= old_userobj;
8343
8344         for_each_tracing_cpu(cpu) {
8345                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8346         }
8347         atomic_dec(&dump_running);
8348         local_irq_restore(flags);
8349 }
8350 EXPORT_SYMBOL_GPL(ftrace_dump);
8351
8352 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8353 {
8354         char **argv;
8355         int argc, ret;
8356
8357         argc = 0;
8358         ret = 0;
8359         argv = argv_split(GFP_KERNEL, buf, &argc);
8360         if (!argv)
8361                 return -ENOMEM;
8362
8363         if (argc)
8364                 ret = createfn(argc, argv);
8365
8366         argv_free(argv);
8367
8368         return ret;
8369 }
8370
8371 #define WRITE_BUFSIZE  4096
8372
8373 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8374                                 size_t count, loff_t *ppos,
8375                                 int (*createfn)(int, char **))
8376 {
8377         char *kbuf, *buf, *tmp;
8378         int ret = 0;
8379         size_t done = 0;
8380         size_t size;
8381
8382         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8383         if (!kbuf)
8384                 return -ENOMEM;
8385
8386         while (done < count) {
8387                 size = count - done;
8388
8389                 if (size >= WRITE_BUFSIZE)
8390                         size = WRITE_BUFSIZE - 1;
8391
8392                 if (copy_from_user(kbuf, buffer + done, size)) {
8393                         ret = -EFAULT;
8394                         goto out;
8395                 }
8396                 kbuf[size] = '\0';
8397                 buf = kbuf;
8398                 do {
8399                         tmp = strchr(buf, '\n');
8400                         if (tmp) {
8401                                 *tmp = '\0';
8402                                 size = tmp - buf + 1;
8403                         } else {
8404                                 size = strlen(buf);
8405                                 if (done + size < count) {
8406                                         if (buf != kbuf)
8407                                                 break;
8408                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8409                                         pr_warn("Line length is too long: Should be less than %d\n",
8410                                                 WRITE_BUFSIZE - 2);
8411                                         ret = -EINVAL;
8412                                         goto out;
8413                                 }
8414                         }
8415                         done += size;
8416
8417                         /* Remove comments */
8418                         tmp = strchr(buf, '#');
8419
8420                         if (tmp)
8421                                 *tmp = '\0';
8422
8423                         ret = trace_run_command(buf, createfn);
8424                         if (ret)
8425                                 goto out;
8426                         buf += size;
8427
8428                 } while (done < count);
8429         }
8430         ret = done;
8431
8432 out:
8433         kfree(kbuf);
8434
8435         return ret;
8436 }
8437
8438 __init static int tracer_alloc_buffers(void)
8439 {
8440         int ring_buf_size;
8441         int ret = -ENOMEM;
8442
8443         /*
8444          * Make sure we don't accidently add more trace options
8445          * than we have bits for.
8446          */
8447         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8448
8449         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8450                 goto out;
8451
8452         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8453                 goto out_free_buffer_mask;
8454
8455         /* Only allocate trace_printk buffers if a trace_printk exists */
8456         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8457                 /* Must be called before global_trace.buffer is allocated */
8458                 trace_printk_init_buffers();
8459
8460         /* To save memory, keep the ring buffer size to its minimum */
8461         if (ring_buffer_expanded)
8462                 ring_buf_size = trace_buf_size;
8463         else
8464                 ring_buf_size = 1;
8465
8466         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8467         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8468
8469         raw_spin_lock_init(&global_trace.start_lock);
8470
8471         /*
8472          * The prepare callbacks allocates some memory for the ring buffer. We
8473          * don't free the buffer if the if the CPU goes down. If we were to free
8474          * the buffer, then the user would lose any trace that was in the
8475          * buffer. The memory will be removed once the "instance" is removed.
8476          */
8477         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8478                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8479                                       NULL);
8480         if (ret < 0)
8481                 goto out_free_cpumask;
8482         /* Used for event triggers */
8483         ret = -ENOMEM;
8484         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8485         if (!temp_buffer)
8486                 goto out_rm_hp_state;
8487
8488         if (trace_create_savedcmd() < 0)
8489                 goto out_free_temp_buffer;
8490
8491         /* TODO: make the number of buffers hot pluggable with CPUS */
8492         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8493                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8494                 WARN_ON(1);
8495                 goto out_free_savedcmd;
8496         }
8497
8498         if (global_trace.buffer_disabled)
8499                 tracing_off();
8500
8501         if (trace_boot_clock) {
8502                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8503                 if (ret < 0)
8504                         pr_warn("Trace clock %s not defined, going back to default\n",
8505                                 trace_boot_clock);
8506         }
8507
8508         /*
8509          * register_tracer() might reference current_trace, so it
8510          * needs to be set before we register anything. This is
8511          * just a bootstrap of current_trace anyway.
8512          */
8513         global_trace.current_trace = &nop_trace;
8514
8515         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8516
8517         ftrace_init_global_array_ops(&global_trace);
8518
8519         init_trace_flags_index(&global_trace);
8520
8521         register_tracer(&nop_trace);
8522
8523         /* Function tracing may start here (via kernel command line) */
8524         init_function_trace();
8525
8526         /* All seems OK, enable tracing */
8527         tracing_disabled = 0;
8528
8529         atomic_notifier_chain_register(&panic_notifier_list,
8530                                        &trace_panic_notifier);
8531
8532         register_die_notifier(&trace_die_notifier);
8533
8534         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8535
8536         INIT_LIST_HEAD(&global_trace.systems);
8537         INIT_LIST_HEAD(&global_trace.events);
8538         INIT_LIST_HEAD(&global_trace.hist_vars);
8539         list_add(&global_trace.list, &ftrace_trace_arrays);
8540
8541         apply_trace_boot_options();
8542
8543         register_snapshot_cmd();
8544
8545         return 0;
8546
8547 out_free_savedcmd:
8548         free_saved_cmdlines_buffer(savedcmd);
8549 out_free_temp_buffer:
8550         ring_buffer_free(temp_buffer);
8551 out_rm_hp_state:
8552         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8553 out_free_cpumask:
8554         free_cpumask_var(global_trace.tracing_cpumask);
8555 out_free_buffer_mask:
8556         free_cpumask_var(tracing_buffer_mask);
8557 out:
8558         return ret;
8559 }
8560
8561 void __init early_trace_init(void)
8562 {
8563         if (tracepoint_printk) {
8564                 tracepoint_print_iter =
8565                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8566                 if (WARN_ON(!tracepoint_print_iter))
8567                         tracepoint_printk = 0;
8568                 else
8569                         static_key_enable(&tracepoint_printk_key.key);
8570         }
8571         tracer_alloc_buffers();
8572 }
8573
8574 void __init trace_init(void)
8575 {
8576         trace_event_init();
8577 }
8578
8579 __init static int clear_boot_tracer(void)
8580 {
8581         /*
8582          * The default tracer at boot buffer is an init section.
8583          * This function is called in lateinit. If we did not
8584          * find the boot tracer, then clear it out, to prevent
8585          * later registration from accessing the buffer that is
8586          * about to be freed.
8587          */
8588         if (!default_bootup_tracer)
8589                 return 0;
8590
8591         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8592                default_bootup_tracer);
8593         default_bootup_tracer = NULL;
8594
8595         return 0;
8596 }
8597
8598 fs_initcall(tracer_init_tracefs);
8599 late_initcall_sync(clear_boot_tracer);
8600
8601 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8602 __init static int tracing_set_default_clock(void)
8603 {
8604         /* sched_clock_stable() is determined in late_initcall */
8605         if (!trace_boot_clock && !sched_clock_stable()) {
8606                 printk(KERN_WARNING
8607                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8608                        "If you want to keep using the local clock, then add:\n"
8609                        "  \"trace_clock=local\"\n"
8610                        "on the kernel command line\n");
8611                 tracing_set_clock(&global_trace, "global");
8612         }
8613
8614         return 0;
8615 }
8616 late_initcall_sync(tracing_set_default_clock);
8617 #endif