Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / kernel / trace / trace_stack.c
1 /*
2  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3  *
4  */
5 #include <linux/sched/task_stack.h>
6 #include <linux/stacktrace.h>
7 #include <linux/kallsyms.h>
8 #include <linux/seq_file.h>
9 #include <linux/spinlock.h>
10 #include <linux/uaccess.h>
11 #include <linux/ftrace.h>
12 #include <linux/module.h>
13 #include <linux/sysctl.h>
14 #include <linux/init.h>
15
16 #include <asm/setup.h>
17
18 #include "trace.h"
19
20 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
21          { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
22 unsigned stack_trace_index[STACK_TRACE_ENTRIES];
23
24 /*
25  * Reserve one entry for the passed in ip. This will allow
26  * us to remove most or all of the stack size overhead
27  * added by the stack tracer itself.
28  */
29 struct stack_trace stack_trace_max = {
30         .max_entries            = STACK_TRACE_ENTRIES - 1,
31         .entries                = &stack_dump_trace[0],
32 };
33
34 unsigned long stack_trace_max_size;
35 arch_spinlock_t stack_trace_max_lock =
36         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
37
38 DEFINE_PER_CPU(int, disable_stack_tracer);
39 static DEFINE_MUTEX(stack_sysctl_mutex);
40
41 int stack_tracer_enabled;
42 static int last_stack_tracer_enabled;
43
44 void stack_trace_print(void)
45 {
46         long i;
47         int size;
48
49         pr_emerg("        Depth    Size   Location    (%d entries)\n"
50                            "        -----    ----   --------\n",
51                            stack_trace_max.nr_entries);
52
53         for (i = 0; i < stack_trace_max.nr_entries; i++) {
54                 if (stack_dump_trace[i] == ULONG_MAX)
55                         break;
56                 if (i+1 == stack_trace_max.nr_entries ||
57                                 stack_dump_trace[i+1] == ULONG_MAX)
58                         size = stack_trace_index[i];
59                 else
60                         size = stack_trace_index[i] - stack_trace_index[i+1];
61
62                 pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_trace_index[i],
63                                 size, (void *)stack_dump_trace[i]);
64         }
65 }
66
67 /*
68  * When arch-specific code overrides this function, the following
69  * data should be filled up, assuming stack_trace_max_lock is held to
70  * prevent concurrent updates.
71  *     stack_trace_index[]
72  *     stack_trace_max
73  *     stack_trace_max_size
74  */
75 void __weak
76 check_stack(unsigned long ip, unsigned long *stack)
77 {
78         unsigned long this_size, flags; unsigned long *p, *top, *start;
79         static int tracer_frame;
80         int frame_size = ACCESS_ONCE(tracer_frame);
81         int i, x;
82
83         this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
84         this_size = THREAD_SIZE - this_size;
85         /* Remove the frame of the tracer */
86         this_size -= frame_size;
87
88         if (this_size <= stack_trace_max_size)
89                 return;
90
91         /* we do not handle interrupt stacks yet */
92         if (!object_is_on_stack(stack))
93                 return;
94
95         /* Can't do this from NMI context (can cause deadlocks) */
96         if (in_nmi())
97                 return;
98
99         /*
100          * There's a slight chance that we are tracing inside the
101          * RCU infrastructure, and rcu_irq_enter() will not work
102          * as expected.
103          */
104         if (unlikely(rcu_irq_enter_disabled()))
105                 return;
106
107         local_irq_save(flags);
108         arch_spin_lock(&stack_trace_max_lock);
109
110         /*
111          * RCU may not be watching, make it see us.
112          * The stack trace code uses rcu_sched.
113          */
114         rcu_irq_enter();
115
116         /* In case another CPU set the tracer_frame on us */
117         if (unlikely(!frame_size))
118                 this_size -= tracer_frame;
119
120         /* a race could have already updated it */
121         if (this_size <= stack_trace_max_size)
122                 goto out;
123
124         stack_trace_max_size = this_size;
125
126         stack_trace_max.nr_entries = 0;
127         stack_trace_max.skip = 3;
128
129         save_stack_trace(&stack_trace_max);
130
131         /* Skip over the overhead of the stack tracer itself */
132         for (i = 0; i < stack_trace_max.nr_entries; i++) {
133                 if (stack_dump_trace[i] == ip)
134                         break;
135         }
136
137         /*
138          * Some archs may not have the passed in ip in the dump.
139          * If that happens, we need to show everything.
140          */
141         if (i == stack_trace_max.nr_entries)
142                 i = 0;
143
144         /*
145          * Now find where in the stack these are.
146          */
147         x = 0;
148         start = stack;
149         top = (unsigned long *)
150                 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
151
152         /*
153          * Loop through all the entries. One of the entries may
154          * for some reason be missed on the stack, so we may
155          * have to account for them. If they are all there, this
156          * loop will only happen once. This code only takes place
157          * on a new max, so it is far from a fast path.
158          */
159         while (i < stack_trace_max.nr_entries) {
160                 int found = 0;
161
162                 stack_trace_index[x] = this_size;
163                 p = start;
164
165                 for (; p < top && i < stack_trace_max.nr_entries; p++) {
166                         if (stack_dump_trace[i] == ULONG_MAX)
167                                 break;
168                         /*
169                          * The READ_ONCE_NOCHECK is used to let KASAN know that
170                          * this is not a stack-out-of-bounds error.
171                          */
172                         if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
173                                 stack_dump_trace[x] = stack_dump_trace[i++];
174                                 this_size = stack_trace_index[x++] =
175                                         (top - p) * sizeof(unsigned long);
176                                 found = 1;
177                                 /* Start the search from here */
178                                 start = p + 1;
179                                 /*
180                                  * We do not want to show the overhead
181                                  * of the stack tracer stack in the
182                                  * max stack. If we haven't figured
183                                  * out what that is, then figure it out
184                                  * now.
185                                  */
186                                 if (unlikely(!tracer_frame)) {
187                                         tracer_frame = (p - stack) *
188                                                 sizeof(unsigned long);
189                                         stack_trace_max_size -= tracer_frame;
190                                 }
191                         }
192                 }
193
194                 if (!found)
195                         i++;
196         }
197
198         stack_trace_max.nr_entries = x;
199         for (; x < i; x++)
200                 stack_dump_trace[x] = ULONG_MAX;
201
202         if (task_stack_end_corrupted(current)) {
203                 stack_trace_print();
204                 BUG();
205         }
206
207  out:
208         rcu_irq_exit();
209         arch_spin_unlock(&stack_trace_max_lock);
210         local_irq_restore(flags);
211 }
212
213 static void
214 stack_trace_call(unsigned long ip, unsigned long parent_ip,
215                  struct ftrace_ops *op, struct pt_regs *pt_regs)
216 {
217         unsigned long stack;
218
219         preempt_disable_notrace();
220
221         /* no atomic needed, we only modify this variable by this cpu */
222         __this_cpu_inc(disable_stack_tracer);
223         if (__this_cpu_read(disable_stack_tracer) != 1)
224                 goto out;
225
226         ip += MCOUNT_INSN_SIZE;
227
228         check_stack(ip, &stack);
229
230  out:
231         __this_cpu_dec(disable_stack_tracer);
232         /* prevent recursion in schedule */
233         preempt_enable_notrace();
234 }
235
236 static struct ftrace_ops trace_ops __read_mostly =
237 {
238         .func = stack_trace_call,
239         .flags = FTRACE_OPS_FL_RECURSION_SAFE,
240 };
241
242 static ssize_t
243 stack_max_size_read(struct file *filp, char __user *ubuf,
244                     size_t count, loff_t *ppos)
245 {
246         unsigned long *ptr = filp->private_data;
247         char buf[64];
248         int r;
249
250         r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
251         if (r > sizeof(buf))
252                 r = sizeof(buf);
253         return simple_read_from_buffer(ubuf, count, ppos, buf, r);
254 }
255
256 static ssize_t
257 stack_max_size_write(struct file *filp, const char __user *ubuf,
258                      size_t count, loff_t *ppos)
259 {
260         long *ptr = filp->private_data;
261         unsigned long val, flags;
262         int ret;
263
264         ret = kstrtoul_from_user(ubuf, count, 10, &val);
265         if (ret)
266                 return ret;
267
268         local_irq_save(flags);
269
270         /*
271          * In case we trace inside arch_spin_lock() or after (NMI),
272          * we will cause circular lock, so we also need to increase
273          * the percpu disable_stack_tracer here.
274          */
275         __this_cpu_inc(disable_stack_tracer);
276
277         arch_spin_lock(&stack_trace_max_lock);
278         *ptr = val;
279         arch_spin_unlock(&stack_trace_max_lock);
280
281         __this_cpu_dec(disable_stack_tracer);
282         local_irq_restore(flags);
283
284         return count;
285 }
286
287 static const struct file_operations stack_max_size_fops = {
288         .open           = tracing_open_generic,
289         .read           = stack_max_size_read,
290         .write          = stack_max_size_write,
291         .llseek         = default_llseek,
292 };
293
294 static void *
295 __next(struct seq_file *m, loff_t *pos)
296 {
297         long n = *pos - 1;
298
299         if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
300                 return NULL;
301
302         m->private = (void *)n;
303         return &m->private;
304 }
305
306 static void *
307 t_next(struct seq_file *m, void *v, loff_t *pos)
308 {
309         (*pos)++;
310         return __next(m, pos);
311 }
312
313 static void *t_start(struct seq_file *m, loff_t *pos)
314 {
315         local_irq_disable();
316
317         __this_cpu_inc(disable_stack_tracer);
318
319         arch_spin_lock(&stack_trace_max_lock);
320
321         if (*pos == 0)
322                 return SEQ_START_TOKEN;
323
324         return __next(m, pos);
325 }
326
327 static void t_stop(struct seq_file *m, void *p)
328 {
329         arch_spin_unlock(&stack_trace_max_lock);
330
331         __this_cpu_dec(disable_stack_tracer);
332
333         local_irq_enable();
334 }
335
336 static void trace_lookup_stack(struct seq_file *m, long i)
337 {
338         unsigned long addr = stack_dump_trace[i];
339
340         seq_printf(m, "%pS\n", (void *)addr);
341 }
342
343 static void print_disabled(struct seq_file *m)
344 {
345         seq_puts(m, "#\n"
346                  "#  Stack tracer disabled\n"
347                  "#\n"
348                  "# To enable the stack tracer, either add 'stacktrace' to the\n"
349                  "# kernel command line\n"
350                  "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
351                  "#\n");
352 }
353
354 static int t_show(struct seq_file *m, void *v)
355 {
356         long i;
357         int size;
358
359         if (v == SEQ_START_TOKEN) {
360                 seq_printf(m, "        Depth    Size   Location"
361                            "    (%d entries)\n"
362                            "        -----    ----   --------\n",
363                            stack_trace_max.nr_entries);
364
365                 if (!stack_tracer_enabled && !stack_trace_max_size)
366                         print_disabled(m);
367
368                 return 0;
369         }
370
371         i = *(long *)v;
372
373         if (i >= stack_trace_max.nr_entries ||
374             stack_dump_trace[i] == ULONG_MAX)
375                 return 0;
376
377         if (i+1 == stack_trace_max.nr_entries ||
378             stack_dump_trace[i+1] == ULONG_MAX)
379                 size = stack_trace_index[i];
380         else
381                 size = stack_trace_index[i] - stack_trace_index[i+1];
382
383         seq_printf(m, "%3ld) %8d   %5d   ", i, stack_trace_index[i], size);
384
385         trace_lookup_stack(m, i);
386
387         return 0;
388 }
389
390 static const struct seq_operations stack_trace_seq_ops = {
391         .start          = t_start,
392         .next           = t_next,
393         .stop           = t_stop,
394         .show           = t_show,
395 };
396
397 static int stack_trace_open(struct inode *inode, struct file *file)
398 {
399         return seq_open(file, &stack_trace_seq_ops);
400 }
401
402 static const struct file_operations stack_trace_fops = {
403         .open           = stack_trace_open,
404         .read           = seq_read,
405         .llseek         = seq_lseek,
406         .release        = seq_release,
407 };
408
409 #ifdef CONFIG_DYNAMIC_FTRACE
410
411 static int
412 stack_trace_filter_open(struct inode *inode, struct file *file)
413 {
414         struct ftrace_ops *ops = inode->i_private;
415
416         return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
417                                  inode, file);
418 }
419
420 static const struct file_operations stack_trace_filter_fops = {
421         .open = stack_trace_filter_open,
422         .read = seq_read,
423         .write = ftrace_filter_write,
424         .llseek = tracing_lseek,
425         .release = ftrace_regex_release,
426 };
427
428 #endif /* CONFIG_DYNAMIC_FTRACE */
429
430 int
431 stack_trace_sysctl(struct ctl_table *table, int write,
432                    void __user *buffer, size_t *lenp,
433                    loff_t *ppos)
434 {
435         int ret;
436
437         mutex_lock(&stack_sysctl_mutex);
438
439         ret = proc_dointvec(table, write, buffer, lenp, ppos);
440
441         if (ret || !write ||
442             (last_stack_tracer_enabled == !!stack_tracer_enabled))
443                 goto out;
444
445         last_stack_tracer_enabled = !!stack_tracer_enabled;
446
447         if (stack_tracer_enabled)
448                 register_ftrace_function(&trace_ops);
449         else
450                 unregister_ftrace_function(&trace_ops);
451
452  out:
453         mutex_unlock(&stack_sysctl_mutex);
454         return ret;
455 }
456
457 static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
458
459 static __init int enable_stacktrace(char *str)
460 {
461         if (strncmp(str, "_filter=", 8) == 0)
462                 strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
463
464         stack_tracer_enabled = 1;
465         last_stack_tracer_enabled = 1;
466         return 1;
467 }
468 __setup("stacktrace", enable_stacktrace);
469
470 static __init int stack_trace_init(void)
471 {
472         struct dentry *d_tracer;
473
474         d_tracer = tracing_init_dentry();
475         if (IS_ERR(d_tracer))
476                 return 0;
477
478         trace_create_file("stack_max_size", 0644, d_tracer,
479                         &stack_trace_max_size, &stack_max_size_fops);
480
481         trace_create_file("stack_trace", 0444, d_tracer,
482                         NULL, &stack_trace_fops);
483
484 #ifdef CONFIG_DYNAMIC_FTRACE
485         trace_create_file("stack_trace_filter", 0444, d_tracer,
486                           &trace_ops, &stack_trace_filter_fops);
487 #endif
488
489         if (stack_trace_filter_buf[0])
490                 ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
491
492         if (stack_tracer_enabled)
493                 register_ftrace_function(&trace_ops);
494
495         return 0;
496 }
497
498 device_initcall(stack_trace_init);