Merge branch 'spectre' of git://git.armlinux.org.uk/~rmk/linux-arm
[sfrench/cifs-2.6.git] / kernel / trace / trace_uprobe.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * uprobes-based tracing events
4  *
5  * Copyright (C) IBM Corporation, 2010-2012
6  * Author:      Srikar Dronamraju <srikar@linux.vnet.ibm.com>
7  */
8 #define pr_fmt(fmt)     "trace_kprobe: " fmt
9
10 #include <linux/module.h>
11 #include <linux/uaccess.h>
12 #include <linux/uprobes.h>
13 #include <linux/namei.h>
14 #include <linux/string.h>
15 #include <linux/rculist.h>
16
17 #include "trace_probe.h"
18 #include "trace_probe_tmpl.h"
19
20 #define UPROBE_EVENT_SYSTEM     "uprobes"
21
22 struct uprobe_trace_entry_head {
23         struct trace_entry      ent;
24         unsigned long           vaddr[];
25 };
26
27 #define SIZEOF_TRACE_ENTRY(is_return)                   \
28         (sizeof(struct uprobe_trace_entry_head) +       \
29          sizeof(unsigned long) * (is_return ? 2 : 1))
30
31 #define DATAOF_TRACE_ENTRY(entry, is_return)            \
32         ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
33
34 struct trace_uprobe_filter {
35         rwlock_t                rwlock;
36         int                     nr_systemwide;
37         struct list_head        perf_events;
38 };
39
40 /*
41  * uprobe event core functions
42  */
43 struct trace_uprobe {
44         struct list_head                list;
45         struct trace_uprobe_filter      filter;
46         struct uprobe_consumer          consumer;
47         struct path                     path;
48         struct inode                    *inode;
49         char                            *filename;
50         unsigned long                   offset;
51         unsigned long                   ref_ctr_offset;
52         unsigned long                   nhit;
53         struct trace_probe              tp;
54 };
55
56 #define SIZEOF_TRACE_UPROBE(n)                          \
57         (offsetof(struct trace_uprobe, tp.args) +       \
58         (sizeof(struct probe_arg) * (n)))
59
60 static int register_uprobe_event(struct trace_uprobe *tu);
61 static int unregister_uprobe_event(struct trace_uprobe *tu);
62
63 static DEFINE_MUTEX(uprobe_lock);
64 static LIST_HEAD(uprobe_list);
65
66 struct uprobe_dispatch_data {
67         struct trace_uprobe     *tu;
68         unsigned long           bp_addr;
69 };
70
71 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
72 static int uretprobe_dispatcher(struct uprobe_consumer *con,
73                                 unsigned long func, struct pt_regs *regs);
74
75 #ifdef CONFIG_STACK_GROWSUP
76 static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
77 {
78         return addr - (n * sizeof(long));
79 }
80 #else
81 static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
82 {
83         return addr + (n * sizeof(long));
84 }
85 #endif
86
87 static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
88 {
89         unsigned long ret;
90         unsigned long addr = user_stack_pointer(regs);
91
92         addr = adjust_stack_addr(addr, n);
93
94         if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
95                 return 0;
96
97         return ret;
98 }
99
100 /*
101  * Uprobes-specific fetch functions
102  */
103 static nokprobe_inline int
104 probe_mem_read(void *dest, void *src, size_t size)
105 {
106         void __user *vaddr = (void __force __user *)src;
107
108         return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
109 }
110 /*
111  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
112  * length and relative data location.
113  */
114 static nokprobe_inline int
115 fetch_store_string(unsigned long addr, void *dest, void *base)
116 {
117         long ret;
118         u32 loc = *(u32 *)dest;
119         int maxlen  = get_loc_len(loc);
120         u8 *dst = get_loc_data(dest, base);
121         void __user *src = (void __force __user *) addr;
122
123         if (unlikely(!maxlen))
124                 return -ENOMEM;
125
126         ret = strncpy_from_user(dst, src, maxlen);
127         if (ret >= 0) {
128                 if (ret == maxlen)
129                         dst[ret - 1] = '\0';
130                 *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
131         }
132
133         return ret;
134 }
135
136 /* Return the length of string -- including null terminal byte */
137 static nokprobe_inline int
138 fetch_store_strlen(unsigned long addr)
139 {
140         int len;
141         void __user *vaddr = (void __force __user *) addr;
142
143         len = strnlen_user(vaddr, MAX_STRING_SIZE);
144
145         return (len > MAX_STRING_SIZE) ? 0 : len;
146 }
147
148 static unsigned long translate_user_vaddr(unsigned long file_offset)
149 {
150         unsigned long base_addr;
151         struct uprobe_dispatch_data *udd;
152
153         udd = (void *) current->utask->vaddr;
154
155         base_addr = udd->bp_addr - udd->tu->offset;
156         return base_addr + file_offset;
157 }
158
159 /* Note that we don't verify it, since the code does not come from user space */
160 static int
161 process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
162                    void *base)
163 {
164         unsigned long val;
165
166         /* 1st stage: get value from context */
167         switch (code->op) {
168         case FETCH_OP_REG:
169                 val = regs_get_register(regs, code->param);
170                 break;
171         case FETCH_OP_STACK:
172                 val = get_user_stack_nth(regs, code->param);
173                 break;
174         case FETCH_OP_STACKP:
175                 val = user_stack_pointer(regs);
176                 break;
177         case FETCH_OP_RETVAL:
178                 val = regs_return_value(regs);
179                 break;
180         case FETCH_OP_IMM:
181                 val = code->immediate;
182                 break;
183         case FETCH_OP_FOFFS:
184                 val = translate_user_vaddr(code->immediate);
185                 break;
186         default:
187                 return -EILSEQ;
188         }
189         code++;
190
191         return process_fetch_insn_bottom(code, val, dest, base);
192 }
193 NOKPROBE_SYMBOL(process_fetch_insn)
194
195 static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
196 {
197         rwlock_init(&filter->rwlock);
198         filter->nr_systemwide = 0;
199         INIT_LIST_HEAD(&filter->perf_events);
200 }
201
202 static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
203 {
204         return !filter->nr_systemwide && list_empty(&filter->perf_events);
205 }
206
207 static inline bool is_ret_probe(struct trace_uprobe *tu)
208 {
209         return tu->consumer.ret_handler != NULL;
210 }
211
212 /*
213  * Allocate new trace_uprobe and initialize it (including uprobes).
214  */
215 static struct trace_uprobe *
216 alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
217 {
218         struct trace_uprobe *tu;
219
220         if (!event || !is_good_name(event))
221                 return ERR_PTR(-EINVAL);
222
223         if (!group || !is_good_name(group))
224                 return ERR_PTR(-EINVAL);
225
226         tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
227         if (!tu)
228                 return ERR_PTR(-ENOMEM);
229
230         tu->tp.call.class = &tu->tp.class;
231         tu->tp.call.name = kstrdup(event, GFP_KERNEL);
232         if (!tu->tp.call.name)
233                 goto error;
234
235         tu->tp.class.system = kstrdup(group, GFP_KERNEL);
236         if (!tu->tp.class.system)
237                 goto error;
238
239         INIT_LIST_HEAD(&tu->list);
240         INIT_LIST_HEAD(&tu->tp.files);
241         tu->consumer.handler = uprobe_dispatcher;
242         if (is_ret)
243                 tu->consumer.ret_handler = uretprobe_dispatcher;
244         init_trace_uprobe_filter(&tu->filter);
245         return tu;
246
247 error:
248         kfree(tu->tp.call.name);
249         kfree(tu);
250
251         return ERR_PTR(-ENOMEM);
252 }
253
254 static void free_trace_uprobe(struct trace_uprobe *tu)
255 {
256         int i;
257
258         for (i = 0; i < tu->tp.nr_args; i++)
259                 traceprobe_free_probe_arg(&tu->tp.args[i]);
260
261         path_put(&tu->path);
262         kfree(tu->tp.call.class->system);
263         kfree(tu->tp.call.name);
264         kfree(tu->filename);
265         kfree(tu);
266 }
267
268 static struct trace_uprobe *find_probe_event(const char *event, const char *group)
269 {
270         struct trace_uprobe *tu;
271
272         list_for_each_entry(tu, &uprobe_list, list)
273                 if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
274                     strcmp(tu->tp.call.class->system, group) == 0)
275                         return tu;
276
277         return NULL;
278 }
279
280 /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
281 static int unregister_trace_uprobe(struct trace_uprobe *tu)
282 {
283         int ret;
284
285         ret = unregister_uprobe_event(tu);
286         if (ret)
287                 return ret;
288
289         list_del(&tu->list);
290         free_trace_uprobe(tu);
291         return 0;
292 }
293
294 /*
295  * Uprobe with multiple reference counter is not allowed. i.e.
296  * If inode and offset matches, reference counter offset *must*
297  * match as well. Though, there is one exception: If user is
298  * replacing old trace_uprobe with new one(same group/event),
299  * then we allow same uprobe with new reference counter as far
300  * as the new one does not conflict with any other existing
301  * ones.
302  */
303 static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new)
304 {
305         struct trace_uprobe *tmp, *old = NULL;
306         struct inode *new_inode = d_real_inode(new->path.dentry);
307
308         old = find_probe_event(trace_event_name(&new->tp.call),
309                                 new->tp.call.class->system);
310
311         list_for_each_entry(tmp, &uprobe_list, list) {
312                 if ((old ? old != tmp : true) &&
313                     new_inode == d_real_inode(tmp->path.dentry) &&
314                     new->offset == tmp->offset &&
315                     new->ref_ctr_offset != tmp->ref_ctr_offset) {
316                         pr_warn("Reference counter offset mismatch.");
317                         return ERR_PTR(-EINVAL);
318                 }
319         }
320         return old;
321 }
322
323 /* Register a trace_uprobe and probe_event */
324 static int register_trace_uprobe(struct trace_uprobe *tu)
325 {
326         struct trace_uprobe *old_tu;
327         int ret;
328
329         mutex_lock(&uprobe_lock);
330
331         /* register as an event */
332         old_tu = find_old_trace_uprobe(tu);
333         if (IS_ERR(old_tu)) {
334                 ret = PTR_ERR(old_tu);
335                 goto end;
336         }
337
338         if (old_tu) {
339                 /* delete old event */
340                 ret = unregister_trace_uprobe(old_tu);
341                 if (ret)
342                         goto end;
343         }
344
345         ret = register_uprobe_event(tu);
346         if (ret) {
347                 pr_warn("Failed to register probe event(%d)\n", ret);
348                 goto end;
349         }
350
351         list_add_tail(&tu->list, &uprobe_list);
352
353 end:
354         mutex_unlock(&uprobe_lock);
355
356         return ret;
357 }
358
359 /*
360  * Argument syntax:
361  *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
362  *
363  *  - Remove uprobe: -:[GRP/]EVENT
364  */
365 static int create_trace_uprobe(int argc, char **argv)
366 {
367         struct trace_uprobe *tu;
368         char *arg, *event, *group, *filename, *rctr, *rctr_end;
369         char buf[MAX_EVENT_NAME_LEN];
370         struct path path;
371         unsigned long offset, ref_ctr_offset;
372         bool is_delete, is_return;
373         int i, ret;
374
375         ret = 0;
376         is_delete = false;
377         is_return = false;
378         event = NULL;
379         group = NULL;
380         ref_ctr_offset = 0;
381
382         /* argc must be >= 1 */
383         if (argv[0][0] == '-')
384                 is_delete = true;
385         else if (argv[0][0] == 'r')
386                 is_return = true;
387         else if (argv[0][0] != 'p') {
388                 pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
389                 return -EINVAL;
390         }
391
392         if (argv[0][1] == ':') {
393                 event = &argv[0][2];
394                 arg = strchr(event, '/');
395
396                 if (arg) {
397                         group = event;
398                         event = arg + 1;
399                         event[-1] = '\0';
400
401                         if (strlen(group) == 0) {
402                                 pr_info("Group name is not specified\n");
403                                 return -EINVAL;
404                         }
405                 }
406                 if (strlen(event) == 0) {
407                         pr_info("Event name is not specified\n");
408                         return -EINVAL;
409                 }
410         }
411         if (!group)
412                 group = UPROBE_EVENT_SYSTEM;
413
414         if (is_delete) {
415                 int ret;
416
417                 if (!event) {
418                         pr_info("Delete command needs an event name.\n");
419                         return -EINVAL;
420                 }
421                 mutex_lock(&uprobe_lock);
422                 tu = find_probe_event(event, group);
423
424                 if (!tu) {
425                         mutex_unlock(&uprobe_lock);
426                         pr_info("Event %s/%s doesn't exist.\n", group, event);
427                         return -ENOENT;
428                 }
429                 /* delete an event */
430                 ret = unregister_trace_uprobe(tu);
431                 mutex_unlock(&uprobe_lock);
432                 return ret;
433         }
434
435         if (argc < 2) {
436                 pr_info("Probe point is not specified.\n");
437                 return -EINVAL;
438         }
439         /* Find the last occurrence, in case the path contains ':' too. */
440         arg = strrchr(argv[1], ':');
441         if (!arg)
442                 return -EINVAL;
443
444         *arg++ = '\0';
445         filename = argv[1];
446         ret = kern_path(filename, LOOKUP_FOLLOW, &path);
447         if (ret)
448                 return ret;
449
450         if (!d_is_reg(path.dentry)) {
451                 ret = -EINVAL;
452                 goto fail_address_parse;
453         }
454
455         /* Parse reference counter offset if specified. */
456         rctr = strchr(arg, '(');
457         if (rctr) {
458                 rctr_end = strchr(rctr, ')');
459                 if (rctr > rctr_end || *(rctr_end + 1) != 0) {
460                         ret = -EINVAL;
461                         pr_info("Invalid reference counter offset.\n");
462                         goto fail_address_parse;
463                 }
464
465                 *rctr++ = '\0';
466                 *rctr_end = '\0';
467                 ret = kstrtoul(rctr, 0, &ref_ctr_offset);
468                 if (ret) {
469                         pr_info("Invalid reference counter offset.\n");
470                         goto fail_address_parse;
471                 }
472         }
473
474         /* Parse uprobe offset. */
475         ret = kstrtoul(arg, 0, &offset);
476         if (ret)
477                 goto fail_address_parse;
478
479         argc -= 2;
480         argv += 2;
481
482         /* setup a probe */
483         if (!event) {
484                 char *tail;
485                 char *ptr;
486
487                 tail = kstrdup(kbasename(filename), GFP_KERNEL);
488                 if (!tail) {
489                         ret = -ENOMEM;
490                         goto fail_address_parse;
491                 }
492
493                 ptr = strpbrk(tail, ".-_");
494                 if (ptr)
495                         *ptr = '\0';
496
497                 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
498                 event = buf;
499                 kfree(tail);
500         }
501
502         tu = alloc_trace_uprobe(group, event, argc, is_return);
503         if (IS_ERR(tu)) {
504                 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
505                 ret = PTR_ERR(tu);
506                 goto fail_address_parse;
507         }
508         tu->offset = offset;
509         tu->ref_ctr_offset = ref_ctr_offset;
510         tu->path = path;
511         tu->filename = kstrdup(filename, GFP_KERNEL);
512
513         if (!tu->filename) {
514                 pr_info("Failed to allocate filename.\n");
515                 ret = -ENOMEM;
516                 goto error;
517         }
518
519         /* parse arguments */
520         ret = 0;
521         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
522                 struct probe_arg *parg = &tu->tp.args[i];
523
524                 /* Increment count for freeing args in error case */
525                 tu->tp.nr_args++;
526
527                 /* Parse argument name */
528                 arg = strchr(argv[i], '=');
529                 if (arg) {
530                         *arg++ = '\0';
531                         parg->name = kstrdup(argv[i], GFP_KERNEL);
532                 } else {
533                         arg = argv[i];
534                         /* If argument name is omitted, set "argN" */
535                         snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
536                         parg->name = kstrdup(buf, GFP_KERNEL);
537                 }
538
539                 if (!parg->name) {
540                         pr_info("Failed to allocate argument[%d] name.\n", i);
541                         ret = -ENOMEM;
542                         goto error;
543                 }
544
545                 if (!is_good_name(parg->name)) {
546                         pr_info("Invalid argument[%d] name: %s\n", i, parg->name);
547                         ret = -EINVAL;
548                         goto error;
549                 }
550
551                 if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) {
552                         pr_info("Argument[%d] name '%s' conflicts with "
553                                 "another field.\n", i, argv[i]);
554                         ret = -EINVAL;
555                         goto error;
556                 }
557
558                 /* Parse fetch argument */
559                 ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg,
560                                         is_return ? TPARG_FL_RETURN : 0);
561                 if (ret) {
562                         pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
563                         goto error;
564                 }
565         }
566
567         ret = register_trace_uprobe(tu);
568         if (ret)
569                 goto error;
570         return 0;
571
572 error:
573         free_trace_uprobe(tu);
574         return ret;
575
576 fail_address_parse:
577         path_put(&path);
578
579         pr_info("Failed to parse address or file.\n");
580
581         return ret;
582 }
583
584 static int cleanup_all_probes(void)
585 {
586         struct trace_uprobe *tu;
587         int ret = 0;
588
589         mutex_lock(&uprobe_lock);
590         while (!list_empty(&uprobe_list)) {
591                 tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
592                 ret = unregister_trace_uprobe(tu);
593                 if (ret)
594                         break;
595         }
596         mutex_unlock(&uprobe_lock);
597         return ret;
598 }
599
600 /* Probes listing interfaces */
601 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
602 {
603         mutex_lock(&uprobe_lock);
604         return seq_list_start(&uprobe_list, *pos);
605 }
606
607 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
608 {
609         return seq_list_next(v, &uprobe_list, pos);
610 }
611
612 static void probes_seq_stop(struct seq_file *m, void *v)
613 {
614         mutex_unlock(&uprobe_lock);
615 }
616
617 static int probes_seq_show(struct seq_file *m, void *v)
618 {
619         struct trace_uprobe *tu = v;
620         char c = is_ret_probe(tu) ? 'r' : 'p';
621         int i;
622
623         seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system,
624                         trace_event_name(&tu->tp.call), tu->filename,
625                         (int)(sizeof(void *) * 2), tu->offset);
626
627         if (tu->ref_ctr_offset)
628                 seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
629
630         for (i = 0; i < tu->tp.nr_args; i++)
631                 seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
632
633         seq_putc(m, '\n');
634         return 0;
635 }
636
637 static const struct seq_operations probes_seq_op = {
638         .start  = probes_seq_start,
639         .next   = probes_seq_next,
640         .stop   = probes_seq_stop,
641         .show   = probes_seq_show
642 };
643
644 static int probes_open(struct inode *inode, struct file *file)
645 {
646         int ret;
647
648         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
649                 ret = cleanup_all_probes();
650                 if (ret)
651                         return ret;
652         }
653
654         return seq_open(file, &probes_seq_op);
655 }
656
657 static ssize_t probes_write(struct file *file, const char __user *buffer,
658                             size_t count, loff_t *ppos)
659 {
660         return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe);
661 }
662
663 static const struct file_operations uprobe_events_ops = {
664         .owner          = THIS_MODULE,
665         .open           = probes_open,
666         .read           = seq_read,
667         .llseek         = seq_lseek,
668         .release        = seq_release,
669         .write          = probes_write,
670 };
671
672 /* Probes profiling interfaces */
673 static int probes_profile_seq_show(struct seq_file *m, void *v)
674 {
675         struct trace_uprobe *tu = v;
676
677         seq_printf(m, "  %s %-44s %15lu\n", tu->filename,
678                         trace_event_name(&tu->tp.call), tu->nhit);
679         return 0;
680 }
681
682 static const struct seq_operations profile_seq_op = {
683         .start  = probes_seq_start,
684         .next   = probes_seq_next,
685         .stop   = probes_seq_stop,
686         .show   = probes_profile_seq_show
687 };
688
689 static int profile_open(struct inode *inode, struct file *file)
690 {
691         return seq_open(file, &profile_seq_op);
692 }
693
694 static const struct file_operations uprobe_profile_ops = {
695         .owner          = THIS_MODULE,
696         .open           = profile_open,
697         .read           = seq_read,
698         .llseek         = seq_lseek,
699         .release        = seq_release,
700 };
701
702 struct uprobe_cpu_buffer {
703         struct mutex mutex;
704         void *buf;
705 };
706 static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
707 static int uprobe_buffer_refcnt;
708
709 static int uprobe_buffer_init(void)
710 {
711         int cpu, err_cpu;
712
713         uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer);
714         if (uprobe_cpu_buffer == NULL)
715                 return -ENOMEM;
716
717         for_each_possible_cpu(cpu) {
718                 struct page *p = alloc_pages_node(cpu_to_node(cpu),
719                                                   GFP_KERNEL, 0);
720                 if (p == NULL) {
721                         err_cpu = cpu;
722                         goto err;
723                 }
724                 per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p);
725                 mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex);
726         }
727
728         return 0;
729
730 err:
731         for_each_possible_cpu(cpu) {
732                 if (cpu == err_cpu)
733                         break;
734                 free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf);
735         }
736
737         free_percpu(uprobe_cpu_buffer);
738         return -ENOMEM;
739 }
740
741 static int uprobe_buffer_enable(void)
742 {
743         int ret = 0;
744
745         BUG_ON(!mutex_is_locked(&event_mutex));
746
747         if (uprobe_buffer_refcnt++ == 0) {
748                 ret = uprobe_buffer_init();
749                 if (ret < 0)
750                         uprobe_buffer_refcnt--;
751         }
752
753         return ret;
754 }
755
756 static void uprobe_buffer_disable(void)
757 {
758         int cpu;
759
760         BUG_ON(!mutex_is_locked(&event_mutex));
761
762         if (--uprobe_buffer_refcnt == 0) {
763                 for_each_possible_cpu(cpu)
764                         free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer,
765                                                              cpu)->buf);
766
767                 free_percpu(uprobe_cpu_buffer);
768                 uprobe_cpu_buffer = NULL;
769         }
770 }
771
772 static struct uprobe_cpu_buffer *uprobe_buffer_get(void)
773 {
774         struct uprobe_cpu_buffer *ucb;
775         int cpu;
776
777         cpu = raw_smp_processor_id();
778         ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu);
779
780         /*
781          * Use per-cpu buffers for fastest access, but we might migrate
782          * so the mutex makes sure we have sole access to it.
783          */
784         mutex_lock(&ucb->mutex);
785
786         return ucb;
787 }
788
789 static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
790 {
791         mutex_unlock(&ucb->mutex);
792 }
793
794 static void __uprobe_trace_func(struct trace_uprobe *tu,
795                                 unsigned long func, struct pt_regs *regs,
796                                 struct uprobe_cpu_buffer *ucb, int dsize,
797                                 struct trace_event_file *trace_file)
798 {
799         struct uprobe_trace_entry_head *entry;
800         struct ring_buffer_event *event;
801         struct ring_buffer *buffer;
802         void *data;
803         int size, esize;
804         struct trace_event_call *call = &tu->tp.call;
805
806         WARN_ON(call != trace_file->event_call);
807
808         if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE))
809                 return;
810
811         if (trace_trigger_soft_disabled(trace_file))
812                 return;
813
814         esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
815         size = esize + tu->tp.size + dsize;
816         event = trace_event_buffer_lock_reserve(&buffer, trace_file,
817                                                 call->event.type, size, 0, 0);
818         if (!event)
819                 return;
820
821         entry = ring_buffer_event_data(event);
822         if (is_ret_probe(tu)) {
823                 entry->vaddr[0] = func;
824                 entry->vaddr[1] = instruction_pointer(regs);
825                 data = DATAOF_TRACE_ENTRY(entry, true);
826         } else {
827                 entry->vaddr[0] = instruction_pointer(regs);
828                 data = DATAOF_TRACE_ENTRY(entry, false);
829         }
830
831         memcpy(data, ucb->buf, tu->tp.size + dsize);
832
833         event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0);
834 }
835
836 /* uprobe handler */
837 static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
838                              struct uprobe_cpu_buffer *ucb, int dsize)
839 {
840         struct event_file_link *link;
841
842         if (is_ret_probe(tu))
843                 return 0;
844
845         rcu_read_lock();
846         list_for_each_entry_rcu(link, &tu->tp.files, list)
847                 __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
848         rcu_read_unlock();
849
850         return 0;
851 }
852
853 static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
854                                  struct pt_regs *regs,
855                                  struct uprobe_cpu_buffer *ucb, int dsize)
856 {
857         struct event_file_link *link;
858
859         rcu_read_lock();
860         list_for_each_entry_rcu(link, &tu->tp.files, list)
861                 __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file);
862         rcu_read_unlock();
863 }
864
865 /* Event entry printers */
866 static enum print_line_t
867 print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
868 {
869         struct uprobe_trace_entry_head *entry;
870         struct trace_seq *s = &iter->seq;
871         struct trace_uprobe *tu;
872         u8 *data;
873
874         entry = (struct uprobe_trace_entry_head *)iter->ent;
875         tu = container_of(event, struct trace_uprobe, tp.call.event);
876
877         if (is_ret_probe(tu)) {
878                 trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)",
879                                  trace_event_name(&tu->tp.call),
880                                  entry->vaddr[1], entry->vaddr[0]);
881                 data = DATAOF_TRACE_ENTRY(entry, true);
882         } else {
883                 trace_seq_printf(s, "%s: (0x%lx)",
884                                  trace_event_name(&tu->tp.call),
885                                  entry->vaddr[0]);
886                 data = DATAOF_TRACE_ENTRY(entry, false);
887         }
888
889         if (print_probe_args(s, tu->tp.args, tu->tp.nr_args, data, entry) < 0)
890                 goto out;
891
892         trace_seq_putc(s, '\n');
893
894  out:
895         return trace_handle_return(s);
896 }
897
898 typedef bool (*filter_func_t)(struct uprobe_consumer *self,
899                                 enum uprobe_filter_ctx ctx,
900                                 struct mm_struct *mm);
901
902 static int
903 probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
904                    filter_func_t filter)
905 {
906         bool enabled = trace_probe_is_enabled(&tu->tp);
907         struct event_file_link *link = NULL;
908         int ret;
909
910         if (file) {
911                 if (tu->tp.flags & TP_FLAG_PROFILE)
912                         return -EINTR;
913
914                 link = kmalloc(sizeof(*link), GFP_KERNEL);
915                 if (!link)
916                         return -ENOMEM;
917
918                 link->file = file;
919                 list_add_tail_rcu(&link->list, &tu->tp.files);
920
921                 tu->tp.flags |= TP_FLAG_TRACE;
922         } else {
923                 if (tu->tp.flags & TP_FLAG_TRACE)
924                         return -EINTR;
925
926                 tu->tp.flags |= TP_FLAG_PROFILE;
927         }
928
929         WARN_ON(!uprobe_filter_is_empty(&tu->filter));
930
931         if (enabled)
932                 return 0;
933
934         ret = uprobe_buffer_enable();
935         if (ret)
936                 goto err_flags;
937
938         tu->consumer.filter = filter;
939         tu->inode = d_real_inode(tu->path.dentry);
940         if (tu->ref_ctr_offset) {
941                 ret = uprobe_register_refctr(tu->inode, tu->offset,
942                                 tu->ref_ctr_offset, &tu->consumer);
943         } else {
944                 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
945         }
946
947         if (ret)
948                 goto err_buffer;
949
950         return 0;
951
952  err_buffer:
953         uprobe_buffer_disable();
954
955  err_flags:
956         if (file) {
957                 list_del(&link->list);
958                 kfree(link);
959                 tu->tp.flags &= ~TP_FLAG_TRACE;
960         } else {
961                 tu->tp.flags &= ~TP_FLAG_PROFILE;
962         }
963         return ret;
964 }
965
966 static void
967 probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
968 {
969         if (!trace_probe_is_enabled(&tu->tp))
970                 return;
971
972         if (file) {
973                 struct event_file_link *link;
974
975                 link = find_event_file_link(&tu->tp, file);
976                 if (!link)
977                         return;
978
979                 list_del_rcu(&link->list);
980                 /* synchronize with u{,ret}probe_trace_func */
981                 synchronize_rcu();
982                 kfree(link);
983
984                 if (!list_empty(&tu->tp.files))
985                         return;
986         }
987
988         WARN_ON(!uprobe_filter_is_empty(&tu->filter));
989
990         uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
991         tu->inode = NULL;
992         tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
993
994         uprobe_buffer_disable();
995 }
996
997 static int uprobe_event_define_fields(struct trace_event_call *event_call)
998 {
999         int ret, size;
1000         struct uprobe_trace_entry_head field;
1001         struct trace_uprobe *tu = event_call->data;
1002
1003         if (is_ret_probe(tu)) {
1004                 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
1005                 DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
1006                 size = SIZEOF_TRACE_ENTRY(true);
1007         } else {
1008                 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
1009                 size = SIZEOF_TRACE_ENTRY(false);
1010         }
1011
1012         return traceprobe_define_arg_fields(event_call, size, &tu->tp);
1013 }
1014
1015 #ifdef CONFIG_PERF_EVENTS
1016 static bool
1017 __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
1018 {
1019         struct perf_event *event;
1020
1021         if (filter->nr_systemwide)
1022                 return true;
1023
1024         list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
1025                 if (event->hw.target->mm == mm)
1026                         return true;
1027         }
1028
1029         return false;
1030 }
1031
1032 static inline bool
1033 uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
1034 {
1035         return __uprobe_perf_filter(&tu->filter, event->hw.target->mm);
1036 }
1037
1038 static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
1039 {
1040         bool done;
1041
1042         write_lock(&tu->filter.rwlock);
1043         if (event->hw.target) {
1044                 list_del(&event->hw.tp_list);
1045                 done = tu->filter.nr_systemwide ||
1046                         (event->hw.target->flags & PF_EXITING) ||
1047                         uprobe_filter_event(tu, event);
1048         } else {
1049                 tu->filter.nr_systemwide--;
1050                 done = tu->filter.nr_systemwide;
1051         }
1052         write_unlock(&tu->filter.rwlock);
1053
1054         if (!done)
1055                 return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
1056
1057         return 0;
1058 }
1059
1060 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
1061 {
1062         bool done;
1063         int err;
1064
1065         write_lock(&tu->filter.rwlock);
1066         if (event->hw.target) {
1067                 /*
1068                  * event->parent != NULL means copy_process(), we can avoid
1069                  * uprobe_apply(). current->mm must be probed and we can rely
1070                  * on dup_mmap() which preserves the already installed bp's.
1071                  *
1072                  * attr.enable_on_exec means that exec/mmap will install the
1073                  * breakpoints we need.
1074                  */
1075                 done = tu->filter.nr_systemwide ||
1076                         event->parent || event->attr.enable_on_exec ||
1077                         uprobe_filter_event(tu, event);
1078                 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1079         } else {
1080                 done = tu->filter.nr_systemwide;
1081                 tu->filter.nr_systemwide++;
1082         }
1083         write_unlock(&tu->filter.rwlock);
1084
1085         err = 0;
1086         if (!done) {
1087                 err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
1088                 if (err)
1089                         uprobe_perf_close(tu, event);
1090         }
1091         return err;
1092 }
1093
1094 static bool uprobe_perf_filter(struct uprobe_consumer *uc,
1095                                 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
1096 {
1097         struct trace_uprobe *tu;
1098         int ret;
1099
1100         tu = container_of(uc, struct trace_uprobe, consumer);
1101         read_lock(&tu->filter.rwlock);
1102         ret = __uprobe_perf_filter(&tu->filter, mm);
1103         read_unlock(&tu->filter.rwlock);
1104
1105         return ret;
1106 }
1107
1108 static void __uprobe_perf_func(struct trace_uprobe *tu,
1109                                unsigned long func, struct pt_regs *regs,
1110                                struct uprobe_cpu_buffer *ucb, int dsize)
1111 {
1112         struct trace_event_call *call = &tu->tp.call;
1113         struct uprobe_trace_entry_head *entry;
1114         struct hlist_head *head;
1115         void *data;
1116         int size, esize;
1117         int rctx;
1118
1119         if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1120                 return;
1121
1122         esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
1123
1124         size = esize + tu->tp.size + dsize;
1125         size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
1126         if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
1127                 return;
1128
1129         preempt_disable();
1130         head = this_cpu_ptr(call->perf_events);
1131         if (hlist_empty(head))
1132                 goto out;
1133
1134         entry = perf_trace_buf_alloc(size, NULL, &rctx);
1135         if (!entry)
1136                 goto out;
1137
1138         if (is_ret_probe(tu)) {
1139                 entry->vaddr[0] = func;
1140                 entry->vaddr[1] = instruction_pointer(regs);
1141                 data = DATAOF_TRACE_ENTRY(entry, true);
1142         } else {
1143                 entry->vaddr[0] = instruction_pointer(regs);
1144                 data = DATAOF_TRACE_ENTRY(entry, false);
1145         }
1146
1147         memcpy(data, ucb->buf, tu->tp.size + dsize);
1148
1149         if (size - esize > tu->tp.size + dsize) {
1150                 int len = tu->tp.size + dsize;
1151
1152                 memset(data + len, 0, size - esize - len);
1153         }
1154
1155         perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1156                               head, NULL);
1157  out:
1158         preempt_enable();
1159 }
1160
1161 /* uprobe profile handler */
1162 static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs,
1163                             struct uprobe_cpu_buffer *ucb, int dsize)
1164 {
1165         if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
1166                 return UPROBE_HANDLER_REMOVE;
1167
1168         if (!is_ret_probe(tu))
1169                 __uprobe_perf_func(tu, 0, regs, ucb, dsize);
1170         return 0;
1171 }
1172
1173 static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
1174                                 struct pt_regs *regs,
1175                                 struct uprobe_cpu_buffer *ucb, int dsize)
1176 {
1177         __uprobe_perf_func(tu, func, regs, ucb, dsize);
1178 }
1179
1180 int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
1181                         const char **filename, u64 *probe_offset,
1182                         bool perf_type_tracepoint)
1183 {
1184         const char *pevent = trace_event_name(event->tp_event);
1185         const char *group = event->tp_event->class->system;
1186         struct trace_uprobe *tu;
1187
1188         if (perf_type_tracepoint)
1189                 tu = find_probe_event(pevent, group);
1190         else
1191                 tu = event->tp_event->data;
1192         if (!tu)
1193                 return -EINVAL;
1194
1195         *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
1196                                     : BPF_FD_TYPE_UPROBE;
1197         *filename = tu->filename;
1198         *probe_offset = tu->offset;
1199         return 0;
1200 }
1201 #endif  /* CONFIG_PERF_EVENTS */
1202
1203 static int
1204 trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
1205                       void *data)
1206 {
1207         struct trace_uprobe *tu = event->data;
1208         struct trace_event_file *file = data;
1209
1210         switch (type) {
1211         case TRACE_REG_REGISTER:
1212                 return probe_event_enable(tu, file, NULL);
1213
1214         case TRACE_REG_UNREGISTER:
1215                 probe_event_disable(tu, file);
1216                 return 0;
1217
1218 #ifdef CONFIG_PERF_EVENTS
1219         case TRACE_REG_PERF_REGISTER:
1220                 return probe_event_enable(tu, NULL, uprobe_perf_filter);
1221
1222         case TRACE_REG_PERF_UNREGISTER:
1223                 probe_event_disable(tu, NULL);
1224                 return 0;
1225
1226         case TRACE_REG_PERF_OPEN:
1227                 return uprobe_perf_open(tu, data);
1228
1229         case TRACE_REG_PERF_CLOSE:
1230                 return uprobe_perf_close(tu, data);
1231
1232 #endif
1233         default:
1234                 return 0;
1235         }
1236         return 0;
1237 }
1238
1239 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
1240 {
1241         struct trace_uprobe *tu;
1242         struct uprobe_dispatch_data udd;
1243         struct uprobe_cpu_buffer *ucb;
1244         int dsize, esize;
1245         int ret = 0;
1246
1247
1248         tu = container_of(con, struct trace_uprobe, consumer);
1249         tu->nhit++;
1250
1251         udd.tu = tu;
1252         udd.bp_addr = instruction_pointer(regs);
1253
1254         current->utask->vaddr = (unsigned long) &udd;
1255
1256         if (WARN_ON_ONCE(!uprobe_cpu_buffer))
1257                 return 0;
1258
1259         dsize = __get_data_size(&tu->tp, regs);
1260         esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
1261
1262         ucb = uprobe_buffer_get();
1263         store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
1264
1265         if (tu->tp.flags & TP_FLAG_TRACE)
1266                 ret |= uprobe_trace_func(tu, regs, ucb, dsize);
1267
1268 #ifdef CONFIG_PERF_EVENTS
1269         if (tu->tp.flags & TP_FLAG_PROFILE)
1270                 ret |= uprobe_perf_func(tu, regs, ucb, dsize);
1271 #endif
1272         uprobe_buffer_put(ucb);
1273         return ret;
1274 }
1275
1276 static int uretprobe_dispatcher(struct uprobe_consumer *con,
1277                                 unsigned long func, struct pt_regs *regs)
1278 {
1279         struct trace_uprobe *tu;
1280         struct uprobe_dispatch_data udd;
1281         struct uprobe_cpu_buffer *ucb;
1282         int dsize, esize;
1283
1284         tu = container_of(con, struct trace_uprobe, consumer);
1285
1286         udd.tu = tu;
1287         udd.bp_addr = func;
1288
1289         current->utask->vaddr = (unsigned long) &udd;
1290
1291         if (WARN_ON_ONCE(!uprobe_cpu_buffer))
1292                 return 0;
1293
1294         dsize = __get_data_size(&tu->tp, regs);
1295         esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
1296
1297         ucb = uprobe_buffer_get();
1298         store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
1299
1300         if (tu->tp.flags & TP_FLAG_TRACE)
1301                 uretprobe_trace_func(tu, func, regs, ucb, dsize);
1302
1303 #ifdef CONFIG_PERF_EVENTS
1304         if (tu->tp.flags & TP_FLAG_PROFILE)
1305                 uretprobe_perf_func(tu, func, regs, ucb, dsize);
1306 #endif
1307         uprobe_buffer_put(ucb);
1308         return 0;
1309 }
1310
1311 static struct trace_event_functions uprobe_funcs = {
1312         .trace          = print_uprobe_event
1313 };
1314
1315 static inline void init_trace_event_call(struct trace_uprobe *tu,
1316                                          struct trace_event_call *call)
1317 {
1318         INIT_LIST_HEAD(&call->class->fields);
1319         call->event.funcs = &uprobe_funcs;
1320         call->class->define_fields = uprobe_event_define_fields;
1321
1322         call->flags = TRACE_EVENT_FL_UPROBE;
1323         call->class->reg = trace_uprobe_register;
1324         call->data = tu;
1325 }
1326
1327 static int register_uprobe_event(struct trace_uprobe *tu)
1328 {
1329         struct trace_event_call *call = &tu->tp.call;
1330         int ret = 0;
1331
1332         init_trace_event_call(tu, call);
1333
1334         if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
1335                 return -ENOMEM;
1336
1337         ret = register_trace_event(&call->event);
1338         if (!ret) {
1339                 kfree(call->print_fmt);
1340                 return -ENODEV;
1341         }
1342
1343         ret = trace_add_event_call(call);
1344
1345         if (ret) {
1346                 pr_info("Failed to register uprobe event: %s\n",
1347                         trace_event_name(call));
1348                 kfree(call->print_fmt);
1349                 unregister_trace_event(&call->event);
1350         }
1351
1352         return ret;
1353 }
1354
1355 static int unregister_uprobe_event(struct trace_uprobe *tu)
1356 {
1357         int ret;
1358
1359         /* tu->event is unregistered in trace_remove_event_call() */
1360         ret = trace_remove_event_call(&tu->tp.call);
1361         if (ret)
1362                 return ret;
1363         kfree(tu->tp.call.print_fmt);
1364         tu->tp.call.print_fmt = NULL;
1365         return 0;
1366 }
1367
1368 #ifdef CONFIG_PERF_EVENTS
1369 struct trace_event_call *
1370 create_local_trace_uprobe(char *name, unsigned long offs,
1371                           unsigned long ref_ctr_offset, bool is_return)
1372 {
1373         struct trace_uprobe *tu;
1374         struct path path;
1375         int ret;
1376
1377         ret = kern_path(name, LOOKUP_FOLLOW, &path);
1378         if (ret)
1379                 return ERR_PTR(ret);
1380
1381         if (!d_is_reg(path.dentry)) {
1382                 path_put(&path);
1383                 return ERR_PTR(-EINVAL);
1384         }
1385
1386         /*
1387          * local trace_kprobes are not added to probe_list, so they are never
1388          * searched in find_trace_kprobe(). Therefore, there is no concern of
1389          * duplicated name "DUMMY_EVENT" here.
1390          */
1391         tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, "DUMMY_EVENT", 0,
1392                                 is_return);
1393
1394         if (IS_ERR(tu)) {
1395                 pr_info("Failed to allocate trace_uprobe.(%d)\n",
1396                         (int)PTR_ERR(tu));
1397                 path_put(&path);
1398                 return ERR_CAST(tu);
1399         }
1400
1401         tu->offset = offs;
1402         tu->path = path;
1403         tu->ref_ctr_offset = ref_ctr_offset;
1404         tu->filename = kstrdup(name, GFP_KERNEL);
1405         init_trace_event_call(tu, &tu->tp.call);
1406
1407         if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
1408                 ret = -ENOMEM;
1409                 goto error;
1410         }
1411
1412         return &tu->tp.call;
1413 error:
1414         free_trace_uprobe(tu);
1415         return ERR_PTR(ret);
1416 }
1417
1418 void destroy_local_trace_uprobe(struct trace_event_call *event_call)
1419 {
1420         struct trace_uprobe *tu;
1421
1422         tu = container_of(event_call, struct trace_uprobe, tp.call);
1423
1424         kfree(tu->tp.call.print_fmt);
1425         tu->tp.call.print_fmt = NULL;
1426
1427         free_trace_uprobe(tu);
1428 }
1429 #endif /* CONFIG_PERF_EVENTS */
1430
1431 /* Make a trace interface for controling probe points */
1432 static __init int init_uprobe_trace(void)
1433 {
1434         struct dentry *d_tracer;
1435
1436         d_tracer = tracing_init_dentry();
1437         if (IS_ERR(d_tracer))
1438                 return 0;
1439
1440         trace_create_file("uprobe_events", 0644, d_tracer,
1441                                     NULL, &uprobe_events_ops);
1442         /* Profile interface */
1443         trace_create_file("uprobe_profile", 0444, d_tracer,
1444                                     NULL, &uprobe_profile_ops);
1445         return 0;
1446 }
1447
1448 fs_initcall(init_uprobe_trace);