Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / kernel / events / hw_breakpoint.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2007 Alan Stern
4  * Copyright (C) IBM Corporation, 2009
5  * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
6  *
7  * Thanks to Ingo Molnar for his many suggestions.
8  *
9  * Authors: Alan Stern <stern@rowland.harvard.edu>
10  *          K.Prasad <prasad@linux.vnet.ibm.com>
11  *          Frederic Weisbecker <fweisbec@gmail.com>
12  */
13
14 /*
15  * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
16  * using the CPU's debug registers.
17  * This file contains the arch-independent routines.
18  */
19
20 #include <linux/irqflags.h>
21 #include <linux/kallsyms.h>
22 #include <linux/notifier.h>
23 #include <linux/kprobes.h>
24 #include <linux/kdebug.h>
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/percpu.h>
28 #include <linux/sched.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/list.h>
32 #include <linux/cpu.h>
33 #include <linux/smp.h>
34 #include <linux/bug.h>
35
36 #include <linux/hw_breakpoint.h>
37 /*
38  * Constraints data
39  */
40 struct bp_cpuinfo {
41         /* Number of pinned cpu breakpoints in a cpu */
42         unsigned int    cpu_pinned;
43         /* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
44         unsigned int    *tsk_pinned;
45         /* Number of non-pinned cpu/task breakpoints in a cpu */
46         unsigned int    flexible; /* XXX: placeholder, see fetch_this_slot() */
47 };
48
49 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
50 static int nr_slots[TYPE_MAX];
51
52 static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
53 {
54         return per_cpu_ptr(bp_cpuinfo + type, cpu);
55 }
56
57 /* Keep track of the breakpoints attached to tasks */
58 static LIST_HEAD(bp_task_head);
59
60 static int constraints_initialized;
61
62 /* Gather the number of total pinned and un-pinned bp in a cpuset */
63 struct bp_busy_slots {
64         unsigned int pinned;
65         unsigned int flexible;
66 };
67
68 /* Serialize accesses to the above constraints */
69 static DEFINE_MUTEX(nr_bp_mutex);
70
71 __weak int hw_breakpoint_weight(struct perf_event *bp)
72 {
73         return 1;
74 }
75
76 static inline enum bp_type_idx find_slot_idx(u64 bp_type)
77 {
78         if (bp_type & HW_BREAKPOINT_RW)
79                 return TYPE_DATA;
80
81         return TYPE_INST;
82 }
83
84 /*
85  * Report the maximum number of pinned breakpoints a task
86  * have in this cpu
87  */
88 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
89 {
90         unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
91         int i;
92
93         for (i = nr_slots[type] - 1; i >= 0; i--) {
94                 if (tsk_pinned[i] > 0)
95                         return i + 1;
96         }
97
98         return 0;
99 }
100
101 /*
102  * Count the number of breakpoints of the same type and same task.
103  * The given event must be not on the list.
104  */
105 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
106 {
107         struct task_struct *tsk = bp->hw.target;
108         struct perf_event *iter;
109         int count = 0;
110
111         list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
112                 if (iter->hw.target == tsk &&
113                     find_slot_idx(iter->attr.bp_type) == type &&
114                     (iter->cpu < 0 || cpu == iter->cpu))
115                         count += hw_breakpoint_weight(iter);
116         }
117
118         return count;
119 }
120
121 static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
122 {
123         if (bp->cpu >= 0)
124                 return cpumask_of(bp->cpu);
125         return cpu_possible_mask;
126 }
127
128 /*
129  * Report the number of pinned/un-pinned breakpoints we have in
130  * a given cpu (cpu > -1) or in all of them (cpu = -1).
131  */
132 static void
133 fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
134                     enum bp_type_idx type)
135 {
136         const struct cpumask *cpumask = cpumask_of_bp(bp);
137         int cpu;
138
139         for_each_cpu(cpu, cpumask) {
140                 struct bp_cpuinfo *info = get_bp_info(cpu, type);
141                 int nr;
142
143                 nr = info->cpu_pinned;
144                 if (!bp->hw.target)
145                         nr += max_task_bp_pinned(cpu, type);
146                 else
147                         nr += task_bp_pinned(cpu, bp, type);
148
149                 if (nr > slots->pinned)
150                         slots->pinned = nr;
151
152                 nr = info->flexible;
153                 if (nr > slots->flexible)
154                         slots->flexible = nr;
155         }
156 }
157
158 /*
159  * For now, continue to consider flexible as pinned, until we can
160  * ensure no flexible event can ever be scheduled before a pinned event
161  * in a same cpu.
162  */
163 static void
164 fetch_this_slot(struct bp_busy_slots *slots, int weight)
165 {
166         slots->pinned += weight;
167 }
168
169 /*
170  * Add a pinned breakpoint for the given task in our constraint table
171  */
172 static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
173                                 enum bp_type_idx type, int weight)
174 {
175         unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
176         int old_idx, new_idx;
177
178         old_idx = task_bp_pinned(cpu, bp, type) - 1;
179         new_idx = old_idx + weight;
180
181         if (old_idx >= 0)
182                 tsk_pinned[old_idx]--;
183         if (new_idx >= 0)
184                 tsk_pinned[new_idx]++;
185 }
186
187 /*
188  * Add/remove the given breakpoint in our constraint table
189  */
190 static void
191 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
192                int weight)
193 {
194         const struct cpumask *cpumask = cpumask_of_bp(bp);
195         int cpu;
196
197         if (!enable)
198                 weight = -weight;
199
200         /* Pinned counter cpu profiling */
201         if (!bp->hw.target) {
202                 get_bp_info(bp->cpu, type)->cpu_pinned += weight;
203                 return;
204         }
205
206         /* Pinned counter task profiling */
207         for_each_cpu(cpu, cpumask)
208                 toggle_bp_task_slot(bp, cpu, type, weight);
209
210         if (enable)
211                 list_add_tail(&bp->hw.bp_list, &bp_task_head);
212         else
213                 list_del(&bp->hw.bp_list);
214 }
215
216 /*
217  * Function to perform processor-specific cleanup during unregistration
218  */
219 __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
220 {
221         /*
222          * A weak stub function here for those archs that don't define
223          * it inside arch/.../kernel/hw_breakpoint.c
224          */
225 }
226
227 /*
228  * Constraints to check before allowing this new breakpoint counter:
229  *
230  *  == Non-pinned counter == (Considered as pinned for now)
231  *
232  *   - If attached to a single cpu, check:
233  *
234  *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
235  *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
236  *
237  *       -> If there are already non-pinned counters in this cpu, it means
238  *          there is already a free slot for them.
239  *          Otherwise, we check that the maximum number of per task
240  *          breakpoints (for this cpu) plus the number of per cpu breakpoint
241  *          (for this cpu) doesn't cover every registers.
242  *
243  *   - If attached to every cpus, check:
244  *
245  *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
246  *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
247  *
248  *       -> This is roughly the same, except we check the number of per cpu
249  *          bp for every cpu and we keep the max one. Same for the per tasks
250  *          breakpoints.
251  *
252  *
253  * == Pinned counter ==
254  *
255  *   - If attached to a single cpu, check:
256  *
257  *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
258  *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
259  *
260  *       -> Same checks as before. But now the info->flexible, if any, must keep
261  *          one register at least (or they will never be fed).
262  *
263  *   - If attached to every cpus, check:
264  *
265  *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
266  *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
267  */
268 static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
269 {
270         struct bp_busy_slots slots = {0};
271         enum bp_type_idx type;
272         int weight;
273
274         /* We couldn't initialize breakpoint constraints on boot */
275         if (!constraints_initialized)
276                 return -ENOMEM;
277
278         /* Basic checks */
279         if (bp_type == HW_BREAKPOINT_EMPTY ||
280             bp_type == HW_BREAKPOINT_INVALID)
281                 return -EINVAL;
282
283         type = find_slot_idx(bp_type);
284         weight = hw_breakpoint_weight(bp);
285
286         fetch_bp_busy_slots(&slots, bp, type);
287         /*
288          * Simulate the addition of this breakpoint to the constraints
289          * and see the result.
290          */
291         fetch_this_slot(&slots, weight);
292
293         /* Flexible counters need to keep at least one slot */
294         if (slots.pinned + (!!slots.flexible) > nr_slots[type])
295                 return -ENOSPC;
296
297         toggle_bp_slot(bp, true, type, weight);
298
299         return 0;
300 }
301
302 int reserve_bp_slot(struct perf_event *bp)
303 {
304         int ret;
305
306         mutex_lock(&nr_bp_mutex);
307
308         ret = __reserve_bp_slot(bp, bp->attr.bp_type);
309
310         mutex_unlock(&nr_bp_mutex);
311
312         return ret;
313 }
314
315 static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
316 {
317         enum bp_type_idx type;
318         int weight;
319
320         type = find_slot_idx(bp_type);
321         weight = hw_breakpoint_weight(bp);
322         toggle_bp_slot(bp, false, type, weight);
323 }
324
325 void release_bp_slot(struct perf_event *bp)
326 {
327         mutex_lock(&nr_bp_mutex);
328
329         arch_unregister_hw_breakpoint(bp);
330         __release_bp_slot(bp, bp->attr.bp_type);
331
332         mutex_unlock(&nr_bp_mutex);
333 }
334
335 static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
336 {
337         int err;
338
339         __release_bp_slot(bp, old_type);
340
341         err = __reserve_bp_slot(bp, new_type);
342         if (err) {
343                 /*
344                  * Reserve the old_type slot back in case
345                  * there's no space for the new type.
346                  *
347                  * This must succeed, because we just released
348                  * the old_type slot in the __release_bp_slot
349                  * call above. If not, something is broken.
350                  */
351                 WARN_ON(__reserve_bp_slot(bp, old_type));
352         }
353
354         return err;
355 }
356
357 static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
358 {
359         int ret;
360
361         mutex_lock(&nr_bp_mutex);
362         ret = __modify_bp_slot(bp, old_type, new_type);
363         mutex_unlock(&nr_bp_mutex);
364         return ret;
365 }
366
367 /*
368  * Allow the kernel debugger to reserve breakpoint slots without
369  * taking a lock using the dbg_* variant of for the reserve and
370  * release breakpoint slots.
371  */
372 int dbg_reserve_bp_slot(struct perf_event *bp)
373 {
374         if (mutex_is_locked(&nr_bp_mutex))
375                 return -1;
376
377         return __reserve_bp_slot(bp, bp->attr.bp_type);
378 }
379
380 int dbg_release_bp_slot(struct perf_event *bp)
381 {
382         if (mutex_is_locked(&nr_bp_mutex))
383                 return -1;
384
385         __release_bp_slot(bp, bp->attr.bp_type);
386
387         return 0;
388 }
389
390 static int hw_breakpoint_parse(struct perf_event *bp,
391                                const struct perf_event_attr *attr,
392                                struct arch_hw_breakpoint *hw)
393 {
394         int err;
395
396         err = hw_breakpoint_arch_parse(bp, attr, hw);
397         if (err)
398                 return err;
399
400         if (arch_check_bp_in_kernelspace(hw)) {
401                 if (attr->exclude_kernel)
402                         return -EINVAL;
403                 /*
404                  * Don't let unprivileged users set a breakpoint in the trap
405                  * path to avoid trap recursion attacks.
406                  */
407                 if (!capable(CAP_SYS_ADMIN))
408                         return -EPERM;
409         }
410
411         return 0;
412 }
413
414 int register_perf_hw_breakpoint(struct perf_event *bp)
415 {
416         struct arch_hw_breakpoint hw;
417         int err;
418
419         err = reserve_bp_slot(bp);
420         if (err)
421                 return err;
422
423         err = hw_breakpoint_parse(bp, &bp->attr, &hw);
424         if (err) {
425                 release_bp_slot(bp);
426                 return err;
427         }
428
429         bp->hw.info = hw;
430
431         return 0;
432 }
433
434 /**
435  * register_user_hw_breakpoint - register a hardware breakpoint for user space
436  * @attr: breakpoint attributes
437  * @triggered: callback to trigger when we hit the breakpoint
438  * @tsk: pointer to 'task_struct' of the process to which the address belongs
439  */
440 struct perf_event *
441 register_user_hw_breakpoint(struct perf_event_attr *attr,
442                             perf_overflow_handler_t triggered,
443                             void *context,
444                             struct task_struct *tsk)
445 {
446         return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
447                                                 context);
448 }
449 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
450
451 static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
452                                     struct perf_event_attr *from)
453 {
454         to->bp_addr = from->bp_addr;
455         to->bp_type = from->bp_type;
456         to->bp_len  = from->bp_len;
457         to->disabled = from->disabled;
458 }
459
460 int
461 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
462                                 bool check)
463 {
464         struct arch_hw_breakpoint hw;
465         int err;
466
467         err = hw_breakpoint_parse(bp, attr, &hw);
468         if (err)
469                 return err;
470
471         if (check) {
472                 struct perf_event_attr old_attr;
473
474                 old_attr = bp->attr;
475                 hw_breakpoint_copy_attr(&old_attr, attr);
476                 if (memcmp(&old_attr, attr, sizeof(*attr)))
477                         return -EINVAL;
478         }
479
480         if (bp->attr.bp_type != attr->bp_type) {
481                 err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
482                 if (err)
483                         return err;
484         }
485
486         hw_breakpoint_copy_attr(&bp->attr, attr);
487         bp->hw.info = hw;
488
489         return 0;
490 }
491
492 /**
493  * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
494  * @bp: the breakpoint structure to modify
495  * @attr: new breakpoint attributes
496  */
497 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
498 {
499         int err;
500
501         /*
502          * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
503          * will not be possible to raise IPIs that invoke __perf_event_disable.
504          * So call the function directly after making sure we are targeting the
505          * current task.
506          */
507         if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
508                 perf_event_disable_local(bp);
509         else
510                 perf_event_disable(bp);
511
512         err = modify_user_hw_breakpoint_check(bp, attr, false);
513
514         if (!bp->attr.disabled)
515                 perf_event_enable(bp);
516
517         return err;
518 }
519 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
520
521 /**
522  * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
523  * @bp: the breakpoint structure to unregister
524  */
525 void unregister_hw_breakpoint(struct perf_event *bp)
526 {
527         if (!bp)
528                 return;
529         perf_event_release_kernel(bp);
530 }
531 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
532
533 /**
534  * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
535  * @attr: breakpoint attributes
536  * @triggered: callback to trigger when we hit the breakpoint
537  *
538  * @return a set of per_cpu pointers to perf events
539  */
540 struct perf_event * __percpu *
541 register_wide_hw_breakpoint(struct perf_event_attr *attr,
542                             perf_overflow_handler_t triggered,
543                             void *context)
544 {
545         struct perf_event * __percpu *cpu_events, *bp;
546         long err = 0;
547         int cpu;
548
549         cpu_events = alloc_percpu(typeof(*cpu_events));
550         if (!cpu_events)
551                 return (void __percpu __force *)ERR_PTR(-ENOMEM);
552
553         get_online_cpus();
554         for_each_online_cpu(cpu) {
555                 bp = perf_event_create_kernel_counter(attr, cpu, NULL,
556                                                       triggered, context);
557                 if (IS_ERR(bp)) {
558                         err = PTR_ERR(bp);
559                         break;
560                 }
561
562                 per_cpu(*cpu_events, cpu) = bp;
563         }
564         put_online_cpus();
565
566         if (likely(!err))
567                 return cpu_events;
568
569         unregister_wide_hw_breakpoint(cpu_events);
570         return (void __percpu __force *)ERR_PTR(err);
571 }
572 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
573
574 /**
575  * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
576  * @cpu_events: the per cpu set of events to unregister
577  */
578 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
579 {
580         int cpu;
581
582         for_each_possible_cpu(cpu)
583                 unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
584
585         free_percpu(cpu_events);
586 }
587 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
588
589 static struct notifier_block hw_breakpoint_exceptions_nb = {
590         .notifier_call = hw_breakpoint_exceptions_notify,
591         /* we need to be notified first */
592         .priority = 0x7fffffff
593 };
594
595 static void bp_perf_event_destroy(struct perf_event *event)
596 {
597         release_bp_slot(event);
598 }
599
600 static int hw_breakpoint_event_init(struct perf_event *bp)
601 {
602         int err;
603
604         if (bp->attr.type != PERF_TYPE_BREAKPOINT)
605                 return -ENOENT;
606
607         /*
608          * no branch sampling for breakpoint events
609          */
610         if (has_branch_stack(bp))
611                 return -EOPNOTSUPP;
612
613         err = register_perf_hw_breakpoint(bp);
614         if (err)
615                 return err;
616
617         bp->destroy = bp_perf_event_destroy;
618
619         return 0;
620 }
621
622 static int hw_breakpoint_add(struct perf_event *bp, int flags)
623 {
624         if (!(flags & PERF_EF_START))
625                 bp->hw.state = PERF_HES_STOPPED;
626
627         if (is_sampling_event(bp)) {
628                 bp->hw.last_period = bp->hw.sample_period;
629                 perf_swevent_set_period(bp);
630         }
631
632         return arch_install_hw_breakpoint(bp);
633 }
634
635 static void hw_breakpoint_del(struct perf_event *bp, int flags)
636 {
637         arch_uninstall_hw_breakpoint(bp);
638 }
639
640 static void hw_breakpoint_start(struct perf_event *bp, int flags)
641 {
642         bp->hw.state = 0;
643 }
644
645 static void hw_breakpoint_stop(struct perf_event *bp, int flags)
646 {
647         bp->hw.state = PERF_HES_STOPPED;
648 }
649
650 static struct pmu perf_breakpoint = {
651         .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
652
653         .event_init     = hw_breakpoint_event_init,
654         .add            = hw_breakpoint_add,
655         .del            = hw_breakpoint_del,
656         .start          = hw_breakpoint_start,
657         .stop           = hw_breakpoint_stop,
658         .read           = hw_breakpoint_pmu_read,
659 };
660
661 int __init init_hw_breakpoint(void)
662 {
663         int cpu, err_cpu;
664         int i;
665
666         for (i = 0; i < TYPE_MAX; i++)
667                 nr_slots[i] = hw_breakpoint_slots(i);
668
669         for_each_possible_cpu(cpu) {
670                 for (i = 0; i < TYPE_MAX; i++) {
671                         struct bp_cpuinfo *info = get_bp_info(cpu, i);
672
673                         info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
674                                                         GFP_KERNEL);
675                         if (!info->tsk_pinned)
676                                 goto err_alloc;
677                 }
678         }
679
680         constraints_initialized = 1;
681
682         perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
683
684         return register_die_notifier(&hw_breakpoint_exceptions_nb);
685
686  err_alloc:
687         for_each_possible_cpu(err_cpu) {
688                 for (i = 0; i < TYPE_MAX; i++)
689                         kfree(get_bp_info(err_cpu, i)->tsk_pinned);
690                 if (err_cpu == cpu)
691                         break;
692         }
693
694         return -ENOMEM;
695 }
696
697