Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux...
[sfrench/cifs-2.6.git] / arch / x86 / kernel / kprobes / opt.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes Jump Optimization (Optprobes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  * Copyright (C) Hitachi Ltd., 2012
7  */
8 #include <linux/kprobes.h>
9 #include <linux/ptrace.h>
10 #include <linux/string.h>
11 #include <linux/slab.h>
12 #include <linux/hardirq.h>
13 #include <linux/preempt.h>
14 #include <linux/extable.h>
15 #include <linux/kdebug.h>
16 #include <linux/kallsyms.h>
17 #include <linux/ftrace.h>
18 #include <linux/frame.h>
19
20 #include <asm/text-patching.h>
21 #include <asm/cacheflush.h>
22 #include <asm/desc.h>
23 #include <asm/pgtable.h>
24 #include <linux/uaccess.h>
25 #include <asm/alternative.h>
26 #include <asm/insn.h>
27 #include <asm/debugreg.h>
28 #include <asm/set_memory.h>
29 #include <asm/sections.h>
30 #include <asm/nospec-branch.h>
31
32 #include "common.h"
33
34 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
35 {
36         struct optimized_kprobe *op;
37         struct kprobe *kp;
38         long offs;
39         int i;
40
41         for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
42                 kp = get_kprobe((void *)addr - i);
43                 /* This function only handles jump-optimized kprobe */
44                 if (kp && kprobe_optimized(kp)) {
45                         op = container_of(kp, struct optimized_kprobe, kp);
46                         /* If op->list is not empty, op is under optimizing */
47                         if (list_empty(&op->list))
48                                 goto found;
49                 }
50         }
51
52         return addr;
53 found:
54         /*
55          * If the kprobe can be optimized, original bytes which can be
56          * overwritten by jump destination address. In this case, original
57          * bytes must be recovered from op->optinsn.copied_insn buffer.
58          */
59         if (probe_kernel_read(buf, (void *)addr,
60                 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
61                 return 0UL;
62
63         if (addr == (unsigned long)kp->addr) {
64                 buf[0] = kp->opcode;
65                 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
66         } else {
67                 offs = addr - (unsigned long)kp->addr - 1;
68                 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
69         }
70
71         return (unsigned long)buf;
72 }
73
74 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
75 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
76 {
77 #ifdef CONFIG_X86_64
78         *addr++ = 0x48;
79         *addr++ = 0xbf;
80 #else
81         *addr++ = 0xb8;
82 #endif
83         *(unsigned long *)addr = val;
84 }
85
86 asm (
87                         ".pushsection .rodata\n"
88                         "optprobe_template_func:\n"
89                         ".global optprobe_template_entry\n"
90                         "optprobe_template_entry:\n"
91 #ifdef CONFIG_X86_64
92                         /* We don't bother saving the ss register */
93                         "       pushq %rsp\n"
94                         "       pushfq\n"
95                         SAVE_REGS_STRING
96                         "       movq %rsp, %rsi\n"
97                         ".global optprobe_template_val\n"
98                         "optprobe_template_val:\n"
99                         ASM_NOP5
100                         ASM_NOP5
101                         ".global optprobe_template_call\n"
102                         "optprobe_template_call:\n"
103                         ASM_NOP5
104                         /* Move flags to rsp */
105                         "       movq 18*8(%rsp), %rdx\n"
106                         "       movq %rdx, 19*8(%rsp)\n"
107                         RESTORE_REGS_STRING
108                         /* Skip flags entry */
109                         "       addq $8, %rsp\n"
110                         "       popfq\n"
111 #else /* CONFIG_X86_32 */
112                         "       pushl %esp\n"
113                         "       pushfl\n"
114                         SAVE_REGS_STRING
115                         "       movl %esp, %edx\n"
116                         ".global optprobe_template_val\n"
117                         "optprobe_template_val:\n"
118                         ASM_NOP5
119                         ".global optprobe_template_call\n"
120                         "optprobe_template_call:\n"
121                         ASM_NOP5
122                         /* Move flags into esp */
123                         "       movl 14*4(%esp), %edx\n"
124                         "       movl %edx, 15*4(%esp)\n"
125                         RESTORE_REGS_STRING
126                         /* Skip flags entry */
127                         "       addl $4, %esp\n"
128                         "       popfl\n"
129 #endif
130                         ".global optprobe_template_end\n"
131                         "optprobe_template_end:\n"
132                         ".popsection\n");
133
134 void optprobe_template_func(void);
135 STACK_FRAME_NON_STANDARD(optprobe_template_func);
136
137 #define TMPL_MOVE_IDX \
138         ((long)optprobe_template_val - (long)optprobe_template_entry)
139 #define TMPL_CALL_IDX \
140         ((long)optprobe_template_call - (long)optprobe_template_entry)
141 #define TMPL_END_IDX \
142         ((long)optprobe_template_end - (long)optprobe_template_entry)
143
144 #define INT3_SIZE sizeof(kprobe_opcode_t)
145
146 /* Optimized kprobe call back function: called from optinsn */
147 static void
148 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
149 {
150         /* This is possible if op is under delayed unoptimizing */
151         if (kprobe_disabled(&op->kp))
152                 return;
153
154         preempt_disable();
155         if (kprobe_running()) {
156                 kprobes_inc_nmissed_count(&op->kp);
157         } else {
158                 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
159                 /* Save skipped registers */
160                 regs->cs = __KERNEL_CS;
161 #ifdef CONFIG_X86_32
162                 regs->cs |= get_kernel_rpl();
163                 regs->gs = 0;
164 #endif
165                 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
166                 regs->orig_ax = ~0UL;
167
168                 __this_cpu_write(current_kprobe, &op->kp);
169                 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
170                 opt_pre_handler(&op->kp, regs);
171                 __this_cpu_write(current_kprobe, NULL);
172         }
173         preempt_enable();
174 }
175 NOKPROBE_SYMBOL(optimized_callback);
176
177 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
178 {
179         struct insn insn;
180         int len = 0, ret;
181
182         while (len < RELATIVEJUMP_SIZE) {
183                 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
184                 if (!ret || !can_boost(&insn, src + len))
185                         return -EINVAL;
186                 len += ret;
187         }
188         /* Check whether the address range is reserved */
189         if (ftrace_text_reserved(src, src + len - 1) ||
190             alternatives_text_reserved(src, src + len - 1) ||
191             jump_label_text_reserved(src, src + len - 1))
192                 return -EBUSY;
193
194         return len;
195 }
196
197 /* Check whether insn is indirect jump */
198 static int __insn_is_indirect_jump(struct insn *insn)
199 {
200         return ((insn->opcode.bytes[0] == 0xff &&
201                 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
202                 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
203 }
204
205 /* Check whether insn jumps into specified address range */
206 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
207 {
208         unsigned long target = 0;
209
210         switch (insn->opcode.bytes[0]) {
211         case 0xe0:      /* loopne */
212         case 0xe1:      /* loope */
213         case 0xe2:      /* loop */
214         case 0xe3:      /* jcxz */
215         case 0xe9:      /* near relative jump */
216         case 0xeb:      /* short relative jump */
217                 break;
218         case 0x0f:
219                 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
220                         break;
221                 return 0;
222         default:
223                 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
224                         break;
225                 return 0;
226         }
227         target = (unsigned long)insn->next_byte + insn->immediate.value;
228
229         return (start <= target && target <= start + len);
230 }
231
232 static int insn_is_indirect_jump(struct insn *insn)
233 {
234         int ret = __insn_is_indirect_jump(insn);
235
236 #ifdef CONFIG_RETPOLINE
237         /*
238          * Jump to x86_indirect_thunk_* is treated as an indirect jump.
239          * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
240          * older gcc may use indirect jump. So we add this check instead of
241          * replace indirect-jump check.
242          */
243         if (!ret)
244                 ret = insn_jump_into_range(insn,
245                                 (unsigned long)__indirect_thunk_start,
246                                 (unsigned long)__indirect_thunk_end -
247                                 (unsigned long)__indirect_thunk_start);
248 #endif
249         return ret;
250 }
251
252 /* Decode whole function to ensure any instructions don't jump into target */
253 static int can_optimize(unsigned long paddr)
254 {
255         unsigned long addr, size = 0, offset = 0;
256         struct insn insn;
257         kprobe_opcode_t buf[MAX_INSN_SIZE];
258
259         /* Lookup symbol including addr */
260         if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
261                 return 0;
262
263         /*
264          * Do not optimize in the entry code due to the unstable
265          * stack handling and registers setup.
266          */
267         if (((paddr >= (unsigned long)__entry_text_start) &&
268              (paddr <  (unsigned long)__entry_text_end)) ||
269             ((paddr >= (unsigned long)__irqentry_text_start) &&
270              (paddr <  (unsigned long)__irqentry_text_end)))
271                 return 0;
272
273         /* Check there is enough space for a relative jump. */
274         if (size - offset < RELATIVEJUMP_SIZE)
275                 return 0;
276
277         /* Decode instructions */
278         addr = paddr - offset;
279         while (addr < paddr - offset + size) { /* Decode until function end */
280                 unsigned long recovered_insn;
281                 if (search_exception_tables(addr))
282                         /*
283                          * Since some fixup code will jumps into this function,
284                          * we can't optimize kprobe in this function.
285                          */
286                         return 0;
287                 recovered_insn = recover_probed_instruction(buf, addr);
288                 if (!recovered_insn)
289                         return 0;
290                 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
291                 insn_get_length(&insn);
292                 /* Another subsystem puts a breakpoint */
293                 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
294                         return 0;
295                 /* Recover address */
296                 insn.kaddr = (void *)addr;
297                 insn.next_byte = (void *)(addr + insn.length);
298                 /* Check any instructions don't jump into target */
299                 if (insn_is_indirect_jump(&insn) ||
300                     insn_jump_into_range(&insn, paddr + INT3_SIZE,
301                                          RELATIVE_ADDR_SIZE))
302                         return 0;
303                 addr += insn.length;
304         }
305
306         return 1;
307 }
308
309 /* Check optimized_kprobe can actually be optimized. */
310 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
311 {
312         int i;
313         struct kprobe *p;
314
315         for (i = 1; i < op->optinsn.size; i++) {
316                 p = get_kprobe(op->kp.addr + i);
317                 if (p && !kprobe_disabled(p))
318                         return -EEXIST;
319         }
320
321         return 0;
322 }
323
324 /* Check the addr is within the optimized instructions. */
325 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
326                                  unsigned long addr)
327 {
328         return ((unsigned long)op->kp.addr <= addr &&
329                 (unsigned long)op->kp.addr + op->optinsn.size > addr);
330 }
331
332 /* Free optimized instruction slot */
333 static
334 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
335 {
336         if (op->optinsn.insn) {
337                 free_optinsn_slot(op->optinsn.insn, dirty);
338                 op->optinsn.insn = NULL;
339                 op->optinsn.size = 0;
340         }
341 }
342
343 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
344 {
345         __arch_remove_optimized_kprobe(op, 1);
346 }
347
348 /*
349  * Copy replacing target instructions
350  * Target instructions MUST be relocatable (checked inside)
351  * This is called when new aggr(opt)probe is allocated or reused.
352  */
353 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
354                                   struct kprobe *__unused)
355 {
356         u8 *buf = NULL, *slot;
357         int ret, len;
358         long rel;
359
360         if (!can_optimize((unsigned long)op->kp.addr))
361                 return -EILSEQ;
362
363         buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
364         if (!buf)
365                 return -ENOMEM;
366
367         op->optinsn.insn = slot = get_optinsn_slot();
368         if (!slot) {
369                 ret = -ENOMEM;
370                 goto out;
371         }
372
373         /*
374          * Verify if the address gap is in 2GB range, because this uses
375          * a relative jump.
376          */
377         rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
378         if (abs(rel) > 0x7fffffff) {
379                 ret = -ERANGE;
380                 goto err;
381         }
382
383         /* Copy arch-dep-instance from template */
384         memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
385
386         /* Copy instructions into the out-of-line buffer */
387         ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
388                                           slot + TMPL_END_IDX);
389         if (ret < 0)
390                 goto err;
391         op->optinsn.size = ret;
392         len = TMPL_END_IDX + op->optinsn.size;
393
394         /* Set probe information */
395         synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
396
397         /* Set probe function call */
398         synthesize_relcall(buf + TMPL_CALL_IDX,
399                            slot + TMPL_CALL_IDX, optimized_callback);
400
401         /* Set returning jmp instruction at the tail of out-of-line buffer */
402         synthesize_reljump(buf + len, slot + len,
403                            (u8 *)op->kp.addr + op->optinsn.size);
404         len += RELATIVEJUMP_SIZE;
405
406         /* We have to use text_poke() for instruction buffer because it is RO */
407         text_poke(slot, buf, len);
408         ret = 0;
409 out:
410         kfree(buf);
411         return ret;
412
413 err:
414         __arch_remove_optimized_kprobe(op, 0);
415         goto out;
416 }
417
418 /*
419  * Replace breakpoints (int3) with relative jumps.
420  * Caller must call with locking kprobe_mutex and text_mutex.
421  */
422 void arch_optimize_kprobes(struct list_head *oplist)
423 {
424         struct optimized_kprobe *op, *tmp;
425         u8 insn_buff[RELATIVEJUMP_SIZE];
426
427         list_for_each_entry_safe(op, tmp, oplist, list) {
428                 s32 rel = (s32)((long)op->optinsn.insn -
429                         ((long)op->kp.addr + RELATIVEJUMP_SIZE));
430
431                 WARN_ON(kprobe_disabled(&op->kp));
432
433                 /* Backup instructions which will be replaced by jump address */
434                 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
435                        RELATIVE_ADDR_SIZE);
436
437                 insn_buff[0] = RELATIVEJUMP_OPCODE;
438                 *(s32 *)(&insn_buff[1]) = rel;
439
440                 text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
441
442                 list_del_init(&op->list);
443         }
444 }
445
446 /* Replace a relative jump with a breakpoint (int3).  */
447 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
448 {
449         u8 insn_buff[RELATIVEJUMP_SIZE];
450         u8 emulate_buff[RELATIVEJUMP_SIZE];
451
452         /* Set int3 to first byte for kprobes */
453         insn_buff[0] = BREAKPOINT_INSTRUCTION;
454         memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
455
456         emulate_buff[0] = RELATIVEJUMP_OPCODE;
457         *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
458                         ((long)op->kp.addr + RELATIVEJUMP_SIZE));
459
460         text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
461                      emulate_buff);
462 }
463
464 /*
465  * Recover original instructions and breakpoints from relative jumps.
466  * Caller must call with locking kprobe_mutex.
467  */
468 extern void arch_unoptimize_kprobes(struct list_head *oplist,
469                                     struct list_head *done_list)
470 {
471         struct optimized_kprobe *op, *tmp;
472
473         list_for_each_entry_safe(op, tmp, oplist, list) {
474                 arch_unoptimize_kprobe(op);
475                 list_move(&op->list, done_list);
476         }
477 }
478
479 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
480 {
481         struct optimized_kprobe *op;
482
483         if (p->flags & KPROBE_FLAG_OPTIMIZED) {
484                 /* This kprobe is really able to run optimized path. */
485                 op = container_of(p, struct optimized_kprobe, kp);
486                 /* Detour through copied instructions */
487                 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
488                 if (!reenter)
489                         reset_current_kprobe();
490                 return 1;
491         }
492         return 0;
493 }
494 NOKPROBE_SYMBOL(setup_detour_execution);