2 * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
8 * This is an implementation of a DWARF unwinder. Its main purpose is
9 * for generating stacktrace information. Based on the DWARF 3
10 * specification from http://www.dwarfstd.org.
13 * - DWARF64 doesn't work.
14 * - Registers with DWARF_VAL_OFFSET rules aren't handled properly.
18 #include <linux/kernel.h>
20 #include <linux/list.h>
21 #include <linux/mempool.h>
23 #include <linux/ftrace.h>
24 #include <asm/dwarf.h>
25 #include <asm/unwinder.h>
26 #include <asm/sections.h>
27 #include <asm/unaligned.h>
28 #include <asm/stacktrace.h>
30 /* Reserve enough memory for two stack frames */
31 #define DWARF_FRAME_MIN_REQ 2
32 /* ... with 4 registers per frame. */
33 #define DWARF_REG_MIN_REQ (DWARF_FRAME_MIN_REQ * 4)
35 static struct kmem_cache *dwarf_frame_cachep;
36 static mempool_t *dwarf_frame_pool;
38 static struct kmem_cache *dwarf_reg_cachep;
39 static mempool_t *dwarf_reg_pool;
41 static LIST_HEAD(dwarf_cie_list);
42 static DEFINE_SPINLOCK(dwarf_cie_lock);
44 static LIST_HEAD(dwarf_fde_list);
45 static DEFINE_SPINLOCK(dwarf_fde_lock);
47 static struct dwarf_cie *cached_cie;
50 * dwarf_frame_alloc_reg - allocate memory for a DWARF register
51 * @frame: the DWARF frame whose list of registers we insert on
52 * @reg_num: the register number
54 * Allocate space for, and initialise, a dwarf reg from
55 * dwarf_reg_pool and insert it onto the (unsorted) linked-list of
56 * dwarf registers for @frame.
58 * Return the initialised DWARF reg.
60 static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
63 struct dwarf_reg *reg;
65 reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
67 printk(KERN_WARNING "Unable to allocate a DWARF register\n");
69 * Let's just bomb hard here, we have no way to
75 reg->number = reg_num;
79 list_add(®->link, &frame->reg_list);
84 static void dwarf_frame_free_regs(struct dwarf_frame *frame)
86 struct dwarf_reg *reg, *n;
88 list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
90 mempool_free(reg, dwarf_reg_pool);
95 * dwarf_frame_reg - return a DWARF register
96 * @frame: the DWARF frame to search in for @reg_num
97 * @reg_num: the register number to search for
99 * Lookup and return the dwarf reg @reg_num for this frame. Return
100 * NULL if @reg_num is an register invalid number.
102 static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
103 unsigned int reg_num)
105 struct dwarf_reg *reg;
107 list_for_each_entry(reg, &frame->reg_list, link) {
108 if (reg->number == reg_num)
116 * dwarf_read_addr - read dwarf data
117 * @src: source address of data
118 * @dst: destination address to store the data to
120 * Read 'n' bytes from @src, where 'n' is the size of an address on
121 * the native machine. We return the number of bytes read, which
122 * should always be 'n'. We also have to be careful when reading
123 * from @src and writing to @dst, because they can be arbitrarily
124 * aligned. Return 'n' - the number of bytes read.
126 static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
128 u32 val = get_unaligned(src);
129 put_unaligned(val, dst);
130 return sizeof(unsigned long *);
134 * dwarf_read_uleb128 - read unsigned LEB128 data
135 * @addr: the address where the ULEB128 data is stored
136 * @ret: address to store the result
138 * Decode an unsigned LEB128 encoded datum. The algorithm is taken
139 * from Appendix C of the DWARF 3 spec. For information on the
140 * encodings refer to section "7.6 - Variable Length Data". Return
141 * the number of bytes read.
143 static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
154 byte = __raw_readb(addr);
158 result |= (byte & 0x7f) << shift;
171 * dwarf_read_leb128 - read signed LEB128 data
172 * @addr: the address of the LEB128 encoded data
173 * @ret: address to store the result
175 * Decode signed LEB128 data. The algorithm is taken from Appendix
176 * C of the DWARF 3 spec. Return the number of bytes read.
178 static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
190 byte = __raw_readb(addr);
192 result |= (byte & 0x7f) << shift;
200 /* The number of bits in a signed integer. */
201 num_bits = 8 * sizeof(result);
203 if ((shift < num_bits) && (byte & 0x40))
204 result |= (-1 << shift);
212 * dwarf_read_encoded_value - return the decoded value at @addr
213 * @addr: the address of the encoded value
214 * @val: where to write the decoded value
215 * @encoding: the encoding with which we can decode @addr
217 * GCC emits encoded address in the .eh_frame FDE entries. Decode
218 * the value at @addr using @encoding. The decoded value is written
219 * to @val and the number of bytes read is returned.
221 static int dwarf_read_encoded_value(char *addr, unsigned long *val,
224 unsigned long decoded_addr = 0;
227 switch (encoding & 0x70) {
228 case DW_EH_PE_absptr:
231 decoded_addr = (unsigned long)addr;
234 pr_debug("encoding=0x%x\n", (encoding & 0x70));
238 if ((encoding & 0x07) == 0x00)
239 encoding |= DW_EH_PE_udata4;
241 switch (encoding & 0x0f) {
242 case DW_EH_PE_sdata4:
243 case DW_EH_PE_udata4:
245 decoded_addr += get_unaligned((u32 *)addr);
246 __raw_writel(decoded_addr, val);
249 pr_debug("encoding=0x%x\n", encoding);
257 * dwarf_entry_len - return the length of an FDE or CIE
258 * @addr: the address of the entry
259 * @len: the length of the entry
261 * Read the initial_length field of the entry and store the size of
262 * the entry in @len. We return the number of bytes read. Return a
263 * count of 0 on error.
265 static inline int dwarf_entry_len(char *addr, unsigned long *len)
270 initial_len = get_unaligned((u32 *)addr);
274 * An initial length field value in the range DW_LEN_EXT_LO -
275 * DW_LEN_EXT_HI indicates an extension, and should not be
276 * interpreted as a length. The only extension that we currently
277 * understand is the use of DWARF64 addresses.
279 if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
281 * The 64-bit length field immediately follows the
282 * compulsory 32-bit length field.
284 if (initial_len == DW_EXT_DWARF64) {
285 *len = get_unaligned((u64 *)addr + 4);
288 printk(KERN_WARNING "Unknown DWARF extension\n");
298 * dwarf_lookup_cie - locate the cie
299 * @cie_ptr: pointer to help with lookup
301 static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
303 struct dwarf_cie *cie;
306 spin_lock_irqsave(&dwarf_cie_lock, flags);
309 * We've cached the last CIE we looked up because chances are
310 * that the FDE wants this CIE.
312 if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
317 list_for_each_entry(cie, &dwarf_cie_list, link) {
318 if (cie->cie_pointer == cie_ptr) {
324 /* Couldn't find the entry in the list. */
325 if (&cie->link == &dwarf_cie_list)
328 spin_unlock_irqrestore(&dwarf_cie_lock, flags);
333 * dwarf_lookup_fde - locate the FDE that covers pc
334 * @pc: the program counter
336 struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
338 struct dwarf_fde *fde;
341 spin_lock_irqsave(&dwarf_fde_lock, flags);
343 list_for_each_entry(fde, &dwarf_fde_list, link) {
344 unsigned long start, end;
346 start = fde->initial_location;
347 end = fde->initial_location + fde->address_range;
349 if (pc >= start && pc < end)
353 /* Couldn't find the entry in the list. */
354 if (&fde->link == &dwarf_fde_list)
357 spin_unlock_irqrestore(&dwarf_fde_lock, flags);
363 * dwarf_cfa_execute_insns - execute instructions to calculate a CFA
364 * @insn_start: address of the first instruction
365 * @insn_end: address of the last instruction
366 * @cie: the CIE for this function
367 * @fde: the FDE for this function
368 * @frame: the instructions calculate the CFA for this frame
369 * @pc: the program counter of the address we're interested in
371 * Execute the Call Frame instruction sequence starting at
372 * @insn_start and ending at @insn_end. The instructions describe
373 * how to calculate the Canonical Frame Address of a stackframe.
374 * Store the results in @frame.
376 static int dwarf_cfa_execute_insns(unsigned char *insn_start,
377 unsigned char *insn_end,
378 struct dwarf_cie *cie,
379 struct dwarf_fde *fde,
380 struct dwarf_frame *frame,
384 unsigned char *current_insn;
385 unsigned int count, delta, reg, expr_len, offset;
386 struct dwarf_reg *regp;
388 current_insn = insn_start;
390 while (current_insn < insn_end && frame->pc <= pc) {
391 insn = __raw_readb(current_insn++);
394 * Firstly, handle the opcodes that embed their operands
395 * in the instructions.
397 switch (DW_CFA_opcode(insn)) {
398 case DW_CFA_advance_loc:
399 delta = DW_CFA_operand(insn);
400 delta *= cie->code_alignment_factor;
405 reg = DW_CFA_operand(insn);
406 count = dwarf_read_uleb128(current_insn, &offset);
407 current_insn += count;
408 offset *= cie->data_alignment_factor;
409 regp = dwarf_frame_alloc_reg(frame, reg);
411 regp->flags |= DWARF_REG_OFFSET;
415 reg = DW_CFA_operand(insn);
421 * Secondly, handle the opcodes that don't embed their
422 * operands in the instruction.
427 case DW_CFA_advance_loc1:
428 delta = *current_insn++;
429 frame->pc += delta * cie->code_alignment_factor;
431 case DW_CFA_advance_loc2:
432 delta = get_unaligned((u16 *)current_insn);
434 frame->pc += delta * cie->code_alignment_factor;
436 case DW_CFA_advance_loc4:
437 delta = get_unaligned((u32 *)current_insn);
439 frame->pc += delta * cie->code_alignment_factor;
441 case DW_CFA_offset_extended:
442 count = dwarf_read_uleb128(current_insn, ®);
443 current_insn += count;
444 count = dwarf_read_uleb128(current_insn, &offset);
445 current_insn += count;
446 offset *= cie->data_alignment_factor;
448 case DW_CFA_restore_extended:
449 count = dwarf_read_uleb128(current_insn, ®);
450 current_insn += count;
452 case DW_CFA_undefined:
453 count = dwarf_read_uleb128(current_insn, ®);
454 current_insn += count;
455 regp = dwarf_frame_alloc_reg(frame, reg);
456 regp->flags |= DWARF_UNDEFINED;
459 count = dwarf_read_uleb128(current_insn,
460 &frame->cfa_register);
461 current_insn += count;
462 count = dwarf_read_uleb128(current_insn,
464 current_insn += count;
466 frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
468 case DW_CFA_def_cfa_register:
469 count = dwarf_read_uleb128(current_insn,
470 &frame->cfa_register);
471 current_insn += count;
472 frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
474 case DW_CFA_def_cfa_offset:
475 count = dwarf_read_uleb128(current_insn, &offset);
476 current_insn += count;
477 frame->cfa_offset = offset;
479 case DW_CFA_def_cfa_expression:
480 count = dwarf_read_uleb128(current_insn, &expr_len);
481 current_insn += count;
483 frame->cfa_expr = current_insn;
484 frame->cfa_expr_len = expr_len;
485 current_insn += expr_len;
487 frame->flags |= DWARF_FRAME_CFA_REG_EXP;
489 case DW_CFA_offset_extended_sf:
490 count = dwarf_read_uleb128(current_insn, ®);
491 current_insn += count;
492 count = dwarf_read_leb128(current_insn, &offset);
493 current_insn += count;
494 offset *= cie->data_alignment_factor;
495 regp = dwarf_frame_alloc_reg(frame, reg);
496 regp->flags |= DWARF_REG_OFFSET;
499 case DW_CFA_val_offset:
500 count = dwarf_read_uleb128(current_insn, ®);
501 current_insn += count;
502 count = dwarf_read_leb128(current_insn, &offset);
503 offset *= cie->data_alignment_factor;
504 regp = dwarf_frame_alloc_reg(frame, reg);
505 regp->flags |= DWARF_VAL_OFFSET;
508 case DW_CFA_GNU_args_size:
509 count = dwarf_read_uleb128(current_insn, &offset);
510 current_insn += count;
512 case DW_CFA_GNU_negative_offset_extended:
513 count = dwarf_read_uleb128(current_insn, ®);
514 current_insn += count;
515 count = dwarf_read_uleb128(current_insn, &offset);
516 offset *= cie->data_alignment_factor;
518 regp = dwarf_frame_alloc_reg(frame, reg);
519 regp->flags |= DWARF_REG_OFFSET;
520 regp->addr = -offset;
523 pr_debug("unhandled DWARF instruction 0x%x\n", insn);
533 * dwarf_unwind_stack - recursively unwind the stack
534 * @pc: address of the function to unwind
535 * @prev: struct dwarf_frame of the previous stackframe on the callstack
537 * Return a struct dwarf_frame representing the most recent frame
538 * on the callstack. Each of the lower (older) stack frames are
539 * linked via the "prev" member.
541 struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
542 struct dwarf_frame *prev)
544 struct dwarf_frame *frame;
545 struct dwarf_cie *cie;
546 struct dwarf_fde *fde;
547 struct dwarf_reg *reg;
551 * If this is the first invocation of this recursive function we
552 * need get the contents of a physical register to get the CFA
553 * in order to begin the virtual unwinding of the stack.
555 * NOTE: the return address is guaranteed to be setup by the
556 * time this function makes its first function call.
559 pc = (unsigned long)current_text_addr();
561 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
563 * If our stack has been patched by the function graph tracer
564 * then we might see the address of return_to_handler() where we
565 * expected to find the real return address.
567 if (pc == (unsigned long)&return_to_handler) {
568 int index = current->curr_ret_stack;
571 * We currently have no way of tracking how many
572 * return_to_handler()'s we've seen. If there is more
573 * than one patched return address on our stack,
578 pc = current->ret_stack[index].ret;
582 frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
584 printk(KERN_ERR "Unable to allocate a dwarf frame\n");
588 INIT_LIST_HEAD(&frame->reg_list);
591 frame->return_addr = 0;
593 fde = dwarf_lookup_fde(pc);
596 * This is our normal exit path - the one that stops the
597 * recursion. There's two reasons why we might exit
600 * a) pc has no asscociated DWARF frame info and so
601 * we don't know how to unwind this frame. This is
602 * usually the case when we're trying to unwind a
603 * frame that was called from some assembly code
604 * that has no DWARF info, e.g. syscalls.
606 * b) the DEBUG info for pc is bogus. There's
607 * really no way to distinguish this case from the
608 * case above, which sucks because we could print a
614 cie = dwarf_lookup_cie(fde->cie_pointer);
616 frame->pc = fde->initial_location;
618 /* CIE initial instructions */
619 dwarf_cfa_execute_insns(cie->initial_instructions,
620 cie->instructions_end, cie, fde,
623 /* FDE instructions */
624 dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
627 /* Calculate the CFA */
628 switch (frame->flags) {
629 case DWARF_FRAME_CFA_REG_OFFSET:
631 reg = dwarf_frame_reg(prev, frame->cfa_register);
632 UNWINDER_BUG_ON(!reg);
633 UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
635 addr = prev->cfa + reg->addr;
636 frame->cfa = __raw_readl(addr);
640 * Again, this is the first invocation of this
641 * recurisve function. We need to physically
642 * read the contents of a register in order to
643 * get the Canonical Frame Address for this
646 frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
649 frame->cfa += frame->cfa_offset;
655 reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
658 * If we haven't seen the return address register or the return
659 * address column is undefined then we must assume that this is
660 * the end of the callstack.
662 if (!reg || reg->flags == DWARF_UNDEFINED)
665 UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
667 addr = frame->cfa + reg->addr;
668 frame->return_addr = __raw_readl(addr);
673 dwarf_frame_free_regs(frame);
674 mempool_free(frame, dwarf_frame_pool);
678 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
681 struct dwarf_cie *cie;
685 cie = kzalloc(sizeof(*cie), GFP_KERNEL);
692 * Record the offset into the .eh_frame section
693 * for this CIE. It allows this CIE to be
694 * quickly and easily looked up from the
697 cie->cie_pointer = (unsigned long)entry;
699 cie->version = *(char *)p++;
700 UNWINDER_BUG_ON(cie->version != 1);
702 cie->augmentation = p;
703 p += strlen(cie->augmentation) + 1;
705 count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
708 count = dwarf_read_leb128(p, &cie->data_alignment_factor);
712 * Which column in the rule table contains the
715 if (cie->version == 1) {
716 cie->return_address_reg = __raw_readb(p);
719 count = dwarf_read_uleb128(p, &cie->return_address_reg);
723 if (cie->augmentation[0] == 'z') {
724 unsigned int length, count;
725 cie->flags |= DWARF_CIE_Z_AUGMENTATION;
727 count = dwarf_read_uleb128(p, &length);
730 UNWINDER_BUG_ON((unsigned char *)p > end);
732 cie->initial_instructions = p + length;
736 while (*cie->augmentation) {
738 * "L" indicates a byte showing how the
739 * LSDA pointer is encoded. Skip it.
741 if (*cie->augmentation == 'L') {
744 } else if (*cie->augmentation == 'R') {
746 * "R" indicates a byte showing
747 * how FDE addresses are
750 cie->encoding = *(char *)p++;
752 } else if (*cie->augmentation == 'P') {
754 * "R" indicates a personality
759 } else if (*cie->augmentation == 'S') {
763 * Unknown augmentation. Assume
766 p = cie->initial_instructions;
772 cie->initial_instructions = p;
773 cie->instructions_end = end;
776 spin_lock_irqsave(&dwarf_cie_lock, flags);
777 list_add_tail(&cie->link, &dwarf_cie_list);
778 spin_unlock_irqrestore(&dwarf_cie_lock, flags);
783 static int dwarf_parse_fde(void *entry, u32 entry_type,
784 void *start, unsigned long len,
787 struct dwarf_fde *fde;
788 struct dwarf_cie *cie;
793 fde = kzalloc(sizeof(*fde), GFP_KERNEL);
800 * In a .eh_frame section the CIE pointer is the
801 * delta between the address within the FDE
803 fde->cie_pointer = (unsigned long)(p - entry_type - 4);
805 cie = dwarf_lookup_cie(fde->cie_pointer);
809 count = dwarf_read_encoded_value(p, &fde->initial_location,
812 count = dwarf_read_addr(p, &fde->initial_location);
817 count = dwarf_read_encoded_value(p, &fde->address_range,
818 cie->encoding & 0x0f);
820 count = dwarf_read_addr(p, &fde->address_range);
824 if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
826 count = dwarf_read_uleb128(p, &length);
830 /* Call frame instructions. */
831 fde->instructions = p;
835 spin_lock_irqsave(&dwarf_fde_lock, flags);
836 list_add_tail(&fde->link, &dwarf_fde_list);
837 spin_unlock_irqrestore(&dwarf_fde_lock, flags);
842 static void dwarf_unwinder_dump(struct task_struct *task,
843 struct pt_regs *regs,
845 const struct stacktrace_ops *ops,
848 struct dwarf_frame *frame, *_frame;
849 unsigned long return_addr;
855 frame = dwarf_unwind_stack(return_addr, _frame);
858 dwarf_frame_free_regs(_frame);
859 mempool_free(_frame, dwarf_frame_pool);
864 if (!frame || !frame->return_addr)
867 return_addr = frame->return_addr;
868 ops->address(data, return_addr, 1);
872 static struct unwinder dwarf_unwinder = {
873 .name = "dwarf-unwinder",
874 .dump = dwarf_unwinder_dump,
878 static void dwarf_unwinder_cleanup(void)
880 struct dwarf_cie *cie;
881 struct dwarf_fde *fde;
884 * Deallocate all the memory allocated for the DWARF unwinder.
885 * Traverse all the FDE/CIE lists and remove and free all the
886 * memory associated with those data structures.
888 list_for_each_entry(cie, &dwarf_cie_list, link)
891 list_for_each_entry(fde, &dwarf_fde_list, link)
894 kmem_cache_destroy(dwarf_reg_cachep);
895 kmem_cache_destroy(dwarf_frame_cachep);
899 * dwarf_unwinder_init - initialise the dwarf unwinder
901 * Build the data structures describing the .dwarf_frame section to
902 * make it easier to lookup CIE and FDE entries. Because the
903 * .eh_frame section is packed as tightly as possible it is not
904 * easy to lookup the FDE for a given PC, so we build a list of FDE
905 * and CIE entries that make it easier.
907 static int __init dwarf_unwinder_init(void)
913 unsigned int c_entries, f_entries;
915 INIT_LIST_HEAD(&dwarf_cie_list);
916 INIT_LIST_HEAD(&dwarf_fde_list);
920 entry = &__start_eh_frame;
922 dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
923 sizeof(struct dwarf_frame), 0,
924 SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
926 dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
927 sizeof(struct dwarf_reg), 0,
928 SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
930 dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
935 dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
940 while ((char *)entry < __stop_eh_frame) {
943 count = dwarf_entry_len(p, &len);
946 * We read a bogus length field value. There is
947 * nothing we can do here apart from disabling
948 * the DWARF unwinder. We can't even skip this
949 * entry and move to the next one because 'len'
950 * tells us where our next entry is.
956 /* initial length does not include itself */
959 entry_type = get_unaligned((u32 *)p);
962 if (entry_type == DW_EH_FRAME_CIE) {
963 err = dwarf_parse_cie(entry, p, len, end);
969 err = dwarf_parse_fde(entry, entry_type, p, len, end);
976 entry = (char *)entry + len + 4;
979 printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
980 c_entries, f_entries);
982 err = unwinder_register(&dwarf_unwinder);
989 printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
990 dwarf_unwinder_cleanup();
993 early_initcall(dwarf_unwinder_init);