2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
5 /*--- priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Valgrind, a dynamic binary instrumentation
12 Copyright (C) 2000-2010 Julian Seward
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 The GNU General Public License is contained in the file COPYING.
33 Stabs reader greatly improved by Nick Nethercote, Apr 02.
34 This module was also extensively hacked on by Jeremy Fitzhardinge
37 /* See comment at top of debuginfo.c for explanation of
38 the _svma / _avma / _image / _bias naming scheme.
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41 priv_tytypes.h to be included before it. */
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
46 /* --------------------- SYMBOLS --------------------- */
48 /* A structure to hold an ELF/XCOFF symbol (very crudely). */
51 Addr addr; /* lowest address of entity */
52 Addr tocptr; /* ppc64-linux only: value that R2 should have */
53 UChar *name; /* name */
54 // XXX: this could be shrunk (on 32-bit platforms) by using 31 bits for
55 // the size and 1 bit for the isText. If you do this, make sure that
56 // all assignments to isText use 0 or 1 (or True or False), and that a
57 // positive number larger than 1 is never used to represent True.
58 UInt size; /* size in bytes */
60 Bool isIFunc; /* symbol is an indirect function? */
64 /* --------------------- SRCLOCS --------------------- */
66 /* Line count at which overflow happens, due to line numbers being
67 stored as shorts in `struct nlist' in a.out.h. */
68 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
70 #define LINENO_BITS 20
71 #define LOC_SIZE_BITS (32 - LINENO_BITS)
72 #define MAX_LINENO ((1 << LINENO_BITS) - 1)
74 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
75 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
77 /* Number used to detect line number overflows; if one line is
78 60000-odd smaller than the previous, it was probably an overflow.
80 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
82 /* A structure to hold addr-to-source info for a single line. There
83 can be a lot of these, hence the dense packing. */
87 Addr addr; /* lowest address for this line */
89 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
90 UInt lineno:LINENO_BITS; /* source line number, or zero */
92 UChar* filename; /* source filename */
94 UChar* dirname; /* source directory name */
98 /* --------------------- CF INFO --------------------- */
100 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
101 address range [base .. base+len-1].
103 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
104 some point and {e,r}ip is in the range [base .. base+len-1], it
105 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
106 current frame and also ra, the return address of the current frame.
108 First off, calculate CFA, the Canonical Frame Address, thusly:
110 cfa = case cfa_how of
111 CFIC_IA_SPREL -> {e,r}sp + cfa_off
112 CFIC_IA_BPREL -> {e,r}bp + cfa_off
113 CFIR_IA_EXPR -> expr whose index is in cfa_off
115 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
116 this frame's {e,r}ra value can be calculated like this:
118 old_{e,r}sp/{e,r}bp/ra
119 = case {e,r}sp/{e,r}bp/ra_how of
120 CFIR_UNKNOWN -> we don't know, sorry
121 CFIR_SAME -> same as it was before (sp/fp only)
122 CFIR_CFAREL -> cfa + sp/bp/ra_off
123 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
124 CFIR_EXPR -> expr whose index is in sp/bp/ra_off
126 On ARM it's pretty much the same, except we have more registers to
129 cfa = case cfa_how of
130 CFIC_R13REL -> r13 + cfa_off
131 CFIC_R12REL -> r12 + cfa_off
132 CFIC_R11REL -> r11 + cfa_off
133 CFIC_R7REL -> r7 + cfa_off
134 CFIR_EXPR -> expr whose index is in cfa_off
136 old_r14/r13/r12/r11/r7/ra
137 = case r14/r13/r12/r11/r7/ra_how of
138 CFIR_UNKNOWN -> we don't know, sorry
139 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
140 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
141 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
142 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
144 On s390x we have a similar logic as x86 or amd64. We need the stack pointer
145 (r15), the frame pointer r11 (like BP) and together with the instruction
146 address in the PSW we can calculate the previous values:
147 cfa = case cfa_how of
148 CFIC_IA_SPREL -> r15 + cfa_off
149 CFIC_IA_BPREL -> r11 + cfa_off
150 CFIR_IA_EXPR -> expr whose index is in cfa_off
153 = case sp/fp/ra_how of
154 CFIR_UNKNOWN -> we don't know, sorry
155 CFIR_SAME -> same as it was before (sp/fp only)
156 CFIR_CFAREL -> cfa + sp/fp/ra_off
157 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
158 CFIR_EXPR -> expr whose index is in sp/fp/ra_off
161 #define CFIC_IA_SPREL ((UChar)1)
162 #define CFIC_IA_BPREL ((UChar)2)
163 #define CFIC_IA_EXPR ((UChar)3)
164 #define CFIC_ARM_R13REL ((UChar)4)
165 #define CFIC_ARM_R12REL ((UChar)5)
166 #define CFIC_ARM_R11REL ((UChar)6)
167 #define CFIC_ARM_R7REL ((UChar)7)
168 #define CFIC_EXPR ((UChar)8) /* all targets */
170 #define CFIR_UNKNOWN ((UChar)64)
171 #define CFIR_SAME ((UChar)65)
172 #define CFIR_CFAREL ((UChar)66)
173 #define CFIR_MEMCFAREL ((UChar)67)
174 #define CFIR_EXPR ((UChar)68)
176 #if defined(VGA_x86) || defined(VGA_amd64)
181 UChar cfa_how; /* a CFIC_IA value */
182 UChar ra_how; /* a CFIR_ value */
183 UChar sp_how; /* a CFIR_ value */
184 UChar bp_how; /* a CFIR_ value */
191 #elif defined(VGA_arm)
196 UChar cfa_how; /* a CFIC_ value */
197 UChar ra_how; /* a CFIR_ value */
198 UChar r14_how; /* a CFIR_ value */
199 UChar r13_how; /* a CFIR_ value */
200 UChar r12_how; /* a CFIR_ value */
201 UChar r11_how; /* a CFIR_ value */
202 UChar r7_how; /* a CFIR_ value */
212 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
213 /* Just have a struct with the common fields in, so that code that
214 processes the common fields doesn't have to be ifdef'd against
215 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
221 UChar cfa_how; /* a CFIC_ value */
222 UChar ra_how; /* a CFIR_ value */
227 #elif defined(VGA_s390x)
232 UChar cfa_how; /* a CFIC_ value */
233 UChar sp_how; /* a CFIR_ value */
234 UChar ra_how; /* a CFIR_ value */
235 UChar fp_how; /* a CFIR_ value */
243 # error "Unknown arch"
308 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
309 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
310 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
311 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR );
312 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
313 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
315 extern void ML_(ppCfiExpr)( XArray* src, Int ix );
317 /* ---------------- FPO INFO (Windows PE) -------------- */
319 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
322 struct _FPO_DATA { /* 16 bytes */
323 UInt ulOffStart; /* offset of 1st byte of function code */
324 UInt cbProcSize; /* # bytes in function */
325 UInt cdwLocals; /* # bytes/4 in locals */
326 UShort cdwParams; /* # bytes/4 in params */
327 UChar cbProlog; /* # bytes in prolog */
328 UChar cbRegs :3; /* # regs saved */
329 UChar fHasSEH:1; /* Structured Exception Handling */
330 UChar fUseBP :1; /* EBP has been used */
332 UChar cbFrame:2; /* frame type */
336 #define PDB_FRAME_FPO 0
337 #define PDB_FRAME_TRAP 1
338 #define PDB_FRAME_TSS 2
340 /* --------------------- VARIABLES --------------------- */
346 XArray* /* of DiVariable */ vars;
352 UChar* name; /* in DebugInfo.strchunks */
353 UWord typeR; /* a cuOff */
354 GExpr* gexpr; /* on DebugInfo.gexprs list */
355 GExpr* fbGX; /* SHARED. */
356 UChar* fileName; /* where declared; may be NULL. in
357 DebugInfo.strchunks */
358 Int lineNo; /* where declared; may be zero. */
363 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
365 /* --------------------- DEBUGINFO --------------------- */
367 /* This is the top-level data type. It's a structure which contains
368 information pertaining to one mapped ELF object. This type is
369 exported only abstractly - in pub_tool_debuginfo.h. */
371 #define SEGINFO_STRCHUNKSIZE (64*1024)
377 struct _DebugInfo* next; /* list of DebugInfos */
378 Bool mark; /* marked for deletion? */
380 /* An abstract handle, which can be used by entities outside of
381 m_debuginfo to (in an abstract datatype sense) refer to this
382 struct _DebugInfo. A .handle of zero is invalid; valid handles
383 are 1 and above. The same handle is never issued twice (in any
384 given run of Valgrind), so a handle becomes invalid when the
385 associated struct _DebugInfo is discarded, and remains invalid
386 forever thereafter. The .handle field is set as soon as this
387 structure is allocated. */
390 /* Used for debugging only - indicate what stuff to dump whilst
391 reading stuff into the seginfo. Are computed as early in the
392 lifetime of the DebugInfo as possible -- at the point when it is
393 created. Use these when deciding what to spew out; do not use
394 the global VG_(clo_blah) flags. */
396 Bool trace_symtab; /* symbols, our style */
397 Bool trace_cfi; /* dwarf frame unwind, our style */
398 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */
399 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */
400 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
402 /* Fields that must be filled in before we can start reading
403 anything from the ELF file. These fields are filled in by
404 VG_(di_notify_mmap) and its immediate helpers. */
406 UChar* filename; /* in mallocville (VG_AR_DINFO) */
407 UChar* memname; /* also in VG_AR_DINFO. AIX5 only: .a member name */
409 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */
410 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */
412 Addr rx_map_avma; /* these fields record the file offset, length */
413 SizeT rx_map_size; /* and map address of the r?x mapping we believe */
414 OffT rx_map_foff; /* is the .text segment mapping */
416 Addr rw_map_avma; /* ditto, for the rw? mapping we believe is the */
417 SizeT rw_map_size; /* .data segment mapping */
420 /* Once both a rw? and r?x mapping for .filename have been
421 observed, we can go on to read the symbol tables and debug info.
422 .have_dinfo flags when that has happened. */
423 /* If have_dinfo is False, then all fields except "*rx_map*" and
424 "*rw_map*" are invalid and should not be consulted. */
425 Bool have_dinfo; /* initially False */
427 /* All the rest of the fields in this structure are filled in once
428 we have committed to reading the symbols and debug info (that
429 is, at the point where .have_dinfo is set to True). */
431 /* The file's soname. FIXME: ensure this is always allocated in
435 /* Description of some important mapped segments. The presence or
436 absence of the mapping is denoted by the _present field, since
437 in some obscure circumstances (to do with data/sdata/bss) it is
438 possible for the mapping to be present but have zero size.
439 Certainly text_ is mandatory on all platforms; not sure about
442 --------------------------------------------------------
444 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
446 either (rx_map_size == 0 && cfsi == NULL) (the degenerate case)
448 or the normal case, which is the AND of the following:
450 (1) no two DebugInfos with rx_map_size > 0
451 have overlapping [rx_map_avma,+rx_map_size)
452 (2) [cfsi_minavma,cfsi_maxavma] does not extend
453 beyond [rx_map_avma,+rx_map_size); that is, the former is a
454 subrange or equal to the latter.
455 (3) all DiCfSI in the cfsi array all have ranges that fall within
456 [rx_map_avma,+rx_map_size).
457 (4) all DiCfSI in the cfsi array are non-overlapping
459 The cumulative effect of these restrictions is to ensure that
460 all the DiCfSI records in the entire system are non overlapping.
461 Hence any address falls into either exactly one DiCfSI record,
462 or none. Hence it is safe to cache the results of searches for
463 DiCfSI records. This is the whole point of these restrictions.
464 The caching of DiCfSI searches is done in VG_(use_CF_info). The
465 cache is flushed after any change to debugInfo_list. DiCfSI
466 searches are cached because they are central to stack unwinding
469 Where are these invariants imposed and checked?
471 They are checked after a successful read of debuginfo into
472 a DebugInfo*, in check_CFSI_related_invariants.
474 (1) is not really imposed anywhere. We simply assume that the
475 kernel will not map the text segments from two different objects
476 into the same space. Sounds reasonable.
478 (2) follows from (4) and (3). It is ensured by canonicaliseCFI.
479 (3) is ensured by ML_(addDiCfSI).
480 (4) is ensured by canonicaliseCFI.
482 --------------------------------------------------------
484 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
486 The _debug_{svma,bias} fields were added as part of a fix to
487 #185816. The problem encompassed in that bug report was that it
488 wasn't correct to use apply the bias values deduced for a
489 primary object to its associated debuginfo object, because the
490 debuginfo object (or the primary) could have been prelinked to a
491 different SVMA. Hence debuginfo and primary objects need to
492 have their own biases.
494 ------ JRS: (referring to r9329): ------
495 Let me see if I understand the workings correctly. Initially
496 the _debug_ values are set to the same values as the "normal"
497 ones, as there's a bunch of bits of code like this (in
500 di->text_svma = svma;
502 di->text_bias = rx_bias;
503 di->text_debug_svma = svma;
504 di->text_debug_bias = rx_bias;
506 If a debuginfo object subsequently shows up then the
507 _debug_svma/bias are set for the debuginfo object. Result is
508 that if there's no debuginfo object then the values are the same
509 as the primary-object values, and if there is a debuginfo object
510 then they will (or at least may) be different.
512 Then when we need to actually bias something, we'll have to
513 decide whether to use the primary bias or the debuginfo bias.
514 And the strategy is to use the primary bias for ELF symbols but
515 the debuginfo bias for anything pulled out of Dwarf.
518 Correct - the debug_svma and bias values apply to any address
519 read from the debug data regardless of where that debug data is
520 stored and the other values are used for addresses from other
521 places (primarily the symbol table).
524 Ok; so this was my only area of concern. Are there any
525 corner-case scenarios where this wouldn't be right? It sounds
526 like we're assuming the ELF symbols come from the primary object
527 and, if there is a debug object, then all the Dwarf comes from
528 there. But what if (eg) both symbols and Dwarf come from the
529 debug object? Is that even possible or allowable?
532 You may have a point...
534 The current logic is to try and take any one set of data from
535 either the base object or the debug object. There are four sets
543 If we see the primary section for a given set in the base object
544 then we ignore all sections relating to that set in the debug
547 Now in principle if we saw a secondary section (like debug_line
548 say) in the base object, but not the main section (debug_info in
549 this case) then we would take debug_info from the debug object
550 but would use the debug_line from the base object unless we saw
551 a replacement copy in the debug object. That's probably unlikely
554 A bigger issue might be, as you say, the symbol table as we will
555 pick that up from the debug object if it isn't in the base. The
556 dynamic symbol table will always have to be in the base object
557 though so we will have to be careful when processing symbols to
558 know which table we are reading in that case.
560 What we probably need to do is tell read_elf_symtab which object
561 the symbols it is being asked to read came from.
563 (A followup patch to deal with this was committed in r9469).
571 Addr text_debug_svma;
572 PtrdiffT text_debug_bias;
579 Addr data_debug_svma;
580 PtrdiffT data_debug_bias;
587 Addr sdata_debug_svma;
588 PtrdiffT sdata_debug_bias;
594 PtrdiffT rodata_bias;
595 Addr rodata_debug_svma;
596 PtrdiffT rodata_debug_bias;
604 PtrdiffT bss_debug_bias;
611 Addr sbss_debug_svma;
612 PtrdiffT sbss_debug_bias;
625 /* .opd -- needed on ppc64-linux for finding symbols */
629 /* .ehframe -- needed on amd64-linux for stack unwinding */
630 Bool ehframe_present;
634 /* Sorted tables of stuff we snarfed from the file. This is the
635 eventual product of reading the debug info. All this stuff
636 lives in VG_AR_DINFO. */
638 /* An expandable array of symbols. */
642 /* An expandable array of locations. */
646 /* An expandable array of CFI summary info records. Also includes
647 summary address bounds, showing the min and max address covered
648 by any of the records, as an aid to fast searching. And, if the
649 records require any expression nodes, they are stored in
656 XArray* cfsi_exprs; /* XArray of CfiExpr */
658 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
659 data. Non-expandable array, hence .size == .used. */
665 /* Expandable arrays of characters -- the string table. Pointers
666 into this are stable (the arrays are not reallocated). */
669 struct strchunk* next;
670 UChar strtab[SEGINFO_STRCHUNKSIZE];
673 /* Variable scope information, as harvested from Dwarf3 files.
677 array of (array of PC address ranges and variables)
679 The outer array indexes over scopes, with Entry 0 containing
680 information on variables which exist for any value of the program
681 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
682 etc contain information on increasinly deeply nested variables.
684 Each inner array is an array of (an address range, and a set
685 of variables that are in scope over that address range).
687 The address ranges may not overlap.
689 Since Entry 0 in the outer array holds information on variables
690 that exist for any value of the PC (that is, global vars), it
691 follows that Entry 0's inner array can only have one address
692 range pair, one that covers the entire address space.
694 XArray* /* of OSet of DiAddrRange */varinfo;
696 /* These are arrays of the relevant typed objects, held here
697 partially for the purposes of visiting each object exactly once
698 when we need to delete them. */
700 /* An array of TyEnts. These are needed to make sense of any types
701 in the .varinfo. Also, when deleting this DebugInfo, we must
702 first traverse this array and throw away malloc'd stuff hanging
703 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
704 XArray* /* of TyEnt */ admin_tyents;
706 /* An array of guarded DWARF3 expressions. */
707 XArray* admin_gexprs;
710 /* --------------------- functions --------------------- */
712 /* ------ Adding ------ */
714 /* Add a symbol to si's symbol table. */
715 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
717 /* Add a line-number record to a DebugInfo. */
719 void ML_(addLineInfo) ( struct _DebugInfo* di,
721 UChar* dirname, /* NULL is allowable */
722 Addr this, Addr next, Int lineno, Int entry);
724 /* Shrink completed tables to save memory. */
726 void ML_(shrinkSym) ( struct _DebugInfo *di );
728 void ML_(shrinkLineInfo) ( struct _DebugInfo *di );
730 /* Add a CFI summary record. The supplied DiCfSI is copied. */
731 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
733 /* Add a string to the string table of a DebugInfo. If len==-1,
734 ML_(addStr) will itself measure the length of the string. */
735 extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len );
737 extern void ML_(addVar)( struct _DebugInfo* di,
742 UWord typeR, /* a cuOff */
744 GExpr* fbGX, /* SHARED. */
745 UChar* fileName, /* where decl'd - may be NULL */
746 Int lineNo, /* where decl'd - may be zero */
749 /* Canonicalise the tables held by 'di', in preparation for use. Call
750 this after finishing adding entries to these tables. */
751 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
753 /* Canonicalise the call-frame-info table held by 'di', in preparation
754 for use. This is called by ML_(canonicaliseTables) but can also be
755 called on it's own to sort just this table. */
756 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
758 /* ------ Searching ------ */
760 /* Find a symbol-table index containing the specified pointer, or -1
761 if not found. Binary search. */
762 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
763 Bool match_anywhere_in_sym,
766 /* Find a location-table index containing the specified pointer, or -1
767 if not found. Binary search. */
768 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
770 /* Find a CFI-table index containing the specified pointer, or -1 if
771 not found. Binary search. */
772 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
774 /* Find a FPO-table index containing the specified pointer, or -1
775 if not found. Binary search. */
776 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
778 /* ------ Misc ------ */
780 /* Show a non-fatal debug info reading error. Use vg_panic if
781 terminal. 'serious' errors are always shown, not 'serious' ones
782 are shown only at verbosity level 2 and above. */
784 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg );
786 /* Print a symbol. */
787 extern void ML_(ppSym) ( Int idx, DiSym* sym );
789 /* Print a call-frame-info summary. */
790 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
793 #define TRACE_SYMTAB(format, args...) \
794 if (di->trace_symtab) { VG_(printf)(format, ## args); }
797 #endif /* ndef __PRIV_STORAGE_H */
799 /*--------------------------------------------------------------------*/
801 /*--------------------------------------------------------------------*/