2 /*--------------------------------------------------------------------*/
3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
4 /*--- readdwarf3.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2008-2010 OpenWorks LLP
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
37 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_l4re)
39 /* REFERENCE (without which this code will not make much sense):
41 DWARF Debugging Information Format, Version 3,
42 dated 20 December 2005 (the "D3 spec").
44 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
45 .doc (MS Word) version, but for some reason the section numbers
46 between the Word and PDF versions differ by 1 in the first digit.
47 All section references in this code are to the PDF version.
51 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
52 assumed to mean "const void" or "volatile void" respectively.
53 GDB appears to interpret them like this, anyway.
55 In many cases it is important to know the svma of a CU (the "base
56 address of the CU", as the D3 spec calls it). There are some
57 situations in which the spec implies this value is unknown, but the
58 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
59 merely zero when not explicitly stated. So we too have to make
62 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
63 unitary_range_list() bias the resulting range list in the same way
64 that its more general cousin, get_range_list(), does? I don't
69 get rid of cu_svma_known and document the assumed-zero svma hack.
71 ML_(sizeOfType): differentiate between zero sized types and types
72 for which the size is unknown. Is this important? I don't know.
74 DW_AT_array_types: deal with explicit sizes (currently we compute
75 the size from the bounds and the element size, although that's
76 fragile, if the bounds incompletely specified, or completely
79 Document reason for difference (by 1) of stack preening depth in
80 parse_var_DIE vs parse_type_DIE.
82 Don't hand to ML_(addVars), vars whose locations are entirely in
83 registers (DW_OP_reg*). This is merely a space-saving
84 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
85 expressions correctly, by failing to evaluate them and hence
86 effectively ignoring the variable with which they are associated.
88 Deal with DW_AT_array_types which have element size != stride
90 In some cases, the info for a variable is split between two
91 different DIEs (generally a declarer and a definer). We punt on
92 these. Could do better here.
94 The 'data_bias' argument passed to the expression evaluator
95 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
96 MaybeUWord, to make it clear when we do vs don't know what it is
97 for the evaluation of an expression. At the moment zero is passed
98 for this parameter in the don't know case. That's a bit fragile
99 and obscure; using a MaybeUWord would be clearer.
101 POTENTIAL PERFORMANCE IMPROVEMENTS:
103 Currently, duplicate removal and all other queries for the type
104 entities array is done using cuOffset-based pointing, which
105 involves a binary search (VG_(lookupXA)) for each access. This is
106 wildly inefficient, although simple. It would be better to
107 translate all the cuOffset-based references (iow, all the "R" and
108 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
109 'tyents' right at the start of dedup_types(), and use direct
110 indexing (VG_(indexXA)) wherever possible after that.
112 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
113 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
114 points, and possibly also make an _UNCHECKED version which skips
115 the range checks in performance-critical situations such as this.
117 Handle interaction between read_DIE and parse_{var,type}_DIE
118 better. Currently read_DIE reads the entire DIE just to find where
119 the end is (and for debug printing), so that it can later reliably
120 move the cursor to the end regardless of what parse_{var,type}_DIE
121 do. This means many DIEs (most, even?) are read twice. It would
122 be smarter to make parse_{var,type}_DIE return a Bool indicating
123 whether or not they advanced the DIE cursor, and only if they
124 didn't should read_DIE itself read through the DIE.
126 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
127 zero variables in their .vars XArray. Rather than have an XArray
128 with zero elements (which uses 2 malloc'd blocks), allow the .vars
129 pointer to be NULL in this case.
131 More generally, reduce the amount of memory allocated and freed
132 while reading Dwarf3 type/variable information. Even modest (20MB)
133 objects cause this module to allocate and free hundreds of
134 thousands of small blocks, and ML_(arena_malloc) and its various
135 groupies always show up at the top of performance profiles. */
137 #include "pub_core_basics.h"
138 #include "pub_core_debuginfo.h"
139 #include "pub_core_libcbase.h"
140 #include "pub_core_libcassert.h"
141 #include "pub_core_libcprint.h"
142 #include "pub_core_libcsetjmp.h" // setjmp facilities
143 #include "pub_core_options.h"
144 #include "pub_core_tooliface.h" /* VG_(needs) */
145 #include "pub_core_xarray.h"
146 #include "pub_core_wordfm.h"
147 #include "priv_misc.h" /* dinfo_zalloc/free */
148 #include "priv_tytypes.h"
149 #include "priv_d3basics.h"
150 #include "priv_storage.h"
151 #include "priv_readdwarf3.h" /* self */
154 /*------------------------------------------------------------*/
156 /*--- Basic machinery for parsing DIEs. ---*/
158 /*------------------------------------------------------------*/
160 #define TRACE_D3(format, args...) \
161 if (td3) { VG_(printf)(format, ## args); }
163 #define D3_INVALID_CUOFF ((UWord)(-1UL))
164 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
168 UChar* region_start_img;
171 void (*barf)( HChar* ) __attribute__((noreturn));
176 static inline Bool is_sane_Cursor ( Cursor* c ) {
177 if (!c) return False;
178 if (!c->barf) return False;
179 if (!c->barfstr) return False;
183 static void init_Cursor ( Cursor* c,
184 UChar* region_start_img,
187 __attribute__((noreturn)) void (*barf)( HChar* ),
191 VG_(memset)(c, 0, sizeof(*c));
192 c->region_start_img = region_start_img;
193 c->region_szB = region_szB;
194 c->region_next = region_next;
196 c->barfstr = barfstr;
197 vg_assert(is_sane_Cursor(c));
200 static Bool is_at_end_Cursor ( Cursor* c ) {
201 vg_assert(is_sane_Cursor(c));
202 return c->region_next >= c->region_szB;
205 static inline UWord get_position_of_Cursor ( Cursor* c ) {
206 vg_assert(is_sane_Cursor(c));
207 return c->region_next;
209 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
210 c->region_next = pos;
211 vg_assert(is_sane_Cursor(c));
214 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
215 vg_assert(is_sane_Cursor(c));
216 return c->region_szB - c->region_next;
219 static UChar* get_address_of_Cursor ( Cursor* c ) {
220 vg_assert(is_sane_Cursor(c));
221 return &c->region_start_img[ c->region_next ];
224 /* FIXME: document assumptions on endianness for
225 get_UShort/UInt/ULong. */
226 static inline UChar get_UChar ( Cursor* c ) {
228 /* vg_assert(is_sane_Cursor(c)); */
229 if (c->region_next + sizeof(UChar) > c->region_szB) {
234 r = * (UChar*) &c->region_start_img[ c->region_next ];
235 c->region_next += sizeof(UChar);
238 static UShort get_UShort ( Cursor* c ) {
240 vg_assert(is_sane_Cursor(c));
241 if (c->region_next + sizeof(UShort) > c->region_szB) {
246 r = * (UShort*) &c->region_start_img[ c->region_next ];
247 c->region_next += sizeof(UShort);
250 static UInt get_UInt ( Cursor* c ) {
252 vg_assert(is_sane_Cursor(c));
253 if (c->region_next + sizeof(UInt) > c->region_szB) {
258 r = * (UInt*) &c->region_start_img[ c->region_next ];
259 c->region_next += sizeof(UInt);
262 static ULong get_ULong ( Cursor* c ) {
264 vg_assert(is_sane_Cursor(c));
265 if (c->region_next + sizeof(ULong) > c->region_szB) {
270 r = * (ULong*) &c->region_start_img[ c->region_next ];
271 c->region_next += sizeof(ULong);
274 static inline ULong get_ULEB128 ( Cursor* c ) {
278 /* unroll first iteration */
279 byte = get_UChar( c );
280 result = (ULong)(byte & 0x7f);
281 if (LIKELY(!(byte & 0x80))) return result;
283 /* end unroll first iteration */
285 byte = get_UChar( c );
286 result |= ((ULong)(byte & 0x7f)) << shift;
288 } while (byte & 0x80);
291 static Long get_SLEB128 ( Cursor* c ) {
297 result |= ((ULong)(byte & 0x7f)) << shift;
299 } while (byte & 0x80);
300 if (shift < 64 && (byte & 0x40))
301 result |= -(1ULL << shift);
305 /* Assume 'c' points to the start of a string. Return the absolute
306 address of whatever it points at, and advance it past the
307 terminating zero. This makes it safe for the caller to then copy
308 the string with ML_(addStr), since (w.r.t. image overruns) the
309 process of advancing past the terminating zero will already have
310 "vetted" the string. */
311 static UChar* get_AsciiZ ( Cursor* c ) {
313 UChar* res = get_address_of_Cursor(c);
314 do { uc = get_UChar(c); } while (uc != 0);
318 static ULong peek_ULEB128 ( Cursor* c ) {
319 Word here = c->region_next;
320 ULong r = get_ULEB128( c );
321 c->region_next = here;
324 static UChar peek_UChar ( Cursor* c ) {
325 Word here = c->region_next;
326 UChar r = get_UChar( c );
327 c->region_next = here;
331 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
332 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
335 static UWord get_UWord ( Cursor* c ) {
336 vg_assert(sizeof(UWord) == sizeof(void*));
337 if (sizeof(UWord) == 4) return get_UInt(c);
338 if (sizeof(UWord) == 8) return get_ULong(c);
342 /* Read a DWARF3 'Initial Length' field */
343 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
351 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
354 else if (w32 == 0xFFFFFFFF) {
356 w64 = get_ULong( c );
365 /*------------------------------------------------------------*/
367 /*--- "CUConst" structure ---*/
369 /*------------------------------------------------------------*/
371 #define N_ABBV_CACHE 32
373 /* Holds information that is constant through the parsing of a
374 Compilation Unit. This is basically plumbed through to
378 /* Call here if anything goes wrong */
379 void (*barf)( HChar* ) __attribute__((noreturn));
380 /* Is this 64-bit DWARF ? */
382 /* Which DWARF version ? (2, 3 or 4) */
384 /* Length of this Compilation Unit, as stated in the
385 .unit_length :: InitialLength field of the CU Header.
386 However, this size (as specified by the D3 spec) does not
387 include the size of the .unit_length field itself, which is
388 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
389 can be obtained through the expression ".is_dw64 ? 12 : 4". */
391 /* Offset of start of this unit in .debug_info */
392 UWord cu_start_offset;
393 /* SVMA for this CU. In the D3 spec, is known as the "base
394 address of the compilation unit (last para sec 3.1.1).
395 Needed for (amongst things) interpretation of location-list
399 /* The debug_abbreviations table to be used for this Unit */
401 /* Upper bound on size thereof (an overestimate, in general) */
402 UWord debug_abbv_maxszB;
403 /* Where is .debug_str ? */
404 UChar* debug_str_img;
406 /* Where is .debug_ranges ? */
407 UChar* debug_ranges_img;
408 UWord debug_ranges_sz;
409 /* Where is .debug_loc ? */
410 UChar* debug_loc_img;
412 /* Where is .debug_line? */
413 UChar* debug_line_img;
415 /* Where is .debug_info? */
416 UChar* debug_info_img;
418 /* --- Needed so we can add stuff to the string table. --- */
419 struct _DebugInfo* di;
420 /* --- a cache for set_abbv_Cursor --- */
421 /* abbv_code == (ULong)-1 for an unused entry. */
422 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
423 UWord saC_cache_queries;
424 UWord saC_cache_misses;
429 /*------------------------------------------------------------*/
431 /*--- Helper functions for Guarded Expressions ---*/
433 /*------------------------------------------------------------*/
435 /* Parse the location list starting at img-offset 'debug_loc_offset'
436 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
437 and so I believe are correct SVMAs for the object as a whole. This
438 function allocates the UChar*, and the caller must deallocate it.
439 The resulting block is in so-called Guarded-Expression format.
441 Guarded-Expression format is similar but not identical to the DWARF3
442 location-list format. The format of each returned block is:
446 followed by zero or more of
448 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
450 '..bytes..' is an standard DWARF3 location expression which is
451 valid when aMin <= pc <= aMax (possibly after suitable biasing).
453 The number of bytes in '..bytes..' is nbytes.
455 The end of the sequence is marked by an isEnd == 1 value. All
456 previous isEnd values must be zero.
458 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
459 text_bias added before use, and 0 if the GX is this is not
460 necessary (is ready to go).
462 Hence the block can be quickly parsed and is self-describing. Note
463 that aMax is 1 less than the corresponding value in a DWARF3
464 location list. Zero length ranges, with aMax == aMin-1, are not
467 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
468 it more logically belongs. */
471 /* Apply a text bias to a GX. */
472 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
476 UChar* p = &gx->payload[0];
478 uc = *p++; /*biasMe*/
482 p[-1] = 0; /* mark it as done */
490 *pA += di->text_debug_bias;
494 *pA += di->text_debug_bias;
496 /* nbytes, and actual expression */
497 nbytes = * (UShort*)p; p += sizeof(UShort);
502 __attribute__((noinline))
503 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
509 vg_assert(sizeof(UWord) == sizeof(Addr));
510 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
512 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/
513 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/
514 + sizeof(UShort) /*nbytes*/ + nbytes
515 + sizeof(UChar); /*isEnd*/
517 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
518 sizeof(GExpr) + bytesReqd );
521 p = pstart = &gx->payload[0];
523 * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar);
524 * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar);
525 * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr);
526 * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr);
527 * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
528 VG_(memcpy)(p, block, nbytes); p += nbytes;
529 * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar);
531 vg_assert( (SizeT)(p - pstart) == bytesReqd);
532 vg_assert( &gx->payload[bytesReqd]
533 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
538 __attribute__((noinline))
539 static GExpr* make_general_GX ( CUConst* cc,
541 UWord debug_loc_offset,
542 Addr svma_of_referencing_CU )
546 XArray* xa; /* XArray of UChar */
550 vg_assert(sizeof(UWord) == sizeof(Addr));
551 if (cc->debug_loc_sz == 0)
552 cc->barf("make_general_GX: .debug_loc is empty/missing");
554 init_Cursor( &loc, cc->debug_loc_img,
555 cc->debug_loc_sz, 0, cc->barf,
556 "Overrun whilst reading .debug_loc section(2)" );
557 set_position_of_Cursor( &loc, debug_loc_offset );
559 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
560 debug_loc_offset, get_address_of_Cursor( &loc ) );
562 /* Who frees this xa? It is freed before this fn exits. */
563 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
567 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
573 /* Read a (host-)word pair. This is something of a hack since
574 the word size to read is really dictated by the ELF file;
575 however, we assume we're reading a file with the same
576 word-sizeness as the host. Reasonably enough. */
577 UWord w1 = get_UWord( &loc );
578 UWord w2 = get_UWord( &loc );
580 TRACE_D3(" %08lx %08lx\n", w1, w2);
581 if (w1 == 0 && w2 == 0)
582 break; /* end of list */
585 /* new value for 'base' */
590 /* else a location expression follows */
591 /* else enumerate [w1+base, w2+base) */
592 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
595 TRACE_D3("negative range is for .debug_loc expr at "
598 cc->barf( "negative range in .debug_loc section" );
601 /* ignore zero length ranges */
603 len = (UWord)get_UShort( &loc );
610 VG_(addBytesToXA)( xa, &c, sizeof(c) );
611 w = w1 + base + svma_of_referencing_CU;
612 VG_(addBytesToXA)( xa, &w, sizeof(w) );
613 w = w2 -1 + base + svma_of_referencing_CU;
614 VG_(addBytesToXA)( xa, &w, sizeof(w) );
616 VG_(addBytesToXA)( xa, &s, sizeof(s) );
620 UChar byte = get_UChar( &loc );
621 TRACE_D3("%02x", (UInt)byte);
623 VG_(addBytesToXA)( xa, &byte, 1 );
629 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
631 nbytes = VG_(sizeXA)( xa );
632 vg_assert(nbytes >= 1);
634 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
636 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
637 vg_assert( &gx->payload[nbytes]
638 == ((UChar*)gx) + sizeof(GExpr) + nbytes );
648 /*------------------------------------------------------------*/
650 /*--- Helper functions for range lists and CU headers ---*/
652 /*------------------------------------------------------------*/
654 /* Denotes an address range. Both aMin and aMax are included in the
655 range; hence a complete range is (0, ~0) and an empty range is any
656 (X, X-1) for X > 0.*/
658 struct { Addr aMin; Addr aMax; }
662 /* Generate an arbitrary structural total ordering on
663 XArray* of AddrRange. */
664 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
667 tl_assert(rngs1 && rngs2);
668 n1 = VG_(sizeXA)( rngs1 );
669 n2 = VG_(sizeXA)( rngs2 );
670 if (n1 < n2) return -1;
671 if (n1 > n2) return 1;
672 for (i = 0; i < n1; i++) {
673 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
674 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
675 if (rng1->aMin < rng2->aMin) return -1;
676 if (rng1->aMin > rng2->aMin) return 1;
677 if (rng1->aMax < rng2->aMax) return -1;
678 if (rng1->aMax > rng2->aMax) return 1;
684 __attribute__((noinline))
685 static XArray* /* of AddrRange */ empty_range_list ( void )
687 XArray* xa; /* XArray of AddrRange */
688 /* Who frees this xa? varstack_preen() does. */
689 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
696 __attribute__((noinline))
697 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
701 vg_assert(aMin <= aMax);
702 /* Who frees this xa? varstack_preen() does. */
703 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1",
708 VG_(addToXA)( xa, &pair );
713 /* Enumerate the address ranges starting at img-offset
714 'debug_ranges_offset' in .debug_ranges. Results are biased with
715 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
716 object as a whole. This function allocates the XArray, and the
717 caller must deallocate it. */
718 __attribute__((noinline))
719 static XArray* /* of AddrRange */
720 get_range_list ( CUConst* cc,
722 UWord debug_ranges_offset,
723 Addr svma_of_referencing_CU )
727 XArray* xa; /* XArray of AddrRange */
730 if (cc->debug_ranges_sz == 0)
731 cc->barf("get_range_list: .debug_ranges is empty/missing");
733 init_Cursor( &ranges, cc->debug_ranges_img,
734 cc->debug_ranges_sz, 0, cc->barf,
735 "Overrun whilst reading .debug_ranges section(2)" );
736 set_position_of_Cursor( &ranges, debug_ranges_offset );
738 /* Who frees this xa? varstack_preen() does. */
739 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
743 /* Read a (host-)word pair. This is something of a hack since
744 the word size to read is really dictated by the ELF file;
745 however, we assume we're reading a file with the same
746 word-sizeness as the host. Reasonably enough. */
747 UWord w1 = get_UWord( &ranges );
748 UWord w2 = get_UWord( &ranges );
750 if (w1 == 0 && w2 == 0)
751 break; /* end of list. */
754 /* new value for 'base' */
759 /* else enumerate [w1+base, w2+base) */
760 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
763 cc->barf( "negative range in .debug_ranges section" );
765 pair.aMin = w1 + base + svma_of_referencing_CU;
766 pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
767 vg_assert(pair.aMin <= pair.aMax);
768 VG_(addToXA)( xa, &pair );
775 /* Parse the Compilation Unit header indicated at 'c' and
776 initialise 'cc' accordingly. */
777 static __attribute__((noinline))
778 void parse_CU_Header ( /*OUT*/CUConst* cc,
781 UChar* debug_abbv_img, UWord debug_abbv_sz )
784 UWord debug_abbrev_offset;
787 VG_(memset)(cc, 0, sizeof(*cc));
788 vg_assert(c && c->barf);
791 /* initial_length field */
793 = get_Initial_Length( &cc->is_dw64, c,
794 "parse_CU_Header: invalid initial-length field" );
796 TRACE_D3(" Length: %lld\n", cc->unit_length );
799 cc->version = get_UShort( c );
800 if (cc->version != 2 && cc->version != 3 && cc->version != 4)
801 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
802 TRACE_D3(" Version: %d\n", (Int)cc->version );
804 /* debug_abbrev_offset */
805 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
806 if (debug_abbrev_offset >= debug_abbv_sz)
807 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
808 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset );
810 /* address size. If this isn't equal to the host word size, just
811 give up. This makes it safe to assume elsewhere that
812 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
814 address_size = get_UChar( c );
815 if (address_size != sizeof(void*))
816 cc->barf( "parse_CU_Header: invalid address_size" );
817 TRACE_D3(" Pointer Size: %d\n", (Int)address_size );
819 /* Set up so that cc->debug_abbv points to the relevant table for
820 this CU. Set the szB so that at least we can't read off the end
821 of the debug_abbrev section -- potentially (and quite likely)
822 too big, if this isn't the last table in the section, but at
824 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset;
825 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset;
826 /* and empty out the set_abbv_Cursor cache */
827 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
828 for (i = 0; i < N_ABBV_CACHE; i++) {
829 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
830 cc->saC_cache[i].posn = 0;
832 cc->saC_cache_queries = 0;
833 cc->saC_cache_misses = 0;
837 /* Set up 'c' so it is ready to parse the abbv table entry code
838 'abbv_code' for this compilation unit. */
839 static __attribute__((noinline))
840 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
841 CUConst* cc, ULong abbv_code )
847 cc->barf("set_abbv_Cursor: abbv_code == 0" );
849 /* (ULong)-1 is used to represent an empty cache slot. So we can't
850 allow it. In any case no valid DWARF3 should make a reference
851 to a negative abbreviation code. [at least, they always seem to
852 be numbered upwards from zero as far as I have seen] */
853 vg_assert(abbv_code != (ULong)-1);
855 /* First search the cache. */
856 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
857 cc->saC_cache_queries++;
858 for (i = 0; i < N_ABBV_CACHE; i++) {
859 /* No need to test the cached abbv_codes for -1 (empty), since
860 we just asserted that abbv_code is not -1. */
861 if (cc->saC_cache[i].abbv_code == abbv_code) {
862 /* Found it. Cool. Set up the parser using the cached
863 position, and move this cache entry 1 step closer to the
865 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
866 init_Cursor( c, cc->debug_abbv,
867 cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
869 "Overrun whilst parsing .debug_abbrev section(1)" );
871 ULong t_abbv_code = cc->saC_cache[i].abbv_code;
872 UWord t_posn = cc->saC_cache[i].posn;
874 cc->saC_cache[i] = cc->saC_cache[i-1];
875 cc->saC_cache[0].abbv_code = t_abbv_code;
876 cc->saC_cache[0].posn = t_posn;
884 /* No. It's not in the cache. We have to search through
885 .debug_abbrev, of course taking care to update the cache
888 cc->saC_cache_misses++;
889 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
890 "Overrun whilst parsing .debug_abbrev section(2)" );
892 /* Now iterate though the table until we find the requested
897 acode = get_ULEB128( c );
898 if (acode == 0) break; /* end of the table */
899 if (acode == abbv_code) break; /* found it */
900 /*atag = */ get_ULEB128( c );
901 /*has_children = */ get_UChar( c );
902 //TRACE_D3(" %llu %s [%s]\n",
903 // acode, pp_DW_TAG(atag), pp_DW_children(has_children));
905 ULong at_name = get_ULEB128( c );
906 ULong at_form = get_ULEB128( c );
907 if (at_name == 0 && at_form == 0) break;
908 //TRACE_D3(" %18s %s\n",
909 // pp_DW_AT(at_name), pp_DW_FORM(at_form));
914 /* Not found. This is fatal. */
915 cc->barf("set_abbv_Cursor: abbv_code not found");
918 /* Otherwise, 'c' is now set correctly to parse the relevant entry,
919 starting from the abbreviation entry's tag. So just cache
920 the result, and return. */
921 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
922 cc->saC_cache[i] = cc->saC_cache[i-1];
924 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
925 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
926 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
930 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
932 If *cts itself contains the entire result, then *ctsSzB is set to
933 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
935 Alternatively, the result can be a block of data (in the
936 transiently mapped-in object, so-called "image" space). If so then
937 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
938 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
940 Unfortunately this means it is impossible to represent a zero-size
941 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
942 and so is ambiguous (which case it is?)
944 Invariant on successful return:
945 (*ctsSzB > 0 && *ctsMemSzB == 0)
946 || (*ctsSzB == 0 && *ctsMemSzB > 0)
949 void get_Form_contents ( /*OUT*/ULong* cts,
951 /*OUT*/UWord* ctsMemSzB,
952 CUConst* cc, Cursor* c,
953 Bool td3, DW_FORM form )
960 *cts = (ULong)(UChar)get_UChar(c);
962 TRACE_D3("%u", (UInt)*cts);
965 *cts = (ULong)(UShort)get_UShort(c);
967 TRACE_D3("%u", (UInt)*cts);
970 *cts = (ULong)(UInt)get_UInt(c);
972 TRACE_D3("%u", (UInt)*cts);
977 TRACE_D3("%llu", *cts);
979 case DW_FORM_sec_offset:
980 *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
981 *ctsSzB = cc->is_dw64 ? 8 : 4;
982 TRACE_D3("%llu", *cts);
985 *cts = (ULong)(Long)get_SLEB128(c);
987 TRACE_D3("%lld", (Long)*cts);
990 *cts = (ULong)(Long)get_ULEB128(c);
992 TRACE_D3("%llu", (Long)*cts);
995 /* note, this is a hack. DW_FORM_addr is defined as getting
996 a word the size of the target machine as defined by the
997 address_size field in the CU Header. However,
998 parse_CU_Header() rejects all inputs except those for
999 which address_size == sizeof(Word), hence we can just
1000 treat it as a (host) Word. */
1001 *cts = (ULong)(UWord)get_UWord(c);
1002 *ctsSzB = sizeof(UWord);
1003 TRACE_D3("0x%lx", (UWord)*cts);
1006 case DW_FORM_ref_addr:
1007 /* We make the same word-size assumption as DW_FORM_addr. */
1008 /* What does this really mean? From D3 Sec 7.5.4,
1009 description of "reference", it would appear to reference
1010 some other DIE, by specifying the offset from the
1011 beginning of a .debug_info section. The D3 spec mentions
1012 that this might be in some other shared object and
1013 executable. But I don't see how the name of the other
1014 object/exe is specified.
1016 At least for the DW_FORM_ref_addrs created by icc11, the
1017 references seem to be within the same object/executable.
1018 So for the moment we merely range-check, to see that they
1019 actually do specify a plausible offset within this
1020 object's .debug_info, and return the value unchanged.
1022 *cts = (ULong)(UWord)get_UWord(c);
1023 *ctsSzB = sizeof(UWord);
1024 TRACE_D3("0x%lx", (UWord)*cts);
1025 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
1026 if (/* the following 2 are surely impossible, but ... */
1027 cc->debug_info_img == NULL || cc->debug_info_sz == 0
1028 || *cts >= (ULong)cc->debug_info_sz) {
1029 /* Hmm. Offset is nonsensical for this object's .debug_info
1030 section. Be safe and reject it. */
1031 cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1032 "outside .debug_info");
1036 case DW_FORM_strp: {
1037 /* this is an offset into .debug_str */
1039 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1040 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
1041 cc->barf("get_Form_contents: DW_FORM_strp "
1042 "points outside .debug_str");
1043 /* FIXME: check the entire string lies inside debug_str,
1044 not just the first byte of it. */
1045 str = (UChar*)cc->debug_str_img + uw;
1046 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
1047 *cts = (ULong)(UWord)str;
1048 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1051 case DW_FORM_string: {
1052 UChar* str = get_AsciiZ(c);
1053 TRACE_D3("%s", str);
1054 *cts = (ULong)(UWord)str;
1055 /* strlen is safe because get_AsciiZ already 'vetted' the
1057 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1060 case DW_FORM_ref1: {
1061 UChar u8 = get_UChar(c);
1062 UWord res = cc->cu_start_offset + (UWord)u8;
1064 *ctsSzB = sizeof(UWord);
1065 TRACE_D3("<%lx>", res);
1068 case DW_FORM_ref2: {
1069 UShort u16 = get_UShort(c);
1070 UWord res = cc->cu_start_offset + (UWord)u16;
1072 *ctsSzB = sizeof(UWord);
1073 TRACE_D3("<%lx>", res);
1076 case DW_FORM_ref4: {
1077 UInt u32 = get_UInt(c);
1078 UWord res = cc->cu_start_offset + (UWord)u32;
1080 *ctsSzB = sizeof(UWord);
1081 TRACE_D3("<%lx>", res);
1084 case DW_FORM_ref8: {
1085 ULong u64 = get_ULong(c);
1086 UWord res = cc->cu_start_offset + (UWord)u64;
1088 *ctsSzB = sizeof(UWord);
1089 TRACE_D3("<%lx>", res);
1092 case DW_FORM_ref_udata: {
1093 ULong u64 = get_ULEB128(c);
1094 UWord res = cc->cu_start_offset + (UWord)u64;
1096 *ctsSzB = sizeof(UWord);
1097 TRACE_D3("<%lx>", res);
1100 case DW_FORM_flag: {
1101 UChar u8 = get_UChar(c);
1102 TRACE_D3("%u", (UInt)u8);
1107 case DW_FORM_flag_present:
1112 case DW_FORM_block1: {
1114 ULong u64 = (ULong)get_UChar(c);
1115 UChar* block = get_address_of_Cursor(c);
1116 TRACE_D3("%llu byte block: ", u64);
1117 for (u64b = u64; u64b > 0; u64b--) {
1118 UChar u8 = get_UChar(c);
1119 TRACE_D3("%x ", (UInt)u8);
1121 *cts = (ULong)(UWord)block;
1122 *ctsMemSzB = (UWord)u64;
1125 case DW_FORM_block2: {
1127 ULong u64 = (ULong)get_UShort(c);
1128 UChar* block = get_address_of_Cursor(c);
1129 TRACE_D3("%llu byte block: ", u64);
1130 for (u64b = u64; u64b > 0; u64b--) {
1131 UChar u8 = get_UChar(c);
1132 TRACE_D3("%x ", (UInt)u8);
1134 *cts = (ULong)(UWord)block;
1135 *ctsMemSzB = (UWord)u64;
1138 case DW_FORM_block4: {
1140 ULong u64 = (ULong)get_UInt(c);
1141 UChar* block = get_address_of_Cursor(c);
1142 TRACE_D3("%llu byte block: ", u64);
1143 for (u64b = u64; u64b > 0; u64b--) {
1144 UChar u8 = get_UChar(c);
1145 TRACE_D3("%x ", (UInt)u8);
1147 *cts = (ULong)(UWord)block;
1148 *ctsMemSzB = (UWord)u64;
1151 case DW_FORM_exprloc:
1152 case DW_FORM_block: {
1154 ULong u64 = (ULong)get_ULEB128(c);
1155 UChar* block = get_address_of_Cursor(c);
1156 TRACE_D3("%llu byte block: ", u64);
1157 for (u64b = u64; u64b > 0; u64b--) {
1158 UChar u8 = get_UChar(c);
1159 TRACE_D3("%x ", (UInt)u8);
1161 *cts = (ULong)(UWord)block;
1162 *ctsMemSzB = (UWord)u64;
1165 case DW_FORM_ref_sig8: {
1167 UChar* block = get_address_of_Cursor(c);
1168 TRACE_D3("8 byte signature: ");
1169 for (u64b = 8; u64b > 0; u64b--) {
1170 UChar u8 = get_UChar(c);
1171 TRACE_D3("%x ", (UInt)u8);
1173 *cts = (ULong)(UWord)block;
1177 case DW_FORM_indirect:
1178 get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
1179 (DW_FORM)get_ULEB128(c));
1184 "get_Form_contents: unhandled %d (%s) at <%lx>\n",
1185 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1186 c->barf("get_Form_contents: unhandled DW_FORM");
1191 /*------------------------------------------------------------*/
1193 /*--- Parsing of variable-related DIEs ---*/
1195 /*------------------------------------------------------------*/
1199 UChar* name; /* in DebugInfo's .strchunks */
1200 /* Represent ranges economically. nRanges is the number of
1202 0: .rngOneMin .rngOneMax .manyRanges are all zero
1203 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1204 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1205 This is merely an optimisation to avoid having to allocate
1206 and free the XArray in the common (98%) of cases where there
1207 is zero or one address ranges. */
1211 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
1212 /* Do not free .rngMany, since many TempVars will have the same
1213 value. Instead the associated storage is to be freed by
1214 deleting 'rangetree', which stores a single copy of each
1218 UWord typeR; /* a cuOff */
1219 GExpr* gexpr; /* for this variable */
1220 GExpr* fbGX; /* to find the frame base of the enclosing fn, if
1222 UChar* fName; /* declaring file name, or NULL */
1223 Int fLine; /* declaring file line number, or zero */
1224 /* offset in .debug_info, so that abstract instances can be
1225 found to satisfy references from concrete instances. */
1227 UWord absOri; /* so the absOri fields refer to dioff fields
1228 in some other, related TempVar. */
1232 #define N_D3_VAR_STACK 48
1236 /* Contains the range stack: a stack of address ranges, one
1237 stack entry for each nested scope.
1239 Some scope entries are created by function definitions
1240 (DW_AT_subprogram), and for those, we also note the GExpr
1241 derived from its DW_AT_frame_base attribute, if any.
1242 Consequently it should be possible to find, for any
1243 variable's DIE, the GExpr for the the containing function's
1244 DW_AT_frame_base by scanning back through the stack to find
1245 the nearest entry associated with a function. This somewhat
1246 elaborate scheme is provided so as to make it possible to
1247 obtain the correct DW_AT_frame_base expression even in the
1248 presence of nested functions (or to be more precise, in the
1249 presence of nested DW_AT_subprogram DIEs).
1251 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
1253 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1254 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */
1255 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1256 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB
1258 /* The file name table. Is a mapping from integer index to the
1259 (permanent) copy of the string, iow a non-img area. */
1260 XArray* /* of UChar* */ filenameTable;
1264 static void varstack_show ( D3VarParser* parser, HChar* str ) {
1266 VG_(printf)(" varstack (%s) {\n", str);
1267 for (i = 0; i <= parser->sp; i++) {
1268 XArray* xa = parser->ranges[i];
1270 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]);
1271 if (parser->isFunc[i]) {
1272 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1274 vg_assert(parser->fbGX[i] == NULL);
1277 if (VG_(sizeXA)( xa ) == 0) {
1278 VG_(printf)("** empty PC range array **");
1280 for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1281 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1283 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1288 VG_(printf)(" }\n");
1291 /* Remove from the stack, all entries with .level > 'level' */
1293 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1295 Bool changed = False;
1296 vg_assert(parser->sp < N_D3_VAR_STACK);
1298 vg_assert(parser->sp >= -1);
1299 if (parser->sp == -1) break;
1300 if (parser->level[parser->sp] <= level) break;
1302 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1303 vg_assert(parser->ranges[parser->sp]);
1304 /* Who allocated this xa? get_range_list() or
1305 unitary_range_list(). */
1306 VG_(deleteXA)( parser->ranges[parser->sp] );
1307 parser->ranges[parser->sp] = NULL;
1308 parser->level[parser->sp] = 0;
1309 parser->isFunc[parser->sp] = False;
1310 parser->fbGX[parser->sp] = NULL;
1315 varstack_show( parser, "after preen" );
1318 static void varstack_push ( CUConst* cc,
1319 D3VarParser* parser,
1321 XArray* ranges, Int level,
1322 Bool isFunc, GExpr* fbGX ) {
1324 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
1325 parser->sp+1, level, ranges);
1327 /* First we need to zap everything >= 'level', as we are about to
1328 replace any previous entry at 'level', so .. */
1329 varstack_preen(parser, /*td3*/False, level-1);
1331 vg_assert(parser->sp >= -1);
1332 vg_assert(parser->sp < N_D3_VAR_STACK);
1333 if (parser->sp == N_D3_VAR_STACK-1)
1334 cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1335 "increase and recompile");
1336 if (parser->sp >= 0)
1337 vg_assert(parser->level[parser->sp] < level);
1339 vg_assert(parser->ranges[parser->sp] == NULL);
1340 vg_assert(parser->level[parser->sp] == 0);
1341 vg_assert(parser->isFunc[parser->sp] == False);
1342 vg_assert(parser->fbGX[parser->sp] == NULL);
1343 vg_assert(ranges != NULL);
1344 if (!isFunc) vg_assert(fbGX == NULL);
1345 parser->ranges[parser->sp] = ranges;
1346 parser->level[parser->sp] = level;
1347 parser->isFunc[parser->sp] = isFunc;
1348 parser->fbGX[parser->sp] = fbGX;
1350 varstack_show( parser, "after push" );
1354 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1355 refer either to a location expression or to a location list.
1356 Figure out which, and in both cases bundle the expression or
1357 location list into a so-called GExpr (guarded expression). */
1358 __attribute__((noinline))
1359 static GExpr* get_GX ( CUConst* cc, Bool td3,
1360 ULong cts, Int ctsSzB, UWord ctsMemSzB )
1362 GExpr* gexpr = NULL;
1363 if (ctsMemSzB > 0 && ctsSzB == 0) {
1364 /* represents an in-line location expression, and cts points
1366 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1369 if (ctsMemSzB == 0 && ctsSzB > 0) {
1370 /* represents location list. cts is the offset of it in
1372 if (!cc->cu_svma_known)
1373 cc->barf("get_GX: location list, but CU svma is unknown");
1374 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1377 vg_assert(0); /* else caller is bogus */
1384 void read_filename_table( /*MOD*/D3VarParser* parser,
1385 CUConst* cc, UWord debug_line_offset,
1395 vg_assert(parser && cc && cc->barf);
1396 if ((!cc->debug_line_img)
1397 || cc->debug_line_sz <= debug_line_offset)
1398 cc->barf("read_filename_table: .debug_line is missing?");
1400 init_Cursor( &c, cc->debug_line_img,
1401 cc->debug_line_sz, debug_line_offset, cc->barf,
1402 "Overrun whilst reading .debug_line section(1)" );
1405 get_Initial_Length( &is_dw64, &c,
1406 "read_filename_table: invalid initial-length field" );
1407 version = get_UShort( &c );
1408 if (version != 2 && version != 3 && version != 4)
1409 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1410 "is currently supported.");
1411 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1412 /*minimum_instruction_length = */ get_UChar( &c );
1414 /*maximum_operations_per_insn = */ get_UChar( &c );
1415 /*default_is_stmt = */ get_UChar( &c );
1416 /*line_base = (Char)*/ get_UChar( &c );
1417 /*line_range = */ get_UChar( &c );
1418 opcode_base = get_UChar( &c );
1419 /* skip over "standard_opcode_lengths" */
1420 for (i = 1; i < (Word)opcode_base; i++)
1421 (void)get_UChar( &c );
1423 /* skip over the directory names table */
1424 while (peek_UChar(&c) != 0) {
1425 (void)get_AsciiZ(&c);
1427 (void)get_UChar(&c); /* skip terminating zero */
1429 /* Read and record the file names table */
1430 vg_assert(parser->filenameTable);
1431 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1432 /* Add a dummy index-zero entry. DWARF3 numbers its files
1433 from 1, for some reason. */
1434 str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1435 VG_(addToXA)( parser->filenameTable, &str );
1436 while (peek_UChar(&c) != 0) {
1437 str = get_AsciiZ(&c);
1438 TRACE_D3(" read_filename_table: %ld %s\n",
1439 VG_(sizeXA)(parser->filenameTable), str);
1440 str = ML_(addStr)( cc->di, str, -1 );
1441 VG_(addToXA)( parser->filenameTable, &str );
1442 (void)get_ULEB128( &c ); /* skip directory index # */
1443 (void)get_ULEB128( &c ); /* skip last mod time */
1444 (void)get_ULEB128( &c ); /* file size */
1446 /* We're done! The rest of it is not interesting. */
1450 __attribute__((noinline))
1451 static void parse_var_DIE (
1452 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1453 /*MOD*/XArray* /* of TempVar* */ tempvars,
1454 /*MOD*/XArray* /* of GExpr* */ gexprs,
1455 /*MOD*/D3VarParser* parser,
1469 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
1470 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1472 varstack_preen( parser, td3, level-1 );
1474 if (dtag == DW_TAG_compile_unit) {
1475 Bool have_lo = False;
1476 Bool have_hi1 = False;
1477 Bool have_range = False;
1482 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1483 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1484 if (attr == 0 && form == 0) break;
1485 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1486 cc, c_die, False/*td3*/, form );
1487 if (attr == DW_AT_low_pc && ctsSzB > 0) {
1491 if (attr == DW_AT_high_pc && ctsSzB > 0) {
1495 if (attr == DW_AT_ranges && ctsSzB > 0) {
1499 if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1500 read_filename_table( parser, cc, (UWord)cts, td3 );
1503 /* Now, does this give us an opportunity to find this
1506 if (level == 0 && have_lo) {
1507 vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1508 because we've already seen a DW_TAG_compile_unit DIE at level
1509 0. But that can't happen, because DWARF3 only allows exactly
1510 one top level DIE per CU. */
1511 cc->cu_svma_known = True;
1512 cc->cu_svma = ip_lo;
1514 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1515 /* Now, it may be that this DIE doesn't tell us the CU's
1516 SVMA, by way of not having a DW_AT_low_pc. That's OK --
1517 the CU doesn't *have* to have its SVMA specified.
1519 But as per last para D3 spec sec 3.1.1 ("Normal and
1520 Partial Compilation Unit Entries", "If the base address
1521 (viz, the SVMA) is undefined, then any DWARF entry of
1522 structure defined interms of the base address of that
1523 compilation unit is not valid.". So that means, if whilst
1524 processing the children of this top level DIE (or their
1525 children, etc) we see a DW_AT_range, and cu_svma_known is
1526 False, then the DIE that contains it is (per the spec)
1527 invalid, and we can legitimately stop and complain. */
1530 /* .. whereas The Reality is, simply assume the SVMA is zero
1531 if it isn't specified. */
1533 vg_assert(!cc->cu_svma_known);
1534 cc->cu_svma_known = True;
1536 cc->cu_svma = ip_lo;
1541 /* Do we have something that looks sane? */
1542 if (have_lo && have_hi1 && (!have_range)) {
1544 varstack_push( cc, parser, td3,
1545 unitary_range_list(ip_lo, ip_hi1 - 1),
1547 False/*isFunc*/, NULL/*fbGX*/ );
1549 if ((!have_lo) && (!have_hi1) && have_range) {
1550 varstack_push( cc, parser, td3,
1551 get_range_list( cc, td3,
1552 rangeoff, cc->cu_svma ),
1554 False/*isFunc*/, NULL/*fbGX*/ );
1556 if ((!have_lo) && (!have_hi1) && (!have_range)) {
1557 /* CU has no code, presumably? */
1558 varstack_push( cc, parser, td3,
1561 False/*isFunc*/, NULL/*fbGX*/ );
1563 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1564 /* broken DIE created by gcc-4.3.X ? Ignore the
1565 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1567 varstack_push( cc, parser, td3,
1568 get_range_list( cc, td3,
1569 rangeoff, cc->cu_svma ),
1571 False/*isFunc*/, NULL/*fbGX*/ );
1573 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1574 (Int)have_lo, (Int)have_hi1, (Int)have_range);
1579 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1580 Bool have_lo = False;
1581 Bool have_hi1 = False;
1582 Bool have_range = False;
1586 Bool isFunc = dtag == DW_TAG_subprogram;
1589 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1590 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1591 if (attr == 0 && form == 0) break;
1592 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1593 cc, c_die, False/*td3*/, form );
1594 if (attr == DW_AT_low_pc && ctsSzB > 0) {
1598 if (attr == DW_AT_high_pc && ctsSzB > 0) {
1602 if (attr == DW_AT_ranges && ctsSzB > 0) {
1607 && attr == DW_AT_frame_base
1608 && ((ctsMemSzB > 0 && ctsSzB == 0)
1609 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1610 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1612 VG_(addToXA)(gexprs, &fbGX);
1615 /* Do we have something that looks sane? */
1616 if (dtag == DW_TAG_subprogram
1617 && (!have_lo) && (!have_hi1) && (!have_range)) {
1618 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1619 representing a subroutine declaration that is not also a
1620 definition does not have code address or range
1623 if (dtag == DW_TAG_lexical_block
1624 && (!have_lo) && (!have_hi1) && (!have_range)) {
1625 /* I believe this is legit, and means the lexical block
1626 contains no insns (whatever that might mean). Ignore. */
1628 if (have_lo && have_hi1 && (!have_range)) {
1629 /* This scope supplies just a single address range. */
1631 varstack_push( cc, parser, td3,
1632 unitary_range_list(ip_lo, ip_hi1 - 1),
1633 level, isFunc, fbGX );
1635 if ((!have_lo) && (!have_hi1) && have_range) {
1636 /* This scope supplies multiple address ranges via the use of
1638 varstack_push( cc, parser, td3,
1639 get_range_list( cc, td3,
1640 rangeoff, cc->cu_svma ),
1641 level, isFunc, fbGX );
1643 if (have_lo && (!have_hi1) && (!have_range)) {
1644 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
1645 Entries) says fairly clearly that a scope must have either
1646 _range or (_low_pc and _high_pc). */
1647 /* The spec is a bit ambiguous though. Perhaps a single byte
1648 range is intended? See sec 2.17 (Code Addresses And Ranges) */
1649 /* This case is here because icc9 produced this:
1650 <2><13bd>: DW_TAG_lexical_block
1651 DW_AT_decl_line : 5229
1652 DW_AT_decl_column : 37
1654 DW_AT_low_pc : 0x401b03
1656 /* Ignore (seems safe than pushing a single byte range) */
1661 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1663 UWord typeR = D3_INVALID_CUOFF;
1664 Bool external = False;
1665 GExpr* gexpr = NULL;
1667 UWord abs_ori = (UWord)D3_INVALID_CUOFF;
1669 UChar* fileName = NULL;
1671 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1672 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1673 if (attr == 0 && form == 0) break;
1674 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1675 cc, c_die, False/*td3*/, form );
1677 if (attr == DW_AT_name && ctsMemSzB > 0) {
1678 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1680 if (attr == DW_AT_location
1681 && ((ctsMemSzB > 0 && ctsSzB == 0)
1682 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1683 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1685 VG_(addToXA)(gexprs, &gexpr);
1687 if (attr == DW_AT_type && ctsSzB > 0) {
1690 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1693 if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1694 abs_ori = (UWord)cts;
1696 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1697 /*declaration = True;*/
1699 if (attr == DW_AT_decl_line && ctsSzB > 0) {
1702 if (attr == DW_AT_decl_file && ctsSzB > 0) {
1703 Int ftabIx = (Int)cts;
1705 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1706 fileName = *(UChar**)
1707 VG_(indexXA)( parser->filenameTable, ftabIx );
1708 vg_assert(fileName);
1710 if (0) VG_(printf)("XXX filename = %s\n", fileName);
1713 /* We'll collect it under if one of the following three
1715 (1) has location and type -> completed
1716 (2) has type only -> is an abstract instance
1717 (3) has location and abs_ori -> is a concrete instance
1718 Name, filename and line number are all optional frills.
1720 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1721 /* 2 */ || (typeR != D3_INVALID_CUOFF)
1722 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1724 /* Add this variable to the list of interesting looking
1725 variables. Crucially, note along with it the address
1726 range(s) associated with the variable, which for locals
1727 will be the address ranges at the top of the varparser's
1731 XArray* /* of AddrRange */ xa;
1733 /* Stack can't be empty; we put a dummy entry on it for the
1734 entire address range before starting with the DIEs for
1736 vg_assert(parser->sp >= 0);
1738 /* If this is a local variable (non-external), try to find
1739 the GExpr for the DW_AT_frame_base of the containing
1740 function. It should have been pushed on the stack at the
1741 time we encountered its DW_TAG_subprogram DIE, so the way
1742 to find it is to scan back down the stack looking for it.
1743 If there isn't an enclosing stack entry marked 'isFunc'
1744 then we must be seeing variable or formal param DIEs
1745 outside of a function, so we deem the Dwarf to be
1746 malformed if that happens. Note that the fbGX may be NULL
1747 if the containing DT_TAG_subprogram didn't supply a
1748 DW_AT_frame_base -- that's OK, but there must actually be
1749 a containing DW_TAG_subprogram. */
1752 for (i = parser->sp; i >= 0; i--) {
1753 if (parser->isFunc[i]) {
1754 fbGX = parser->fbGX[i];
1760 if (0 && VG_(clo_verbosity) >= 0) {
1761 VG_(message)(Vg_DebugMsg,
1762 "warning: parse_var_DIE: non-external variable "
1763 "outside DW_TAG_subprogram\n");
1766 /* This seems to happen a lot. Just ignore it -- if,
1767 when we come to evaluation of the location (guarded)
1768 expression, it requires a frame base value, and
1769 there's no expression for that, then evaluation as a
1770 whole will fail. Harmless - a bit of a waste of
1771 cycles but nothing more. */
1775 /* re "external ? 0 : parser->sp" (twice), if the var is
1776 marked 'external' then we must put it at the global scope,
1777 as only the global scope (level 0) covers the entire PC
1778 address space. It is asserted elsewhere that level 0
1779 always covers the entire address space. */
1780 xa = parser->ranges[external ? 0 : parser->sp];
1781 nRanges = VG_(sizeXA)(xa);
1782 vg_assert(nRanges >= 0);
1784 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
1786 tv->level = external ? 0 : parser->sp;
1790 tv->fName = fileName;
1793 tv->absOri = abs_ori;
1795 /* See explanation on definition of type TempVar for the
1796 reason for this elaboration. */
1797 tv->nRanges = nRanges;
1802 AddrRange* range = VG_(indexXA)(xa, 0);
1803 tv->rngOneMin = range->aMin;
1804 tv->rngOneMax = range->aMax;
1806 else if (nRanges > 1) {
1807 /* See if we already have a range list which is
1808 structurally identical. If so, use that; if not, clone
1809 this one, and add it to our collection. */
1811 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
1812 XArray* old = (XArray*)keyW;
1813 tl_assert(valW == 0);
1814 tl_assert(old != xa);
1817 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
1818 tv->rngMany = cloned;
1819 VG_(addToFM)( rangestree, (UWord)cloned, 0 );
1823 VG_(addToXA)( tempvars, &tv );
1825 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
1827 /* collect stats on how effective the ->ranges special
1830 static Int ntot=0, ngt=0;
1832 if (tv->rngMany) ngt++;
1833 if (0 == (ntot % 100000))
1834 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1839 /* Here are some other weird cases seen in the wild:
1841 We have a variable with a name and a type, but no
1842 location. I guess that's a sign that it has been
1843 optimised away. Ignore it. Here's an example:
1845 static Int lc_compar(void* n1, void* n2) {
1846 MC_Chunk* mc1 = *(MC_Chunk**)n1;
1847 MC_Chunk* mc2 = *(MC_Chunk**)n2;
1848 return (mc1->data < mc2->data ? -1 : 1);
1851 Both mc1 and mc2 are like this
1852 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1855 DW_AT_decl_line : 216
1858 whereas n1 and n2 do have locations specified.
1860 ---------------------------------------------
1862 We see a DW_TAG_formal_parameter with a type, but
1863 no name and no location. It's probably part of a function type
1864 construction, thusly, hence ignore it:
1865 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1866 DW_AT_sibling : <2c9>
1867 DW_AT_prototyped : 1
1869 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1871 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1874 ---------------------------------------------
1876 Is very minimal, like this:
1877 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1878 DW_AT_abstract_origin: <7ba>
1879 What that signifies I have no idea. Ignore.
1881 ----------------------------------------------
1883 Is very minimal, like this:
1884 <200f>: DW_TAG_formal_parameter
1885 DW_AT_abstract_ori: <1f4c>
1886 DW_AT_location : 13440
1887 What that signifies I have no idea. Ignore.
1888 It might be significant, though: the variable at least
1889 has a location and so might exist somewhere.
1890 Maybe we should handle this.
1892 ---------------------------------------------
1894 <22407>: DW_TAG_variable
1895 DW_AT_name : (indirect string, offset: 0x6579):
1896 vgPlain_trampoline_stuff_start
1897 DW_AT_decl_file : 29
1898 DW_AT_decl_line : 56
1900 DW_AT_declaration : 1
1902 Nameless and typeless variable that has a location? Who
1904 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1905 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1906 (DW_OP_addr: 3813c7c0)
1908 No, really. Check it out. gcc is quite simply borked.
1909 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1910 // followed by no attributes, and the next DIE is a sibling,
1917 set_position_of_Cursor( c_die, saved_die_c_offset );
1918 set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1919 VG_(printf)("\nparse_var_DIE: confused by:\n");
1920 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1922 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1923 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1924 if (attr == 0 && form == 0) break;
1925 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
1926 /* Get the form contents, so as to print them */
1927 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1928 cc, c_die, True, form );
1929 VG_(printf)("\t\n");
1932 cc->barf("parse_var_DIE: confused by the above DIE");
1937 /*------------------------------------------------------------*/
1939 /*--- Parsing of type-related DIEs ---*/
1941 /*------------------------------------------------------------*/
1943 #define N_D3_TYPE_STACK 16
1947 /* What source language? 'A'=Ada83/95,
1951 Established once per compilation unit. */
1953 /* A stack of types which are currently under construction */
1954 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
1956 /* Note that the TyEnts in qparentE are temporary copies of the
1957 ones accumulating in the main tyent array. So it is not safe
1958 to free up anything on them when popping them off the stack
1959 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
1960 memset them to zero when done. */
1961 TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
1962 Int qlevel[N_D3_TYPE_STACK];
1967 static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1969 VG_(printf)(" typestack (%s) {\n", str);
1970 for (i = 0; i <= parser->sp; i++) {
1971 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]);
1972 ML_(pp_TyEnt)( &parser->qparentE[i] );
1975 VG_(printf)(" }\n");
1978 /* Remove from the stack, all entries with .level > 'level' */
1980 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1982 Bool changed = False;
1983 vg_assert(parser->sp < N_D3_TYPE_STACK);
1985 vg_assert(parser->sp >= -1);
1986 if (parser->sp == -1) break;
1987 if (parser->qlevel[parser->sp] <= level) break;
1989 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1990 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
1991 VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
1992 parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
1993 parser->qparentE[parser->sp].tag = Te_EMPTY;
1994 parser->qlevel[parser->sp] = 0;
1999 typestack_show( parser, "after preen" );
2002 static Bool typestack_is_empty ( D3TypeParser* parser ) {
2003 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2004 return parser->sp == -1;
2007 static void typestack_push ( CUConst* cc,
2008 D3TypeParser* parser,
2010 TyEnt* parentE, Int level ) {
2012 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
2013 parser->sp+1, level, parentE->cuOff);
2015 /* First we need to zap everything >= 'level', as we are about to
2016 replace any previous entry at 'level', so .. */
2017 typestack_preen(parser, /*td3*/False, level-1);
2019 vg_assert(parser->sp >= -1);
2020 vg_assert(parser->sp < N_D3_TYPE_STACK);
2021 if (parser->sp == N_D3_TYPE_STACK-1)
2022 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2023 "increase and recompile");
2024 if (parser->sp >= 0)
2025 vg_assert(parser->qlevel[parser->sp] < level);
2027 vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2028 vg_assert(parser->qlevel[parser->sp] == 0);
2030 vg_assert(ML_(TyEnt__is_type)(parentE));
2031 vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2032 parser->qparentE[parser->sp] = *parentE;
2033 parser->qlevel[parser->sp] = level;
2035 typestack_show( parser, "after push" );
2038 /* True if the subrange type being parsed gives the bounds of an array. */
2039 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
2041 vg_assert(dtag == DW_TAG_subrange_type);
2042 /* For most languages, a subrange_type dtag always gives the
2044 For Ada, there are additional conditions as a subrange_type
2045 is also used for other purposes. */
2046 if (parser->language != 'A')
2047 /* not Ada, so it definitely denotes an array bound. */
2050 /* Extra constraints for Ada: it only denotes an array bound if .. */
2051 return (! typestack_is_empty(parser)
2052 && parser->qparentE[parser->sp].tag == Te_TyArray);
2055 /* Parse a type-related DIE. 'parser' holds the current parser state.
2056 'admin' is where the completed types are dumped. 'dtag' is the tag
2057 for this DIE. 'c_die' points to the start of the data fields (FORM
2058 stuff) for the DIE. c_abbv points to the start of the (name,form)
2059 pairs which describe the DIE.
2061 We may find the DIE uninteresting, in which case we should ignore
2064 What happens: the DIE is examined. If uninteresting, it is ignored.
2065 Otherwise, the DIE gives rise to two things:
2067 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2068 (2) a TyAdmin structure, which holds the type, or related stuff
2070 (2) is added at the end of 'tyadmins', at some index, say 'i'.
2072 A pair (cuOffset, i) is added to 'tydict'.
2074 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2075 a mapping from cuOffset to the index of the corresponding entry in
2078 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2079 in the tydict (by binary search). This gives an index into
2080 tyadmins, and the required entity lives in tyadmins at that index.
2082 __attribute__((noinline))
2083 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2084 /*MOD*/D3TypeParser* parser,
2101 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
2102 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2104 VG_(memset)( &typeE, 0xAA, sizeof(typeE) );
2105 VG_(memset)( &atomE, 0xAA, sizeof(atomE) );
2106 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2107 VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2109 /* If we've returned to a level at or above any previously noted
2110 parent, un-note it, so we don't believe we're still collecting
2112 typestack_preen( parser, td3, level-1 );
2114 if (dtag == DW_TAG_compile_unit) {
2115 /* See if we can find DW_AT_language, since it is important for
2116 establishing array bounds (see DW_TAG_subrange_type below in
2119 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2120 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2121 if (attr == 0 && form == 0) break;
2122 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2123 cc, c_die, False/*td3*/, form );
2124 if (attr != DW_AT_language)
2129 case DW_LANG_C89: case DW_LANG_C:
2130 case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2131 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2132 case DW_LANG_Upc: case DW_LANG_C99:
2133 parser->language = 'C'; break;
2134 case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2135 case DW_LANG_Fortran95:
2136 parser->language = 'F'; break;
2137 case DW_LANG_Ada83: case DW_LANG_Ada95:
2138 parser->language = 'A'; break;
2139 case DW_LANG_Cobol74:
2140 case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2141 case DW_LANG_Modula2: case DW_LANG_Java:
2143 case DW_LANG_D: case DW_LANG_Python:
2144 case DW_LANG_Mips_Assembler:
2145 parser->language = '?'; break;
2152 if (dtag == DW_TAG_base_type) {
2153 /* We can pick up a new base type any time. */
2154 VG_(memset)(&typeE, 0, sizeof(typeE));
2155 typeE.cuOff = D3_INVALID_CUOFF;
2156 typeE.tag = Te_TyBase;
2158 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2159 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2160 if (attr == 0 && form == 0) break;
2161 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2162 cc, c_die, False/*td3*/, form );
2163 if (attr == DW_AT_name && ctsMemSzB > 0) {
2164 typeE.Te.TyBase.name
2165 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
2166 (UChar*)(UWord)cts );
2168 if (attr == DW_AT_byte_size && ctsSzB > 0) {
2169 typeE.Te.TyBase.szB = cts;
2171 if (attr == DW_AT_encoding && ctsSzB > 0) {
2173 case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2174 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2175 case DW_ATE_boolean:/* FIXME - is this correct? */
2176 typeE.Te.TyBase.enc = 'U'; break;
2177 case DW_ATE_signed: case DW_ATE_signed_char:
2178 typeE.Te.TyBase.enc = 'S'; break;
2180 typeE.Te.TyBase.enc = 'F'; break;
2181 case DW_ATE_complex_float:
2182 typeE.Te.TyBase.enc = 'C'; break;
2189 /* Invent a name if it doesn't have one. gcc-4.3
2190 -ftree-vectorize is observed to emit nameless base types. */
2191 if (!typeE.Te.TyBase.name)
2192 typeE.Te.TyBase.name
2193 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2194 "<anon_base_type>" );
2196 /* Do we have something that looks sane? */
2197 if (/* must have a name */
2198 typeE.Te.TyBase.name == NULL
2199 /* and a plausible size. Yes, really 32: "complex long
2200 double" apparently has size=32 */
2201 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2202 /* and a plausible encoding */
2203 || (typeE.Te.TyBase.enc != 'U'
2204 && typeE.Te.TyBase.enc != 'S'
2205 && typeE.Te.TyBase.enc != 'F'
2206 && typeE.Te.TyBase.enc != 'C'))
2208 /* Last minute hack: if we see this
2209 <1><515>: DW_TAG_base_type
2213 convert it into a real Void type. */
2214 if (typeE.Te.TyBase.szB == 0
2215 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2216 ML_(TyEnt__make_EMPTY)(&typeE);
2217 typeE.tag = Te_TyVoid;
2218 typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2224 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2225 || dtag == DW_TAG_ptr_to_member_type) {
2226 /* This seems legit for _pointer_type and _reference_type. I
2227 don't know if rolling _ptr_to_member_type in here really is
2228 legit, but it's better than not handling it at all. */
2229 VG_(memset)(&typeE, 0, sizeof(typeE));
2230 typeE.cuOff = D3_INVALID_CUOFF;
2231 typeE.tag = Te_TyPorR;
2232 /* target type defaults to void */
2233 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2234 typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
2235 || dtag == DW_TAG_ptr_to_member_type;
2236 /* These three type kinds don't *have* to specify their size, in
2237 which case we assume it's a machine word. But if they do
2238 specify it, it must be a machine word :-) This probably
2239 assumes that the word size of the Dwarf3 we're reading is the
2240 same size as that on the machine. gcc appears to give a size
2241 whereas icc9 doesn't. */
2242 typeE.Te.TyPorR.szB = sizeof(UWord);
2244 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2245 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2246 if (attr == 0 && form == 0) break;
2247 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2248 cc, c_die, False/*td3*/, form );
2249 if (attr == DW_AT_byte_size && ctsSzB > 0) {
2250 typeE.Te.TyPorR.szB = cts;
2252 if (attr == DW_AT_type && ctsSzB > 0) {
2253 typeE.Te.TyPorR.typeR = (UWord)cts;
2256 /* Do we have something that looks sane? */
2257 if (typeE.Te.TyPorR.szB != sizeof(UWord))
2263 if (dtag == DW_TAG_enumeration_type) {
2264 /* Create a new Type to hold the results. */
2265 VG_(memset)(&typeE, 0, sizeof(typeE));
2267 typeE.tag = Te_TyEnum;
2268 typeE.Te.TyEnum.atomRs
2269 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2273 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2274 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2275 if (attr == 0 && form == 0) break;
2276 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2277 cc, c_die, False/*td3*/, form );
2278 if (attr == DW_AT_name && ctsMemSzB > 0) {
2279 typeE.Te.TyEnum.name
2280 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
2281 (UChar*)(UWord)cts );
2283 if (attr == DW_AT_byte_size && ctsSzB > 0) {
2284 typeE.Te.TyEnum.szB = cts;
2288 if (!typeE.Te.TyEnum.name)
2289 typeE.Te.TyEnum.name
2290 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2291 "<anon_enum_type>" );
2293 /* Do we have something that looks sane? */
2294 if (typeE.Te.TyEnum.szB == 0
2295 /* we must know the size */
2296 /* but not for Ada, which uses such dummy
2297 enumerations as helper for gdb ada mode. */
2298 && parser->language != 'A')
2301 typestack_push( cc, parser, td3, &typeE, level );
2305 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2306 DW_TAG_enumerator with only a DW_AT_name but no
2307 DW_AT_const_value. This is in violation of the Dwarf3 standard,
2308 and appears to be a new "feature" of gcc - versions 4.3.x and
2309 earlier do not appear to do this. So accept DW_TAG_enumerator
2310 which only have a name but no value. An example:
2312 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2313 <181> DW_AT_name : (indirect string, offset: 0xda70):
2315 <185> DW_AT_byte_size : 4
2316 <186> DW_AT_decl_file : 14
2317 <187> DW_AT_decl_line : 1480
2318 <189> DW_AT_sibling : <0x1a7>
2319 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2320 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
2322 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2323 <193> DW_AT_name : (indirect string, offset: 0x1505f):
2325 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2326 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
2328 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2329 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
2331 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2332 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
2335 if (dtag == DW_TAG_enumerator) {
2336 VG_(memset)( &atomE, 0, sizeof(atomE) );
2338 atomE.tag = Te_Atom;
2340 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2341 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2342 if (attr == 0 && form == 0) break;
2343 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2344 cc, c_die, False/*td3*/, form );
2345 if (attr == DW_AT_name && ctsMemSzB > 0) {
2347 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
2348 (UChar*)(UWord)cts );
2350 if (attr == DW_AT_const_value && ctsSzB > 0) {
2351 atomE.Te.Atom.value = cts;
2352 atomE.Te.Atom.valueKnown = True;
2355 /* Do we have something that looks sane? */
2356 if (atomE.Te.Atom.name == NULL)
2358 /* Do we have a plausible parent? */
2359 if (typestack_is_empty(parser)) goto bad_DIE;
2360 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2361 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2362 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2363 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
2364 /* Record this child in the parent */
2365 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2366 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2368 /* And record the child itself */
2372 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
2373 don't know if this is correct, but it at least makes this reader
2374 usable for gcc-4.3 produced Dwarf3. */
2375 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2376 || dtag == DW_TAG_union_type) {
2377 Bool have_szB = False;
2378 Bool is_decl = False;
2379 Bool is_spec = False;
2380 /* Create a new Type to hold the results. */
2381 VG_(memset)(&typeE, 0, sizeof(typeE));
2383 typeE.tag = Te_TyStOrUn;
2384 typeE.Te.TyStOrUn.name = NULL;
2385 typeE.Te.TyStOrUn.fieldRs
2386 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2389 typeE.Te.TyStOrUn.complete = True;
2390 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2391 || dtag == DW_TAG_class_type;
2393 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2394 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2395 if (attr == 0 && form == 0) break;
2396 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2397 cc, c_die, False/*td3*/, form );
2398 if (attr == DW_AT_name && ctsMemSzB > 0) {
2399 typeE.Te.TyStOrUn.name
2400 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
2401 (UChar*)(UWord)cts );
2403 if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2404 typeE.Te.TyStOrUn.szB = cts;
2407 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2410 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2414 /* Do we have something that looks sane? */
2415 if (is_decl && (!is_spec)) {
2416 /* It's a DW_AT_declaration. We require the name but
2418 if (typeE.Te.TyStOrUn.name == NULL)
2420 typeE.Te.TyStOrUn.complete = False;
2421 /* JRS 2009 Aug 10: <possible kludge>? */
2422 /* Push this tyent on the stack, even though it's incomplete.
2423 It appears that gcc-4.4 on Fedora 11 will sometimes create
2424 DW_TAG_member entries for it, and so we need to have a
2425 plausible parent present in order for that to work. See
2426 #200029 comments 8 and 9. */
2427 typestack_push( cc, parser, td3, &typeE, level );
2428 /* </possible kludge> */
2431 if ((!is_decl) /* && (!is_spec) */) {
2432 /* this is the common, ordinary case */
2433 if ((!have_szB) /* we must know the size */
2434 /* But the name can be present, or not */)
2437 typestack_push( cc, parser, td3, &typeE, level );
2441 /* don't know how to handle any other variants just now */
2446 if (dtag == DW_TAG_member) {
2447 /* Acquire member entries for both DW_TAG_structure_type and
2448 DW_TAG_union_type. They differ minorly, in that struct
2449 members must have a DW_AT_data_member_location expression
2450 whereas union members must not. */
2451 Bool parent_is_struct;
2452 VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2453 fieldE.cuOff = posn;
2454 fieldE.tag = Te_Field;
2455 fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2457 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2458 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2459 if (attr == 0 && form == 0) break;
2460 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2461 cc, c_die, False/*td3*/, form );
2462 if (attr == DW_AT_name && ctsMemSzB > 0) {
2463 fieldE.Te.Field.name
2464 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
2465 (UChar*)(UWord)cts );
2467 if (attr == DW_AT_type && ctsSzB > 0) {
2468 fieldE.Te.Field.typeR = (UWord)cts;
2470 /* There are 2 different cases for DW_AT_data_member_location.
2471 If it is a constant class attribute, it contains byte offset
2472 from the beginning of the containing entity.
2473 Otherwise it is a location expression. */
2474 if (attr == DW_AT_data_member_location && ctsSzB > 0) {
2475 fieldE.Te.Field.nLoc = -1;
2476 fieldE.Te.Field.pos.offset = cts;
2477 } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2478 fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
2479 fieldE.Te.Field.pos.loc
2480 = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
2482 (SizeT)fieldE.Te.Field.nLoc );
2485 /* Do we have a plausible parent? */
2486 if (typestack_is_empty(parser)) goto bad_DIE;
2487 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2488 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2489 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2490 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
2491 /* Do we have something that looks sane? If this a member of a
2492 struct, we must have a location expression; but if a member
2493 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
2494 to reject in the latter case, but some compilers have been
2495 observed to emit constant-zero expressions. So just ignore
2498 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2499 if (!fieldE.Te.Field.name)
2500 fieldE.Te.Field.name
2501 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2503 vg_assert(fieldE.Te.Field.name);
2504 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2506 if (fieldE.Te.Field.nLoc) {
2507 if (!parent_is_struct) {
2508 /* If this is a union type, pretend we haven't seen the data
2509 member location expression, as it is by definition
2510 redundant (it must be zero). */
2511 if (fieldE.Te.Field.nLoc > 0)
2512 ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2513 fieldE.Te.Field.pos.loc = NULL;
2514 fieldE.Te.Field.nLoc = 0;
2516 /* Record this child in the parent */
2517 fieldE.Te.Field.isStruct = parent_is_struct;
2518 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2519 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2521 /* And record the child itself */
2524 /* Member with no location - this can happen with static
2525 const members in C++ code which are compile time constants
2526 that do no exist in the class. They're not of any interest
2527 to us so we ignore them. */
2531 if (dtag == DW_TAG_array_type) {
2532 VG_(memset)(&typeE, 0, sizeof(typeE));
2534 typeE.tag = Te_TyArray;
2535 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2536 typeE.Te.TyArray.boundRs
2537 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2541 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2542 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2543 if (attr == 0 && form == 0) break;
2544 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2545 cc, c_die, False/*td3*/, form );
2546 if (attr == DW_AT_type && ctsSzB > 0) {
2547 typeE.Te.TyArray.typeR = (UWord)cts;
2550 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2553 typestack_push( cc, parser, td3, &typeE, level );
2557 /* this is a subrange type defining the bounds of an array. */
2558 if (dtag == DW_TAG_subrange_type
2559 && subrange_type_denotes_array_bounds(parser, dtag)) {
2560 Bool have_lower = False;
2561 Bool have_upper = False;
2562 Bool have_count = False;
2566 switch (parser->language) {
2567 case 'C': have_lower = True; lower = 0; break;
2568 case 'F': have_lower = True; lower = 1; break;
2569 case '?': have_lower = False; break;
2570 case 'A': have_lower = False; break;
2571 default: vg_assert(0); /* assured us by handling of
2572 DW_TAG_compile_unit in this fn */
2575 VG_(memset)( &boundE, 0, sizeof(boundE) );
2576 boundE.cuOff = D3_INVALID_CUOFF;
2577 boundE.tag = Te_Bound;
2579 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2580 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2581 if (attr == 0 && form == 0) break;
2582 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2583 cc, c_die, False/*td3*/, form );
2584 if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2588 if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2592 if (attr == DW_AT_count && ctsSzB > 0) {
2593 /*count = (Long)cts;*/
2597 /* FIXME: potentially skip the rest if no parent present, since
2598 it could be the case that this subrange type is free-standing
2599 (not being used to describe the bounds of a containing array
2601 /* Do we have a plausible parent? */
2602 if (typestack_is_empty(parser)) goto bad_DIE;
2603 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2604 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2605 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2606 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
2608 /* Figure out if we have a definite range or not */
2609 if (have_lower && have_upper && (!have_count)) {
2610 boundE.Te.Bound.knownL = True;
2611 boundE.Te.Bound.knownU = True;
2612 boundE.Te.Bound.boundL = lower;
2613 boundE.Te.Bound.boundU = upper;
2615 else if (have_lower && (!have_upper) && (!have_count)) {
2616 boundE.Te.Bound.knownL = True;
2617 boundE.Te.Bound.knownU = False;
2618 boundE.Te.Bound.boundL = lower;
2619 boundE.Te.Bound.boundU = 0;
2621 else if ((!have_lower) && have_upper && (!have_count)) {
2622 boundE.Te.Bound.knownL = False;
2623 boundE.Te.Bound.knownU = True;
2624 boundE.Te.Bound.boundL = 0;
2625 boundE.Te.Bound.boundU = upper;
2627 else if ((!have_lower) && (!have_upper) && (!have_count)) {
2628 boundE.Te.Bound.knownL = False;
2629 boundE.Te.Bound.knownU = False;
2630 boundE.Te.Bound.boundL = 0;
2631 boundE.Te.Bound.boundU = 0;
2633 /* FIXME: handle more cases */
2637 /* Record this bound in the parent */
2638 boundE.cuOff = posn;
2639 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2640 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2642 /* And record the child itself */
2646 /* typedef or subrange_type other than array bounds. */
2647 if (dtag == DW_TAG_typedef
2648 || (dtag == DW_TAG_subrange_type
2649 && !subrange_type_denotes_array_bounds(parser, dtag))) {
2650 /* subrange_type other than array bound is only for Ada. */
2651 vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
2652 /* We can pick up a new typedef/subrange_type any time. */
2653 VG_(memset)(&typeE, 0, sizeof(typeE));
2654 typeE.cuOff = D3_INVALID_CUOFF;
2655 typeE.tag = Te_TyTyDef;
2656 typeE.Te.TyTyDef.name = NULL;
2657 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2659 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2660 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2661 if (attr == 0 && form == 0) break;
2662 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2663 cc, c_die, False/*td3*/, form );
2664 if (attr == DW_AT_name && ctsMemSzB > 0) {
2665 typeE.Te.TyTyDef.name
2666 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
2667 (UChar*)(UWord)cts );
2669 if (attr == DW_AT_type && ctsSzB > 0) {
2670 typeE.Te.TyTyDef.typeR = (UWord)cts;
2673 /* Do we have something that looks sane? */
2674 if (/* must have a name */
2675 typeE.Te.TyTyDef.name == NULL
2676 /* However gcc gnat Ada generates minimal typedef
2677 such as the below => accept no name for Ada.
2678 <6><91cc>: DW_TAG_typedef
2679 DW_AT_abstract_ori: <9066>
2681 && parser->language != 'A'
2682 /* but the referred-to type can be absent */)
2688 if (dtag == DW_TAG_subroutine_type) {
2689 /* function type? just record that one fact and ask no
2690 further questions. */
2691 VG_(memset)(&typeE, 0, sizeof(typeE));
2692 typeE.cuOff = D3_INVALID_CUOFF;
2693 typeE.tag = Te_TyFn;
2697 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2699 VG_(memset)(&typeE, 0, sizeof(typeE));
2700 typeE.cuOff = D3_INVALID_CUOFF;
2701 typeE.tag = Te_TyQual;
2702 typeE.Te.TyQual.qual
2703 = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2704 /* target type defaults to 'void' */
2705 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2707 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2708 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2709 if (attr == 0 && form == 0) break;
2710 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2711 cc, c_die, False/*td3*/, form );
2712 if (attr == DW_AT_type && ctsSzB > 0) {
2713 typeE.Te.TyQual.typeR = (UWord)cts;
2717 /* gcc sometimes generates DW_TAG_const/volatile_type without
2718 DW_AT_type and GDB appears to interpret the type as 'const
2719 void' (resp. 'volatile void'). So just allow it .. */
2720 if (have_ty == 1 || have_ty == 0)
2726 /* else ignore this DIE */
2731 if (0) VG_(printf)("YYYY Acquire Type\n");
2732 vg_assert(ML_(TyEnt__is_type)( &typeE ));
2733 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
2735 VG_(addToXA)( tyents, &typeE );
2740 if (0) VG_(printf)("YYYY Acquire Atom\n");
2741 vg_assert(atomE.tag == Te_Atom);
2742 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
2744 VG_(addToXA)( tyents, &atomE );
2749 /* For union members, Expr should be absent */
2750 if (0) VG_(printf)("YYYY Acquire Field\n");
2751 vg_assert(fieldE.tag == Te_Field);
2752 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
2753 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
2754 if (fieldE.Te.Field.isStruct) {
2755 vg_assert(fieldE.Te.Field.nLoc != 0);
2757 vg_assert(fieldE.Te.Field.nLoc == 0);
2759 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
2760 fieldE.cuOff = posn;
2761 VG_(addToXA)( tyents, &fieldE );
2766 if (0) VG_(printf)("YYYY Acquire Bound\n");
2767 vg_assert(boundE.tag == Te_Bound);
2768 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
2769 boundE.cuOff = posn;
2770 VG_(addToXA)( tyents, &boundE );
2775 set_position_of_Cursor( c_die, saved_die_c_offset );
2776 set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2777 VG_(printf)("\nparse_type_DIE: confused by:\n");
2778 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2780 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2781 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2782 if (attr == 0 && form == 0) break;
2783 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
2784 /* Get the form contents, so as to print them */
2785 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2786 cc, c_die, True, form );
2787 VG_(printf)("\t\n");
2790 cc->barf("parse_type_DIE: confused by the above DIE");
2795 /*------------------------------------------------------------*/
2797 /*--- Compression of type DIE information ---*/
2799 /*------------------------------------------------------------*/
2801 static UWord chase_cuOff ( Bool* changed,
2802 XArray* /* of TyEnt */ ents,
2803 TyEntIndexCache* ents_cache,
2807 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
2810 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
2815 vg_assert(ent->tag != Te_EMPTY);
2816 if (ent->tag != Te_INDIR) {
2820 vg_assert(ent->Te.INDIR.indR < cuOff);
2822 return ent->Te.INDIR.indR;
2827 void chase_cuOffs_in_XArray ( Bool* changed,
2828 XArray* /* of TyEnt */ ents,
2829 TyEntIndexCache* ents_cache,
2830 /*MOD*/XArray* /* of UWord */ cuOffs )
2833 Word i, n = VG_(sizeXA)( cuOffs );
2834 for (i = 0; i < n; i++) {
2836 UWord* p = VG_(indexXA)( cuOffs, i );
2837 *p = chase_cuOff( &b, ents, ents_cache, *p );
2844 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
2845 TyEntIndexCache* ents_cache,
2848 Bool b, changed = False;
2854 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
2855 if (b) changed = True;
2863 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
2864 if (b) changed = True;
2872 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
2873 if (b) changed = True;
2876 te->Te.TyTyDef.typeR
2877 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
2878 if (b) changed = True;
2881 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
2882 if (b) changed = True;
2885 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
2886 if (b) changed = True;
2889 te->Te.TyArray.typeR
2890 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
2891 if (b) changed = True;
2892 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
2893 if (b) changed = True;
2899 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
2900 if (b) changed = True;
2911 /* Make a pass over 'ents'. For each tyent, inspect the target of any
2912 'R' or 'Rs' fields (those which refer to other tyents), and replace
2913 any which point to INDIR nodes with the target of the indirection
2914 (which should not itself be an indirection). In summary, this
2915 routine shorts out all references to indirection nodes. */
2917 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
2918 TyEntIndexCache* ents_cache )
2920 Word i, n, nChanged = 0;
2922 n = VG_(sizeXA)( ents );
2923 for (i = 0; i < n; i++) {
2924 TyEnt* ent = VG_(indexXA)( ents, i );
2925 vg_assert(ent->tag != Te_EMPTY);
2926 /* We have to substitute everything, even indirections, so as to
2927 ensure that chains of indirections don't build up. */
2928 b = TyEnt__subst_R_fields( ents, ents_cache, ent );
2937 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
2938 Look up each new tyent in the dictionary in turn. If it is already
2939 in the dictionary, replace this tyent with an indirection to the
2940 existing one, and delete any malloc'd stuff hanging off this one.
2941 In summary, this routine commons up all tyents that are identical
2942 as defined by TyEnt__cmp_by_all_except_cuOff. */
2944 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
2946 Word n, i, nDeleted;
2947 WordFM* dict; /* TyEnt* -> void */
2952 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
2954 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
2958 n = VG_(sizeXA)( ents );
2959 for (i = 0; i < n; i++) {
2960 ent = VG_(indexXA)( ents, i );
2961 vg_assert(ent->tag != Te_EMPTY);
2963 /* Ignore indirections, although check that they are
2964 not forming a cycle. */
2965 if (ent->tag == Te_INDIR) {
2966 vg_assert(ent->Te.INDIR.indR < ent->cuOff);
2971 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
2972 /* it's already in the dictionary. */
2973 TyEnt* old = (TyEnt*)keyW;
2974 vg_assert(valW == 0);
2975 vg_assert(old != ent);
2976 vg_assert(old->tag != Te_INDIR);
2977 /* since we are traversing the array in increasing order of
2979 vg_assert(old->cuOff < ent->cuOff);
2980 /* So anyway, dump this entry and replace it with an
2981 indirection to the one in the dictionary. Note that the
2982 assertion above guarantees that we cannot create cycles of
2983 indirections, since we are always creating an indirection
2984 to a tyent with a cuOff lower than this one. */
2985 ML_(TyEnt__make_EMPTY)( ent );
2986 ent->tag = Te_INDIR;
2987 ent->Te.INDIR.indR = old->cuOff;
2990 /* not in dictionary; add it and keep going. */
2991 VG_(addToFM)( dict, (UWord)ent, 0 );
2995 VG_(deleteFM)( dict, NULL, NULL );
3002 void dedup_types ( Bool td3,
3003 /*MOD*/XArray* /* of TyEnt */ ents,
3004 TyEntIndexCache* ents_cache )
3006 Word m, n, i, nDel, nSubst, nThresh;
3009 n = VG_(sizeXA)( ents );
3011 /* If a commoning pass and a substitution pass both make fewer than
3012 this many changes, just stop. It's pointless to burn up CPU
3013 time trying to compress the last 1% or so out of the array. */
3016 /* First we must sort .ents by its .cuOff fields, so we
3017 can index into it. */
3020 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3022 VG_(sortXA)( ents );
3024 /* Now repeatedly do commoning and substitution passes over
3025 the array, until there are no more changes. */
3027 nDel = dedup_types_commoning_pass ( ents );
3028 nSubst = dedup_types_substitution_pass ( ents, ents_cache );
3029 vg_assert(nDel >= 0 && nSubst >= 0);
3030 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst);
3031 } while (nDel > nThresh || nSubst > nThresh);
3033 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3034 In fact this should be true at the end of every loop iteration
3035 above (a commoning pass followed by a substitution pass), but
3036 checking it on every iteration is excessively expensive. Note,
3037 this loop also computes 'm' for the stats printing below it. */
3039 n = VG_(sizeXA)( ents );
3040 for (i = 0; i < n; i++) {
3042 ent = VG_(indexXA)( ents, i );
3043 if (ent->tag != Te_INDIR) continue;
3045 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3046 ent->Te.INDIR.indR );
3048 vg_assert(ind->tag != Te_INDIR);
3051 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3055 /*------------------------------------------------------------*/
3057 /*--- Resolution of references to type DIEs ---*/
3059 /*------------------------------------------------------------*/
3061 /* Make a pass through the (temporary) variables array. Examine the
3062 type of each variable, check is it found, and chase any Te_INDIRs.
3063 Postcondition is: each variable has a typeR field that refers to a
3064 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3065 not to refer to a Te_INDIR. (This is so that we can throw all the
3066 Te_INDIRs away later). */
3068 __attribute__((noinline))
3069 static void resolve_variable_types (
3070 void (*barf)( HChar* ) __attribute__((noreturn)),
3071 /*R-O*/XArray* /* of TyEnt */ ents,
3072 /*MOD*/TyEntIndexCache* ents_cache,
3073 /*MOD*/XArray* /* of TempVar* */ vars
3077 n = VG_(sizeXA)( vars );
3078 for (i = 0; i < n; i++) {
3079 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3080 /* This is the stated type of the variable. But it might be
3081 an indirection, so be careful. */
3082 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3084 if (ent && ent->tag == Te_INDIR) {
3085 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3086 ent->Te.INDIR.indR );
3088 vg_assert(ent->tag != Te_INDIR);
3091 /* Deal first with "normal" cases */
3092 if (ent && ML_(TyEnt__is_type)(ent)) {
3093 var->typeR = ent->cuOff;
3097 /* If there's no ent, it probably we did not manage to read a
3098 type at the cuOffset which is stated as being this variable's
3099 type. Maybe a deficiency in parse_type_DIE. Complain. */
3101 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3102 barf("resolve_variable_types: "
3103 "cuOff does not refer to a known type");
3106 /* If ent has any other tag, something bad happened, along the
3107 lines of var->typeR not referring to a type at all. */
3108 vg_assert(ent->tag == Te_UNKNOWN);
3109 /* Just accept it; the type will be useless, but at least keep
3111 var->typeR = ent->cuOff;
3116 /*------------------------------------------------------------*/
3118 /*--- Parsing of Compilation Units ---*/
3120 /*------------------------------------------------------------*/
3122 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
3123 TempVar* t1 = *(TempVar**)v1;
3124 TempVar* t2 = *(TempVar**)v2;
3125 if (t1->dioff < t2->dioff) return -1;
3126 if (t1->dioff > t2->dioff) return 1;
3130 static void read_DIE (
3131 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3132 /*MOD*/XArray* /* of TyEnt */ tyents,
3133 /*MOD*/XArray* /* of TempVar* */ tempvars,
3134 /*MOD*/XArray* /* of GExpr* */ gexprs,
3135 /*MOD*/D3TypeParser* typarser,
3136 /*MOD*/D3VarParser* varparser,
3137 Cursor* c, Bool td3, CUConst* cc, Int level
3141 ULong atag, abbv_code;
3144 UWord start_die_c_offset, start_abbv_c_offset;
3145 UWord after_die_c_offset, after_abbv_c_offset;
3147 /* --- Deal with this DIE --- */
3148 posn = get_position_of_Cursor( c );
3149 abbv_code = get_ULEB128( c );
3150 set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3151 atag = get_ULEB128( &abbv );
3153 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3154 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3157 cc->barf("read_DIE: invalid zero tag on DIE");
3159 has_children = get_UChar( &abbv );
3160 if (has_children != DW_children_no && has_children != DW_children_yes)
3161 cc->barf("read_DIE: invalid has_children value");
3163 /* We're set up to look at the fields of this DIE. Hand it off to
3164 any parser(s) that want to see it. Since they will in general
3165 advance both the DIE and abbrev cursors, remember their current
3166 settings so that we can then back up and do one final pass over
3167 the DIE, to print out its contents. */
3169 start_die_c_offset = get_position_of_Cursor( c );
3170 start_abbv_c_offset = get_position_of_Cursor( &abbv );
3176 ULong at_name = get_ULEB128( &abbv );
3177 ULong at_form = get_ULEB128( &abbv );
3178 if (at_name == 0 && at_form == 0) break;
3179 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name));
3180 /* Get the form contents, but ignore them; the only purpose is
3181 to print them, if td3 is True */
3182 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3183 cc, c, td3, (DW_FORM)at_form );
3188 after_die_c_offset = get_position_of_Cursor( c );
3189 after_abbv_c_offset = get_position_of_Cursor( &abbv );
3191 set_position_of_Cursor( c, start_die_c_offset );
3192 set_position_of_Cursor( &abbv, start_abbv_c_offset );
3194 parse_type_DIE( tyents,
3200 &abbv, /* abbrev cursor */
3204 set_position_of_Cursor( c, start_die_c_offset );
3205 set_position_of_Cursor( &abbv, start_abbv_c_offset );
3207 parse_var_DIE( rangestree,
3215 &abbv, /* abbrev cursor */
3219 set_position_of_Cursor( c, after_die_c_offset );
3220 set_position_of_Cursor( &abbv, after_abbv_c_offset );
3222 /* --- Now recurse into its children, if any --- */
3223 if (has_children == DW_children_yes) {
3224 if (0) TRACE_D3("BEGIN children of level %d\n", level);
3226 atag = peek_ULEB128( c );
3227 if (atag == 0) break;
3228 read_DIE( rangestree, tyents, tempvars, gexprs,
3229 typarser, varparser,
3230 c, td3, cc, level+1 );
3232 /* Now we need to eat the terminating zero */
3233 atag = get_ULEB128( c );
3234 vg_assert(atag == 0);
3235 if (0) TRACE_D3("END children of level %d\n", level);
3242 void new_dwarf3_reader_wrk (
3243 struct _DebugInfo* di,
3244 __attribute__((noreturn)) void (*barf)( HChar* ),
3245 UChar* debug_info_img, SizeT debug_info_sz,
3246 UChar* debug_abbv_img, SizeT debug_abbv_sz,
3247 UChar* debug_line_img, SizeT debug_line_sz,
3248 UChar* debug_str_img, SizeT debug_str_sz,
3249 UChar* debug_ranges_img, SizeT debug_ranges_sz,
3250 UChar* debug_loc_img, SizeT debug_loc_sz
3253 XArray* /* of TyEnt */ tyents;
3254 XArray* /* of TyEnt */ tyents_to_keep;
3255 XArray* /* of GExpr* */ gexprs;
3256 XArray* /* of TempVar* */ tempvars;
3257 WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3258 TyEntIndexCache* tyents_cache = NULL;
3259 TyEntIndexCache* tyents_to_keep_cache = NULL;
3260 TempVar *varp, *varp2;
3262 Cursor abbv; /* for showing .debug_abbrev */
3263 Cursor info; /* primary cursor for parsing .debug_info */
3264 Cursor ranges; /* for showing .debug_ranges */
3265 D3TypeParser typarser;
3266 D3VarParser varparser;
3270 Bool td3 = di->trace_symtab;
3271 XArray* /* of TempVar* */ dioff_lookup_tab;
3273 /* This doesn't work properly because it assumes all entries are
3274 packed end to end, with no holes. But that doesn't always
3275 appear to be the case, so it loses sync. And the D3 spec
3276 doesn't appear to require a no-hole situation either. */
3277 /* Display .debug_loc */
3280 Cursor loc; /* for showing .debug_loc */
3282 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3283 TRACE_SYMTAB(" Offset Begin End Expression\n");
3284 init_Cursor( &loc, debug_loc_img,
3285 debug_loc_sz, 0, barf,
3286 "Overrun whilst reading .debug_loc section(1)" );
3292 if (is_at_end_Cursor( &loc ))
3295 /* Read a (host-)word pair. This is something of a hack since
3296 the word size to read is really dictated by the ELF file;
3297 however, we assume we're reading a file with the same
3298 word-sizeness as the host. Reasonably enough. */
3299 w1 = get_UWord( &loc );
3300 w2 = get_UWord( &loc );
3302 if (w1 == 0 && w2 == 0) {
3303 /* end of list. reset 'base' */
3304 TRACE_D3(" %08lx <End of list>\n", dl_offset);
3306 dl_offset = get_position_of_Cursor( &loc );
3311 /* new value for 'base' */
3312 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
3318 /* else a location expression follows */
3319 TRACE_D3(" %08lx %08lx %08lx ",
3320 dl_offset, w1 + dl_base, w2 + dl_base);
3321 len = (UWord)get_UShort( &loc );
3323 UChar byte = get_UChar( &loc );
3324 TRACE_D3("%02x", (UInt)byte);
3331 /* Display .debug_ranges */
3333 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3334 TRACE_SYMTAB(" Offset Begin End\n");
3335 init_Cursor( &ranges, debug_ranges_img,
3336 debug_ranges_sz, 0, barf,
3337 "Overrun whilst reading .debug_ranges section(1)" );
3343 if (is_at_end_Cursor( &ranges ))
3346 /* Read a (host-)word pair. This is something of a hack since
3347 the word size to read is really dictated by the ELF file;
3348 however, we assume we're reading a file with the same
3349 word-sizeness as the host. Reasonably enough. */
3350 w1 = get_UWord( &ranges );
3351 w2 = get_UWord( &ranges );
3353 if (w1 == 0 && w2 == 0) {
3354 /* end of list. reset 'base' */
3355 TRACE_D3(" %08lx <End of list>\n", dr_offset);
3357 dr_offset = get_position_of_Cursor( &ranges );
3362 /* new value for 'base' */
3363 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
3369 /* else a range [w1+base, w2+base) is denoted */
3370 TRACE_D3(" %08lx %08lx %08lx\n",
3371 dr_offset, w1 + dr_base, w2 + dr_base);
3374 /* Display .debug_abbrev */
3375 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
3376 "Overrun whilst reading .debug_abbrev section" );
3378 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3380 if (is_at_end_Cursor( &abbv ))
3382 /* Read one abbreviation table */
3383 TRACE_D3(" Number TAG\n");
3387 ULong acode = get_ULEB128( &abbv );
3388 if (acode == 0) break; /* end of the table */
3389 atag = get_ULEB128( &abbv );
3390 has_children = get_UChar( &abbv );
3391 TRACE_D3(" %llu %s [%s]\n",
3392 acode, ML_(pp_DW_TAG)(atag),
3393 ML_(pp_DW_children)(has_children));
3395 ULong at_name = get_ULEB128( &abbv );
3396 ULong at_form = get_ULEB128( &abbv );
3397 if (at_name == 0 && at_form == 0) break;
3398 TRACE_D3(" %18s %s\n",
3399 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3405 /* Now loop over the Compilation Units listed in the .debug_info
3406 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
3407 unit contains a Compilation Unit Header followed by precisely
3408 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3409 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
3410 "Overrun whilst reading .debug_info section" );
3412 /* We'll park the harvested type information in here. Also create
3413 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3414 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
3415 huge and presumably will not occur in any valid DWARF3 file --
3416 it would need to have a .debug_info section 4GB long for that to
3417 happen. These type entries end up in the DebugInfo. */
3418 tyents = VG_(newXA)( ML_(dinfo_zalloc),
3419 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3420 ML_(dinfo_free), sizeof(TyEnt) );
3422 VG_(memset)(&tyent, 0, sizeof(tyent));
3423 tyent.tag = Te_TyVoid;
3424 tyent.cuOff = D3_FAKEVOID_CUOFF;
3425 tyent.Te.TyVoid.isFake = True;
3426 VG_(addToXA)( tyents, &tyent );
3429 VG_(memset)(&tyent, 0, sizeof(tyent));
3430 tyent.tag = Te_UNKNOWN;
3431 tyent.cuOff = D3_INVALID_CUOFF;
3432 VG_(addToXA)( tyents, &tyent );
3435 /* This is a tree used to unique-ify the range lists that are
3436 manufactured by parse_var_DIE. References to the keys in the
3437 tree wind up in .rngMany fields in TempVars. We'll need to
3438 delete this tree, and the XArrays attached to it, at the end of
3440 rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3441 "di.readdwarf3.ndrw.2 (rangestree)",
3443 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3445 /* List of variables we're accumulating. These don't end up in the
3446 DebugInfo; instead their contents are handed to ML_(addVar) and
3447 the list elements are then deleted. */
3448 tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3449 "di.readdwarf3.ndrw.3 (TempVar*s array)",
3453 /* List of GExprs we're accumulating. These wind up in the
3455 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3456 ML_(dinfo_free), sizeof(GExpr*) );
3458 /* We need a D3TypeParser to keep track of partially constructed
3459 types. It'll be discarded as soon as we've completed the CU,
3460 since the resulting information is tipped in to 'tyents' as it
3462 VG_(memset)( &typarser, 0, sizeof(typarser) );
3464 typarser.language = '?';
3465 for (i = 0; i < N_D3_TYPE_STACK; i++) {
3466 typarser.qparentE[i].tag = Te_EMPTY;
3467 typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3470 VG_(memset)( &varparser, 0, sizeof(varparser) );
3473 TRACE_D3("\n------ Parsing .debug_info section ------\n");
3475 UWord cu_start_offset, cu_offset_now;
3477 /* It may be that the stated size of this CU is larger than the
3478 amount of stuff actually in it. icc9 seems to generate CUs
3479 thusly. We use these variables to figure out if this is
3480 indeed the case, and if so how many bytes we need to skip to
3481 get to the start of the next CU. Not skipping those bytes
3482 causes us to misidentify the start of the next CU, and it all
3483 goes badly wrong after that (not surprisingly). */
3484 UWord cu_size_including_IniLen, cu_amount_used;
3486 /* It seems icc9 finishes the DIE info before debug_info_sz
3487 bytes have been used up. So be flexible, and declare the
3488 sequence complete if there is not enough remaining bytes to
3489 hold even the smallest conceivable CU header. (11 bytes I
3491 /* JRS 23Jan09: I suspect this is no longer necessary now that
3492 the code below contains a 'while (cu_amount_used <
3493 cu_size_including_IniLen ...' style loop, which skips over
3494 any leftover bytes at the end of a CU in the case where the
3495 CU's stated size is larger than its actual size (as
3496 determined by reading all its DIEs). However, for prudence,
3497 I'll leave the following test in place. I can't see that a
3498 CU header can be smaller than 11 bytes, so I don't think
3499 there's any harm possible through the test -- it just adds
3501 Word avail = get_remaining_length_Cursor( &info );
3504 TRACE_D3("new_dwarf3_reader_wrk: warning: "
3505 "%ld unused bytes after end of DIEs\n", avail);
3509 /* Check the varparser's stack is in a sane state. */
3510 vg_assert(varparser.sp == -1);
3511 for (i = 0; i < N_D3_VAR_STACK; i++) {
3512 vg_assert(varparser.ranges[i] == NULL);
3513 vg_assert(varparser.level[i] == 0);
3515 for (i = 0; i < N_D3_TYPE_STACK; i++) {
3516 vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3517 vg_assert(typarser.qparentE[i].tag == Te_EMPTY);
3518 vg_assert(typarser.qlevel[i] == 0);
3521 cu_start_offset = get_position_of_Cursor( &info );
3523 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3524 /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3526 parse_CU_Header( &cc, td3, &info,
3527 (UChar*)debug_abbv_img, debug_abbv_sz );
3528 cc.debug_str_img = debug_str_img;
3529 cc.debug_str_sz = debug_str_sz;
3530 cc.debug_ranges_img = debug_ranges_img;
3531 cc.debug_ranges_sz = debug_ranges_sz;
3532 cc.debug_loc_img = debug_loc_img;
3533 cc.debug_loc_sz = debug_loc_sz;
3534 cc.debug_line_img = debug_line_img;
3535 cc.debug_line_sz = debug_line_sz;
3536 cc.debug_info_img = debug_info_img;
3537 cc.debug_info_sz = debug_info_sz;
3538 cc.cu_start_offset = cu_start_offset;
3540 /* The CU's svma can be deduced by looking at the AT_low_pc
3541 value in the top level TAG_compile_unit, which is the topmost
3542 DIE. We'll leave it for the 'varparser' to acquire that info
3543 and fill it in -- since it is the only party to want to know
3545 cc.cu_svma_known = False;
3548 /* Create a fake outermost-level range covering the entire
3549 address range. So we always have *something* to catch all
3550 variable declarations. */
3551 varstack_push( &cc, &varparser, td3,
3552 unitary_range_list(0UL, ~0UL),
3553 -1, False/*isFunc*/, NULL/*fbGX*/ );
3555 /* And set up the file name table. When we come across the top
3556 level DIE for this CU (which is what the next call to
3557 read_DIE should process) we will copy all the file names out
3558 of the .debug_line img area and use this table to look up the
3559 copies when we later see filename numbers in DW_TAG_variables
3561 vg_assert(!varparser.filenameTable );
3562 varparser.filenameTable
3563 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3566 vg_assert(varparser.filenameTable);
3568 /* Now read the one-and-only top-level DIE for this CU. */
3569 vg_assert(varparser.sp == 0);
3570 read_DIE( rangestree,
3571 tyents, tempvars, gexprs,
3572 &typarser, &varparser,
3573 &info, td3, &cc, 0 );
3575 cu_offset_now = get_position_of_Cursor( &info );
3577 if (0) VG_(printf)("Travelled: %lu size %llu\n",
3578 cu_offset_now - cc.cu_start_offset,
3579 cc.unit_length + (cc.is_dw64 ? 12 : 4));
3581 /* How big the CU claims it is .. */
3582 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3583 /* .. vs how big we have found it to be */
3584 cu_amount_used = cu_offset_now - cc.cu_start_offset;
3586 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3587 cu_offset_now, debug_info_sz);
3588 if (cu_offset_now > debug_info_sz)
3589 barf("toplevel DIEs beyond end of CU");
3591 /* If the CU is bigger than it claims to be, we've got a serious
3593 if (cu_amount_used > cu_size_including_IniLen)
3594 barf("CU's actual size appears to be larger than it claims it is");
3596 /* If the CU is smaller than it claims to be, we need to skip some
3597 bytes. Loop updates cu_offset_new and cu_amount_used. */
3598 while (cu_amount_used < cu_size_including_IniLen
3599 && get_remaining_length_Cursor( &info ) > 0) {
3600 if (0) VG_(printf)("SKIP\n");
3601 (void)get_UChar( &info );
3602 cu_offset_now = get_position_of_Cursor( &info );
3603 cu_amount_used = cu_offset_now - cc.cu_start_offset;
3606 if (cu_offset_now == debug_info_sz)
3609 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
3610 anywhere else at all. Our fake the-entire-address-space
3611 range is at level -1, so preening to -2 should completely
3612 empty the stack out. */
3614 varstack_preen( &varparser, td3, -2 );
3615 /* Similarly, empty the type stack out. */
3616 typestack_preen( &typarser, td3, -2 );
3617 /* else keep going */
3619 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3620 cc.saC_cache_queries, cc.saC_cache_misses);
3622 vg_assert(varparser.filenameTable );
3623 VG_(deleteXA)( varparser.filenameTable );
3624 varparser.filenameTable = NULL;
3627 /* From here on we're post-processing the stuff we got
3628 out of the .debug_info section. */
3631 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
3633 TRACE_D3("------ Compressing type entries ------\n");
3636 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
3637 sizeof(TyEntIndexCache) );
3638 ML_(TyEntIndexCache__invalidate)( tyents_cache );
3639 dedup_types( td3, tyents, tyents_cache );
3642 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
3646 TRACE_D3("------ Resolving the types of variables ------\n" );
3647 resolve_variable_types( barf, tyents, tyents_cache, tempvars );
3649 /* Copy all the non-INDIR tyents into a new table. For large
3650 .so's, about 90% of the tyents will by now have been resolved to
3651 INDIRs, and we no longer need them, and so don't need to store
3654 = VG_(newXA)( ML_(dinfo_zalloc),
3655 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
3656 ML_(dinfo_free), sizeof(TyEnt) );
3657 n = VG_(sizeXA)( tyents );
3658 for (i = 0; i < n; i++) {
3659 TyEnt* ent = VG_(indexXA)( tyents, i );
3660 if (ent->tag != Te_INDIR)
3661 VG_(addToXA)( tyents_to_keep, ent );
3664 VG_(deleteXA)( tyents );
3666 ML_(dinfo_free)( tyents_cache );
3667 tyents_cache = NULL;
3669 /* Sort tyents_to_keep so we can lookup in it. A complete (if
3670 minor) waste of time, since tyents itself is sorted, but
3671 necessary since VG_(lookupXA) refuses to cooperate if we
3675 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3677 VG_(sortXA)( tyents_to_keep );
3679 /* Enable cacheing on tyents_to_keep */
3680 tyents_to_keep_cache
3681 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
3682 sizeof(TyEntIndexCache) );
3683 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
3685 /* And record the tyents in the DebugInfo. We do this before
3686 starting to hand variables to ML_(addVar), since if ML_(addVar)
3687 wants to do debug printing (of the types of said vars) then it
3688 will need the tyents.*/
3689 vg_assert(!di->admin_tyents);
3690 di->admin_tyents = tyents_to_keep;
3692 /* Bias all the location expressions. */
3694 TRACE_D3("------ Biasing the location expressions ------\n" );
3696 n = VG_(sizeXA)( gexprs );
3697 for (i = 0; i < n; i++) {
3698 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
3699 bias_GX( gexpr, di );
3703 TRACE_D3("------ Acquired the following variables: ------\n\n");
3705 /* Park (pointers to) all the vars in an XArray, so we can look up
3706 abstract origins quickly. The array is sorted (hence, looked-up
3707 by) the .dioff fields. Since the .dioffs should be in strictly
3708 ascending order, there is no need to sort the array after
3709 construction. The ascendingness is however asserted for. */
3711 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
3714 vg_assert(dioff_lookup_tab);
3716 n = VG_(sizeXA)( tempvars );
3717 for (i = 0; i < n; i++) {
3718 varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3720 varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
3721 /* why should this hold? Only, I think, because we've
3722 constructed the array by reading .debug_info sequentially,
3723 and so the array .dioff fields should reflect that, and be
3724 strictly ascending. */
3725 vg_assert(varp2->dioff < varp->dioff);
3727 VG_(addToXA)( dioff_lookup_tab, &varp );
3729 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3730 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3732 /* Now visit each var. Collect up as much info as possible for
3733 each var and hand it to ML_(addVar). */
3734 n = VG_(sizeXA)( tempvars );
3735 for (j = 0; j < n; j++) {
3737 varp = *(TempVar**)VG_(indexXA)( tempvars, j );
3739 /* Possibly show .. */
3741 VG_(printf)("<%lx> addVar: level %d: %s :: ",
3744 varp->name ? varp->name : (UChar*)"<anon_var>" );
3746 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
3748 VG_(printf)("NULL");
3750 VG_(printf)("\n Loc=");
3752 ML_(pp_GX)(varp->gexpr);
3754 VG_(printf)("NULL");
3758 VG_(printf)(" FrB=");
3759 ML_(pp_GX)( varp->fbGX );
3762 VG_(printf)(" FrB=none\n");
3764 VG_(printf)(" declared at: %s:%d\n",
3765 varp->fName ? varp->fName : (UChar*)"NULL",
3767 if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3768 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri);
3771 /* Skip variables which have no location. These must be
3772 abstract instances; they are useless as-is since with no
3773 location they have no specified memory location. They will
3774 presumably be referred to via the absOri fields of other
3777 TRACE_D3(" SKIP (no location)\n\n");
3781 /* So it has a location, at least. If it refers to some other
3782 entry through its absOri field, pull in further info through
3784 if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3786 Word ixFirst, ixLast;
3788 TempVar* keyp = &key;
3790 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3791 key.dioff = varp->absOri; /* this is what we want to find */
3792 found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3793 &ixFirst, &ixLast );
3795 /* barf("DW_AT_abstract_origin can't be resolved"); */
3796 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
3799 /* If the following fails, there is more than one entry with
3800 the same dioff. Which can't happen. */
3801 vg_assert(ixFirst == ixLast);
3802 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3805 vg_assert(varAI->dioff == varp->absOri);
3807 /* Copy what useful info we can. */
3808 if (varAI->typeR && !varp->typeR)
3809 varp->typeR = varAI->typeR;
3810 if (varAI->name && !varp->name)
3811 varp->name = varAI->name;
3812 if (varAI->fName && !varp->fName)
3813 varp->fName = varAI->fName;
3814 if (varAI->fLine > 0 && varp->fLine == 0)
3815 varp->fLine = varAI->fLine;
3818 /* Give it a name if it doesn't have one. */
3820 varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3822 /* So now does it have enough info to be useful? */
3823 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
3824 the type didn't get resolved. Really, in that case
3825 something's broken earlier on, and should be fixed, rather
3826 than just skipping the variable. */
3827 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
3828 tyents_to_keep_cache,
3830 /* The next two assertions should be guaranteed by
3831 our previous call to resolve_variable_types. */
3833 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
3835 if (ent->tag == Te_UNKNOWN) continue;
3837 vg_assert(varp->gexpr);
3838 vg_assert(varp->name);
3839 vg_assert(varp->typeR);
3840 vg_assert(varp->level >= 0);
3842 /* Ok. So we're going to keep it. Call ML_(addVar) once for
3843 each address range in which the variable exists. */
3844 TRACE_D3(" ACQUIRE for range(s) ");
3845 { AddrRange oneRange;
3846 AddrRange* varPcRanges;
3848 /* Set up to iterate over address ranges, however
3850 if (varp->nRanges == 0 || varp->nRanges == 1) {
3851 vg_assert(!varp->rngMany);
3852 if (varp->nRanges == 0) {
3853 vg_assert(varp->rngOneMin == 0);
3854 vg_assert(varp->rngOneMax == 0);
3856 nVarPcRanges = varp->nRanges;
3857 oneRange.aMin = varp->rngOneMin;
3858 oneRange.aMax = varp->rngOneMax;
3859 varPcRanges = &oneRange;
3861 vg_assert(varp->rngMany);
3862 vg_assert(varp->rngOneMin == 0);
3863 vg_assert(varp->rngOneMax == 0);
3864 nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3865 vg_assert(nVarPcRanges >= 2);
3866 vg_assert(nVarPcRanges == (Word)varp->nRanges);
3867 varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3869 if (varp->level == 0)
3870 vg_assert( nVarPcRanges == 1 );
3872 for (i = 0; i < nVarPcRanges; i++) {
3873 Addr pcMin = varPcRanges[i].aMin;
3874 Addr pcMax = varPcRanges[i].aMax;
3875 vg_assert(pcMin <= pcMax);
3876 /* Level 0 is the global address range. So at level 0 we
3877 don't want to bias pcMin/pcMax; but at all other levels
3878 we do since those are derived from svmas in the Dwarf
3879 we're reading. Be paranoid ... */
3880 if (varp->level == 0) {
3881 vg_assert(pcMin == (Addr)0);
3882 vg_assert(pcMax == ~(Addr)0);
3884 /* vg_assert(pcMin > (Addr)0);
3885 No .. we can legitimately expect to see ranges like
3886 0x0-0x11D (pre-biasing, of course). */
3887 vg_assert(pcMax < ~(Addr)0);
3890 /* Apply text biasing, for non-global variables. */
3891 if (varp->level > 0) {
3892 pcMin += di->text_debug_bias;
3893 pcMax += di->text_debug_bias;
3896 if (i > 0 && (i%2) == 0)
3898 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
3903 varp->name, varp->typeR,
3904 varp->gexpr, varp->fbGX,
3905 varp->fName, varp->fLine, td3
3911 /* and move on to the next var */
3914 /* Now free all the TempVars */
3915 n = VG_(sizeXA)( tempvars );
3916 for (i = 0; i < n; i++) {
3917 varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3918 ML_(dinfo_free)(varp);
3920 VG_(deleteXA)( tempvars );
3923 /* and the temp lookup table */
3924 VG_(deleteXA)( dioff_lookup_tab );
3926 /* and the ranges tree. Note that we need to also free the XArrays
3927 which constitute the keys, hence pass VG_(deleteXA) as a
3929 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
3931 /* and the tyents_to_keep cache */
3932 ML_(dinfo_free)( tyents_to_keep_cache );
3933 tyents_to_keep_cache = NULL;
3935 /* and the file name table (just the array, not the entries
3936 themselves). (Apparently, 2008-Oct-23, varparser.filenameTable
3937 can be NULL here, for icc9 generated Dwarf3. Not sure what that
3938 signifies (a deeper problem with the reader?)) */
3939 if (varparser.filenameTable) {
3940 VG_(deleteXA)( varparser.filenameTable );
3941 varparser.filenameTable = NULL;
3944 /* record the GExprs in di so they can be freed later */
3945 vg_assert(!di->admin_gexprs);
3946 di->admin_gexprs = gexprs;
3950 /*------------------------------------------------------------*/
3952 /*--- The "new" DWARF3 reader -- top level control logic ---*/
3954 /*------------------------------------------------------------*/
3956 static Bool d3rd_jmpbuf_valid = False;
3957 static HChar* d3rd_jmpbuf_reason = NULL;
3958 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
3960 static __attribute__((noreturn)) void barf ( HChar* reason ) {
3961 vg_assert(d3rd_jmpbuf_valid);
3962 d3rd_jmpbuf_reason = reason;
3963 VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
3970 ML_(new_dwarf3_reader) (
3971 struct _DebugInfo* di,
3972 UChar* debug_info_img, SizeT debug_info_sz,
3973 UChar* debug_abbv_img, SizeT debug_abbv_sz,
3974 UChar* debug_line_img, SizeT debug_line_sz,
3975 UChar* debug_str_img, SizeT debug_str_sz,
3976 UChar* debug_ranges_img, SizeT debug_ranges_sz,
3977 UChar* debug_loc_img, SizeT debug_loc_sz
3980 volatile Int jumped;
3981 volatile Bool td3 = di->trace_symtab;
3983 /* Run the _wrk function to read the dwarf3. If it succeeds, it
3984 just returns normally. If there is any failure, it longjmp's
3985 back here, having first set d3rd_jmpbuf_reason to something
3987 vg_assert(d3rd_jmpbuf_valid == False);
3988 vg_assert(d3rd_jmpbuf_reason == NULL);
3990 d3rd_jmpbuf_valid = True;
3991 jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
3994 new_dwarf3_reader_wrk( di, barf,
3995 debug_info_img, debug_info_sz,
3996 debug_abbv_img, debug_abbv_sz,
3997 debug_line_img, debug_line_sz,
3998 debug_str_img, debug_str_sz,
3999 debug_ranges_img, debug_ranges_sz,
4000 debug_loc_img, debug_loc_sz );
4001 d3rd_jmpbuf_valid = False;
4002 TRACE_D3("\n------ .debug_info reading was successful ------\n");
4005 d3rd_jmpbuf_valid = False;
4006 /* Can't longjump without giving some sort of reason. */
4007 vg_assert(d3rd_jmpbuf_reason != NULL);
4009 TRACE_D3("\n------ .debug_info reading failed ------\n");
4011 ML_(symerr)(di, True, d3rd_jmpbuf_reason);
4014 d3rd_jmpbuf_valid = False;
4015 d3rd_jmpbuf_reason = NULL;
4020 /* --- Unused code fragments which might be useful one day. --- */
4023 /* Read the arange tables */
4025 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
4026 init_Cursor( &aranges, debug_aranges_img,
4027 debug_aranges_sz, 0, barf,
4028 "Overrun whilst reading .debug_aranges section" );
4030 ULong len, d_i_offset;
4033 UChar asize, segsize;
4035 if (is_at_end_Cursor( &aranges ))
4037 /* Read one arange thingy */
4038 /* initial_length field */
4039 len = get_Initial_Length( &is64, &aranges,
4040 "in .debug_aranges: invalid initial-length field" );
4041 version = get_UShort( &aranges );
4042 d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4043 asize = get_UChar( &aranges );
4044 segsize = get_UChar( &aranges );
4045 TRACE_D3(" Length: %llu\n", len);
4046 TRACE_D3(" Version: %d\n", (Int)version);
4047 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset);
4048 TRACE_D3(" Pointer Size: %d\n", (Int)asize);
4049 TRACE_D3(" Segment Size: %d\n", (Int)segsize);
4051 TRACE_D3(" Address Length\n");
4053 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4054 (void)get_UChar( & aranges );
4057 ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4058 ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4059 TRACE_D3(" 0x%016llx 0x%llx\n", address, length);
4060 if (address == 0 && length == 0) break;
4066 #endif // defined(VGO_linux) || defined(VGO_darwin)
4068 /*--------------------------------------------------------------------*/
4070 /*--------------------------------------------------------------------*/