l4/pkg/valgrind/src/valgrind-3.6.0-svn/coregrind/m_debuginfo/readdwarf3.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
   4 /*---                                                 readdwarf3.c ---*/
   5 /*--------------------------------------------------------------------*/
   6
   7 /*
   8    This file is part of Valgrind, a dynamic binary instrumentation
   9    framework.
  10
  11    Copyright (C) 2008-2010 OpenWorks LLP
  12       info@open-works.co.uk
  13
  14    This program is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU General Public License as
  16    published by the Free Software Foundation; either version 2 of the
  17    License, or (at your option) any later version.
  18
  19    This program is distributed in the hope that it will be useful, but
  20    WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    General Public License for more details.
  23
  24    You should have received a copy of the GNU General Public License
  25    along with this program; if not, write to the Free Software
  26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  27    02111-1307, USA.
  28
  29    The GNU General Public License is contained in the file COPYING.
  30
  31    Neither the names of the U.S. Department of Energy nor the
  32    University of California nor the names of its contributors may be
  33    used to endorse or promote products derived from this software
  34    without prior written permission.
  35 */
  36
  37 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_l4re)
  38
  39 /* REFERENCE (without which this code will not make much sense):
  40
  41    DWARF Debugging Information Format, Version 3,
  42    dated 20 December 2005 (the "D3 spec").
  43
  44    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
  45    .doc (MS Word) version, but for some reason the section numbers
  46    between the Word and PDF versions differ by 1 in the first digit.
  47    All section references in this code are to the PDF version.
  48
  49    CURRENT HACKS:
  50
  51    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
  52       assumed to mean "const void" or "volatile void" respectively.
  53       GDB appears to interpret them like this, anyway.
  54
  55    In many cases it is important to know the svma of a CU (the "base
  56    address of the CU", as the D3 spec calls it).  There are some
  57    situations in which the spec implies this value is unknown, but the
  58    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
  59    merely zero when not explicitly stated.  So we too have to make
  60    that assumption.
  61
  62    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
  63    unitary_range_list() bias the resulting range list in the same way
  64    that its more general cousin, get_range_list(), does?  I don't
  65    know.
  66
  67    TODO, 2008 Feb 17:
  68
  69    get rid of cu_svma_known and document the assumed-zero svma hack.
  70
  71    ML_(sizeOfType): differentiate between zero sized types and types
  72    for which the size is unknown.  Is this important?  I don't know.
  73
  74    DW_AT_array_types: deal with explicit sizes (currently we compute
  75    the size from the bounds and the element size, although that's
  76    fragile, if the bounds incompletely specified, or completely
  77    absent)
  78
  79    Document reason for difference (by 1) of stack preening depth in
  80    parse_var_DIE vs parse_type_DIE.
  81
  82    Don't hand to ML_(addVars), vars whose locations are entirely in
  83    registers (DW_OP_reg*).  This is merely a space-saving
  84    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
  85    expressions correctly, by failing to evaluate them and hence
  86    effectively ignoring the variable with which they are associated.
  87
  88    Deal with DW_AT_array_types which have element size != stride
  89
  90    In some cases, the info for a variable is split between two
  91    different DIEs (generally a declarer and a definer).  We punt on
  92    these.  Could do better here.
  93
  94    The 'data_bias' argument passed to the expression evaluator
  95    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
  96    MaybeUWord, to make it clear when we do vs don't know what it is
  97    for the evaluation of an expression.  At the moment zero is passed
  98    for this parameter in the don't know case.  That's a bit fragile
  99    and obscure; using a MaybeUWord would be clearer.
 100
 101    POTENTIAL PERFORMANCE IMPROVEMENTS:
 102
 103    Currently, duplicate removal and all other queries for the type
 104    entities array is done using cuOffset-based pointing, which
 105    involves a binary search (VG_(lookupXA)) for each access.  This is
 106    wildly inefficient, although simple.  It would be better to
 107    translate all the cuOffset-based references (iow, all the "R" and
 108    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
 109    'tyents' right at the start of dedup_types(), and use direct
 110    indexing (VG_(indexXA)) wherever possible after that.
 111
 112    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
 113    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
 114    points, and possibly also make an _UNCHECKED version which skips
 115    the range checks in performance-critical situations such as this.
 116
 117    Handle interaction between read_DIE and parse_{var,type}_DIE
 118    better.  Currently read_DIE reads the entire DIE just to find where
 119    the end is (and for debug printing), so that it can later reliably
 120    move the cursor to the end regardless of what parse_{var,type}_DIE
 121    do.  This means many DIEs (most, even?) are read twice.  It would
 122    be smarter to make parse_{var,type}_DIE return a Bool indicating
 123    whether or not they advanced the DIE cursor, and only if they
 124    didn't should read_DIE itself read through the DIE.
 125
 126    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
 127    zero variables in their .vars XArray.  Rather than have an XArray
 128    with zero elements (which uses 2 malloc'd blocks), allow the .vars
 129    pointer to be NULL in this case.
 130
 131    More generally, reduce the amount of memory allocated and freed
 132    while reading Dwarf3 type/variable information.  Even modest (20MB)
 133    objects cause this module to allocate and free hundreds of
 134    thousands of small blocks, and ML_(arena_malloc) and its various
 135    groupies always show up at the top of performance profiles. */
 136
 137 #include "pub_core_basics.h"
 138 #include "pub_core_debuginfo.h"
 139 #include "pub_core_libcbase.h"
 140 #include "pub_core_libcassert.h"
 141 #include "pub_core_libcprint.h"
 142 #include "pub_core_libcsetjmp.h"   // setjmp facilities
 143 #include "pub_core_options.h"
 144 #include "pub_core_tooliface.h"    /* VG_(needs) */
 145 #include "pub_core_xarray.h"
 146 #include "pub_core_wordfm.h"
 147 #include "priv_misc.h"             /* dinfo_zalloc/free */
 148 #include "priv_tytypes.h"
 149 #include "priv_d3basics.h"
 150 #include "priv_storage.h"
 151 #include "priv_readdwarf3.h"       /* self */
 152
 153
 154 /*------------------------------------------------------------*/
 155 /*---                                                      ---*/
 156 /*--- Basic machinery for parsing DIEs.                    ---*/
 157 /*---                                                      ---*/
 158 /*------------------------------------------------------------*/
 159
 160 #define TRACE_D3(format, args...) \
 161    if (td3) { VG_(printf)(format, ## args); }
 162
 163 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
 164 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 165
 166 typedef
 167    struct {
 168       UChar* region_start_img;
 169       UWord  region_szB;
 170       UWord  region_next;
 171       void (*barf)( HChar* ) __attribute__((noreturn));
 172       HChar* barfstr;
 173    }
 174    Cursor;
 175
 176 static inline Bool is_sane_Cursor ( Cursor* c ) {
 177    if (!c)                return False;
 178    if (!c->barf)          return False;
 179    if (!c->barfstr)       return False;
 180    return True;
 181 }
 182
 183 static void init_Cursor ( Cursor* c,
 184                           UChar*  region_start_img,
 185                           UWord   region_szB,
 186                           UWord   region_next,
 187                           __attribute__((noreturn)) void (*barf)( HChar* ),
 188                           HChar*  barfstr )
 189 {
 190    vg_assert(c);
 191    VG_(memset)(c, 0, sizeof(*c));
 192    c->region_start_img = region_start_img;
 193    c->region_szB       = region_szB;
 194    c->region_next      = region_next;
 195    c->barf             = barf;
 196    c->barfstr          = barfstr;
 197    vg_assert(is_sane_Cursor(c));
 198 }
 199
 200 static Bool is_at_end_Cursor ( Cursor* c ) {
 201    vg_assert(is_sane_Cursor(c));
 202    return c->region_next >= c->region_szB;
 203 }
 204
 205 static inline UWord get_position_of_Cursor ( Cursor* c ) {
 206    vg_assert(is_sane_Cursor(c));
 207    return c->region_next;
 208 }
 209 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
 210    c->region_next = pos;
 211    vg_assert(is_sane_Cursor(c));
 212 }
 213
 214 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
 215    vg_assert(is_sane_Cursor(c));
 216    return c->region_szB - c->region_next;
 217 }
 218
 219 static UChar* get_address_of_Cursor ( Cursor* c ) {
 220    vg_assert(is_sane_Cursor(c));
 221    return &c->region_start_img[ c->region_next ];
 222 }
 223
 224 /* FIXME: document assumptions on endianness for
 225    get_UShort/UInt/ULong. */
 226 static inline UChar get_UChar ( Cursor* c ) {
 227    UChar r;
 228    /* vg_assert(is_sane_Cursor(c)); */
 229    if (c->region_next + sizeof(UChar) > c->region_szB) {
 230       c->barf(c->barfstr);
 231       /*NOTREACHED*/
 232       vg_assert(0);
 233    }
 234    r = * (UChar*) &c->region_start_img[ c->region_next ];
 235    c->region_next += sizeof(UChar);
 236    return r;
 237 }
 238 static UShort get_UShort ( Cursor* c ) {
 239    UShort r;
 240    vg_assert(is_sane_Cursor(c));
 241    if (c->region_next + sizeof(UShort) > c->region_szB) {
 242       c->barf(c->barfstr);
 243       /*NOTREACHED*/
 244       vg_assert(0);
 245    }
 246    r = * (UShort*) &c->region_start_img[ c->region_next ];
 247    c->region_next += sizeof(UShort);
 248    return r;
 249 }
 250 static UInt get_UInt ( Cursor* c ) {
 251    UInt r;
 252    vg_assert(is_sane_Cursor(c));
 253    if (c->region_next + sizeof(UInt) > c->region_szB) {
 254       c->barf(c->barfstr);
 255       /*NOTREACHED*/
 256       vg_assert(0);
 257    }
 258    r = * (UInt*) &c->region_start_img[ c->region_next ];
 259    c->region_next += sizeof(UInt);
 260    return r;
 261 }
 262 static ULong get_ULong ( Cursor* c ) {
 263    ULong r;
 264    vg_assert(is_sane_Cursor(c));
 265    if (c->region_next + sizeof(ULong) > c->region_szB) {
 266       c->barf(c->barfstr);
 267       /*NOTREACHED*/
 268       vg_assert(0);
 269    }
 270    r = * (ULong*) &c->region_start_img[ c->region_next ];
 271    c->region_next += sizeof(ULong);
 272    return r;
 273 }
 274 static inline ULong get_ULEB128 ( Cursor* c ) {
 275    ULong result;
 276    Int   shift;
 277    UChar byte;
 278    /* unroll first iteration */
 279    byte = get_UChar( c );
 280    result = (ULong)(byte & 0x7f);
 281    if (LIKELY(!(byte & 0x80))) return result;
 282    shift = 7;
 283    /* end unroll first iteration */
 284    do {
 285       byte = get_UChar( c );
 286       result |= ((ULong)(byte & 0x7f)) << shift;
 287       shift += 7;
 288    } while (byte & 0x80);
 289    return result;
 290 }
 291 static Long get_SLEB128 ( Cursor* c ) {
 292    ULong  result = 0;
 293    Int    shift = 0;
 294    UChar  byte;
 295    do {
 296       byte = get_UChar(c);
 297       result |= ((ULong)(byte & 0x7f)) << shift;
 298       shift += 7;
 299    } while (byte & 0x80);
 300    if (shift < 64 && (byte & 0x40))
 301       result |= -(1ULL << shift);
 302    return result;
 303 }
 304
 305 /* Assume 'c' points to the start of a string.  Return the absolute
 306    address of whatever it points at, and advance it past the
 307    terminating zero.  This makes it safe for the caller to then copy
 308    the string with ML_(addStr), since (w.r.t. image overruns) the
 309    process of advancing past the terminating zero will already have
 310    "vetted" the string. */
 311 static UChar* get_AsciiZ ( Cursor* c ) {
 312    UChar  uc;
 313    UChar* res = get_address_of_Cursor(c);
 314    do { uc = get_UChar(c); } while (uc != 0);
 315    return res;
 316 }
 317
 318 static ULong peek_ULEB128 ( Cursor* c ) {
 319    Word here = c->region_next;
 320    ULong r = get_ULEB128( c );
 321    c->region_next = here;
 322    return r;
 323 }
 324 static UChar peek_UChar ( Cursor* c ) {
 325    Word here = c->region_next;
 326    UChar r = get_UChar( c );
 327    c->region_next = here;
 328    return r;
 329 }
 330
 331 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
 332    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
 333 }
 334
 335 static UWord get_UWord ( Cursor* c ) {
 336    vg_assert(sizeof(UWord) == sizeof(void*));
 337    if (sizeof(UWord) == 4) return get_UInt(c);
 338    if (sizeof(UWord) == 8) return get_ULong(c);
 339    vg_assert(0);
 340 }
 341
 342 /* Read a DWARF3 'Initial Length' field */
 343 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
 344                                   Cursor* c,
 345                                   HChar* barfMsg )
 346 {
 347    ULong w64;
 348    UInt  w32;
 349    *is64 = False;
 350    w32 = get_UInt( c );
 351    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
 352       c->barf( barfMsg );
 353    }
 354    else if (w32 == 0xFFFFFFFF) {
 355       *is64 = True;
 356       w64   = get_ULong( c );
 357    } else {
 358       *is64 = False;
 359       w64 = (ULong)w32;
 360    }
 361    return w64;
 362 }
 363
 364
 365 /*------------------------------------------------------------*/
 366 /*---                                                      ---*/
 367 /*--- "CUConst" structure                                  ---*/
 368 /*---                                                      ---*/
 369 /*------------------------------------------------------------*/
 370
 371 #define N_ABBV_CACHE 32
 372
 373 /* Holds information that is constant through the parsing of a
 374    Compilation Unit.  This is basically plumbed through to
 375    everywhere. */
 376 typedef
 377    struct {
 378       /* Call here if anything goes wrong */
 379       void (*barf)( HChar* ) __attribute__((noreturn));
 380       /* Is this 64-bit DWARF ? */
 381       Bool   is_dw64;
 382       /* Which DWARF version ?  (2, 3 or 4) */
 383       UShort version;
 384       /* Length of this Compilation Unit, as stated in the
 385          .unit_length :: InitialLength field of the CU Header.
 386          However, this size (as specified by the D3 spec) does not
 387          include the size of the .unit_length field itself, which is
 388          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
 389          can be obtained through the expression ".is_dw64 ? 12 : 4". */
 390       ULong  unit_length;
 391       /* Offset of start of this unit in .debug_info */
 392       UWord  cu_start_offset;
 393       /* SVMA for this CU.  In the D3 spec, is known as the "base
 394          address of the compilation unit (last para sec 3.1.1).
 395          Needed for (amongst things) interpretation of location-list
 396          values. */
 397       Addr   cu_svma;
 398       Bool   cu_svma_known;
 399       /* The debug_abbreviations table to be used for this Unit */
 400       UChar* debug_abbv;
 401       /* Upper bound on size thereof (an overestimate, in general) */
 402       UWord  debug_abbv_maxszB;
 403       /* Where is .debug_str ? */
 404       UChar* debug_str_img;
 405       UWord  debug_str_sz;
 406       /* Where is .debug_ranges ? */
 407       UChar* debug_ranges_img;
 408       UWord  debug_ranges_sz;
 409       /* Where is .debug_loc ? */
 410       UChar* debug_loc_img;
 411       UWord  debug_loc_sz;
 412       /* Where is .debug_line? */
 413       UChar* debug_line_img;
 414       UWord  debug_line_sz;
 415       /* Where is .debug_info? */
 416       UChar* debug_info_img;
 417       UWord  debug_info_sz;
 418       /* --- Needed so we can add stuff to the string table. --- */
 419       struct _DebugInfo* di;
 420       /* --- a cache for set_abbv_Cursor --- */
 421       /* abbv_code == (ULong)-1 for an unused entry. */
 422       struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
 423       UWord saC_cache_queries;
 424       UWord saC_cache_misses;
 425    }
 426    CUConst;
 427
 428
 429 /*------------------------------------------------------------*/
 430 /*---                                                      ---*/
 431 /*--- Helper functions for Guarded Expressions             ---*/
 432 /*---                                                      ---*/
 433 /*------------------------------------------------------------*/
 434
 435 /* Parse the location list starting at img-offset 'debug_loc_offset'
 436    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
 437    and so I believe are correct SVMAs for the object as a whole.  This
 438    function allocates the UChar*, and the caller must deallocate it.
 439    The resulting block is in so-called Guarded-Expression format.
 440
 441    Guarded-Expression format is similar but not identical to the DWARF3
 442    location-list format.  The format of each returned block is:
 443
 444       UChar biasMe;
 445       UChar isEnd;
 446       followed by zero or more of
 447
 448       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
 449
 450    '..bytes..' is an standard DWARF3 location expression which is
 451    valid when aMin <= pc <= aMax (possibly after suitable biasing).
 452
 453    The number of bytes in '..bytes..' is nbytes.
 454
 455    The end of the sequence is marked by an isEnd == 1 value.  All
 456    previous isEnd values must be zero.
 457
 458    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
 459    text_bias added before use, and 0 if the GX is this is not
 460    necessary (is ready to go).
 461
 462    Hence the block can be quickly parsed and is self-describing.  Note
 463    that aMax is 1 less than the corresponding value in a DWARF3
 464    location list.  Zero length ranges, with aMax == aMin-1, are not
 465    allowed.
 466 */
 467 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
 468    it more logically belongs. */
 469
 470
 471 /* Apply a text bias to a GX. */
 472 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
 473 {
 474    UShort nbytes;
 475    Addr*  pA;
 476    UChar* p = &gx->payload[0];
 477    UChar  uc;
 478    uc = *p++; /*biasMe*/
 479    if (uc == 0)
 480       return;
 481    vg_assert(uc == 1);
 482    p[-1] = 0; /* mark it as done */
 483    while (True) {
 484       uc = *p++;
 485       if (uc == 1)
 486          break; /*isEnd*/
 487       vg_assert(uc == 0);
 488       /* t-bias aMin */
 489       pA = (Addr*)p;
 490       *pA += di->text_debug_bias;
 491       p += sizeof(Addr);
 492       /* t-bias aMax */
 493       pA = (Addr*)p;
 494       *pA += di->text_debug_bias;
 495       p += sizeof(Addr);
 496       /* nbytes, and actual expression */
 497       nbytes = * (UShort*)p; p += sizeof(UShort);
 498       p += nbytes;
 499    }
 500 }
 501
 502 __attribute__((noinline))
 503 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
 504 {
 505    SizeT  bytesReqd;
 506    GExpr* gx;
 507    UChar *p, *pstart;
 508
 509    vg_assert(sizeof(UWord) == sizeof(Addr));
 510    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
 511    bytesReqd
 512       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
 513         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
 514         + sizeof(UShort) /*nbytes*/    + nbytes
 515         + sizeof(UChar); /*isEnd*/
 516
 517    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
 518                            sizeof(GExpr) + bytesReqd );
 519    vg_assert(gx);
 520
 521    p = pstart = &gx->payload[0];
 522
 523    * ((UChar*)p)  = 0;          /*biasMe*/ p += sizeof(UChar);
 524    * ((UChar*)p)  = 0;          /*!isEnd*/ p += sizeof(UChar);
 525    * ((Addr*)p)   = 0;          /*aMin*/   p += sizeof(Addr);
 526    * ((Addr*)p)   = ~((Addr)0); /*aMax */  p += sizeof(Addr);
 527    * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
 528    VG_(memcpy)(p, block, nbytes); p += nbytes;
 529    * ((UChar*)p)  = 1;          /*isEnd*/  p += sizeof(UChar);
 530
 531    vg_assert( (SizeT)(p - pstart) == bytesReqd);
 532    vg_assert( &gx->payload[bytesReqd]
 533               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
 534
 535    return gx;
 536 }
 537
 538 __attribute__((noinline))
 539 static GExpr* make_general_GX ( CUConst* cc,
 540                                 Bool     td3,
 541                                 UWord    debug_loc_offset,
 542                                 Addr     svma_of_referencing_CU )
 543 {
 544    Addr      base;
 545    Cursor    loc;
 546    XArray*   xa; /* XArray of UChar */
 547    GExpr*    gx;
 548    Word      nbytes;
 549
 550    vg_assert(sizeof(UWord) == sizeof(Addr));
 551    if (cc->debug_loc_sz == 0)
 552       cc->barf("make_general_GX: .debug_loc is empty/missing");
 553
 554    init_Cursor( &loc, cc->debug_loc_img,
 555                 cc->debug_loc_sz, 0, cc->barf,
 556                 "Overrun whilst reading .debug_loc section(2)" );
 557    set_position_of_Cursor( &loc, debug_loc_offset );
 558
 559    TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
 560             debug_loc_offset, get_address_of_Cursor( &loc ) );
 561
 562    /* Who frees this xa?  It is freed before this fn exits. */
 563    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
 564                     ML_(dinfo_free),
 565                     sizeof(UChar) );
 566
 567    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 568
 569    base = 0;
 570    while (True) {
 571       Bool  acquire;
 572       UWord len;
 573       /* Read a (host-)word pair.  This is something of a hack since
 574          the word size to read is really dictated by the ELF file;
 575          however, we assume we're reading a file with the same
 576          word-sizeness as the host.  Reasonably enough. */
 577       UWord w1 = get_UWord( &loc );
 578       UWord w2 = get_UWord( &loc );
 579
 580       TRACE_D3("   %08lx %08lx\n", w1, w2);
 581       if (w1 == 0 && w2 == 0)
 582          break; /* end of list */
 583
 584       if (w1 == -1UL) {
 585          /* new value for 'base' */
 586          base = w2;
 587          continue;
 588       }
 589
 590       /* else a location expression follows */
 591       /* else enumerate [w1+base, w2+base) */
 592       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 593          (sec 2.17.2) */
 594       if (w1 > w2) {
 595          TRACE_D3("negative range is for .debug_loc expr at "
 596                   "file offset %lu\n",
 597                   debug_loc_offset);
 598          cc->barf( "negative range in .debug_loc section" );
 599       }
 600
 601       /* ignore zero length ranges */
 602       acquire = w1 < w2;
 603       len     = (UWord)get_UShort( &loc );
 604
 605       if (acquire) {
 606          UWord  w;
 607          UShort s;
 608          UChar  c;
 609          c = 0; /* !isEnd*/
 610          VG_(addBytesToXA)( xa, &c, sizeof(c) );
 611          w = w1    + base + svma_of_referencing_CU;
 612          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 613          w = w2 -1 + base + svma_of_referencing_CU;
 614          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 615          s = (UShort)len;
 616          VG_(addBytesToXA)( xa, &s, sizeof(s) );
 617       }
 618
 619       while (len > 0) {
 620          UChar byte = get_UChar( &loc );
 621          TRACE_D3("%02x", (UInt)byte);
 622          if (acquire)
 623             VG_(addBytesToXA)( xa, &byte, 1 );
 624          len--;
 625       }
 626       TRACE_D3("\n");
 627    }
 628
 629    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 630
 631    nbytes = VG_(sizeXA)( xa );
 632    vg_assert(nbytes >= 1);
 633
 634    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
 635    vg_assert(gx);
 636    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
 637    vg_assert( &gx->payload[nbytes]
 638               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
 639
 640    VG_(deleteXA)( xa );
 641
 642    TRACE_D3("}\n");
 643
 644    return gx;
 645 }
 646
 647
 648 /*------------------------------------------------------------*/
 649 /*---                                                      ---*/
 650 /*--- Helper functions for range lists and CU headers      ---*/
 651 /*---                                                      ---*/
 652 /*------------------------------------------------------------*/
 653
 654 /* Denotes an address range.  Both aMin and aMax are included in the
 655    range; hence a complete range is (0, ~0) and an empty range is any
 656    (X, X-1) for X > 0.*/
 657 typedef
 658    struct { Addr aMin; Addr aMax; }
 659    AddrRange;
 660
 661
 662 /* Generate an arbitrary structural total ordering on
 663    XArray* of AddrRange. */
 664 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
 665 {
 666    Word n1, n2, i;
 667    tl_assert(rngs1 && rngs2);
 668    n1 = VG_(sizeXA)( rngs1 );
 669    n2 = VG_(sizeXA)( rngs2 );
 670    if (n1 < n2) return -1;
 671    if (n1 > n2) return 1;
 672    for (i = 0; i < n1; i++) {
 673       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
 674       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
 675       if (rng1->aMin < rng2->aMin) return -1;
 676       if (rng1->aMin > rng2->aMin) return 1;
 677       if (rng1->aMax < rng2->aMax) return -1;
 678       if (rng1->aMax > rng2->aMax) return 1;
 679    }
 680    return 0;
 681 }
 682
 683
 684 __attribute__((noinline))
 685 static XArray* /* of AddrRange */ empty_range_list ( void )
 686 {
 687    XArray* xa; /* XArray of AddrRange */
 688    /* Who frees this xa?  varstack_preen() does. */
 689    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
 690                     ML_(dinfo_free),
 691                     sizeof(AddrRange) );
 692    return xa;
 693 }
 694
 695
 696 __attribute__((noinline))
 697 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 698 {
 699    XArray*   xa;
 700    AddrRange pair;
 701    vg_assert(aMin <= aMax);
 702    /* Who frees this xa?  varstack_preen() does. */
 703    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
 704                     ML_(dinfo_free),
 705                     sizeof(AddrRange) );
 706    pair.aMin = aMin;
 707    pair.aMax = aMax;
 708    VG_(addToXA)( xa, &pair );
 709    return xa;
 710 }
 711
 712
 713 /* Enumerate the address ranges starting at img-offset
 714    'debug_ranges_offset' in .debug_ranges.  Results are biased with
 715    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
 716    object as a whole.  This function allocates the XArray, and the
 717    caller must deallocate it. */
 718 __attribute__((noinline))
 719 static XArray* /* of AddrRange */
 720        get_range_list ( CUConst* cc,
 721                         Bool     td3,
 722                         UWord    debug_ranges_offset,
 723                         Addr     svma_of_referencing_CU )
 724 {
 725    Addr      base;
 726    Cursor    ranges;
 727    XArray*   xa; /* XArray of AddrRange */
 728    AddrRange pair;
 729
 730    if (cc->debug_ranges_sz == 0)
 731       cc->barf("get_range_list: .debug_ranges is empty/missing");
 732
 733    init_Cursor( &ranges, cc->debug_ranges_img,
 734                 cc->debug_ranges_sz, 0, cc->barf,
 735                 "Overrun whilst reading .debug_ranges section(2)" );
 736    set_position_of_Cursor( &ranges, debug_ranges_offset );
 737
 738    /* Who frees this xa?  varstack_preen() does. */
 739    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
 740                     sizeof(AddrRange) );
 741    base = 0;
 742    while (True) {
 743       /* Read a (host-)word pair.  This is something of a hack since
 744          the word size to read is really dictated by the ELF file;
 745          however, we assume we're reading a file with the same
 746          word-sizeness as the host.  Reasonably enough. */
 747       UWord w1 = get_UWord( &ranges );
 748       UWord w2 = get_UWord( &ranges );
 749
 750       if (w1 == 0 && w2 == 0)
 751          break; /* end of list. */
 752
 753       if (w1 == -1UL) {
 754          /* new value for 'base' */
 755          base = w2;
 756          continue;
 757       }
 758
 759       /* else enumerate [w1+base, w2+base) */
 760       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 761          (sec 2.17.2) */
 762       if (w1 > w2)
 763          cc->barf( "negative range in .debug_ranges section" );
 764       if (w1 < w2) {
 765          pair.aMin = w1     + base + svma_of_referencing_CU;
 766          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
 767          vg_assert(pair.aMin <= pair.aMax);
 768          VG_(addToXA)( xa, &pair );
 769       }
 770    }
 771    return xa;
 772 }
 773
 774
 775 /* Parse the Compilation Unit header indicated at 'c' and
 776    initialise 'cc' accordingly. */
 777 static __attribute__((noinline))
 778 void parse_CU_Header ( /*OUT*/CUConst* cc,
 779                        Bool td3,
 780                        Cursor* c,
 781                        UChar* debug_abbv_img, UWord debug_abbv_sz )
 782 {
 783    UChar  address_size;
 784    UWord  debug_abbrev_offset;
 785    Int    i;
 786
 787    VG_(memset)(cc, 0, sizeof(*cc));
 788    vg_assert(c && c->barf);
 789    cc->barf = c->barf;
 790
 791    /* initial_length field */
 792    cc->unit_length
 793       = get_Initial_Length( &cc->is_dw64, c,
 794            "parse_CU_Header: invalid initial-length field" );
 795
 796    TRACE_D3("   Length:        %lld\n", cc->unit_length );
 797
 798    /* version */
 799    cc->version = get_UShort( c );
 800    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
 801       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
 802    TRACE_D3("   Version:       %d\n", (Int)cc->version );
 803
 804    /* debug_abbrev_offset */
 805    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
 806    if (debug_abbrev_offset >= debug_abbv_sz)
 807       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
 808    TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
 809
 810    /* address size.  If this isn't equal to the host word size, just
 811       give up.  This makes it safe to assume elsewhere that
 812       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
 813       word. */
 814    address_size = get_UChar( c );
 815    if (address_size != sizeof(void*))
 816       cc->barf( "parse_CU_Header: invalid address_size" );
 817    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
 818
 819    /* Set up so that cc->debug_abbv points to the relevant table for
 820       this CU.  Set the szB so that at least we can't read off the end
 821       of the debug_abbrev section -- potentially (and quite likely)
 822       too big, if this isn't the last table in the section, but at
 823       least it's safe. */
 824    cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
 825    cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
 826    /* and empty out the set_abbv_Cursor cache */
 827    if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
 828    for (i = 0; i < N_ABBV_CACHE; i++) {
 829       cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
 830       cc->saC_cache[i].posn = 0;
 831    }
 832    cc->saC_cache_queries = 0;
 833    cc->saC_cache_misses = 0;
 834 }
 835
 836
 837 /* Set up 'c' so it is ready to parse the abbv table entry code
 838    'abbv_code' for this compilation unit.  */
 839 static __attribute__((noinline))
 840 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
 841                        CUConst* cc, ULong abbv_code )
 842 {
 843    Int   i;
 844    ULong acode;
 845
 846    if (abbv_code == 0)
 847       cc->barf("set_abbv_Cursor: abbv_code == 0" );
 848
 849    /* (ULong)-1 is used to represent an empty cache slot.  So we can't
 850       allow it.  In any case no valid DWARF3 should make a reference
 851       to a negative abbreviation code.  [at least, they always seem to
 852       be numbered upwards from zero as far as I have seen] */
 853    vg_assert(abbv_code != (ULong)-1);
 854
 855    /* First search the cache. */
 856    if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
 857    cc->saC_cache_queries++;
 858    for (i = 0; i < N_ABBV_CACHE; i++) {
 859       /* No need to test the cached abbv_codes for -1 (empty), since
 860          we just asserted that abbv_code is not -1. */
 861      if (cc->saC_cache[i].abbv_code == abbv_code) {
 862         /* Found it.  Cool.  Set up the parser using the cached
 863            position, and move this cache entry 1 step closer to the
 864            front. */
 865         if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
 866         init_Cursor( c, cc->debug_abbv,
 867                      cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
 868                      cc->barf,
 869                      "Overrun whilst parsing .debug_abbrev section(1)" );
 870         if (i > 0) {
 871            ULong t_abbv_code = cc->saC_cache[i].abbv_code;
 872            UWord t_posn = cc->saC_cache[i].posn;
 873            while (i > 0) {
 874               cc->saC_cache[i] = cc->saC_cache[i-1];
 875               cc->saC_cache[0].abbv_code = t_abbv_code;
 876               cc->saC_cache[0].posn = t_posn;
 877               i--;
 878            }
 879         }
 880         return;
 881      }
 882    }
 883
 884    /* No.  It's not in the cache.  We have to search through
 885       .debug_abbrev, of course taking care to update the cache
 886       when done. */
 887
 888    cc->saC_cache_misses++;
 889    init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
 890                "Overrun whilst parsing .debug_abbrev section(2)" );
 891
 892    /* Now iterate though the table until we find the requested
 893       entry. */
 894    while (True) {
 895       //ULong atag;
 896       //UInt  has_children;
 897       acode = get_ULEB128( c );
 898       if (acode == 0) break; /* end of the table */
 899       if (acode == abbv_code) break; /* found it */
 900       /*atag         = */ get_ULEB128( c );
 901       /*has_children = */ get_UChar( c );
 902       //TRACE_D3("   %llu      %s    [%s]\n",
 903       //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
 904       while (True) {
 905          ULong at_name = get_ULEB128( c );
 906          ULong at_form = get_ULEB128( c );
 907          if (at_name == 0 && at_form == 0) break;
 908          //TRACE_D3("    %18s %s\n",
 909          //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
 910       }
 911    }
 912
 913    if (acode == 0) {
 914       /* Not found.  This is fatal. */
 915       cc->barf("set_abbv_Cursor: abbv_code not found");
 916    }
 917
 918    /* Otherwise, 'c' is now set correctly to parse the relevant entry,
 919       starting from the abbreviation entry's tag.  So just cache
 920       the result, and return. */
 921    for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
 922       cc->saC_cache[i] = cc->saC_cache[i-1];
 923    }
 924    if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
 925    cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
 926    cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
 927 }
 928
 929
 930 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
 931
 932    If *cts itself contains the entire result, then *ctsSzB is set to
 933    1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
 934
 935    Alternatively, the result can be a block of data (in the
 936    transiently mapped-in object, so-called "image" space).  If so then
 937    the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
 938    image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
 939
 940    Unfortunately this means it is impossible to represent a zero-size
 941    image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
 942    and so is ambiguous (which case it is?)
 943
 944    Invariant on successful return:
 945       (*ctsSzB > 0 && *ctsMemSzB == 0)
 946       || (*ctsSzB == 0 && *ctsMemSzB > 0)
 947 */
 948 static
 949 void get_Form_contents ( /*OUT*/ULong* cts,
 950                          /*OUT*/Int*   ctsSzB,
 951                          /*OUT*/UWord* ctsMemSzB,
 952                          CUConst* cc, Cursor* c,
 953                          Bool td3, DW_FORM form )
 954 {
 955    *cts       = 0;
 956    *ctsSzB    = 0;
 957    *ctsMemSzB = 0;
 958    switch (form) {
 959       case DW_FORM_data1:
 960          *cts = (ULong)(UChar)get_UChar(c);
 961          *ctsSzB = 1;
 962          TRACE_D3("%u", (UInt)*cts);
 963          break;
 964       case DW_FORM_data2:
 965          *cts = (ULong)(UShort)get_UShort(c);
 966          *ctsSzB = 2;
 967          TRACE_D3("%u", (UInt)*cts);
 968          break;
 969       case DW_FORM_data4:
 970          *cts = (ULong)(UInt)get_UInt(c);
 971          *ctsSzB = 4;
 972          TRACE_D3("%u", (UInt)*cts);
 973          break;
 974       case DW_FORM_data8:
 975          *cts = get_ULong(c);
 976          *ctsSzB = 8;
 977          TRACE_D3("%llu", *cts);
 978          break;
 979       case DW_FORM_sec_offset:
 980          *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
 981          *ctsSzB = cc->is_dw64 ? 8 : 4;
 982          TRACE_D3("%llu", *cts);
 983          break;
 984       case DW_FORM_sdata:
 985          *cts = (ULong)(Long)get_SLEB128(c);
 986          *ctsSzB = 8;
 987          TRACE_D3("%lld", (Long)*cts);
 988          break;
 989       case DW_FORM_udata:
 990          *cts = (ULong)(Long)get_ULEB128(c);
 991          *ctsSzB = 8;
 992          TRACE_D3("%llu", (Long)*cts);
 993          break;
 994       case DW_FORM_addr:
 995          /* note, this is a hack.  DW_FORM_addr is defined as getting
 996             a word the size of the target machine as defined by the
 997             address_size field in the CU Header.  However,
 998             parse_CU_Header() rejects all inputs except those for
 999             which address_size == sizeof(Word), hence we can just
1000             treat it as a (host) Word.  */
1001          *cts = (ULong)(UWord)get_UWord(c);
1002          *ctsSzB = sizeof(UWord);
1003          TRACE_D3("0x%lx", (UWord)*cts);
1004          break;
1005
1006       case DW_FORM_ref_addr:
1007          /* We make the same word-size assumption as DW_FORM_addr. */
1008          /* What does this really mean?  From D3 Sec 7.5.4,
1009             description of "reference", it would appear to reference
1010             some other DIE, by specifying the offset from the
1011             beginning of a .debug_info section.  The D3 spec mentions
1012             that this might be in some other shared object and
1013             executable.  But I don't see how the name of the other
1014             object/exe is specified.
1015
1016             At least for the DW_FORM_ref_addrs created by icc11, the
1017             references seem to be within the same object/executable.
1018             So for the moment we merely range-check, to see that they
1019             actually do specify a plausible offset within this
1020             object's .debug_info, and return the value unchanged.
1021          */
1022          *cts = (ULong)(UWord)get_UWord(c);
1023          *ctsSzB = sizeof(UWord);
1024          TRACE_D3("0x%lx", (UWord)*cts);
1025          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
1026          if (/* the following 2 are surely impossible, but ... */
1027              cc->debug_info_img == NULL || cc->debug_info_sz == 0
1028              || *cts >= (ULong)cc->debug_info_sz) {
1029             /* Hmm.  Offset is nonsensical for this object's .debug_info
1030                section.  Be safe and reject it. */
1031             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1032                      "outside .debug_info");
1033          }
1034          break;
1035
1036       case DW_FORM_strp: {
1037          /* this is an offset into .debug_str */
1038          UChar* str;
1039          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1040          if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
1041             cc->barf("get_Form_contents: DW_FORM_strp "
1042                      "points outside .debug_str");
1043          /* FIXME: check the entire string lies inside debug_str,
1044             not just the first byte of it. */
1045          str = (UChar*)cc->debug_str_img + uw;
1046          TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
1047          *cts = (ULong)(UWord)str;
1048          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1049          break;
1050       }
1051       case DW_FORM_string: {
1052          UChar* str = get_AsciiZ(c);
1053          TRACE_D3("%s", str);
1054          *cts = (ULong)(UWord)str;
1055          /* strlen is safe because get_AsciiZ already 'vetted' the
1056             entire string */
1057          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1058          break;
1059       }
1060       case DW_FORM_ref1: {
1061          UChar  u8 = get_UChar(c);
1062          UWord res = cc->cu_start_offset + (UWord)u8;
1063          *cts = (ULong)res;
1064          *ctsSzB = sizeof(UWord);
1065          TRACE_D3("<%lx>", res);
1066          break;
1067       }
1068       case DW_FORM_ref2: {
1069          UShort  u16 = get_UShort(c);
1070          UWord res = cc->cu_start_offset + (UWord)u16;
1071          *cts = (ULong)res;
1072          *ctsSzB = sizeof(UWord);
1073          TRACE_D3("<%lx>", res);
1074          break;
1075       }
1076       case DW_FORM_ref4: {
1077          UInt  u32 = get_UInt(c);
1078          UWord res = cc->cu_start_offset + (UWord)u32;
1079          *cts = (ULong)res;
1080          *ctsSzB = sizeof(UWord);
1081          TRACE_D3("<%lx>", res);
1082          break;
1083       }
1084       case DW_FORM_ref8: {
1085          ULong  u64 = get_ULong(c);
1086          UWord res = cc->cu_start_offset + (UWord)u64;
1087          *cts = (ULong)res;
1088          *ctsSzB = sizeof(UWord);
1089          TRACE_D3("<%lx>", res);
1090          break;
1091       }
1092       case DW_FORM_ref_udata: {
1093          ULong  u64 = get_ULEB128(c);
1094          UWord res = cc->cu_start_offset + (UWord)u64;
1095          *cts = (ULong)res;
1096          *ctsSzB = sizeof(UWord);
1097          TRACE_D3("<%lx>", res);
1098          break;
1099       }
1100       case DW_FORM_flag: {
1101          UChar u8 = get_UChar(c);
1102          TRACE_D3("%u", (UInt)u8);
1103          *cts = (ULong)u8;
1104          *ctsSzB = 1;
1105          break;
1106       }
1107       case DW_FORM_flag_present:
1108          TRACE_D3("1");
1109          *cts = 1;
1110          *ctsSzB = 1;
1111          break;
1112       case DW_FORM_block1: {
1113          ULong  u64b;
1114          ULong  u64 = (ULong)get_UChar(c);
1115          UChar* block = get_address_of_Cursor(c);
1116          TRACE_D3("%llu byte block: ", u64);
1117          for (u64b = u64; u64b > 0; u64b--) {
1118             UChar u8 = get_UChar(c);
1119             TRACE_D3("%x ", (UInt)u8);
1120          }
1121          *cts = (ULong)(UWord)block;
1122          *ctsMemSzB = (UWord)u64;
1123          break;
1124       }
1125       case DW_FORM_block2: {
1126          ULong  u64b;
1127          ULong  u64 = (ULong)get_UShort(c);
1128          UChar* block = get_address_of_Cursor(c);
1129          TRACE_D3("%llu byte block: ", u64);
1130          for (u64b = u64; u64b > 0; u64b--) {
1131             UChar u8 = get_UChar(c);
1132             TRACE_D3("%x ", (UInt)u8);
1133          }
1134          *cts = (ULong)(UWord)block;
1135          *ctsMemSzB = (UWord)u64;
1136          break;
1137       }
1138       case DW_FORM_block4: {
1139          ULong  u64b;
1140          ULong  u64 = (ULong)get_UInt(c);
1141          UChar* block = get_address_of_Cursor(c);
1142          TRACE_D3("%llu byte block: ", u64);
1143          for (u64b = u64; u64b > 0; u64b--) {
1144             UChar u8 = get_UChar(c);
1145             TRACE_D3("%x ", (UInt)u8);
1146          }
1147          *cts = (ULong)(UWord)block;
1148          *ctsMemSzB = (UWord)u64;
1149          break;
1150       }
1151       case DW_FORM_exprloc:
1152       case DW_FORM_block: {
1153          ULong  u64b;
1154          ULong  u64 = (ULong)get_ULEB128(c);
1155          UChar* block = get_address_of_Cursor(c);
1156          TRACE_D3("%llu byte block: ", u64);
1157          for (u64b = u64; u64b > 0; u64b--) {
1158             UChar u8 = get_UChar(c);
1159             TRACE_D3("%x ", (UInt)u8);
1160          }
1161          *cts = (ULong)(UWord)block;
1162          *ctsMemSzB = (UWord)u64;
1163          break;
1164       }
1165       case DW_FORM_ref_sig8: {
1166          ULong  u64b;
1167          UChar* block = get_address_of_Cursor(c);
1168          TRACE_D3("8 byte signature: ");
1169          for (u64b = 8; u64b > 0; u64b--) {
1170             UChar u8 = get_UChar(c);
1171             TRACE_D3("%x ", (UInt)u8);
1172          }
1173          *cts = (ULong)(UWord)block;
1174          *ctsMemSzB = 8;
1175          break;
1176       }
1177       case DW_FORM_indirect:
1178          get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
1179                             (DW_FORM)get_ULEB128(c));
1180          return;
1181
1182       default:
1183          VG_(printf)(
1184             "get_Form_contents: unhandled %d (%s) at <%lx>\n",
1185             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1186          c->barf("get_Form_contents: unhandled DW_FORM");
1187    }
1188 }
1189
1190
1191 /*------------------------------------------------------------*/
1192 /*---                                                      ---*/
1193 /*--- Parsing of variable-related DIEs                     ---*/
1194 /*---                                                      ---*/
1195 /*------------------------------------------------------------*/
1196
1197 typedef
1198    struct _TempVar {
1199       UChar*  name; /* in DebugInfo's .strchunks */
1200       /* Represent ranges economically.  nRanges is the number of
1201          ranges.  Cases:
1202          0: .rngOneMin .rngOneMax .manyRanges are all zero
1203          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1204          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1205          This is merely an optimisation to avoid having to allocate
1206          and free the XArray in the common (98%) of cases where there
1207          is zero or one address ranges. */
1208       UWord   nRanges;
1209       Addr    rngOneMin;
1210       Addr    rngOneMax;
1211       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1212       /* Do not free .rngMany, since many TempVars will have the same
1213          value.  Instead the associated storage is to be freed by
1214          deleting 'rangetree', which stores a single copy of each
1215          range. */
1216       /* --- */
1217       Int     level;
1218       UWord   typeR; /* a cuOff */
1219       GExpr*  gexpr; /* for this variable */
1220       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1221                         any */
1222       UChar*  fName; /* declaring file name, or NULL */
1223       Int     fLine; /* declaring file line number, or zero */
1224       /* offset in .debug_info, so that abstract instances can be
1225          found to satisfy references from concrete instances. */
1226       UWord   dioff;
1227       UWord   absOri; /* so the absOri fields refer to dioff fields
1228                          in some other, related TempVar. */
1229    }
1230    TempVar;
1231
1232 #define N_D3_VAR_STACK 48
1233
1234 typedef
1235    struct {
1236       /* Contains the range stack: a stack of address ranges, one
1237          stack entry for each nested scope.
1238
1239          Some scope entries are created by function definitions
1240          (DW_AT_subprogram), and for those, we also note the GExpr
1241          derived from its DW_AT_frame_base attribute, if any.
1242          Consequently it should be possible to find, for any
1243          variable's DIE, the GExpr for the the containing function's
1244          DW_AT_frame_base by scanning back through the stack to find
1245          the nearest entry associated with a function.  This somewhat
1246          elaborate scheme is provided so as to make it possible to
1247          obtain the correct DW_AT_frame_base expression even in the
1248          presence of nested functions (or to be more precise, in the
1249          presence of nested DW_AT_subprogram DIEs).
1250       */
1251       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1252                      stack */
1253       XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1254       Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1255       Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1256       GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1257                                          expr, else NULL */
1258       /* The file name table.  Is a mapping from integer index to the
1259          (permanent) copy of the string, iow a non-img area. */
1260       XArray* /* of UChar* */ filenameTable;
1261    }
1262    D3VarParser;
1263
1264 static void varstack_show ( D3VarParser* parser, HChar* str ) {
1265    Word i, j;
1266    VG_(printf)("  varstack (%s) {\n", str);
1267    for (i = 0; i <= parser->sp; i++) {
1268       XArray* xa = parser->ranges[i];
1269       vg_assert(xa);
1270       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1271       if (parser->isFunc[i]) {
1272          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1273       } else {
1274          vg_assert(parser->fbGX[i] == NULL);
1275       }
1276       VG_(printf)(": ");
1277       if (VG_(sizeXA)( xa ) == 0) {
1278          VG_(printf)("** empty PC range array **");
1279       } else {
1280          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1281             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1282             vg_assert(range);
1283             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1284          }
1285       }
1286       VG_(printf)("\n");
1287    }
1288    VG_(printf)("  }\n");
1289 }
1290
1291 /* Remove from the stack, all entries with .level > 'level' */
1292 static
1293 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1294 {
1295    Bool changed = False;
1296    vg_assert(parser->sp < N_D3_VAR_STACK);
1297    while (True) {
1298       vg_assert(parser->sp >= -1);
1299       if (parser->sp == -1) break;
1300       if (parser->level[parser->sp] <= level) break;
1301       if (0)
1302          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1303       vg_assert(parser->ranges[parser->sp]);
1304       /* Who allocated this xa?  get_range_list() or
1305          unitary_range_list(). */
1306       VG_(deleteXA)( parser->ranges[parser->sp] );
1307       parser->ranges[parser->sp] = NULL;
1308       parser->level[parser->sp]  = 0;
1309       parser->isFunc[parser->sp] = False;
1310       parser->fbGX[parser->sp]   = NULL;
1311       parser->sp--;
1312       changed = True;
1313    }
1314    if (changed && td3)
1315       varstack_show( parser, "after preen" );
1316 }
1317
1318 static void varstack_push ( CUConst* cc,
1319                             D3VarParser* parser,
1320                             Bool td3,
1321                             XArray* ranges, Int level,
1322                             Bool    isFunc, GExpr* fbGX ) {
1323    if (0)
1324    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1325             parser->sp+1, level, ranges);
1326
1327    /* First we need to zap everything >= 'level', as we are about to
1328       replace any previous entry at 'level', so .. */
1329    varstack_preen(parser, /*td3*/False, level-1);
1330
1331    vg_assert(parser->sp >= -1);
1332    vg_assert(parser->sp < N_D3_VAR_STACK);
1333    if (parser->sp == N_D3_VAR_STACK-1)
1334       cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1335                "increase and recompile");
1336    if (parser->sp >= 0)
1337       vg_assert(parser->level[parser->sp] < level);
1338    parser->sp++;
1339    vg_assert(parser->ranges[parser->sp] == NULL);
1340    vg_assert(parser->level[parser->sp]  == 0);
1341    vg_assert(parser->isFunc[parser->sp] == False);
1342    vg_assert(parser->fbGX[parser->sp]   == NULL);
1343    vg_assert(ranges != NULL);
1344    if (!isFunc) vg_assert(fbGX == NULL);
1345    parser->ranges[parser->sp] = ranges;
1346    parser->level[parser->sp]  = level;
1347    parser->isFunc[parser->sp] = isFunc;
1348    parser->fbGX[parser->sp]   = fbGX;
1349    if (td3)
1350       varstack_show( parser, "after push" );
1351 }
1352
1353
1354 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1355    refer either to a location expression or to a location list.
1356    Figure out which, and in both cases bundle the expression or
1357    location list into a so-called GExpr (guarded expression). */
1358 __attribute__((noinline))
1359 static GExpr* get_GX ( CUConst* cc, Bool td3,
1360                        ULong cts, Int ctsSzB, UWord ctsMemSzB )
1361 {
1362    GExpr* gexpr = NULL;
1363    if (ctsMemSzB > 0 && ctsSzB == 0) {
1364       /* represents an in-line location expression, and cts points
1365          right at it */
1366       gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1367    }
1368    else
1369    if (ctsMemSzB == 0 && ctsSzB > 0) {
1370       /* represents location list.  cts is the offset of it in
1371          .debug_loc. */
1372       if (!cc->cu_svma_known)
1373          cc->barf("get_GX: location list, but CU svma is unknown");
1374       gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1375    }
1376    else {
1377       vg_assert(0); /* else caller is bogus */
1378    }
1379    return gexpr;
1380 }
1381
1382
1383 static
1384 void read_filename_table( /*MOD*/D3VarParser* parser,
1385                           CUConst* cc, UWord debug_line_offset,
1386                           Bool td3 )
1387 {
1388    Bool   is_dw64;
1389    Cursor c;
1390    Word   i;
1391    UShort version;
1392    UChar  opcode_base;
1393    UChar* str;
1394
1395    vg_assert(parser && cc && cc->barf);
1396    if ((!cc->debug_line_img)
1397        || cc->debug_line_sz <= debug_line_offset)
1398       cc->barf("read_filename_table: .debug_line is missing?");
1399
1400    init_Cursor( &c, cc->debug_line_img,
1401                 cc->debug_line_sz, debug_line_offset, cc->barf,
1402                 "Overrun whilst reading .debug_line section(1)" );
1403
1404    /* unit_length = */
1405       get_Initial_Length( &is_dw64, &c,
1406            "read_filename_table: invalid initial-length field" );
1407    version = get_UShort( &c );
1408    if (version != 2 && version != 3 && version != 4)
1409      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1410               "is currently supported.");
1411    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1412    /*minimum_instruction_length = */ get_UChar( &c );
1413    if (version >= 4)
1414       /*maximum_operations_per_insn = */ get_UChar( &c );
1415    /*default_is_stmt            = */ get_UChar( &c );
1416    /*line_base                  = (Char)*/ get_UChar( &c );
1417    /*line_range                 = */ get_UChar( &c );
1418    opcode_base                = get_UChar( &c );
1419    /* skip over "standard_opcode_lengths" */
1420    for (i = 1; i < (Word)opcode_base; i++)
1421      (void)get_UChar( &c );
1422
1423    /* skip over the directory names table */
1424    while (peek_UChar(&c) != 0) {
1425      (void)get_AsciiZ(&c);
1426    }
1427    (void)get_UChar(&c); /* skip terminating zero */
1428
1429    /* Read and record the file names table */
1430    vg_assert(parser->filenameTable);
1431    vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1432    /* Add a dummy index-zero entry.  DWARF3 numbers its files
1433       from 1, for some reason. */
1434    str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1435    VG_(addToXA)( parser->filenameTable, &str );
1436    while (peek_UChar(&c) != 0) {
1437       str = get_AsciiZ(&c);
1438       TRACE_D3("  read_filename_table: %ld %s\n",
1439                VG_(sizeXA)(parser->filenameTable), str);
1440       str = ML_(addStr)( cc->di, str, -1 );
1441       VG_(addToXA)( parser->filenameTable, &str );
1442       (void)get_ULEB128( &c ); /* skip directory index # */
1443       (void)get_ULEB128( &c ); /* skip last mod time */
1444       (void)get_ULEB128( &c ); /* file size */
1445    }
1446    /* We're done!  The rest of it is not interesting. */
1447 }
1448
1449
1450 __attribute__((noinline))
1451 static void parse_var_DIE (
1452    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1453    /*MOD*/XArray* /* of TempVar* */ tempvars,
1454    /*MOD*/XArray* /* of GExpr* */ gexprs,
1455    /*MOD*/D3VarParser* parser,
1456    DW_TAG dtag,
1457    UWord posn,
1458    Int level,
1459    Cursor* c_die,
1460    Cursor* c_abbv,
1461    CUConst* cc,
1462    Bool td3
1463 )
1464 {
1465    ULong       cts;
1466    Int         ctsSzB;
1467    UWord       ctsMemSzB;
1468
1469    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1470    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1471
1472    varstack_preen( parser, td3, level-1 );
1473
1474    if (dtag == DW_TAG_compile_unit) {
1475       Bool have_lo    = False;
1476       Bool have_hi1   = False;
1477       Bool have_range = False;
1478       Addr ip_lo    = 0;
1479       Addr ip_hi1   = 0;
1480       Addr rangeoff = 0;
1481       while (True) {
1482          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1483          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1484          if (attr == 0 && form == 0) break;
1485          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1486                             cc, c_die, False/*td3*/, form );
1487          if (attr == DW_AT_low_pc && ctsSzB > 0) {
1488             ip_lo   = cts;
1489             have_lo = True;
1490          }
1491          if (attr == DW_AT_high_pc && ctsSzB > 0) {
1492             ip_hi1   = cts;
1493             have_hi1 = True;
1494          }
1495          if (attr == DW_AT_ranges && ctsSzB > 0) {
1496             rangeoff = cts;
1497             have_range = True;
1498          }
1499          if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1500             read_filename_table( parser, cc, (UWord)cts, td3 );
1501          }
1502       }
1503       /* Now, does this give us an opportunity to find this
1504          CU's svma? */
1505 #if 0
1506       if (level == 0 && have_lo) {
1507          vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1508          because we've already seen a DW_TAG_compile_unit DIE at level
1509          0.  But that can't happen, because DWARF3 only allows exactly
1510          one top level DIE per CU. */
1511          cc->cu_svma_known = True;
1512          cc->cu_svma = ip_lo;
1513          if (1)
1514             TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1515          /* Now, it may be that this DIE doesn't tell us the CU's
1516             SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1517             the CU doesn't *have* to have its SVMA specified.
1518
1519             But as per last para D3 spec sec 3.1.1 ("Normal and
1520             Partial Compilation Unit Entries", "If the base address
1521             (viz, the SVMA) is undefined, then any DWARF entry of
1522             structure defined interms of the base address of that
1523             compilation unit is not valid.".  So that means, if whilst
1524             processing the children of this top level DIE (or their
1525             children, etc) we see a DW_AT_range, and cu_svma_known is
1526             False, then the DIE that contains it is (per the spec)
1527             invalid, and we can legitimately stop and complain. */
1528       }
1529 #else
1530       /* .. whereas The Reality is, simply assume the SVMA is zero
1531          if it isn't specified. */
1532       if (level == 0) {
1533          vg_assert(!cc->cu_svma_known);
1534          cc->cu_svma_known = True;
1535          if (have_lo)
1536             cc->cu_svma = ip_lo;
1537          else
1538             cc->cu_svma = 0;
1539       }
1540 #endif
1541       /* Do we have something that looks sane? */
1542       if (have_lo && have_hi1 && (!have_range)) {
1543          if (ip_lo < ip_hi1)
1544             varstack_push( cc, parser, td3,
1545                            unitary_range_list(ip_lo, ip_hi1 - 1),
1546                            level,
1547                            False/*isFunc*/, NULL/*fbGX*/ );
1548       } else
1549       if ((!have_lo) && (!have_hi1) && have_range) {
1550          varstack_push( cc, parser, td3,
1551                         get_range_list( cc, td3,
1552                                         rangeoff, cc->cu_svma ),
1553                         level,
1554                         False/*isFunc*/, NULL/*fbGX*/ );
1555       } else
1556       if ((!have_lo) && (!have_hi1) && (!have_range)) {
1557          /* CU has no code, presumably? */
1558          varstack_push( cc, parser, td3,
1559                         empty_range_list(),
1560                         level,
1561                         False/*isFunc*/, NULL/*fbGX*/ );
1562       } else
1563       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1564          /* broken DIE created by gcc-4.3.X ?  Ignore the
1565             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1566             instead. */
1567          varstack_push( cc, parser, td3,
1568                         get_range_list( cc, td3,
1569                                         rangeoff, cc->cu_svma ),
1570                         level,
1571                         False/*isFunc*/, NULL/*fbGX*/ );
1572       } else {
1573          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1574                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
1575          goto bad_DIE;
1576       }
1577    }
1578
1579    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1580       Bool   have_lo    = False;
1581       Bool   have_hi1   = False;
1582       Bool   have_range = False;
1583       Addr   ip_lo      = 0;
1584       Addr   ip_hi1     = 0;
1585       Addr   rangeoff   = 0;
1586       Bool   isFunc     = dtag == DW_TAG_subprogram;
1587       GExpr* fbGX       = NULL;
1588       while (True) {
1589          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1590          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1591          if (attr == 0 && form == 0) break;
1592          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1593                             cc, c_die, False/*td3*/, form );
1594          if (attr == DW_AT_low_pc && ctsSzB > 0) {
1595             ip_lo   = cts;
1596             have_lo = True;
1597          }
1598          if (attr == DW_AT_high_pc && ctsSzB > 0) {
1599             ip_hi1   = cts;
1600             have_hi1 = True;
1601          }
1602          if (attr == DW_AT_ranges && ctsSzB > 0) {
1603             rangeoff = cts;
1604             have_range = True;
1605          }
1606          if (isFunc
1607              && attr == DW_AT_frame_base
1608              && ((ctsMemSzB > 0 && ctsSzB == 0)
1609                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
1610             fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1611             vg_assert(fbGX);
1612             VG_(addToXA)(gexprs, &fbGX);
1613          }
1614       }
1615       /* Do we have something that looks sane? */
1616       if (dtag == DW_TAG_subprogram
1617           && (!have_lo) && (!have_hi1) && (!have_range)) {
1618          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1619             representing a subroutine declaration that is not also a
1620             definition does not have code address or range
1621             attributes." */
1622       } else
1623       if (dtag == DW_TAG_lexical_block
1624           && (!have_lo) && (!have_hi1) && (!have_range)) {
1625          /* I believe this is legit, and means the lexical block
1626             contains no insns (whatever that might mean).  Ignore. */
1627       } else
1628       if (have_lo && have_hi1 && (!have_range)) {
1629          /* This scope supplies just a single address range. */
1630          if (ip_lo < ip_hi1)
1631             varstack_push( cc, parser, td3,
1632                            unitary_range_list(ip_lo, ip_hi1 - 1),
1633                            level, isFunc, fbGX );
1634       } else
1635       if ((!have_lo) && (!have_hi1) && have_range) {
1636          /* This scope supplies multiple address ranges via the use of
1637             a range list. */
1638          varstack_push( cc, parser, td3,
1639                         get_range_list( cc, td3,
1640                                         rangeoff, cc->cu_svma ),
1641                         level, isFunc, fbGX );
1642       } else
1643       if (have_lo && (!have_hi1) && (!have_range)) {
1644          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1645             Entries) says fairly clearly that a scope must have either
1646             _range or (_low_pc and _high_pc). */
1647          /* The spec is a bit ambiguous though.  Perhaps a single byte
1648             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1649          /* This case is here because icc9 produced this:
1650          <2><13bd>: DW_TAG_lexical_block
1651             DW_AT_decl_line   : 5229
1652             DW_AT_decl_column : 37
1653             DW_AT_decl_file   : 1
1654             DW_AT_low_pc      : 0x401b03
1655          */
1656          /* Ignore (seems safe than pushing a single byte range) */
1657       } else
1658          goto bad_DIE;
1659    }
1660
1661    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1662       UChar* name        = NULL;
1663       UWord  typeR       = D3_INVALID_CUOFF;
1664       Bool   external    = False;
1665       GExpr* gexpr       = NULL;
1666       Int    n_attrs     = 0;
1667       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1668       Int    lineNo      = 0;
1669       UChar* fileName    = NULL;
1670       while (True) {
1671          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1672          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1673          if (attr == 0 && form == 0) break;
1674          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1675                             cc, c_die, False/*td3*/, form );
1676          n_attrs++;
1677          if (attr == DW_AT_name && ctsMemSzB > 0) {
1678             name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1679          }
1680          if (attr == DW_AT_location
1681              && ((ctsMemSzB > 0 && ctsSzB == 0)
1682                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
1683             gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1684             vg_assert(gexpr);
1685             VG_(addToXA)(gexprs, &gexpr);
1686          }
1687          if (attr == DW_AT_type && ctsSzB > 0) {
1688             typeR = (UWord)cts;
1689          }
1690          if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1691             external = True;
1692          }
1693          if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1694             abs_ori = (UWord)cts;
1695          }
1696          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1697             /*declaration = True;*/
1698          }
1699          if (attr == DW_AT_decl_line && ctsSzB > 0) {
1700             lineNo = (Int)cts;
1701          }
1702          if (attr == DW_AT_decl_file && ctsSzB > 0) {
1703             Int ftabIx = (Int)cts;
1704             if (ftabIx >= 1
1705                 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1706                fileName = *(UChar**)
1707                           VG_(indexXA)( parser->filenameTable, ftabIx );
1708                vg_assert(fileName);
1709             }
1710             if (0) VG_(printf)("XXX filename = %s\n", fileName);
1711          }
1712       }
1713       /* We'll collect it under if one of the following three
1714          conditions holds:
1715          (1) has location and type    -> completed
1716          (2) has type only            -> is an abstract instance
1717          (3) has location and abs_ori -> is a concrete instance
1718          Name, filename and line number are all optional frills.
1719       */
1720       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1721            /* 2 */ || (typeR != D3_INVALID_CUOFF)
1722            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1723
1724          /* Add this variable to the list of interesting looking
1725             variables.  Crucially, note along with it the address
1726             range(s) associated with the variable, which for locals
1727             will be the address ranges at the top of the varparser's
1728             stack. */
1729          GExpr*   fbGX = NULL;
1730          Word     i, nRanges;
1731          XArray*  /* of AddrRange */ xa;
1732          TempVar* tv;
1733          /* Stack can't be empty; we put a dummy entry on it for the
1734             entire address range before starting with the DIEs for
1735             this CU. */
1736          vg_assert(parser->sp >= 0);
1737
1738          /* If this is a local variable (non-external), try to find
1739             the GExpr for the DW_AT_frame_base of the containing
1740             function.  It should have been pushed on the stack at the
1741             time we encountered its DW_TAG_subprogram DIE, so the way
1742             to find it is to scan back down the stack looking for it.
1743             If there isn't an enclosing stack entry marked 'isFunc'
1744             then we must be seeing variable or formal param DIEs
1745             outside of a function, so we deem the Dwarf to be
1746             malformed if that happens.  Note that the fbGX may be NULL
1747             if the containing DT_TAG_subprogram didn't supply a
1748             DW_AT_frame_base -- that's OK, but there must actually be
1749             a containing DW_TAG_subprogram. */
1750          if (!external) {
1751             Bool found = False;
1752             for (i = parser->sp; i >= 0; i--) {
1753                if (parser->isFunc[i]) {
1754                   fbGX = parser->fbGX[i];
1755                   found = True;
1756                   break;
1757                }
1758             }
1759             if (!found) {
1760                if (0 && VG_(clo_verbosity) >= 0) {
1761                   VG_(message)(Vg_DebugMsg,
1762                      "warning: parse_var_DIE: non-external variable "
1763                      "outside DW_TAG_subprogram\n");
1764                }
1765                /* goto bad_DIE; */
1766                /* This seems to happen a lot.  Just ignore it -- if,
1767                   when we come to evaluation of the location (guarded)
1768                   expression, it requires a frame base value, and
1769                   there's no expression for that, then evaluation as a
1770                   whole will fail.  Harmless - a bit of a waste of
1771                   cycles but nothing more. */
1772             }
1773          }
1774
1775          /* re "external ? 0 : parser->sp" (twice), if the var is
1776             marked 'external' then we must put it at the global scope,
1777             as only the global scope (level 0) covers the entire PC
1778             address space.  It is asserted elsewhere that level 0
1779             always covers the entire address space. */
1780          xa = parser->ranges[external ? 0 : parser->sp];
1781          nRanges = VG_(sizeXA)(xa);
1782          vg_assert(nRanges >= 0);
1783
1784          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
1785          tv->name   = name;
1786          tv->level  = external ? 0 : parser->sp;
1787          tv->typeR  = typeR;
1788          tv->gexpr  = gexpr;
1789          tv->fbGX   = fbGX;
1790          tv->fName  = fileName;
1791          tv->fLine  = lineNo;
1792          tv->dioff  = posn;
1793          tv->absOri = abs_ori;
1794
1795          /* See explanation on definition of type TempVar for the
1796             reason for this elaboration. */
1797          tv->nRanges = nRanges;
1798          tv->rngOneMin = 0;
1799          tv->rngOneMax = 0;
1800          tv->rngMany = NULL;
1801          if (nRanges == 1) {
1802             AddrRange* range = VG_(indexXA)(xa, 0);
1803             tv->rngOneMin = range->aMin;
1804             tv->rngOneMax = range->aMax;
1805          }
1806          else if (nRanges > 1) {
1807             /* See if we already have a range list which is
1808                structurally identical.  If so, use that; if not, clone
1809                this one, and add it to our collection. */
1810             UWord keyW, valW;
1811             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
1812                XArray* old = (XArray*)keyW;
1813                tl_assert(valW == 0);
1814                tl_assert(old != xa);
1815                tv->rngMany = old;
1816             } else {
1817                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
1818                tv->rngMany = cloned;
1819                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
1820             }
1821          }
1822
1823          VG_(addToXA)( tempvars, &tv );
1824
1825          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
1826                   VG_(sizeXA)(xa) );
1827          /* collect stats on how effective the ->ranges special
1828             casing is */
1829          if (0) {
1830             static Int ntot=0, ngt=0;
1831             ntot++;
1832             if (tv->rngMany) ngt++;
1833             if (0 == (ntot % 100000))
1834                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1835          }
1836
1837       }
1838
1839       /* Here are some other weird cases seen in the wild:
1840
1841             We have a variable with a name and a type, but no
1842             location.  I guess that's a sign that it has been
1843             optimised away.  Ignore it.  Here's an example:
1844
1845             static Int lc_compar(void* n1, void* n2) {
1846                MC_Chunk* mc1 = *(MC_Chunk**)n1;
1847                MC_Chunk* mc2 = *(MC_Chunk**)n2;
1848                return (mc1->data < mc2->data ? -1 : 1);
1849             }
1850
1851             Both mc1 and mc2 are like this
1852             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1853                 DW_AT_name        : mc1
1854                 DW_AT_decl_file   : 1
1855                 DW_AT_decl_line   : 216
1856                 DW_AT_type        : <5d3>
1857
1858             whereas n1 and n2 do have locations specified.
1859
1860             ---------------------------------------------
1861
1862             We see a DW_TAG_formal_parameter with a type, but
1863             no name and no location.  It's probably part of a function type
1864             construction, thusly, hence ignore it:
1865          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1866              DW_AT_sibling     : <2c9>
1867              DW_AT_prototyped  : 1
1868              DW_AT_type        : <114>
1869          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1870              DW_AT_type        : <13e>
1871          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1872              DW_AT_type        : <133>
1873
1874             ---------------------------------------------
1875
1876             Is very minimal, like this:
1877             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1878                 DW_AT_abstract_origin: <7ba>
1879             What that signifies I have no idea.  Ignore.
1880
1881             ----------------------------------------------
1882
1883             Is very minimal, like this:
1884             <200f>: DW_TAG_formal_parameter
1885                 DW_AT_abstract_ori: <1f4c>
1886                 DW_AT_location    : 13440
1887             What that signifies I have no idea.  Ignore.
1888             It might be significant, though: the variable at least
1889             has a location and so might exist somewhere.
1890             Maybe we should handle this.
1891
1892             ---------------------------------------------
1893
1894             <22407>: DW_TAG_variable
1895               DW_AT_name        : (indirect string, offset: 0x6579):
1896                                   vgPlain_trampoline_stuff_start
1897               DW_AT_decl_file   : 29
1898               DW_AT_decl_line   : 56
1899               DW_AT_external    : 1
1900               DW_AT_declaration : 1
1901
1902             Nameless and typeless variable that has a location?  Who
1903             knows.  Not me.
1904             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1905                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1906                                      (DW_OP_addr: 3813c7c0)
1907
1908             No, really.  Check it out.  gcc is quite simply borked.
1909             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1910             // followed by no attributes, and the next DIE is a sibling,
1911             // not a child
1912             */
1913    }
1914    return;
1915
1916   bad_DIE:
1917    set_position_of_Cursor( c_die,  saved_die_c_offset );
1918    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1919    VG_(printf)("\nparse_var_DIE: confused by:\n");
1920    VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1921    while (True) {
1922       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1923       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1924       if (attr == 0 && form == 0) break;
1925       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
1926       /* Get the form contents, so as to print them */
1927       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1928                          cc, c_die, True, form );
1929       VG_(printf)("\t\n");
1930    }
1931    VG_(printf)("\n");
1932    cc->barf("parse_var_DIE: confused by the above DIE");
1933    /*NOTREACHED*/
1934 }
1935
1936
1937 /*------------------------------------------------------------*/
1938 /*---                                                      ---*/
1939 /*--- Parsing of type-related DIEs                         ---*/
1940 /*---                                                      ---*/
1941 /*------------------------------------------------------------*/
1942
1943 #define N_D3_TYPE_STACK 16
1944
1945 typedef
1946    struct {
1947       /* What source language?  'A'=Ada83/95,
1948                                 'C'=C/C++,
1949                                 'F'=Fortran,
1950                                 '?'=other
1951          Established once per compilation unit. */
1952       UChar language;
1953       /* A stack of types which are currently under construction */
1954       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
1955                    stack */
1956       /* Note that the TyEnts in qparentE are temporary copies of the
1957          ones accumulating in the main tyent array.  So it is not safe
1958          to free up anything on them when popping them off the stack
1959          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
1960          memset them to zero when done. */
1961       TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
1962       Int   qlevel[N_D3_TYPE_STACK];
1963
1964    }
1965    D3TypeParser;
1966
1967 static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1968    Word i;
1969    VG_(printf)("  typestack (%s) {\n", str);
1970    for (i = 0; i <= parser->sp; i++) {
1971       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
1972       ML_(pp_TyEnt)( &parser->qparentE[i] );
1973       VG_(printf)("\n");
1974    }
1975    VG_(printf)("  }\n");
1976 }
1977
1978 /* Remove from the stack, all entries with .level > 'level' */
1979 static
1980 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1981 {
1982    Bool changed = False;
1983    vg_assert(parser->sp < N_D3_TYPE_STACK);
1984    while (True) {
1985       vg_assert(parser->sp >= -1);
1986       if (parser->sp == -1) break;
1987       if (parser->qlevel[parser->sp] <= level) break;
1988       if (0)
1989          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1990       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
1991       VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
1992       parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
1993       parser->qparentE[parser->sp].tag = Te_EMPTY;
1994       parser->qlevel[parser->sp] = 0;
1995       parser->sp--;
1996       changed = True;
1997    }
1998    if (changed && td3)
1999       typestack_show( parser, "after preen" );
2000 }
2001
2002 static Bool typestack_is_empty ( D3TypeParser* parser ) {
2003    vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2004    return parser->sp == -1;
2005 }
2006
2007 static void typestack_push ( CUConst* cc,
2008                              D3TypeParser* parser,
2009                              Bool td3,
2010                              TyEnt* parentE, Int level ) {
2011    if (0)
2012    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2013             parser->sp+1, level, parentE->cuOff);
2014
2015    /* First we need to zap everything >= 'level', as we are about to
2016       replace any previous entry at 'level', so .. */
2017    typestack_preen(parser, /*td3*/False, level-1);
2018
2019    vg_assert(parser->sp >= -1);
2020    vg_assert(parser->sp < N_D3_TYPE_STACK);
2021    if (parser->sp == N_D3_TYPE_STACK-1)
2022       cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2023                "increase and recompile");
2024    if (parser->sp >= 0)
2025       vg_assert(parser->qlevel[parser->sp] < level);
2026    parser->sp++;
2027    vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2028    vg_assert(parser->qlevel[parser->sp]  == 0);
2029    vg_assert(parentE);
2030    vg_assert(ML_(TyEnt__is_type)(parentE));
2031    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2032    parser->qparentE[parser->sp] = *parentE;
2033    parser->qlevel[parser->sp]  = level;
2034    if (td3)
2035       typestack_show( parser, "after push" );
2036 }
2037
2038 /* True if the subrange type being parsed gives the bounds of an array. */
2039 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
2040                                                  DW_TAG dtag ) {
2041    vg_assert(dtag == DW_TAG_subrange_type);
2042    /* For most languages, a subrange_type dtag always gives the
2043       bounds of an array.
2044       For Ada, there are additional conditions as a subrange_type
2045       is also used for other purposes. */
2046    if (parser->language != 'A')
2047       /* not Ada, so it definitely denotes an array bound. */
2048       return True;
2049    else
2050       /* Extra constraints for Ada: it only denotes an array bound if .. */
2051       return (! typestack_is_empty(parser)
2052               && parser->qparentE[parser->sp].tag == Te_TyArray);
2053 }
2054
2055 /* Parse a type-related DIE.  'parser' holds the current parser state.
2056    'admin' is where the completed types are dumped.  'dtag' is the tag
2057    for this DIE.  'c_die' points to the start of the data fields (FORM
2058    stuff) for the DIE.  c_abbv points to the start of the (name,form)
2059    pairs which describe the DIE.
2060
2061    We may find the DIE uninteresting, in which case we should ignore
2062    it.
2063
2064    What happens: the DIE is examined.  If uninteresting, it is ignored.
2065    Otherwise, the DIE gives rise to two things:
2066
2067    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2068    (2) a TyAdmin structure, which holds the type, or related stuff
2069
2070    (2) is added at the end of 'tyadmins', at some index, say 'i'.
2071
2072    A pair (cuOffset, i) is added to 'tydict'.
2073
2074    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2075    a mapping from cuOffset to the index of the corresponding entry in
2076    'tyadmin'.
2077
2078    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2079    in the tydict (by binary search).  This gives an index into
2080    tyadmins, and the required entity lives in tyadmins at that index.
2081 */
2082 __attribute__((noinline))
2083 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2084                              /*MOD*/D3TypeParser* parser,
2085                              DW_TAG dtag,
2086                              UWord posn,
2087                              Int level,
2088                              Cursor* c_die,
2089                              Cursor* c_abbv,
2090                              CUConst* cc,
2091                              Bool td3 )
2092 {
2093    ULong cts;
2094    Int   ctsSzB;
2095    UWord ctsMemSzB;
2096    TyEnt typeE;
2097    TyEnt atomE;
2098    TyEnt fieldE;
2099    TyEnt boundE;
2100
2101    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2102    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2103
2104    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2105    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2106    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2107    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2108
2109    /* If we've returned to a level at or above any previously noted
2110       parent, un-note it, so we don't believe we're still collecting
2111       its children. */
2112    typestack_preen( parser, td3, level-1 );
2113
2114    if (dtag == DW_TAG_compile_unit) {
2115       /* See if we can find DW_AT_language, since it is important for
2116          establishing array bounds (see DW_TAG_subrange_type below in
2117          this fn) */
2118       while (True) {
2119          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2120          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2121          if (attr == 0 && form == 0) break;
2122          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2123                             cc, c_die, False/*td3*/, form );
2124          if (attr != DW_AT_language)
2125             continue;
2126          if (ctsSzB == 0)
2127            goto bad_DIE;
2128          switch (cts) {
2129             case DW_LANG_C89: case DW_LANG_C:
2130             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2131             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2132             case DW_LANG_Upc: case DW_LANG_C99:
2133                parser->language = 'C'; break;
2134             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2135             case DW_LANG_Fortran95:
2136                parser->language = 'F'; break;
2137             case DW_LANG_Ada83: case DW_LANG_Ada95:
2138                parser->language = 'A'; break;
2139             case DW_LANG_Cobol74:
2140             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2141             case DW_LANG_Modula2: case DW_LANG_Java:
2142             case DW_LANG_PLI:
2143             case DW_LANG_D: case DW_LANG_Python:
2144             case DW_LANG_Mips_Assembler:
2145                parser->language = '?'; break;
2146             default:
2147                goto bad_DIE;
2148          }
2149       }
2150    }
2151
2152    if (dtag == DW_TAG_base_type) {
2153       /* We can pick up a new base type any time. */
2154       VG_(memset)(&typeE, 0, sizeof(typeE));
2155       typeE.cuOff = D3_INVALID_CUOFF;
2156       typeE.tag   = Te_TyBase;
2157       while (True) {
2158          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2159          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2160          if (attr == 0 && form == 0) break;
2161          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2162                             cc, c_die, False/*td3*/, form );
2163          if (attr == DW_AT_name && ctsMemSzB > 0) {
2164             typeE.Te.TyBase.name
2165                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
2166                                     (UChar*)(UWord)cts );
2167          }
2168          if (attr == DW_AT_byte_size && ctsSzB > 0) {
2169             typeE.Te.TyBase.szB = cts;
2170          }
2171          if (attr == DW_AT_encoding && ctsSzB > 0) {
2172             switch (cts) {
2173                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2174                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2175                case DW_ATE_boolean:/* FIXME - is this correct? */
2176                   typeE.Te.TyBase.enc = 'U'; break;
2177                case DW_ATE_signed: case DW_ATE_signed_char:
2178                   typeE.Te.TyBase.enc = 'S'; break;
2179                case DW_ATE_float:
2180                   typeE.Te.TyBase.enc = 'F'; break;
2181                case DW_ATE_complex_float:
2182                   typeE.Te.TyBase.enc = 'C'; break;
2183                default:
2184                   goto bad_DIE;
2185             }
2186          }
2187       }
2188
2189       /* Invent a name if it doesn't have one.  gcc-4.3
2190          -ftree-vectorize is observed to emit nameless base types. */
2191       if (!typeE.Te.TyBase.name)
2192          typeE.Te.TyBase.name
2193             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2194                                  "<anon_base_type>" );
2195
2196       /* Do we have something that looks sane? */
2197       if (/* must have a name */
2198           typeE.Te.TyBase.name == NULL
2199           /* and a plausible size.  Yes, really 32: "complex long
2200              double" apparently has size=32 */
2201           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2202           /* and a plausible encoding */
2203           || (typeE.Te.TyBase.enc != 'U'
2204               && typeE.Te.TyBase.enc != 'S'
2205               && typeE.Te.TyBase.enc != 'F'
2206               && typeE.Te.TyBase.enc != 'C'))
2207          goto bad_DIE;
2208       /* Last minute hack: if we see this
2209          <1><515>: DW_TAG_base_type
2210              DW_AT_byte_size   : 0
2211              DW_AT_encoding    : 5
2212              DW_AT_name        : void
2213          convert it into a real Void type. */
2214       if (typeE.Te.TyBase.szB == 0
2215           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2216          ML_(TyEnt__make_EMPTY)(&typeE);
2217          typeE.tag = Te_TyVoid;
2218          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2219       }
2220
2221       goto acquire_Type;
2222    }
2223
2224    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2225        || dtag == DW_TAG_ptr_to_member_type) {
2226       /* This seems legit for _pointer_type and _reference_type.  I
2227          don't know if rolling _ptr_to_member_type in here really is
2228          legit, but it's better than not handling it at all. */
2229       VG_(memset)(&typeE, 0, sizeof(typeE));
2230       typeE.cuOff = D3_INVALID_CUOFF;
2231       typeE.tag   = Te_TyPorR;
2232       /* target type defaults to void */
2233       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2234       typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
2235                               || dtag == DW_TAG_ptr_to_member_type;
2236       /* These three type kinds don't *have* to specify their size, in
2237          which case we assume it's a machine word.  But if they do
2238          specify it, it must be a machine word :-)  This probably
2239          assumes that the word size of the Dwarf3 we're reading is the
2240          same size as that on the machine.  gcc appears to give a size
2241          whereas icc9 doesn't. */
2242       typeE.Te.TyPorR.szB = sizeof(UWord);
2243       while (True) {
2244          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2245          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2246          if (attr == 0 && form == 0) break;
2247          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2248                             cc, c_die, False/*td3*/, form );
2249          if (attr == DW_AT_byte_size && ctsSzB > 0) {
2250             typeE.Te.TyPorR.szB = cts;
2251          }
2252          if (attr == DW_AT_type && ctsSzB > 0) {
2253             typeE.Te.TyPorR.typeR = (UWord)cts;
2254          }
2255       }
2256       /* Do we have something that looks sane? */
2257       if (typeE.Te.TyPorR.szB != sizeof(UWord))
2258          goto bad_DIE;
2259       else
2260          goto acquire_Type;
2261    }
2262
2263    if (dtag == DW_TAG_enumeration_type) {
2264       /* Create a new Type to hold the results. */
2265       VG_(memset)(&typeE, 0, sizeof(typeE));
2266       typeE.cuOff = posn;
2267       typeE.tag   = Te_TyEnum;
2268       typeE.Te.TyEnum.atomRs
2269          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2270                        ML_(dinfo_free),
2271                        sizeof(UWord) );
2272       while (True) {
2273          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2274          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2275          if (attr == 0 && form == 0) break;
2276          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2277                             cc, c_die, False/*td3*/, form );
2278          if (attr == DW_AT_name && ctsMemSzB > 0) {
2279             typeE.Te.TyEnum.name
2280               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
2281                                    (UChar*)(UWord)cts );
2282          }
2283          if (attr == DW_AT_byte_size && ctsSzB > 0) {
2284             typeE.Te.TyEnum.szB = cts;
2285          }
2286       }
2287
2288       if (!typeE.Te.TyEnum.name)
2289          typeE.Te.TyEnum.name
2290             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2291                                  "<anon_enum_type>" );
2292
2293       /* Do we have something that looks sane? */
2294       if (typeE.Te.TyEnum.szB == 0
2295           /* we must know the size */
2296           /* but not for Ada, which uses such dummy
2297              enumerations as helper for gdb ada mode. */
2298           && parser->language != 'A')
2299          goto bad_DIE;
2300       /* On't stack! */
2301       typestack_push( cc, parser, td3, &typeE, level );
2302       goto acquire_Type;
2303    }
2304
2305    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2306       DW_TAG_enumerator with only a DW_AT_name but no
2307       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2308       and appears to be a new "feature" of gcc - versions 4.3.x and
2309       earlier do not appear to do this.  So accept DW_TAG_enumerator
2310       which only have a name but no value.  An example:
2311
2312       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2313          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2314                                      QtMsgType
2315          <185>   DW_AT_byte_size   : 4
2316          <186>   DW_AT_decl_file   : 14
2317          <187>   DW_AT_decl_line   : 1480
2318          <189>   DW_AT_sibling     : <0x1a7>
2319       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2320          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2321                                      QtDebugMsg
2322       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2323          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2324                                      QtWarningMsg
2325       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2326          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2327                                      QtCriticalMsg
2328       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2329          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2330                                      QtFatalMsg
2331       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2332          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2333                                      QtSystemMsg
2334    */
2335    if (dtag == DW_TAG_enumerator) {
2336       VG_(memset)( &atomE, 0, sizeof(atomE) );
2337       atomE.cuOff = posn;
2338       atomE.tag   = Te_Atom;
2339       while (True) {
2340          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2341          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2342          if (attr == 0 && form == 0) break;
2343          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2344                             cc, c_die, False/*td3*/, form );
2345          if (attr == DW_AT_name && ctsMemSzB > 0) {
2346             atomE.Te.Atom.name
2347               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
2348                                    (UChar*)(UWord)cts );
2349          }
2350          if (attr == DW_AT_const_value && ctsSzB > 0) {
2351             atomE.Te.Atom.value = cts;
2352             atomE.Te.Atom.valueKnown = True;
2353          }
2354       }
2355       /* Do we have something that looks sane? */
2356       if (atomE.Te.Atom.name == NULL)
2357          goto bad_DIE;
2358       /* Do we have a plausible parent? */
2359       if (typestack_is_empty(parser)) goto bad_DIE;
2360       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2361       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2362       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2363       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
2364       /* Record this child in the parent */
2365       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2366       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2367                     &atomE );
2368       /* And record the child itself */
2369       goto acquire_Atom;
2370    }
2371
2372    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2373       don't know if this is correct, but it at least makes this reader
2374       usable for gcc-4.3 produced Dwarf3. */
2375    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2376        || dtag == DW_TAG_union_type) {
2377       Bool have_szB = False;
2378       Bool is_decl  = False;
2379       Bool is_spec  = False;
2380       /* Create a new Type to hold the results. */
2381       VG_(memset)(&typeE, 0, sizeof(typeE));
2382       typeE.cuOff = posn;
2383       typeE.tag   = Te_TyStOrUn;
2384       typeE.Te.TyStOrUn.name = NULL;
2385       typeE.Te.TyStOrUn.fieldRs
2386          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2387                        ML_(dinfo_free),
2388                        sizeof(UWord) );
2389       typeE.Te.TyStOrUn.complete = True;
2390       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2391                                    || dtag == DW_TAG_class_type;
2392       while (True) {
2393          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2394          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2395          if (attr == 0 && form == 0) break;
2396          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2397                             cc, c_die, False/*td3*/, form );
2398          if (attr == DW_AT_name && ctsMemSzB > 0) {
2399             typeE.Te.TyStOrUn.name
2400                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
2401                                     (UChar*)(UWord)cts );
2402          }
2403          if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2404             typeE.Te.TyStOrUn.szB = cts;
2405             have_szB = True;
2406          }
2407          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2408             is_decl = True;
2409          }
2410          if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2411             is_spec = True;
2412          }
2413       }
2414       /* Do we have something that looks sane? */
2415       if (is_decl && (!is_spec)) {
2416          /* It's a DW_AT_declaration.  We require the name but
2417             nothing else. */
2418          if (typeE.Te.TyStOrUn.name == NULL)
2419             goto bad_DIE;
2420          typeE.Te.TyStOrUn.complete = False;
2421          /* JRS 2009 Aug 10: <possible kludge>? */
2422          /* Push this tyent on the stack, even though it's incomplete.
2423             It appears that gcc-4.4 on Fedora 11 will sometimes create
2424             DW_TAG_member entries for it, and so we need to have a
2425             plausible parent present in order for that to work.  See
2426             #200029 comments 8 and 9. */
2427          typestack_push( cc, parser, td3, &typeE, level );
2428          /* </possible kludge> */
2429          goto acquire_Type;
2430       }
2431       if ((!is_decl) /* && (!is_spec) */) {
2432          /* this is the common, ordinary case */
2433          if ((!have_szB) /* we must know the size */
2434              /* But the name can be present, or not */)
2435             goto bad_DIE;
2436          /* On't stack! */
2437          typestack_push( cc, parser, td3, &typeE, level );
2438          goto acquire_Type;
2439       }
2440       else {
2441          /* don't know how to handle any other variants just now */
2442          goto bad_DIE;
2443       }
2444    }
2445
2446    if (dtag == DW_TAG_member) {
2447       /* Acquire member entries for both DW_TAG_structure_type and
2448          DW_TAG_union_type.  They differ minorly, in that struct
2449          members must have a DW_AT_data_member_location expression
2450          whereas union members must not. */
2451       Bool parent_is_struct;
2452       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2453       fieldE.cuOff = posn;
2454       fieldE.tag   = Te_Field;
2455       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2456       while (True) {
2457          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2458          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2459          if (attr == 0 && form == 0) break;
2460          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2461                             cc, c_die, False/*td3*/, form );
2462          if (attr == DW_AT_name && ctsMemSzB > 0) {
2463             fieldE.Te.Field.name
2464                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
2465                                     (UChar*)(UWord)cts );
2466          }
2467          if (attr == DW_AT_type && ctsSzB > 0) {
2468             fieldE.Te.Field.typeR = (UWord)cts;
2469          }
2470          /* There are 2 different cases for DW_AT_data_member_location.
2471             If it is a constant class attribute, it contains byte offset
2472             from the beginning of the containing entity.
2473             Otherwise it is a location expression.  */
2474          if (attr == DW_AT_data_member_location && ctsSzB > 0) {
2475             fieldE.Te.Field.nLoc = -1;
2476             fieldE.Te.Field.pos.offset = cts;
2477          } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2478             fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
2479             fieldE.Te.Field.pos.loc
2480                = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
2481                                     (UChar*)(UWord)cts,
2482                                     (SizeT)fieldE.Te.Field.nLoc );
2483          }
2484       }
2485       /* Do we have a plausible parent? */
2486       if (typestack_is_empty(parser)) goto bad_DIE;
2487       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2488       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2489       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2490       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
2491       /* Do we have something that looks sane?  If this a member of a
2492          struct, we must have a location expression; but if a member
2493          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2494          to reject in the latter case, but some compilers have been
2495          observed to emit constant-zero expressions.  So just ignore
2496          them. */
2497       parent_is_struct
2498          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2499       if (!fieldE.Te.Field.name)
2500          fieldE.Te.Field.name
2501             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2502                                  "<anon_field>" );
2503       vg_assert(fieldE.Te.Field.name);
2504       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2505          goto bad_DIE;
2506       if (fieldE.Te.Field.nLoc) {
2507          if (!parent_is_struct) {
2508             /* If this is a union type, pretend we haven't seen the data
2509                member location expression, as it is by definition
2510                redundant (it must be zero). */
2511             if (fieldE.Te.Field.nLoc > 0)
2512                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2513             fieldE.Te.Field.pos.loc = NULL;
2514             fieldE.Te.Field.nLoc = 0;
2515          }
2516          /* Record this child in the parent */
2517          fieldE.Te.Field.isStruct = parent_is_struct;
2518          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2519          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2520                        &posn );
2521          /* And record the child itself */
2522          goto acquire_Field;
2523       } else {
2524          /* Member with no location - this can happen with static
2525             const members in C++ code which are compile time constants
2526             that do no exist in the class. They're not of any interest
2527             to us so we ignore them. */
2528       }
2529    }
2530
2531    if (dtag == DW_TAG_array_type) {
2532       VG_(memset)(&typeE, 0, sizeof(typeE));
2533       typeE.cuOff = posn;
2534       typeE.tag   = Te_TyArray;
2535       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2536       typeE.Te.TyArray.boundRs
2537          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2538                        ML_(dinfo_free),
2539                        sizeof(UWord) );
2540       while (True) {
2541          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2542          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2543          if (attr == 0 && form == 0) break;
2544          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2545                             cc, c_die, False/*td3*/, form );
2546          if (attr == DW_AT_type && ctsSzB > 0) {
2547             typeE.Te.TyArray.typeR = (UWord)cts;
2548          }
2549       }
2550       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2551          goto bad_DIE;
2552       /* On't stack! */
2553       typestack_push( cc, parser, td3, &typeE, level );
2554       goto acquire_Type;
2555    }
2556
2557    /* this is a subrange type defining the bounds of an array. */
2558    if (dtag == DW_TAG_subrange_type
2559        && subrange_type_denotes_array_bounds(parser, dtag)) {
2560       Bool have_lower = False;
2561       Bool have_upper = False;
2562       Bool have_count = False;
2563       Long lower = 0;
2564       Long upper = 0;
2565
2566       switch (parser->language) {
2567          case 'C': have_lower = True;  lower = 0; break;
2568          case 'F': have_lower = True;  lower = 1; break;
2569          case '?': have_lower = False; break;
2570          case 'A': have_lower = False; break;
2571          default:  vg_assert(0); /* assured us by handling of
2572                                     DW_TAG_compile_unit in this fn */
2573       }
2574
2575       VG_(memset)( &boundE, 0, sizeof(boundE) );
2576       boundE.cuOff = D3_INVALID_CUOFF;
2577       boundE.tag   = Te_Bound;
2578       while (True) {
2579          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2580          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2581          if (attr == 0 && form == 0) break;
2582          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2583                             cc, c_die, False/*td3*/, form );
2584          if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2585             lower      = (Long)cts;
2586             have_lower = True;
2587          }
2588          if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2589             upper      = (Long)cts;
2590             have_upper = True;
2591          }
2592          if (attr == DW_AT_count && ctsSzB > 0) {
2593             /*count    = (Long)cts;*/
2594             have_count = True;
2595          }
2596       }
2597       /* FIXME: potentially skip the rest if no parent present, since
2598          it could be the case that this subrange type is free-standing
2599          (not being used to describe the bounds of a containing array
2600          type) */
2601       /* Do we have a plausible parent? */
2602       if (typestack_is_empty(parser)) goto bad_DIE;
2603       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2604       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2605       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2606       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
2607
2608       /* Figure out if we have a definite range or not */
2609       if (have_lower && have_upper && (!have_count)) {
2610          boundE.Te.Bound.knownL = True;
2611          boundE.Te.Bound.knownU = True;
2612          boundE.Te.Bound.boundL = lower;
2613          boundE.Te.Bound.boundU = upper;
2614       }
2615       else if (have_lower && (!have_upper) && (!have_count)) {
2616          boundE.Te.Bound.knownL = True;
2617          boundE.Te.Bound.knownU = False;
2618          boundE.Te.Bound.boundL = lower;
2619          boundE.Te.Bound.boundU = 0;
2620       }
2621       else if ((!have_lower) && have_upper && (!have_count)) {
2622          boundE.Te.Bound.knownL = False;
2623          boundE.Te.Bound.knownU = True;
2624          boundE.Te.Bound.boundL = 0;
2625          boundE.Te.Bound.boundU = upper;
2626       }
2627       else if ((!have_lower) && (!have_upper) && (!have_count)) {
2628          boundE.Te.Bound.knownL = False;
2629          boundE.Te.Bound.knownU = False;
2630          boundE.Te.Bound.boundL = 0;
2631          boundE.Te.Bound.boundU = 0;
2632       } else {
2633          /* FIXME: handle more cases */
2634          goto bad_DIE;
2635       }
2636
2637       /* Record this bound in the parent */
2638       boundE.cuOff = posn;
2639       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2640       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2641                     &boundE );
2642       /* And record the child itself */
2643       goto acquire_Bound;
2644    }
2645
2646    /* typedef or subrange_type other than array bounds. */
2647    if (dtag == DW_TAG_typedef
2648        || (dtag == DW_TAG_subrange_type
2649            && !subrange_type_denotes_array_bounds(parser, dtag))) {
2650       /* subrange_type other than array bound is only for Ada. */
2651       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
2652       /* We can pick up a new typedef/subrange_type any time. */
2653       VG_(memset)(&typeE, 0, sizeof(typeE));
2654       typeE.cuOff = D3_INVALID_CUOFF;
2655       typeE.tag   = Te_TyTyDef;
2656       typeE.Te.TyTyDef.name = NULL;
2657       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2658       while (True) {
2659          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2660          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2661          if (attr == 0 && form == 0) break;
2662          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2663                             cc, c_die, False/*td3*/, form );
2664          if (attr == DW_AT_name && ctsMemSzB > 0) {
2665             typeE.Te.TyTyDef.name
2666                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
2667                                     (UChar*)(UWord)cts );
2668          }
2669          if (attr == DW_AT_type && ctsSzB > 0) {
2670             typeE.Te.TyTyDef.typeR = (UWord)cts;
2671          }
2672       }
2673       /* Do we have something that looks sane? */
2674       if (/* must have a name */
2675           typeE.Te.TyTyDef.name == NULL
2676           /* However gcc gnat Ada generates minimal typedef
2677              such as the below => accept no name for Ada.
2678              <6><91cc>: DW_TAG_typedef
2679                 DW_AT_abstract_ori: <9066>
2680           */
2681           && parser->language != 'A'
2682           /* but the referred-to type can be absent */)
2683          goto bad_DIE;
2684       else
2685          goto acquire_Type;
2686    }
2687
2688    if (dtag == DW_TAG_subroutine_type) {
2689       /* function type? just record that one fact and ask no
2690          further questions. */
2691       VG_(memset)(&typeE, 0, sizeof(typeE));
2692       typeE.cuOff = D3_INVALID_CUOFF;
2693       typeE.tag   = Te_TyFn;
2694       goto acquire_Type;
2695    }
2696
2697    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2698       Int have_ty = 0;
2699       VG_(memset)(&typeE, 0, sizeof(typeE));
2700       typeE.cuOff = D3_INVALID_CUOFF;
2701       typeE.tag   = Te_TyQual;
2702       typeE.Te.TyQual.qual
2703          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2704       /* target type defaults to 'void' */
2705       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2706       while (True) {
2707          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2708          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2709          if (attr == 0 && form == 0) break;
2710          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2711                             cc, c_die, False/*td3*/, form );
2712          if (attr == DW_AT_type && ctsSzB > 0) {
2713             typeE.Te.TyQual.typeR = (UWord)cts;
2714             have_ty++;
2715          }
2716       }
2717       /* gcc sometimes generates DW_TAG_const/volatile_type without
2718          DW_AT_type and GDB appears to interpret the type as 'const
2719          void' (resp. 'volatile void').  So just allow it .. */
2720       if (have_ty == 1 || have_ty == 0)
2721          goto acquire_Type;
2722       else
2723          goto bad_DIE;
2724    }
2725
2726    /* else ignore this DIE */
2727    return;
2728    /*NOTREACHED*/
2729
2730   acquire_Type:
2731    if (0) VG_(printf)("YYYY Acquire Type\n");
2732    vg_assert(ML_(TyEnt__is_type)( &typeE ));
2733    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
2734    typeE.cuOff = posn;
2735    VG_(addToXA)( tyents, &typeE );
2736    return;
2737    /*NOTREACHED*/
2738
2739   acquire_Atom:
2740    if (0) VG_(printf)("YYYY Acquire Atom\n");
2741    vg_assert(atomE.tag == Te_Atom);
2742    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
2743    atomE.cuOff = posn;
2744    VG_(addToXA)( tyents, &atomE );
2745    return;
2746    /*NOTREACHED*/
2747
2748   acquire_Field:
2749    /* For union members, Expr should be absent */
2750    if (0) VG_(printf)("YYYY Acquire Field\n");
2751    vg_assert(fieldE.tag == Te_Field);
2752    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
2753    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
2754    if (fieldE.Te.Field.isStruct) {
2755       vg_assert(fieldE.Te.Field.nLoc != 0);
2756    } else {
2757       vg_assert(fieldE.Te.Field.nLoc == 0);
2758    }
2759    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
2760    fieldE.cuOff = posn;
2761    VG_(addToXA)( tyents, &fieldE );
2762    return;
2763    /*NOTREACHED*/
2764
2765   acquire_Bound:
2766    if (0) VG_(printf)("YYYY Acquire Bound\n");
2767    vg_assert(boundE.tag == Te_Bound);
2768    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
2769    boundE.cuOff = posn;
2770    VG_(addToXA)( tyents, &boundE );
2771    return;
2772    /*NOTREACHED*/
2773
2774   bad_DIE:
2775    set_position_of_Cursor( c_die,  saved_die_c_offset );
2776    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2777    VG_(printf)("\nparse_type_DIE: confused by:\n");
2778    VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2779    while (True) {
2780       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2781       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2782       if (attr == 0 && form == 0) break;
2783       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2784       /* Get the form contents, so as to print them */
2785       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2786                          cc, c_die, True, form );
2787       VG_(printf)("\t\n");
2788    }
2789    VG_(printf)("\n");
2790    cc->barf("parse_type_DIE: confused by the above DIE");
2791    /*NOTREACHED*/
2792 }
2793
2794
2795 /*------------------------------------------------------------*/
2796 /*---                                                      ---*/
2797 /*--- Compression of type DIE information                  ---*/
2798 /*---                                                      ---*/
2799 /*------------------------------------------------------------*/
2800
2801 static UWord chase_cuOff ( Bool* changed,
2802                            XArray* /* of TyEnt */ ents,
2803                            TyEntIndexCache* ents_cache,
2804                            UWord cuOff )
2805 {
2806    TyEnt* ent;
2807    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
2808
2809    if (!ent) {
2810       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
2811       *changed = False;
2812       return cuOff;
2813    }
2814
2815    vg_assert(ent->tag != Te_EMPTY);
2816    if (ent->tag != Te_INDIR) {
2817       *changed = False;
2818       return cuOff;
2819    } else {
2820       vg_assert(ent->Te.INDIR.indR < cuOff);
2821       *changed = True;
2822       return ent->Te.INDIR.indR;
2823    }
2824 }
2825
2826 static
2827 void chase_cuOffs_in_XArray ( Bool* changed,
2828                               XArray* /* of TyEnt */ ents,
2829                               TyEntIndexCache* ents_cache,
2830                               /*MOD*/XArray* /* of UWord */ cuOffs )
2831 {
2832    Bool b2 = False;
2833    Word i, n = VG_(sizeXA)( cuOffs );
2834    for (i = 0; i < n; i++) {
2835       Bool   b = False;
2836       UWord* p = VG_(indexXA)( cuOffs, i );
2837       *p = chase_cuOff( &b, ents, ents_cache, *p );
2838       if (b)
2839          b2 = True;
2840    }
2841    *changed = b2;
2842 }
2843
2844 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
2845                                     TyEntIndexCache* ents_cache,
2846                                     /*MOD*/TyEnt* te )
2847 {
2848    Bool b, changed = False;
2849    switch (te->tag) {
2850       case Te_EMPTY:
2851          break;
2852       case Te_INDIR:
2853          te->Te.INDIR.indR
2854             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
2855          if (b) changed = True;
2856          break;
2857       case Te_UNKNOWN:
2858          break;
2859       case Te_Atom:
2860          break;
2861       case Te_Field:
2862          te->Te.Field.typeR
2863             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
2864          if (b) changed = True;
2865          break;
2866       case Te_Bound:
2867          break;
2868       case Te_TyBase:
2869          break;
2870       case Te_TyPorR:
2871          te->Te.TyPorR.typeR
2872             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
2873          if (b) changed = True;
2874          break;
2875       case Te_TyTyDef:
2876          te->Te.TyTyDef.typeR
2877             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
2878          if (b) changed = True;
2879          break;
2880       case Te_TyStOrUn:
2881          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
2882          if (b) changed = True;
2883          break;
2884       case Te_TyEnum:
2885          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
2886          if (b) changed = True;
2887          break;
2888       case Te_TyArray:
2889          te->Te.TyArray.typeR
2890             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
2891          if (b) changed = True;
2892          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
2893          if (b) changed = True;
2894          break;
2895       case Te_TyFn:
2896          break;
2897       case Te_TyQual:
2898          te->Te.TyQual.typeR
2899             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
2900          if (b) changed = True;
2901          break;
2902       case Te_TyVoid:
2903          break;
2904       default:
2905          ML_(pp_TyEnt)(te);
2906          vg_assert(0);
2907    }
2908    return changed;
2909 }
2910
2911 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
2912    'R' or 'Rs' fields (those which refer to other tyents), and replace
2913    any which point to INDIR nodes with the target of the indirection
2914    (which should not itself be an indirection).  In summary, this
2915    routine shorts out all references to indirection nodes. */
2916 static
2917 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
2918                                      TyEntIndexCache* ents_cache )
2919 {
2920    Word i, n, nChanged = 0;
2921    Bool b;
2922    n = VG_(sizeXA)( ents );
2923    for (i = 0; i < n; i++) {
2924       TyEnt* ent = VG_(indexXA)( ents, i );
2925       vg_assert(ent->tag != Te_EMPTY);
2926       /* We have to substitute everything, even indirections, so as to
2927          ensure that chains of indirections don't build up. */
2928       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
2929       if (b)
2930          nChanged++;
2931    }
2932
2933    return nChanged;
2934 }
2935
2936
2937 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
2938    Look up each new tyent in the dictionary in turn.  If it is already
2939    in the dictionary, replace this tyent with an indirection to the
2940    existing one, and delete any malloc'd stuff hanging off this one.
2941    In summary, this routine commons up all tyents that are identical
2942    as defined by TyEnt__cmp_by_all_except_cuOff. */
2943 static
2944 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
2945 {
2946    Word    n, i, nDeleted;
2947    WordFM* dict; /* TyEnt* -> void */
2948    TyEnt*  ent;
2949    UWord   keyW, valW;
2950
2951    dict = VG_(newFM)(
2952              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
2953              ML_(dinfo_free),
2954              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
2955           );
2956
2957    nDeleted = 0;
2958    n = VG_(sizeXA)( ents );
2959    for (i = 0; i < n; i++) {
2960       ent = VG_(indexXA)( ents, i );
2961       vg_assert(ent->tag != Te_EMPTY);
2962
2963       /* Ignore indirections, although check that they are
2964          not forming a cycle. */
2965       if (ent->tag == Te_INDIR) {
2966          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
2967          continue;
2968       }
2969
2970       keyW = valW = 0;
2971       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
2972          /* it's already in the dictionary. */
2973          TyEnt* old = (TyEnt*)keyW;
2974          vg_assert(valW == 0);
2975          vg_assert(old != ent);
2976          vg_assert(old->tag != Te_INDIR);
2977          /* since we are traversing the array in increasing order of
2978             cuOff: */
2979          vg_assert(old->cuOff < ent->cuOff);
2980          /* So anyway, dump this entry and replace it with an
2981             indirection to the one in the dictionary.  Note that the
2982             assertion above guarantees that we cannot create cycles of
2983             indirections, since we are always creating an indirection
2984             to a tyent with a cuOff lower than this one. */
2985          ML_(TyEnt__make_EMPTY)( ent );
2986          ent->tag = Te_INDIR;
2987          ent->Te.INDIR.indR = old->cuOff;
2988          nDeleted++;
2989       } else {
2990          /* not in dictionary; add it and keep going. */
2991          VG_(addToFM)( dict, (UWord)ent, 0 );
2992       }
2993    }
2994
2995    VG_(deleteFM)( dict, NULL, NULL );
2996
2997    return nDeleted;
2998 }
2999
3000
3001 static
3002 void dedup_types ( Bool td3,
3003                    /*MOD*/XArray* /* of TyEnt */ ents,
3004                    TyEntIndexCache* ents_cache )
3005 {
3006    Word m, n, i, nDel, nSubst, nThresh;
3007    if (0) td3 = True;
3008
3009    n = VG_(sizeXA)( ents );
3010
3011    /* If a commoning pass and a substitution pass both make fewer than
3012       this many changes, just stop.  It's pointless to burn up CPU
3013       time trying to compress the last 1% or so out of the array. */
3014    nThresh = n / 200;
3015
3016    /* First we must sort .ents by its .cuOff fields, so we
3017       can index into it. */
3018    VG_(setCmpFnXA)(
3019       ents,
3020       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3021    );
3022    VG_(sortXA)( ents );
3023
3024    /* Now repeatedly do commoning and substitution passes over
3025       the array, until there are no more changes. */
3026    do {
3027       nDel   = dedup_types_commoning_pass ( ents );
3028       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
3029       vg_assert(nDel >= 0 && nSubst >= 0);
3030       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
3031    } while (nDel > nThresh || nSubst > nThresh);
3032
3033    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3034       In fact this should be true at the end of every loop iteration
3035       above (a commoning pass followed by a substitution pass), but
3036       checking it on every iteration is excessively expensive.  Note,
3037       this loop also computes 'm' for the stats printing below it. */
3038    m = 0;
3039    n = VG_(sizeXA)( ents );
3040    for (i = 0; i < n; i++) {
3041       TyEnt *ent, *ind;
3042       ent = VG_(indexXA)( ents, i );
3043       if (ent->tag != Te_INDIR) continue;
3044       m++;
3045       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3046                                          ent->Te.INDIR.indR );
3047       vg_assert(ind);
3048       vg_assert(ind->tag != Te_INDIR);
3049    }
3050
3051    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3052 }
3053
3054
3055 /*------------------------------------------------------------*/
3056 /*---                                                      ---*/
3057 /*--- Resolution of references to type DIEs                ---*/
3058 /*---                                                      ---*/
3059 /*------------------------------------------------------------*/
3060
3061 /* Make a pass through the (temporary) variables array.  Examine the
3062    type of each variable, check is it found, and chase any Te_INDIRs.
3063    Postcondition is: each variable has a typeR field that refers to a
3064    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3065    not to refer to a Te_INDIR.  (This is so that we can throw all the
3066    Te_INDIRs away later). */
3067
3068 __attribute__((noinline))
3069 static void resolve_variable_types (
3070                void (*barf)( HChar* ) __attribute__((noreturn)),
3071                /*R-O*/XArray* /* of TyEnt */ ents,
3072                /*MOD*/TyEntIndexCache* ents_cache,
3073                /*MOD*/XArray* /* of TempVar* */ vars
3074             )
3075 {
3076    Word i, n;
3077    n = VG_(sizeXA)( vars );
3078    for (i = 0; i < n; i++) {
3079       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3080       /* This is the stated type of the variable.  But it might be
3081          an indirection, so be careful. */
3082       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3083                                                 var->typeR );
3084       if (ent && ent->tag == Te_INDIR) {
3085          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3086                                             ent->Te.INDIR.indR );
3087          vg_assert(ent);
3088          vg_assert(ent->tag != Te_INDIR);
3089       }
3090
3091       /* Deal first with "normal" cases */
3092       if (ent && ML_(TyEnt__is_type)(ent)) {
3093          var->typeR = ent->cuOff;
3094          continue;
3095       }
3096
3097       /* If there's no ent, it probably we did not manage to read a
3098          type at the cuOffset which is stated as being this variable's
3099          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
3100       if (ent == NULL) {
3101          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3102          barf("resolve_variable_types: "
3103               "cuOff does not refer to a known type");
3104       }
3105       vg_assert(ent);
3106       /* If ent has any other tag, something bad happened, along the
3107          lines of var->typeR not referring to a type at all. */
3108       vg_assert(ent->tag == Te_UNKNOWN);
3109       /* Just accept it; the type will be useless, but at least keep
3110          going. */
3111       var->typeR = ent->cuOff;
3112    }
3113 }
3114
3115
3116 /*------------------------------------------------------------*/
3117 /*---                                                      ---*/
3118 /*--- Parsing of Compilation Units                         ---*/
3119 /*---                                                      ---*/
3120 /*------------------------------------------------------------*/
3121
3122 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
3123    TempVar* t1 = *(TempVar**)v1;
3124    TempVar* t2 = *(TempVar**)v2;
3125    if (t1->dioff < t2->dioff) return -1;
3126    if (t1->dioff > t2->dioff) return 1;
3127    return 0;
3128 }
3129
3130 static void read_DIE (
3131    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3132    /*MOD*/XArray* /* of TyEnt */ tyents,
3133    /*MOD*/XArray* /* of TempVar* */ tempvars,
3134    /*MOD*/XArray* /* of GExpr* */ gexprs,
3135    /*MOD*/D3TypeParser* typarser,
3136    /*MOD*/D3VarParser* varparser,
3137    Cursor* c, Bool td3, CUConst* cc, Int level
3138 )
3139 {
3140    Cursor abbv;
3141    ULong  atag, abbv_code;
3142    UWord  posn;
3143    UInt   has_children;
3144    UWord  start_die_c_offset, start_abbv_c_offset;
3145    UWord  after_die_c_offset, after_abbv_c_offset;
3146
3147    /* --- Deal with this DIE --- */
3148    posn      = get_position_of_Cursor( c );
3149    abbv_code = get_ULEB128( c );
3150    set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3151    atag      = get_ULEB128( &abbv );
3152    TRACE_D3("\n");
3153    TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3154             level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3155
3156    if (atag == 0)
3157       cc->barf("read_DIE: invalid zero tag on DIE");
3158
3159    has_children = get_UChar( &abbv );
3160    if (has_children != DW_children_no && has_children != DW_children_yes)
3161       cc->barf("read_DIE: invalid has_children value");
3162
3163    /* We're set up to look at the fields of this DIE.  Hand it off to
3164       any parser(s) that want to see it.  Since they will in general
3165       advance both the DIE and abbrev cursors, remember their current
3166       settings so that we can then back up and do one final pass over
3167       the DIE, to print out its contents. */
3168
3169    start_die_c_offset  = get_position_of_Cursor( c );
3170    start_abbv_c_offset = get_position_of_Cursor( &abbv );
3171
3172    while (True) {
3173       ULong cts;
3174       Int   ctsSzB;
3175       UWord ctsMemSzB;
3176       ULong at_name = get_ULEB128( &abbv );
3177       ULong at_form = get_ULEB128( &abbv );
3178       if (at_name == 0 && at_form == 0) break;
3179       TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3180       /* Get the form contents, but ignore them; the only purpose is
3181          to print them, if td3 is True */
3182       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3183                          cc, c, td3, (DW_FORM)at_form );
3184       TRACE_D3("\t");
3185       TRACE_D3("\n");
3186    }
3187
3188    after_die_c_offset  = get_position_of_Cursor( c );
3189    after_abbv_c_offset = get_position_of_Cursor( &abbv );
3190
3191    set_position_of_Cursor( c,     start_die_c_offset );
3192    set_position_of_Cursor( &abbv, start_abbv_c_offset );
3193
3194    parse_type_DIE( tyents,
3195                    typarser,
3196                    (DW_TAG)atag,
3197                    posn,
3198                    level,
3199                    c,     /* DIE cursor */
3200                    &abbv, /* abbrev cursor */
3201                    cc,
3202                    td3 );
3203
3204    set_position_of_Cursor( c,     start_die_c_offset );
3205    set_position_of_Cursor( &abbv, start_abbv_c_offset );
3206
3207    parse_var_DIE( rangestree,
3208                   tempvars,
3209                   gexprs,
3210                   varparser,
3211                   (DW_TAG)atag,
3212                   posn,
3213                   level,
3214                   c,     /* DIE cursor */
3215                   &abbv, /* abbrev cursor */
3216                   cc,
3217                   td3 );
3218
3219    set_position_of_Cursor( c,     after_die_c_offset );
3220    set_position_of_Cursor( &abbv, after_abbv_c_offset );
3221
3222    /* --- Now recurse into its children, if any --- */
3223    if (has_children == DW_children_yes) {
3224       if (0) TRACE_D3("BEGIN children of level %d\n", level);
3225       while (True) {
3226          atag = peek_ULEB128( c );
3227          if (atag == 0) break;
3228          read_DIE( rangestree, tyents, tempvars, gexprs,
3229                    typarser, varparser,
3230                    c, td3, cc, level+1 );
3231       }
3232       /* Now we need to eat the terminating zero */
3233       atag = get_ULEB128( c );
3234       vg_assert(atag == 0);
3235       if (0) TRACE_D3("END children of level %d\n", level);
3236    }
3237
3238 }
3239
3240
3241 static
3242 void new_dwarf3_reader_wrk (
3243    struct _DebugInfo* di,
3244    __attribute__((noreturn)) void (*barf)( HChar* ),
3245    UChar* debug_info_img,   SizeT debug_info_sz,
3246    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3247    UChar* debug_line_img,   SizeT debug_line_sz,
3248    UChar* debug_str_img,    SizeT debug_str_sz,
3249    UChar* debug_ranges_img, SizeT debug_ranges_sz,
3250    UChar* debug_loc_img,    SizeT debug_loc_sz
3251 )
3252 {
3253    XArray* /* of TyEnt */     tyents;
3254    XArray* /* of TyEnt */     tyents_to_keep;
3255    XArray* /* of GExpr* */    gexprs;
3256    XArray* /* of TempVar* */  tempvars;
3257    WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3258    TyEntIndexCache* tyents_cache = NULL;
3259    TyEntIndexCache* tyents_to_keep_cache = NULL;
3260    TempVar *varp, *varp2;
3261    GExpr* gexpr;
3262    Cursor abbv; /* for showing .debug_abbrev */
3263    Cursor info; /* primary cursor for parsing .debug_info */
3264    Cursor ranges; /* for showing .debug_ranges */
3265    D3TypeParser typarser;
3266    D3VarParser varparser;
3267    Addr  dr_base;
3268    UWord dr_offset;
3269    Word  i, j, n;
3270    Bool td3 = di->trace_symtab;
3271    XArray* /* of TempVar* */ dioff_lookup_tab;
3272 #if 0
3273    /* This doesn't work properly because it assumes all entries are
3274       packed end to end, with no holes.  But that doesn't always
3275       appear to be the case, so it loses sync.  And the D3 spec
3276       doesn't appear to require a no-hole situation either. */
3277    /* Display .debug_loc */
3278    Addr  dl_base;
3279    UWord dl_offset;
3280    Cursor loc; /* for showing .debug_loc */
3281    TRACE_SYMTAB("\n");
3282    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3283    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3284    init_Cursor( &loc, debug_loc_img,
3285                 debug_loc_sz, 0, barf,
3286                 "Overrun whilst reading .debug_loc section(1)" );
3287    dl_base = 0;
3288    dl_offset = 0;
3289    while (True) {
3290       UWord  w1, w2;
3291       UWord  len;
3292       if (is_at_end_Cursor( &loc ))
3293          break;
3294
3295       /* Read a (host-)word pair.  This is something of a hack since
3296          the word size to read is really dictated by the ELF file;
3297          however, we assume we're reading a file with the same
3298          word-sizeness as the host.  Reasonably enough. */
3299       w1 = get_UWord( &loc );
3300       w2 = get_UWord( &loc );
3301
3302       if (w1 == 0 && w2 == 0) {
3303          /* end of list.  reset 'base' */
3304          TRACE_D3("    %08lx <End of list>\n", dl_offset);
3305          dl_base = 0;
3306          dl_offset = get_position_of_Cursor( &loc );
3307          continue;
3308       }
3309
3310       if (w1 == -1UL) {
3311          /* new value for 'base' */
3312          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3313                   dl_offset, w1, w2);
3314          dl_base = w2;
3315          continue;
3316       }
3317
3318       /* else a location expression follows */
3319       TRACE_D3("    %08lx %08lx %08lx ",
3320                dl_offset, w1 + dl_base, w2 + dl_base);
3321       len = (UWord)get_UShort( &loc );
3322       while (len > 0) {
3323          UChar byte = get_UChar( &loc );
3324          TRACE_D3("%02x", (UInt)byte);
3325          len--;
3326       }
3327       TRACE_SYMTAB("\n");
3328    }
3329 #endif
3330
3331    /* Display .debug_ranges */
3332    TRACE_SYMTAB("\n");
3333    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3334    TRACE_SYMTAB("    Offset   Begin    End\n");
3335    init_Cursor( &ranges, debug_ranges_img,
3336                 debug_ranges_sz, 0, barf,
3337                 "Overrun whilst reading .debug_ranges section(1)" );
3338    dr_base = 0;
3339    dr_offset = 0;
3340    while (True) {
3341       UWord  w1, w2;
3342
3343       if (is_at_end_Cursor( &ranges ))
3344          break;
3345
3346       /* Read a (host-)word pair.  This is something of a hack since
3347          the word size to read is really dictated by the ELF file;
3348          however, we assume we're reading a file with the same
3349          word-sizeness as the host.  Reasonably enough. */
3350       w1 = get_UWord( &ranges );
3351       w2 = get_UWord( &ranges );
3352
3353       if (w1 == 0 && w2 == 0) {
3354          /* end of list.  reset 'base' */
3355          TRACE_D3("    %08lx <End of list>\n", dr_offset);
3356          dr_base = 0;
3357          dr_offset = get_position_of_Cursor( &ranges );
3358          continue;
3359       }
3360
3361       if (w1 == -1UL) {
3362          /* new value for 'base' */
3363          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3364                   dr_offset, w1, w2);
3365          dr_base = w2;
3366          continue;
3367       }
3368
3369       /* else a range [w1+base, w2+base) is denoted */
3370       TRACE_D3("    %08lx %08lx %08lx\n",
3371                dr_offset, w1 + dr_base, w2 + dr_base);
3372    }
3373
3374    /* Display .debug_abbrev */
3375    init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
3376                 "Overrun whilst reading .debug_abbrev section" );
3377    TRACE_SYMTAB("\n");
3378    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3379    while (True) {
3380       if (is_at_end_Cursor( &abbv ))
3381          break;
3382       /* Read one abbreviation table */
3383       TRACE_D3("  Number TAG\n");
3384       while (True) {
3385          ULong atag;
3386          UInt  has_children;
3387          ULong acode = get_ULEB128( &abbv );
3388          if (acode == 0) break; /* end of the table */
3389          atag = get_ULEB128( &abbv );
3390          has_children = get_UChar( &abbv );
3391          TRACE_D3("   %llu      %s    [%s]\n",
3392                   acode, ML_(pp_DW_TAG)(atag),
3393                          ML_(pp_DW_children)(has_children));
3394          while (True) {
3395             ULong at_name = get_ULEB128( &abbv );
3396             ULong at_form = get_ULEB128( &abbv );
3397             if (at_name == 0 && at_form == 0) break;
3398             TRACE_D3("    %18s %s\n",
3399                      ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3400          }
3401       }
3402    }
3403    TRACE_SYMTAB("\n");
3404
3405    /* Now loop over the Compilation Units listed in the .debug_info
3406       section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3407       unit contains a Compilation Unit Header followed by precisely
3408       one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3409    init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
3410                 "Overrun whilst reading .debug_info section" );
3411
3412    /* We'll park the harvested type information in here.  Also create
3413       a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3414       have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3415       huge and presumably will not occur in any valid DWARF3 file --
3416       it would need to have a .debug_info section 4GB long for that to
3417       happen.  These type entries end up in the DebugInfo. */
3418    tyents = VG_(newXA)( ML_(dinfo_zalloc),
3419                         "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3420                         ML_(dinfo_free), sizeof(TyEnt) );
3421    { TyEnt tyent;
3422      VG_(memset)(&tyent, 0, sizeof(tyent));
3423      tyent.tag   = Te_TyVoid;
3424      tyent.cuOff = D3_FAKEVOID_CUOFF;
3425      tyent.Te.TyVoid.isFake = True;
3426      VG_(addToXA)( tyents, &tyent );
3427    }
3428    { TyEnt tyent;
3429      VG_(memset)(&tyent, 0, sizeof(tyent));
3430      tyent.tag   = Te_UNKNOWN;
3431      tyent.cuOff = D3_INVALID_CUOFF;
3432      VG_(addToXA)( tyents, &tyent );
3433    }
3434
3435    /* This is a tree used to unique-ify the range lists that are
3436       manufactured by parse_var_DIE.  References to the keys in the
3437       tree wind up in .rngMany fields in TempVars.  We'll need to
3438       delete this tree, and the XArrays attached to it, at the end of
3439       this function. */
3440    rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3441                             "di.readdwarf3.ndrw.2 (rangestree)",
3442                             ML_(dinfo_free),
3443                             (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3444
3445    /* List of variables we're accumulating.  These don't end up in the
3446       DebugInfo; instead their contents are handed to ML_(addVar) and
3447       the list elements are then deleted. */
3448    tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3449                           "di.readdwarf3.ndrw.3 (TempVar*s array)",
3450                           ML_(dinfo_free),
3451                           sizeof(TempVar*) );
3452
3453    /* List of GExprs we're accumulating.  These wind up in the
3454       DebugInfo. */
3455    gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3456                         ML_(dinfo_free), sizeof(GExpr*) );
3457
3458    /* We need a D3TypeParser to keep track of partially constructed
3459       types.  It'll be discarded as soon as we've completed the CU,
3460       since the resulting information is tipped in to 'tyents' as it
3461       is generated. */
3462    VG_(memset)( &typarser, 0, sizeof(typarser) );
3463    typarser.sp = -1;
3464    typarser.language = '?';
3465    for (i = 0; i < N_D3_TYPE_STACK; i++) {
3466       typarser.qparentE[i].tag   = Te_EMPTY;
3467       typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3468    }
3469
3470    VG_(memset)( &varparser, 0, sizeof(varparser) );
3471    varparser.sp = -1;
3472
3473    TRACE_D3("\n------ Parsing .debug_info section ------\n");
3474    while (True) {
3475       UWord   cu_start_offset, cu_offset_now;
3476       CUConst cc;
3477       /* It may be that the stated size of this CU is larger than the
3478          amount of stuff actually in it.  icc9 seems to generate CUs
3479          thusly.  We use these variables to figure out if this is
3480          indeed the case, and if so how many bytes we need to skip to
3481          get to the start of the next CU.  Not skipping those bytes
3482          causes us to misidentify the start of the next CU, and it all
3483          goes badly wrong after that (not surprisingly). */
3484       UWord cu_size_including_IniLen, cu_amount_used;
3485
3486       /* It seems icc9 finishes the DIE info before debug_info_sz
3487          bytes have been used up.  So be flexible, and declare the
3488          sequence complete if there is not enough remaining bytes to
3489          hold even the smallest conceivable CU header.  (11 bytes I
3490          reckon). */
3491       /* JRS 23Jan09: I suspect this is no longer necessary now that
3492          the code below contains a 'while (cu_amount_used <
3493          cu_size_including_IniLen ...'  style loop, which skips over
3494          any leftover bytes at the end of a CU in the case where the
3495          CU's stated size is larger than its actual size (as
3496          determined by reading all its DIEs).  However, for prudence,
3497          I'll leave the following test in place.  I can't see that a
3498          CU header can be smaller than 11 bytes, so I don't think
3499          there's any harm possible through the test -- it just adds
3500          robustness. */
3501       Word avail = get_remaining_length_Cursor( &info );
3502       if (avail < 11) {
3503          if (avail > 0)
3504             TRACE_D3("new_dwarf3_reader_wrk: warning: "
3505                      "%ld unused bytes after end of DIEs\n", avail);
3506          break;
3507       }
3508
3509       /* Check the varparser's stack is in a sane state. */
3510       vg_assert(varparser.sp == -1);
3511       for (i = 0; i < N_D3_VAR_STACK; i++) {
3512          vg_assert(varparser.ranges[i] == NULL);
3513          vg_assert(varparser.level[i] == 0);
3514       }
3515       for (i = 0; i < N_D3_TYPE_STACK; i++) {
3516          vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3517          vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3518          vg_assert(typarser.qlevel[i] == 0);
3519       }
3520
3521       cu_start_offset = get_position_of_Cursor( &info );
3522       TRACE_D3("\n");
3523       TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3524       /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3525          (saC_cache) */
3526       parse_CU_Header( &cc, td3, &info,
3527                        (UChar*)debug_abbv_img, debug_abbv_sz );
3528       cc.debug_str_img    = debug_str_img;
3529       cc.debug_str_sz     = debug_str_sz;
3530       cc.debug_ranges_img = debug_ranges_img;
3531       cc.debug_ranges_sz  = debug_ranges_sz;
3532       cc.debug_loc_img    = debug_loc_img;
3533       cc.debug_loc_sz     = debug_loc_sz;
3534       cc.debug_line_img   = debug_line_img;
3535       cc.debug_line_sz    = debug_line_sz;
3536       cc.debug_info_img   = debug_info_img;
3537       cc.debug_info_sz    = debug_info_sz;
3538       cc.cu_start_offset  = cu_start_offset;
3539       cc.di = di;
3540       /* The CU's svma can be deduced by looking at the AT_low_pc
3541          value in the top level TAG_compile_unit, which is the topmost
3542          DIE.  We'll leave it for the 'varparser' to acquire that info
3543          and fill it in -- since it is the only party to want to know
3544          it. */
3545       cc.cu_svma_known = False;
3546       cc.cu_svma       = 0;
3547
3548       /* Create a fake outermost-level range covering the entire
3549          address range.  So we always have *something* to catch all
3550          variable declarations. */
3551       varstack_push( &cc, &varparser, td3,
3552                      unitary_range_list(0UL, ~0UL),
3553                      -1, False/*isFunc*/, NULL/*fbGX*/ );
3554
3555       /* And set up the file name table.  When we come across the top
3556          level DIE for this CU (which is what the next call to
3557          read_DIE should process) we will copy all the file names out
3558          of the .debug_line img area and use this table to look up the
3559          copies when we later see filename numbers in DW_TAG_variables
3560          etc. */
3561       vg_assert(!varparser.filenameTable );
3562       varparser.filenameTable
3563          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3564                        ML_(dinfo_free),
3565                        sizeof(UChar*) );
3566       vg_assert(varparser.filenameTable);
3567
3568       /* Now read the one-and-only top-level DIE for this CU. */
3569       vg_assert(varparser.sp == 0);
3570       read_DIE( rangestree,
3571                 tyents, tempvars, gexprs,
3572                 &typarser, &varparser,
3573                 &info, td3, &cc, 0 );
3574
3575       cu_offset_now = get_position_of_Cursor( &info );
3576
3577       if (0) VG_(printf)("Travelled: %lu  size %llu\n",
3578                          cu_offset_now - cc.cu_start_offset,
3579                          cc.unit_length + (cc.is_dw64 ? 12 : 4));
3580
3581       /* How big the CU claims it is .. */
3582       cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3583       /* .. vs how big we have found it to be */
3584       cu_amount_used = cu_offset_now - cc.cu_start_offset;
3585
3586       if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3587                       cu_offset_now, debug_info_sz);
3588       if (cu_offset_now > debug_info_sz)
3589          barf("toplevel DIEs beyond end of CU");
3590
3591       /* If the CU is bigger than it claims to be, we've got a serious
3592          problem. */
3593       if (cu_amount_used > cu_size_including_IniLen)
3594          barf("CU's actual size appears to be larger than it claims it is");
3595
3596       /* If the CU is smaller than it claims to be, we need to skip some
3597          bytes.  Loop updates cu_offset_new and cu_amount_used. */
3598       while (cu_amount_used < cu_size_including_IniLen
3599              && get_remaining_length_Cursor( &info ) > 0) {
3600          if (0) VG_(printf)("SKIP\n");
3601          (void)get_UChar( &info );
3602          cu_offset_now = get_position_of_Cursor( &info );
3603          cu_amount_used = cu_offset_now - cc.cu_start_offset;
3604       }
3605
3606       if (cu_offset_now == debug_info_sz)
3607          break;
3608
3609       /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
3610          anywhere else at all.  Our fake the-entire-address-space
3611          range is at level -1, so preening to -2 should completely
3612          empty the stack out. */
3613       TRACE_D3("\n");
3614       varstack_preen( &varparser, td3, -2 );
3615       /* Similarly, empty the type stack out. */
3616       typestack_preen( &typarser, td3, -2 );
3617       /* else keep going */
3618
3619       TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3620                cc.saC_cache_queries, cc.saC_cache_misses);
3621
3622       vg_assert(varparser.filenameTable );
3623       VG_(deleteXA)( varparser.filenameTable );
3624       varparser.filenameTable = NULL;
3625    }
3626
3627    /* From here on we're post-processing the stuff we got
3628       out of the .debug_info section. */
3629    if (td3) {
3630       TRACE_D3("\n");
3631       ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
3632       TRACE_D3("\n");
3633       TRACE_D3("------ Compressing type entries ------\n");
3634    }
3635
3636    tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
3637                                      sizeof(TyEntIndexCache) );
3638    ML_(TyEntIndexCache__invalidate)( tyents_cache );
3639    dedup_types( td3, tyents, tyents_cache );
3640    if (td3) {
3641       TRACE_D3("\n");
3642       ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
3643    }
3644
3645    TRACE_D3("\n");
3646    TRACE_D3("------ Resolving the types of variables ------\n" );
3647    resolve_variable_types( barf, tyents, tyents_cache, tempvars );
3648
3649    /* Copy all the non-INDIR tyents into a new table.  For large
3650       .so's, about 90% of the tyents will by now have been resolved to
3651       INDIRs, and we no longer need them, and so don't need to store
3652       them. */
3653    tyents_to_keep
3654       = VG_(newXA)( ML_(dinfo_zalloc),
3655                     "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
3656                     ML_(dinfo_free), sizeof(TyEnt) );
3657    n = VG_(sizeXA)( tyents );
3658    for (i = 0; i < n; i++) {
3659       TyEnt* ent = VG_(indexXA)( tyents, i );
3660       if (ent->tag != Te_INDIR)
3661          VG_(addToXA)( tyents_to_keep, ent );
3662    }
3663
3664    VG_(deleteXA)( tyents );
3665    tyents = NULL;
3666    ML_(dinfo_free)( tyents_cache );
3667    tyents_cache = NULL;
3668
3669    /* Sort tyents_to_keep so we can lookup in it.  A complete (if
3670       minor) waste of time, since tyents itself is sorted, but
3671       necessary since VG_(lookupXA) refuses to cooperate if we
3672       don't. */
3673    VG_(setCmpFnXA)(
3674       tyents_to_keep,
3675       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3676    );
3677    VG_(sortXA)( tyents_to_keep );
3678
3679    /* Enable cacheing on tyents_to_keep */
3680    tyents_to_keep_cache
3681       = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
3682                            sizeof(TyEntIndexCache) );
3683    ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
3684
3685    /* And record the tyents in the DebugInfo.  We do this before
3686       starting to hand variables to ML_(addVar), since if ML_(addVar)
3687       wants to do debug printing (of the types of said vars) then it
3688       will need the tyents.*/
3689    vg_assert(!di->admin_tyents);
3690    di->admin_tyents = tyents_to_keep;
3691
3692    /* Bias all the location expressions. */
3693    TRACE_D3("\n");
3694    TRACE_D3("------ Biasing the location expressions ------\n" );
3695
3696    n = VG_(sizeXA)( gexprs );
3697    for (i = 0; i < n; i++) {
3698       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
3699       bias_GX( gexpr, di );
3700    }
3701
3702    TRACE_D3("\n");
3703    TRACE_D3("------ Acquired the following variables: ------\n\n");
3704
3705    /* Park (pointers to) all the vars in an XArray, so we can look up
3706       abstract origins quickly.  The array is sorted (hence, looked-up
3707       by) the .dioff fields.  Since the .dioffs should be in strictly
3708       ascending order, there is no need to sort the array after
3709       construction.  The ascendingness is however asserted for. */
3710    dioff_lookup_tab
3711       = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
3712                     ML_(dinfo_free),
3713                     sizeof(TempVar*) );
3714    vg_assert(dioff_lookup_tab);
3715
3716    n = VG_(sizeXA)( tempvars );
3717    for (i = 0; i < n; i++) {
3718       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3719       if (i > 0) {
3720          varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
3721          /* why should this hold?  Only, I think, because we've
3722             constructed the array by reading .debug_info sequentially,
3723             and so the array .dioff fields should reflect that, and be
3724             strictly ascending. */
3725          vg_assert(varp2->dioff < varp->dioff);
3726       }
3727       VG_(addToXA)( dioff_lookup_tab, &varp );
3728    }
3729    VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3730    VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3731
3732    /* Now visit each var.  Collect up as much info as possible for
3733       each var and hand it to ML_(addVar). */
3734    n = VG_(sizeXA)( tempvars );
3735    for (j = 0; j < n; j++) {
3736       TyEnt* ent;
3737       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
3738
3739       /* Possibly show .. */
3740       if (td3) {
3741          VG_(printf)("<%lx> addVar: level %d: %s :: ",
3742                      varp->dioff,
3743                      varp->level,
3744                      varp->name ? varp->name : (UChar*)"<anon_var>" );
3745          if (varp->typeR) {
3746             ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
3747          } else {
3748             VG_(printf)("NULL");
3749          }
3750          VG_(printf)("\n  Loc=");
3751          if (varp->gexpr) {
3752             ML_(pp_GX)(varp->gexpr);
3753          } else {
3754             VG_(printf)("NULL");
3755          }
3756          VG_(printf)("\n");
3757          if (varp->fbGX) {
3758             VG_(printf)("  FrB=");
3759             ML_(pp_GX)( varp->fbGX );
3760             VG_(printf)("\n");
3761          } else {
3762             VG_(printf)("  FrB=none\n");
3763          }
3764          VG_(printf)("  declared at: %s:%d\n",
3765                      varp->fName ? varp->fName : (UChar*)"NULL",
3766                      varp->fLine );
3767          if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3768             VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
3769       }
3770
3771       /* Skip variables which have no location.  These must be
3772          abstract instances; they are useless as-is since with no
3773          location they have no specified memory location.  They will
3774          presumably be referred to via the absOri fields of other
3775          variables. */
3776       if (!varp->gexpr) {
3777          TRACE_D3("  SKIP (no location)\n\n");
3778          continue;
3779       }
3780
3781       /* So it has a location, at least.  If it refers to some other
3782          entry through its absOri field, pull in further info through
3783          that. */
3784       if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3785          Bool found;
3786          Word ixFirst, ixLast;
3787          TempVar key;
3788          TempVar* keyp = &key;
3789          TempVar *varAI;
3790          VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3791          key.dioff = varp->absOri; /* this is what we want to find */
3792          found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3793                                 &ixFirst, &ixLast );
3794          if (!found) {
3795             /* barf("DW_AT_abstract_origin can't be resolved"); */
3796             TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
3797             continue;
3798          }
3799          /* If the following fails, there is more than one entry with
3800             the same dioff.  Which can't happen. */
3801          vg_assert(ixFirst == ixLast);
3802          varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3803          /* stay sane */
3804          vg_assert(varAI);
3805          vg_assert(varAI->dioff == varp->absOri);
3806
3807          /* Copy what useful info we can. */
3808          if (varAI->typeR && !varp->typeR)
3809             varp->typeR = varAI->typeR;
3810          if (varAI->name && !varp->name)
3811             varp->name = varAI->name;
3812          if (varAI->fName && !varp->fName)
3813             varp->fName = varAI->fName;
3814          if (varAI->fLine > 0 && varp->fLine == 0)
3815             varp->fLine = varAI->fLine;
3816       }
3817
3818       /* Give it a name if it doesn't have one. */
3819       if (!varp->name)
3820          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3821
3822       /* So now does it have enough info to be useful? */
3823       /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
3824          the type didn't get resolved.  Really, in that case
3825          something's broken earlier on, and should be fixed, rather
3826          than just skipping the variable. */
3827       ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
3828                                          tyents_to_keep_cache,
3829                                          varp->typeR );
3830       /* The next two assertions should be guaranteed by
3831          our previous call to resolve_variable_types. */
3832       vg_assert(ent);
3833       vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
3834
3835       if (ent->tag == Te_UNKNOWN) continue;
3836
3837       vg_assert(varp->gexpr);
3838       vg_assert(varp->name);
3839       vg_assert(varp->typeR);
3840       vg_assert(varp->level >= 0);
3841
3842       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
3843          each address range in which the variable exists. */
3844       TRACE_D3("  ACQUIRE for range(s) ");
3845       { AddrRange  oneRange;
3846         AddrRange* varPcRanges;
3847         Word       nVarPcRanges;
3848         /* Set up to iterate over address ranges, however
3849            represented. */
3850         if (varp->nRanges == 0 || varp->nRanges == 1) {
3851            vg_assert(!varp->rngMany);
3852            if (varp->nRanges == 0) {
3853               vg_assert(varp->rngOneMin == 0);
3854               vg_assert(varp->rngOneMax == 0);
3855            }
3856            nVarPcRanges = varp->nRanges;
3857            oneRange.aMin = varp->rngOneMin;
3858            oneRange.aMax = varp->rngOneMax;
3859            varPcRanges = &oneRange;
3860         } else {
3861            vg_assert(varp->rngMany);
3862            vg_assert(varp->rngOneMin == 0);
3863            vg_assert(varp->rngOneMax == 0);
3864            nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3865            vg_assert(nVarPcRanges >= 2);
3866            vg_assert(nVarPcRanges == (Word)varp->nRanges);
3867            varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3868         }
3869         if (varp->level == 0)
3870            vg_assert( nVarPcRanges == 1 );
3871         /* and iterate */
3872         for (i = 0; i < nVarPcRanges; i++) {
3873            Addr pcMin = varPcRanges[i].aMin;
3874            Addr pcMax = varPcRanges[i].aMax;
3875            vg_assert(pcMin <= pcMax);
3876            /* Level 0 is the global address range.  So at level 0 we
3877               don't want to bias pcMin/pcMax; but at all other levels
3878               we do since those are derived from svmas in the Dwarf
3879               we're reading.  Be paranoid ... */
3880            if (varp->level == 0) {
3881               vg_assert(pcMin == (Addr)0);
3882               vg_assert(pcMax == ~(Addr)0);
3883            } else {
3884               /* vg_assert(pcMin > (Addr)0);
3885                  No .. we can legitimately expect to see ranges like
3886                  0x0-0x11D (pre-biasing, of course). */
3887               vg_assert(pcMax < ~(Addr)0);
3888            }
3889
3890            /* Apply text biasing, for non-global variables. */
3891            if (varp->level > 0) {
3892               pcMin += di->text_debug_bias;
3893               pcMax += di->text_debug_bias;
3894            }
3895
3896            if (i > 0 && (i%2) == 0)
3897               TRACE_D3("\n                       ");
3898            TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
3899
3900            ML_(addVar)(
3901               di, varp->level,
3902                   pcMin, pcMax,
3903                   varp->name,  varp->typeR,
3904                   varp->gexpr, varp->fbGX,
3905                   varp->fName, varp->fLine, td3
3906            );
3907         }
3908       }
3909
3910       TRACE_D3("\n\n");
3911       /* and move on to the next var */
3912    }
3913
3914    /* Now free all the TempVars */
3915    n = VG_(sizeXA)( tempvars );
3916    for (i = 0; i < n; i++) {
3917       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3918       ML_(dinfo_free)(varp);
3919    }
3920    VG_(deleteXA)( tempvars );
3921    tempvars = NULL;
3922
3923    /* and the temp lookup table */
3924    VG_(deleteXA)( dioff_lookup_tab );
3925
3926    /* and the ranges tree.  Note that we need to also free the XArrays
3927       which constitute the keys, hence pass VG_(deleteXA) as a
3928       key-finalizer. */
3929    VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
3930
3931    /* and the tyents_to_keep cache */
3932    ML_(dinfo_free)( tyents_to_keep_cache );
3933    tyents_to_keep_cache = NULL;
3934
3935    /* and the file name table (just the array, not the entries
3936       themselves).  (Apparently, 2008-Oct-23, varparser.filenameTable
3937       can be NULL here, for icc9 generated Dwarf3.  Not sure what that
3938       signifies (a deeper problem with the reader?)) */
3939    if (varparser.filenameTable) {
3940       VG_(deleteXA)( varparser.filenameTable );
3941       varparser.filenameTable = NULL;
3942    }
3943
3944    /* record the GExprs in di so they can be freed later */
3945    vg_assert(!di->admin_gexprs);
3946    di->admin_gexprs = gexprs;
3947 }
3948
3949
3950 /*------------------------------------------------------------*/
3951 /*---                                                      ---*/
3952 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
3953 /*---                                                      ---*/
3954 /*------------------------------------------------------------*/
3955
3956 static Bool               d3rd_jmpbuf_valid  = False;
3957 static HChar*             d3rd_jmpbuf_reason = NULL;
3958 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
3959
3960 static __attribute__((noreturn)) void barf ( HChar* reason ) {
3961    vg_assert(d3rd_jmpbuf_valid);
3962    d3rd_jmpbuf_reason = reason;
3963    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
3964    /*NOTREACHED*/
3965    vg_assert(0);
3966 }
3967
3968
3969 void
3970 ML_(new_dwarf3_reader) (
3971    struct _DebugInfo* di,
3972    UChar* debug_info_img,   SizeT debug_info_sz,
3973    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3974    UChar* debug_line_img,   SizeT debug_line_sz,
3975    UChar* debug_str_img,    SizeT debug_str_sz,
3976    UChar* debug_ranges_img, SizeT debug_ranges_sz,
3977    UChar* debug_loc_img,    SizeT debug_loc_sz
3978 )
3979 {
3980    volatile Int  jumped;
3981    volatile Bool td3 = di->trace_symtab;
3982
3983    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
3984       just returns normally.  If there is any failure, it longjmp's
3985       back here, having first set d3rd_jmpbuf_reason to something
3986       useful. */
3987    vg_assert(d3rd_jmpbuf_valid  == False);
3988    vg_assert(d3rd_jmpbuf_reason == NULL);
3989
3990    d3rd_jmpbuf_valid = True;
3991    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
3992    if (jumped == 0) {
3993       /* try this ... */
3994       new_dwarf3_reader_wrk( di, barf,
3995                              debug_info_img,   debug_info_sz,
3996                              debug_abbv_img,   debug_abbv_sz,
3997                              debug_line_img,   debug_line_sz,
3998                              debug_str_img,    debug_str_sz,
3999                              debug_ranges_img, debug_ranges_sz,
4000                              debug_loc_img,    debug_loc_sz );
4001       d3rd_jmpbuf_valid = False;
4002       TRACE_D3("\n------ .debug_info reading was successful ------\n");
4003    } else {
4004       /* It longjmp'd. */
4005       d3rd_jmpbuf_valid = False;
4006       /* Can't longjump without giving some sort of reason. */
4007       vg_assert(d3rd_jmpbuf_reason != NULL);
4008
4009       TRACE_D3("\n------ .debug_info reading failed ------\n");
4010
4011       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
4012    }
4013
4014    d3rd_jmpbuf_valid  = False;
4015    d3rd_jmpbuf_reason = NULL;
4016 }
4017
4018
4019
4020 /* --- Unused code fragments which might be useful one day. --- */
4021
4022 #if 0
4023    /* Read the arange tables */
4024    TRACE_SYMTAB("\n");
4025    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
4026    init_Cursor( &aranges, debug_aranges_img,
4027                 debug_aranges_sz, 0, barf,
4028                 "Overrun whilst reading .debug_aranges section" );
4029    while (True) {
4030       ULong  len, d_i_offset;
4031       Bool   is64;
4032       UShort version;
4033       UChar  asize, segsize;
4034
4035       if (is_at_end_Cursor( &aranges ))
4036          break;
4037       /* Read one arange thingy */
4038       /* initial_length field */
4039       len = get_Initial_Length( &is64, &aranges,
4040                "in .debug_aranges: invalid initial-length field" );
4041       version    = get_UShort( &aranges );
4042       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4043       asize      = get_UChar( &aranges );
4044       segsize    = get_UChar( &aranges );
4045       TRACE_D3("  Length:                   %llu\n", len);
4046       TRACE_D3("  Version:                  %d\n", (Int)version);
4047       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
4048       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
4049       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
4050       TRACE_D3("\n");
4051       TRACE_D3("    Address            Length\n");
4052
4053       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4054          (void)get_UChar( & aranges );
4055       }
4056       while (True) {
4057          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4058          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4059          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
4060          if (address == 0 && length == 0) break;
4061       }
4062    }
4063    TRACE_SYMTAB("\n");
4064 #endif
4065
4066 #endif // defined(VGO_linux) || defined(VGO_darwin)
4067
4068 /*--------------------------------------------------------------------*/
4069 /*--- end                                                          ---*/
4070 /*--------------------------------------------------------------------*/