2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2005-2010 Apple Inc.
12 Greg Parker gparker@apple.com
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_darwin)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcfile.h"
40 #include "pub_core_libcproc.h"
41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42 #include "pub_core_machine.h" /* VG_ELF_CLASS */
43 #include "pub_core_options.h"
44 #include "pub_core_oset.h"
45 #include "pub_core_tooliface.h" /* VG_(needs) */
46 #include "pub_core_xarray.h"
47 #include "pub_core_clientstate.h"
48 #include "pub_core_debuginfo.h"
50 #include "priv_d3basics.h"
51 #include "priv_misc.h"
52 #include "priv_tytypes.h"
53 #include "priv_storage.h"
54 #include "priv_readmacho.h"
55 #include "priv_readdwarf.h"
56 #include "priv_readdwarf3.h"
57 #include "priv_readstabs.h"
59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60 #include <mach-o/loader.h>
61 #include <mach-o/nlist.h>
62 #include <mach-o/fat.h>
63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
66 # define MAGIC MH_MAGIC
67 # define MACH_HEADER mach_header
68 # define LC_SEGMENT_CMD LC_SEGMENT
69 # define SEGMENT_COMMAND segment_command
70 # define SECTION section
73 # define MAGIC MH_MAGIC_64
74 # define MACH_HEADER mach_header_64
75 # define LC_SEGMENT_CMD LC_SEGMENT_64
76 # define SEGMENT_COMMAND segment_command_64
77 # define SECTION section_64
78 # define NLIST nlist_64
82 /*------------------------------------------------------------*/
84 /*--- Mach-O file mapping/unmapping helpers ---*/
86 /*------------------------------------------------------------*/
90 /* These two describe the entire mapped-in ("primary") image,
91 fat headers, kitchen sink, whatnot: the entire file. The
92 image is mapped into img[0 .. img_szB-1]. */
95 /* These two describe the Mach-O object of interest, which is
96 presumably somewhere inside the primary image.
97 map_image_aboard() below, which generates this info, will
98 carefully check that the macho_ fields denote a section of
99 memory that falls entirely inside img[0 .. img_szB-1]. */
106 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
108 /* (JRS: the Mach-O headers might not be in this mapped data,
109 because we only mapped a page for this initial check,
110 or at least not very much, and what's at the start of the file
111 is in general a so-called fat header. The Mach-O object we're
112 interested in could be arbitrarily far along the image, and so
113 we can't assume its header will fall within this page.) */
115 /* But we can say that either it's a fat object, in which case it
116 begins with a fat header, or it's unadorned Mach-O, in which
117 case it starts with a normal header. At least do what checks we
118 can to establish whether or not we're looking at something
121 const struct fat_header* fh_be = buf;
122 const struct MACH_HEADER* mh = buf;
125 if (szB < sizeof(struct fat_header))
127 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
130 if (szB < sizeof(struct MACH_HEADER))
132 if (mh->magic == MAGIC)
139 /* Unmap an image mapped in by map_image_aboard. */
140 static void unmap_image ( /*MOD*/ImageInfo* ii )
144 vg_assert(ii->img_szB > 0);
145 sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB );
146 /* Do we care if this fails? I suppose so; it would indicate
147 some fairly serious snafu with the mapping of the file. */
148 vg_assert( !sr_isError(sres) );
149 VG_(memset)(ii, 0, sizeof(*ii));
153 /* Map a given fat or thin object aboard, find the thin part if
154 necessary, do some checks, and write details of both the fat and
155 thin parts into *ii. Returns False (and leaves the file unmapped)
156 on failure. Guarantees to return pointers to a valid(ish) Mach-O
157 image if it succeeds. */
158 static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */
159 /*OUT*/ImageInfo* ii, UChar* filename )
161 VG_(memset)(ii, 0, sizeof(*ii));
163 /* First off, try to map the thing in. */
166 struct vg_stat stat_buf;
168 fd = VG_(stat)(filename, &stat_buf);
169 if (sr_isError(fd)) {
170 ML_(symerr)(di, True, "Can't stat image (to determine its size)?!");
173 size = stat_buf.size;
175 fd = VG_(open)(filename, VKI_O_RDONLY, 0);
176 if (sr_isError(fd)) {
177 ML_(symerr)(di, True, "Can't open image to read symbols?!");
181 sres = VG_(am_mmap_file_float_valgrind)
182 ( size, VKI_PROT_READ, sr_Res(fd), 0 );
183 if (sr_isError(sres)) {
184 ML_(symerr)(di, True, "Can't mmap image to read symbols?!");
188 VG_(close)(sr_Res(fd));
190 ii->img = (UChar*)sr_Res(sres);
194 /* Now it's mapped in and we have .img and .img_szB set. Look for
195 the embedded Mach-O object. If not findable, unmap and fail. */
196 { struct fat_header* fh_be;
197 struct fat_header fh;
198 struct MACH_HEADER* mh;
200 // Assume initially that we have a thin image, and update
201 // these if it turns out to be fat.
202 ii->macho_img = ii->img;
203 ii->macho_img_szB = ii->img_szB;
205 // Check for fat header.
206 if (ii->img_szB < sizeof(struct fat_header)) {
207 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
211 // Fat header is always BIG-ENDIAN
212 fh_be = (struct fat_header *)ii->img;
213 fh.magic = VG_(ntohl)(fh_be->magic);
214 fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch);
215 if (fh.magic == FAT_MAGIC) {
216 // Look for a good architecture.
217 struct fat_arch *arch_be;
218 struct fat_arch arch;
220 if (ii->img_szB < sizeof(struct fat_header)
221 + fh.nfat_arch * sizeof(struct fat_arch)) {
222 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
225 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
229 # if defined(VGA_ppc)
230 cputype = CPU_TYPE_POWERPC;
231 # elif defined(VGA_ppc64)
232 cputype = CPU_TYPE_POWERPC64;
233 # elif defined(VGA_x86)
234 cputype = CPU_TYPE_X86;
235 # elif defined(VGA_amd64)
236 cputype = CPU_TYPE_X86_64;
238 # error "unknown architecture"
240 arch.cputype = VG_(ntohl)(arch_be->cputype);
241 arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype);
242 arch.offset = VG_(ntohl)(arch_be->offset);
243 arch.size = VG_(ntohl)(arch_be->size);
244 if (arch.cputype == cputype) {
245 if (ii->img_szB < arch.offset + arch.size) {
246 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
249 ii->macho_img = ii->img + arch.offset;
250 ii->macho_img_szB = arch.size;
254 if (f == fh.nfat_arch) {
255 ML_(symerr)(di, True,
256 "No acceptable architecture found in fat file.");
261 /* Sanity check what we found. */
263 /* assured by logic above */
264 vg_assert(ii->img_szB >= sizeof(struct fat_header));
266 if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) {
267 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
271 if (ii->macho_img_szB > ii->img_szB) {
272 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
276 if (ii->macho_img >= ii->img
277 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
278 /* thin entirely within fat, as expected */
280 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
284 mh = (struct MACH_HEADER *)ii->macho_img;
285 if (mh->magic != MAGIC) {
286 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
290 if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) {
291 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
297 vg_assert(ii->macho_img);
298 vg_assert(ii->img_szB > 0);
299 vg_assert(ii->macho_img_szB > 0);
300 vg_assert(ii->macho_img >= ii->img);
301 vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
302 return True; /* success */
307 return False; /* bah! */
311 /*------------------------------------------------------------*/
313 /*--- Mach-O symbol table reading ---*/
315 /*------------------------------------------------------------*/
317 /* Read a symbol table (nlist). Add the resulting candidate symbols
318 to 'syms'; the caller will post-process them and hand them off to
319 ML_(addSym) itself. */
321 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
322 struct _DebugInfo* di,
323 struct NLIST* o_symtab, UInt o_symtab_count,
324 UChar* o_strtab, UInt o_strtab_sz )
331 static UChar* s_a_t_v = NULL; /* do not make non-static */
333 for (i = 0; i < o_symtab_count; i++) {
334 struct NLIST *nl = o_symtab+i;
335 if ((nl->n_type & N_TYPE) == N_SECT) {
336 sym_addr = di->text_bias + nl->n_value;
337 /*} else if ((nl->n_type & N_TYPE) == N_ABS) {
338 GrP fixme don't ignore absolute symbols?
339 sym_addr = nl->n_value; */
344 if (di->trace_symtab)
345 VG_(printf)("nlist raw: avma %010lx %s\n",
346 sym_addr, o_strtab + nl->n_un.n_strx );
348 /* If no part of the symbol falls within the mapped range,
350 if (sym_addr <= di->text_avma
351 || sym_addr >= di->text_avma+di->text_size) {
355 /* skip names which point outside the string table;
356 following these risks segfaulting Valgrind */
357 name = o_strtab + nl->n_un.n_strx;
358 if (name < o_strtab || name >= o_strtab + o_strtab_sz)
361 /* skip nameless symbols; these appear to be common, but
367 risym.addr = sym_addr;
368 risym.size = // let canonicalize fix it
369 di->text_avma+di->text_size - sym_addr;
370 risym.name = ML_(addStr)(di, name, -1);
372 risym.isIFunc = False;
373 // Lots of user function names get prepended with an underscore. Eg. the
374 // function 'f' becomes the symbol '_f'. And the "below main"
375 // function is called "start". So we skip the leading underscore, and
376 // if we see 'start' and --show-below-main=no, we rename it as
377 // "start_according_to_valgrind", which makes it easy to spot later
378 // and display as "(below main)".
379 if (risym.name[0] == '_') {
381 } else if (!VG_(clo_show_below_main) && VG_STREQ(risym.name, "start")) {
383 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
385 risym.name = s_a_t_v;
388 vg_assert(risym.name);
389 VG_(addToXA)( syms, &risym );
394 /* Compare DiSyms by their start address, and for equal addresses, use
395 the name as a secondary sort key. */
396 static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 )
398 DiSym* s1 = (DiSym*)v1;
399 DiSym* s2 = (DiSym*)v2;
400 if (s1->addr < s2->addr) return -1;
401 if (s1->addr > s2->addr) return 1;
402 return VG_(strcmp)(s1->name, s2->name);
405 /* 'cand' is a bunch of candidate symbols obtained by reading
406 nlist-style symbol table entries. Their ends may overlap, so sort
407 them and truncate them accordingly. The code in this routine is
408 copied almost verbatim from read_symbol_table() in readxcoff.c. */
409 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
412 Word nsyms, i, j, k, m;
414 nsyms = VG_(sizeXA)(syms);
416 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
419 /* We only know for sure the start addresses (actual VMAs) of
420 symbols, and an overestimation of their end addresses. So sort
421 by start address, then clip each symbol so that its end address
422 does not overlap with the next one along.
424 There is a small refinement: if a group of symbols have the same
425 address, treat them as a group: find the next symbol along that
426 has a higher start address, and clip all of the group
427 accordingly. This clips the group as a whole so as not to
428 overlap following symbols. This leaves prefersym() in
429 storage.c, which is not nlist-specific, to later decide which of
430 the symbols in the group to keep.
432 Another refinement is that we need to get rid of symbols which,
433 after clipping, have identical starts, ends, and names. So the
434 sorting uses the name as a secondary key.
437 for (i = 0; i < nsyms; i++) {
440 && ((DiSym*)VG_(indexXA)(syms,i))->addr
441 == ((DiSym*)VG_(indexXA)(syms,k))->addr;
444 /* So now [i .. k-1] is a group all with the same start address.
445 Clip their ending addresses so they don't overlap [k]. In
446 the normal case (no overlaps), k == i+1. */
448 DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
449 for (m = i; m < k; m++) {
450 DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
451 vg_assert(here->addr < next->addr);
452 if (here->addr + here->size > next->addr)
453 here->size = next->addr - here->addr;
457 vg_assert(i <= nsyms);
463 for (i = 1; i < nsyms; i++) {
464 DiSym *s_j1, *s_j, *s_i;
466 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
467 s_j = (DiSym*)VG_(indexXA)(syms, j);
468 s_i = (DiSym*)VG_(indexXA)(syms, i);
469 if (s_i->addr != s_j1->addr
470 || s_i->size != s_j1->size
471 || 0 != VG_(strcmp)(s_i->name, s_j1->name)) {
476 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
477 s_i->addr, s_i->name );
481 vg_assert(j >= 0 && j <= nsyms);
482 VG_(dropTailXA)(syms, nsyms - j);
486 /*------------------------------------------------------------*/
488 /*--- Mach-O top-level processing ---*/
490 /*------------------------------------------------------------*/
492 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
493 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
497 static Bool file_exists_p(const Char *path)
500 SysRes res = VG_(stat)(path, &sbuf);
501 return sr_isError(res) ? False : True;
505 /* Search for an existing dSYM file as a possible separate debug file.
508 find_separate_debug_file (const Char *executable_name)
515 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
516 will end up with an infinite loop where after we add a dSYM symbol file,
517 it will then enter this function asking if there is a debug file for the
519 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
521 /* Check for the existence of a .dSYM file for a given executable. */
522 basename_str = VG_(basename) (executable_name);
523 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
524 VG_(strlen) (executable_name)
525 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
526 + VG_(strlen) (basename_str)
530 /* First try for the dSYM in the same directory as the original file. */
531 VG_(strcpy) (dsymfile, executable_name);
532 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
533 VG_(strcat) (dsymfile, basename_str);
535 if (file_exists_p (dsymfile))
538 /* Now search for any parent directory that has a '.' in it so we can find
539 Mac OS X applications, bundles, plugins, and any other kinds of files.
540 Mac OS X application bundles wil have their program in
541 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
542 ".bundle" or ".plugin" for other types of bundles). So we look for any
543 prior '.' character and try appending the apple dSYM extension and
544 subdirectory and see if we find an existing dSYM file (in the above
545 MyApp example the dSYM would be at either:
546 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
547 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
548 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
549 while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
551 /* Find the directory delimiter that follows the '.' character since
552 we now look for a .dSYM that follows any bundle extension. */
553 slash_ptr = VG_(strchr) (dot_ptr, '/');
556 /* NULL terminate the string at the '/' character and append
557 the path down to the dSYM file. */
559 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
560 VG_(strcat) (slash_ptr, basename_str);
561 if (file_exists_p (dsymfile))
565 /* NULL terminate the string at the '.' character and append
566 the path down to the dSYM file. */
568 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
569 VG_(strcat) (dot_ptr, basename_str);
570 if (file_exists_p (dsymfile))
573 /* NULL terminate the string at the '.' locatated by the strrchr()
577 /* We found a previous extension '.' character and did not find a
578 dSYM file so now find previous directory delimiter so we don't
579 try multiple times on a file name that may have a version number
580 in it such as "/some/path/MyApp.6.0.4.app". */
581 slash_ptr = VG_(strrchr) (dsymfile, '/');
584 /* NULL terminate the string at the previous directory character
594 static UChar *getsectdata(UChar* base, SizeT size,
595 Char *segname, Char *sectname,
596 /*OUT*/Word *sect_size)
598 struct MACH_HEADER *mh = (struct MACH_HEADER *)base;
599 struct load_command *cmd;
602 for (c = 0, cmd = (struct load_command *)(mh+1);
604 c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd))
606 if (cmd->cmd == LC_SEGMENT_CMD) {
607 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
608 if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) {
609 struct SECTION *sects = (struct SECTION *)(seg+1);
611 for (s = 0; s < seg->nsects; s++) {
612 if (0 == VG_(strncmp(sects[s].sectname, sectname,
613 sizeof(sects[s].sectname))))
615 if (sect_size) *sect_size = sects[s].size;
616 return (UChar *)(base + sects[s].offset);
623 if (sect_size) *sect_size = 0;
628 /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */
629 static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid )
632 UChar* img = (UChar*)imgA;
633 UChar first = uuid[0];
636 for (i = 0; i < n_img-16; i++) {
639 if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 ))
646 /* Heuristic kludge: return True if this looks like an installed
647 standard library; hence we shouldn't consider automagically running
649 static Bool is_systemish_library_name ( UChar* name )
652 if (0 == VG_(strncasecmp)(name, "/usr/", 5)
653 || 0 == VG_(strncasecmp)(name, "/bin/", 5)
654 || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
655 || 0 == VG_(strncasecmp)(name, "/opt/", 5)
656 || 0 == VG_(strncasecmp)(name, "/sw/", 4)
657 || 0 == VG_(strncasecmp)(name, "/System/", 8)
658 || 0 == VG_(strncasecmp)(name, "/Library/", 9)
659 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
667 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
669 struct symtab_command *symcmd = NULL;
670 struct dysymtab_command *dysymcmd = NULL;
671 HChar* dsymfilename = NULL;
672 Bool have_uuid = False;
674 ImageInfo ii; /* main file */
675 ImageInfo iid; /* auxiliary .dSYM file */
678 /* mmap the object file to look for di->soname and di->text_bias
679 and uuid and nlist and STABS */
681 if (VG_(clo_verbosity) > 1)
682 VG_(message)(Vg_DebugMsg,
683 "%s (%#lx)\n", di->filename, di->rx_map_avma );
685 /* This should be ensured by our caller. */
686 vg_assert(di->have_rx_map);
687 vg_assert(di->have_rw_map);
689 VG_(memset)(&ii, 0, sizeof(ii));
690 VG_(memset)(&iid, 0, sizeof(iid));
691 VG_(memset)(&uuid, 0, sizeof(uuid));
693 ok = map_image_aboard( di, &ii, di->filename );
696 vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
698 /* Poke around in the Mach-O header, to find some important
700 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
701 // Read di->soname from LC_ID_DYLIB if present,
702 // or from LC_ID_DYLINKER if present,
704 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
705 // Get uuid for later dsym search
709 { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img;
710 struct load_command *cmd;
713 for (c = 0, cmd = (struct load_command *)(mh+1);
715 c++, cmd = (struct load_command *)(cmd->cmdsize
716 + (unsigned long)cmd)) {
717 if (cmd->cmd == LC_SYMTAB) {
718 symcmd = (struct symtab_command *)cmd;
720 else if (cmd->cmd == LC_DYSYMTAB) {
721 dysymcmd = (struct dysymtab_command *)cmd;
723 else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) {
725 struct dylib_command *dcmd = (struct dylib_command *)cmd;
726 UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd;
727 UChar *soname = VG_(strrchr)(dylibname, '/');
728 if (!soname) soname = dylibname;
730 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
733 else if (cmd->cmd==LC_ID_DYLINKER && mh->filetype==MH_DYLINKER) {
734 struct dylinker_command *dcmd = (struct dylinker_command *)cmd;
735 UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd;
736 UChar *soname = VG_(strrchr)(dylinkername, '/');
737 if (!soname) soname = dylinkername;
739 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
743 // A comment from Julian about why varinfo[35] fail:
745 // My impression is, from comparing the output of otool -l for these
746 // executables with the logic in ML_(read_macho_debug_info),
747 // specifically the part that begins "else if (cmd->cmd ==
748 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
749 // to work ok for text symbols. In particular, it appears to assume
750 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
751 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
752 // need. However, otool -l shows, and also the Apple docs state,
753 // that a struct load_command may contain an arbitrary number of
754 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
755 // snarf the first. But I'm not sure about this.
757 // The "Try for __DATA" block below simply adds acquisition of data
758 // svma/bias values using the same assumption. It also needs
759 // (probably) to deal with bss sections, but I don't understand how
760 // this all ties together really, so it requires further study.
762 // If you can get your head around the relationship between MachO
763 // segments, sections and load commands, this might be relatively
764 // easy to fix properly.
766 // Basically we need to come up with plausible numbers for di->
767 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
768 // then trivially derived. Then I think the debuginfo reader should
770 else if (cmd->cmd == LC_SEGMENT_CMD) {
771 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
773 if (!di->text_present
774 && 0 == VG_(strcmp)(seg->segname, "__TEXT")
775 /* DDD: is the next line a kludge? -- JRS */
776 && seg->fileoff == 0 && seg->filesize != 0) {
777 di->text_present = True;
778 di->text_svma = (Addr)seg->vmaddr;
779 di->text_avma = di->rx_map_avma;
780 di->text_size = seg->vmsize;
781 di->text_bias = di->text_avma - di->text_svma;
782 /* Make the _debug_ values be the same as the
783 svma/bias for the primary object, since there is
784 no secondary (debuginfo) object, but nevertheless
785 downstream biasing of Dwarf3 relies on the
787 di->text_debug_svma = di->text_svma;
788 di->text_debug_bias = di->text_bias;
791 if (!di->data_present
792 && 0 == VG_(strcmp)(seg->segname, "__DATA")
793 /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) {
794 di->data_present = True;
795 di->data_svma = (Addr)seg->vmaddr;
796 di->data_avma = di->rw_map_avma;
797 di->data_size = seg->vmsize;
798 di->data_bias = di->data_avma - di->data_svma;
799 di->data_debug_svma = di->data_svma;
800 di->data_debug_bias = di->data_bias;
803 else if (cmd->cmd == LC_UUID) {
804 struct uuid_command *uuid_cmd = (struct uuid_command *)cmd;
805 VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid));
812 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
815 /* Now we have the base object to hand. Read symbols from it. */
817 if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) {
819 /* Read nlist symbol table */
822 XArray* /* DiSym */ candSyms = NULL;
825 if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize
826 || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms
827 * sizeof(struct NLIST)) {
828 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
831 if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms
832 || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) {
833 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
837 syms = (struct NLIST *)(ii.macho_img + symcmd->symoff);
838 strs = (UChar *)(ii.macho_img + symcmd->stroff);
840 if (VG_(clo_verbosity) > 1)
841 VG_(message)(Vg_DebugMsg,
842 " reading syms from primary file (%d %d)\n",
843 dysymcmd->nextdefsym, dysymcmd->nlocalsym );
845 /* Read candidate symbols into 'candSyms', so we can truncate
846 overlapping ends and generally tidy up, before presenting
847 them to ML_(addSym). */
848 candSyms = VG_(newXA)(
849 ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
850 ML_(dinfo_free), sizeof(DiSym)
855 read_symtab(candSyms,
857 syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym,
858 strs, symcmd->strsize);
859 // static and private_extern symbols
860 read_symtab(candSyms,
862 syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym,
863 strs, symcmd->strsize);
865 /* tidy up the cand syms -- trim overlapping ends. May resize
867 tidy_up_cand_syms( candSyms, di->trace_symtab );
869 /* and finally present them to ML_(addSym) */
870 nCandSyms = VG_(sizeXA)( candSyms );
871 for (i = 0; i < nCandSyms; i++) {
872 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
873 if (di->trace_symtab)
874 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
875 cand->addr, cand->addr + cand->size - 1, cand->name );
876 ML_(addSym)( di, cand );
878 VG_(deleteXA)( candSyms );
881 /* If there's no UUID in the primary, don't even bother to try and
882 read any DWARF, since we won't be able to verify it matches.
883 Our policy is not to load debug info unless we can verify that
884 it matches the primary. Just declare success at this point.
885 And don't complain to the user, since that would cause us to
886 complain on objects compiled without -g. (Some versions of
887 XCode are observed to omit a UUID entry for object linked(?)
888 without -g. Others don't appear to omit it.) */
892 /* mmap the dSYM file to look for DWARF debug info. If successful,
893 use the .macho_img and .macho_img_szB in iid. */
895 dsymfilename = find_separate_debug_file( di->filename );
897 /* Try to load it. */
901 if (VG_(clo_verbosity) > 1)
902 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
904 ok = map_image_aboard( di, &iid, dsymfilename );
907 /* check it has the right uuid. */
908 vg_assert(have_uuid);
909 valid = iid.macho_img && iid.macho_img_szB > 0
910 && check_uuid_matches( (Addr)iid.macho_img,
911 iid.macho_img_szB, uuid );
915 if (VG_(clo_verbosity) > 1)
916 VG_(message)(Vg_DebugMsg, " dSYM does not have "
917 "correct UUID (out of date?)\n");
920 /* There was no dsym file, or it doesn't match. We'll have to try
921 regenerating it, unless --dsymutil=no, in which case just complain
924 /* If this looks like a lib that we shouldn't run dsymutil on, just
925 give up. (possible reasons: is system lib, or in /usr etc, or
926 the dsym dir would not be writable by the user, or we're running
928 vg_assert(di->filename);
929 if (is_systemish_library_name(di->filename))
932 if (!VG_(clo_dsymutil)) {
933 if (VG_(clo_verbosity) == 1) {
934 VG_(message)(Vg_DebugMsg, "%s:\n", di->filename);
936 if (VG_(clo_verbosity) > 0)
937 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
939 VG_(clo_verbosity) > 1 ? " " : "",
940 dsymfilename ? "has wrong UUID" : "is missing");
947 HChar* dsymutil = "/usr/bin/dsymutil ";
948 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
949 VG_(strlen)(dsymutil)
950 + VG_(strlen)(di->filename)
952 VG_(strcpy)(cmd, dsymutil);
953 if (0) VG_(strcat)(cmd, "--verbose ");
954 VG_(strcat)(cmd, di->filename);
955 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
956 r = VG_(system)( cmd );
958 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
959 ML_(dinfo_free)(cmd);
960 dsymfilename = find_separate_debug_file(di->filename);
963 /* Try again to load it. */
967 if (VG_(clo_verbosity) > 1)
968 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
970 ok = map_image_aboard( di, &iid, dsymfilename );
973 /* check it has the right uuid. */
974 vg_assert(have_uuid);
975 valid = iid.macho_img && iid.macho_img_szB > 0
976 && check_uuid_matches( (Addr)iid.macho_img,
977 iid.macho_img_szB, uuid );
979 if (VG_(clo_verbosity) > 0) {
980 VG_(message)(Vg_DebugMsg,
981 "WARNING: did not find expected UUID %02X%02X%02X%02X"
982 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
984 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
985 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
986 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
987 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
988 (UInt)uuid[14], (UInt)uuid[15] );
989 VG_(message)(Vg_DebugMsg,
990 "WARNING: for %s\n", di->filename);
993 /* unmap_image zeroes the fields, so the following test makes
999 /* Right. Finally we have our best try at the dwarf image, so go
1000 on to reading stuff out of it. */
1003 if (iid.macho_img && iid.macho_img_szB > 0) {
1004 UChar* debug_info_img = NULL;
1006 UChar* debug_abbv_img;
1008 UChar* debug_line_img;
1010 UChar* debug_str_img;
1012 UChar* debug_ranges_img;
1013 Word debug_ranges_sz;
1014 UChar* debug_loc_img;
1016 UChar* debug_name_img;
1020 getsectdata(iid.macho_img, iid.macho_img_szB,
1021 "__DWARF", "__debug_info", &debug_info_sz);
1023 getsectdata(iid.macho_img, iid.macho_img_szB,
1024 "__DWARF", "__debug_abbrev", &debug_abbv_sz);
1026 getsectdata(iid.macho_img, iid.macho_img_szB,
1027 "__DWARF", "__debug_line", &debug_line_sz);
1029 getsectdata(iid.macho_img, iid.macho_img_szB,
1030 "__DWARF", "__debug_str", &debug_str_sz);
1032 getsectdata(iid.macho_img, iid.macho_img_szB,
1033 "__DWARF", "__debug_ranges", &debug_ranges_sz);
1035 getsectdata(iid.macho_img, iid.macho_img_szB,
1036 "__DWARF", "__debug_loc", &debug_loc_sz);
1038 getsectdata(iid.macho_img, iid.macho_img_szB,
1039 "__DWARF", "__debug_pubnames", &debug_name_sz);
1041 if (debug_info_img) {
1042 if (VG_(clo_verbosity) > 1) {
1044 VG_(message)(Vg_DebugMsg,
1045 "Reading dwarf3 for %s (%#lx) from %s"
1046 " (%ld %ld %ld %ld %ld %ld)\n",
1047 di->filename, di->text_avma, dsymfilename,
1048 debug_info_sz, debug_abbv_sz, debug_line_sz,
1049 debug_str_sz, debug_ranges_sz, debug_loc_sz
1051 VG_(message)(Vg_DebugMsg,
1052 " reading dwarf3 from dsyms file\n");
1054 /* The old reader: line numbers and unwind info only */
1055 ML_(read_debuginfo_dwarf3) ( di,
1056 debug_info_img, debug_info_sz,
1057 debug_abbv_img, debug_abbv_sz,
1058 debug_line_img, debug_line_sz,
1059 debug_str_img, debug_str_sz );
1061 /* The new reader: read the DIEs in .debug_info to acquire
1062 information on variable types and locations. But only if
1063 the tool asks for it, or the user requests it on the
1065 if (VG_(needs).var_info /* the tool requires it */
1066 || VG_(clo_read_var_info) /* the user asked for it */) {
1067 ML_(new_dwarf3_reader)(
1068 di, debug_info_img, debug_info_sz,
1069 debug_abbv_img, debug_abbv_sz,
1070 debug_line_img, debug_line_sz,
1071 debug_str_img, debug_str_sz,
1072 debug_ranges_img, debug_ranges_sz,
1073 debug_loc_img, debug_loc_sz
1079 if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1091 ML_(symerr)(di, True, "Error reading Mach-O object.");
1099 #endif // defined(VGO_darwin)
1101 /*--------------------------------------------------------------------*/
1103 /*--------------------------------------------------------------------*/