2 /*--------------------------------------------------------------------*/
3 /*--- Wrappers for generic (non-AIX5!) Unix system calls ---*/
4 /*--- syswrap-generic.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2000-2010 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_l4re)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h" // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h" // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h" // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_tool_gdbserver.h" // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h" // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
64 #include "priv_types_n_macros.h"
65 #include "priv_syswrap-generic.h"
70 /* Returns True iff address range is something the client can
71 plausibly mess with: all of it is either already belongs to the
72 client or is free or a reservation. */
74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
75 const Char *syscallname)
82 ret = VG_(am_is_valid_for_client_or_free_or_resvn)
83 (start,size,VKI_PROT_NONE);
86 VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
87 syscallname, start, start+size-1, (Int)ret);
89 if (!ret && syscallname != NULL) {
90 VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
91 "to modify addresses %#lx-%#lx\n",
92 syscallname, start, start+size-1);
93 if (VG_(clo_verbosity) > 1) {
94 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
102 Bool ML_(client_signal_OK)(Int sigNo)
104 /* signal 0 is OK for kill */
105 Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
107 //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
113 /* Handy small function to help stop wrappers from segfaulting when
114 presented with bogus client addresses. Is not used for generating
115 user-visible errors. */
117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
119 return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
123 /* ---------------------------------------------------------------------
125 ------------------------------------------------------------------ */
127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
128 munmap, mprotect (and mremap??) work at the page level. So addresses
129 and lengths must be adjusted for this. */
131 /* Mash around start and length so that the area exactly covers
132 an integral number of pages. If we don't do that, memcheck's
133 idea of addressible memory diverges from that of the
134 kernel's, which causes the leak detector to crash. */
136 void page_align_addr_and_len( Addr* a, SizeT* len)
140 ra = VG_PGROUNDDN(*a);
141 *len = VG_PGROUNDUP(*a + *len) - ra;
145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
146 UInt flags, Int fd, Off64T offset)
150 /* 'a' is the return value from a real kernel mmap, hence: */
151 vg_assert(VG_IS_PAGE_ALIGNED(a));
152 /* whereas len is whatever the syscall supplied. So: */
153 len = VG_PGROUNDUP(len);
155 d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
158 VG_(discard_translations)( (Addr64)a, (ULong)len,
159 "notify_core_of_mmap" );
162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
166 /* 'a' is the return value from a real kernel mmap, hence: */
167 vg_assert(VG_IS_PAGE_ALIGNED(a));
168 /* whereas len is whatever the syscall supplied. So: */
169 len = VG_PGROUNDUP(len);
171 rr = toBool(prot & VKI_PROT_READ);
172 ww = toBool(prot & VKI_PROT_WRITE);
173 xx = toBool(prot & VKI_PROT_EXEC);
175 VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
179 /* When a client mmap has been successfully done, this function must
180 be called. It notifies both aspacem and the tool of the new
183 JRS 2008-Aug-14: But notice this is *very* obscure. The only place
184 it is called from is POST(sys_io_setup). In particular,
185 ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
186 client mmap. But it doesn't call this function; instead it does the
187 relevant notifications itself. Here, we just pass di_handle=0 to
188 notify_tool_of_mmap as we have no better information. But really this
189 function should be done away with; problem is I don't understand what
190 POST(sys_io_setup) does or how it works.
192 [However, this function is used lots for Darwin, because
193 ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
196 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
197 UInt flags, Int fd, Off64T offset )
199 // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
200 // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
202 notify_core_of_mmap(a, len, prot, flags, fd, offset);
203 notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
207 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
211 page_align_addr_and_len(&a, &len);
212 d = VG_(am_notify_munmap)(a, len);
213 VG_TRACK( die_mem_munmap, a, len );
214 VG_(di_notify_munmap)( a, len );
216 VG_(discard_translations)( (Addr64)a, (ULong)len,
217 "ML_(notify_core_and_tool_of_munmap)" );
221 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
223 Bool rr = toBool(prot & VKI_PROT_READ);
224 Bool ww = toBool(prot & VKI_PROT_WRITE);
225 Bool xx = toBool(prot & VKI_PROT_EXEC);
228 page_align_addr_and_len(&a, &len);
229 d = VG_(am_notify_mprotect)(a, len, prot);
230 VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
231 VG_(di_notify_mprotect)( a, len, prot );
233 VG_(discard_translations)( (Addr64)a, (ULong)len,
234 "ML_(notify_core_and_tool_of_mprotect)" );
240 /* Expand (or shrink) an existing mapping, potentially moving it at
241 the same time (controlled by the MREMAP_MAYMOVE flag). Nightmare.
244 SysRes do_mremap( Addr old_addr, SizeT old_len,
245 Addr new_addr, SizeT new_len,
246 UWord flags, ThreadId tid )
248 # define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
251 NSegment const* old_seg;
253 Bool f_fixed = toBool(flags & VKI_MREMAP_FIXED);
254 Bool f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
257 VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
258 old_addr,old_len,new_addr,new_len,
259 flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
260 flags & VKI_MREMAP_FIXED ? "FIXED" : "");
262 VG_(am_show_nsegments)(0, "do_remap: before");
264 if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
267 if (!VG_IS_PAGE_ALIGNED(old_addr))
270 old_len = VG_PGROUNDUP(old_len);
271 new_len = VG_PGROUNDUP(new_len);
276 /* kernel doesn't reject this, but we do. */
280 /* reject wraparounds */
281 if (old_addr + old_len < old_addr
282 || new_addr + new_len < new_len)
285 /* kernel rejects all fixed, no-move requests (which are
287 if (f_fixed == True && f_maymove == False)
290 /* Stay away from non-client areas. */
291 if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
294 /* In all remaining cases, if the old range does not fall within a
295 single segment, fail. */
296 old_seg = VG_(am_find_nsegment)( old_addr );
297 if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
299 if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
302 vg_assert(old_len > 0);
303 vg_assert(new_len > 0);
304 vg_assert(VG_IS_PAGE_ALIGNED(old_len));
305 vg_assert(VG_IS_PAGE_ALIGNED(new_len));
306 vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
308 /* There are 3 remaining cases:
312 new space has to be at old address, so:
313 - shrink -> unmap end
314 - same size -> do nothing
315 - grow -> if can grow in-place, do so, else fail
317 * maymove == True, fixed == False
319 new space can be anywhere, so:
320 - shrink -> unmap end
321 - same size -> do nothing
322 - grow -> if can grow in-place, do so, else
323 move to anywhere large enough, else fail
325 * maymove == True, fixed == True
327 new space must be at new address, so:
329 - if new address is not page aligned, fail
330 - if new address range overlaps old one, fail
331 - if new address range cannot be allocated, fail
332 - else move to new address range with new size
336 if (f_maymove == False) {
337 /* new space has to be at old address */
338 if (new_len < old_len)
339 goto shrink_in_place;
340 if (new_len > old_len)
341 goto grow_in_place_or_fail;
345 if (f_maymove == True && f_fixed == False) {
346 /* new space can be anywhere */
347 if (new_len < old_len)
348 goto shrink_in_place;
349 if (new_len > old_len)
350 goto grow_in_place_or_move_anywhere_or_fail;
354 if (f_maymove == True && f_fixed == True) {
355 /* new space can only be at the new address */
356 if (!VG_IS_PAGE_ALIGNED(new_addr))
358 if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
365 /* VG_(am_get_advisory_client_simple) interprets zero to mean
366 non-fixed, which is not what we want */
367 advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
368 if (!ok || advised != new_addr)
370 ok = VG_(am_relocate_nooverlap_client)
371 ( &d, old_addr, old_len, new_addr, new_len );
373 VG_TRACK( copy_mem_remap, old_addr, new_addr,
374 MIN_SIZET(old_len,new_len) );
375 if (new_len > old_len)
376 VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
377 old_seg->hasR, old_seg->hasW, old_seg->hasX,
379 VG_TRACK(die_mem_munmap, old_addr, old_len);
381 VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
382 VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
384 return VG_(mk_SysRes_Success)( new_addr );
389 /* end of the 3 cases */
390 /*NOTREACHED*/ vg_assert(0);
392 grow_in_place_or_move_anywhere_or_fail:
394 /* try growing it in-place */
395 Addr needA = old_addr + old_len;
396 SSizeT needL = new_len - old_len;
398 vg_assert(needL > 0);
401 /* VG_(am_get_advisory_client_simple) interprets zero to mean
402 non-fixed, which is not what we want */
403 advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
405 /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
406 this-or-nothing) is too lenient, and may allow us to trash
407 the next segment along. So make very sure that the proposed
408 new area really is free. This is perhaps overly
409 conservative, but it fixes #129866. */
410 NSegment const* segLo = VG_(am_find_nsegment)( needA );
411 NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
412 if (segLo == NULL || segHi == NULL
413 || segLo != segHi || segLo->kind != SkFree)
416 if (ok && advised == needA) {
417 ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
419 VG_TRACK( new_mem_mmap, needA, needL,
421 old_seg->hasW, old_seg->hasX,
424 VG_(discard_translations)( needA, needL, "do_remap(3)" );
425 return VG_(mk_SysRes_Success)( old_addr );
429 /* that failed. Look elsewhere. */
430 advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
432 Bool oldR = old_seg->hasR;
433 Bool oldW = old_seg->hasW;
434 Bool oldX = old_seg->hasX;
435 /* assert new area does not overlap old */
436 vg_assert(advised+new_len-1 < old_addr
437 || advised > old_addr+old_len-1);
438 ok = VG_(am_relocate_nooverlap_client)
439 ( &d, old_addr, old_len, advised, new_len );
441 VG_TRACK( copy_mem_remap, old_addr, advised,
442 MIN_SIZET(old_len,new_len) );
443 if (new_len > old_len)
444 VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
445 oldR, oldW, oldX, 0/*di_handle*/ );
446 VG_TRACK(die_mem_munmap, old_addr, old_len);
448 VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
449 VG_(discard_translations)( advised, new_len, "do_remap(5)" );
451 return VG_(mk_SysRes_Success)( advised );
456 /*NOTREACHED*/ vg_assert(0);
458 grow_in_place_or_fail:
460 Addr needA = old_addr + old_len;
461 SizeT needL = new_len - old_len;
464 /* VG_(am_get_advisory_client_simple) interprets zero to mean
465 non-fixed, which is not what we want */
466 advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
468 /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
469 this-or-nothing) is too lenient, and may allow us to trash
470 the next segment along. So make very sure that the proposed
471 new area really is free. */
472 NSegment const* segLo = VG_(am_find_nsegment)( needA );
473 NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
474 if (segLo == NULL || segHi == NULL
475 || segLo != segHi || segLo->kind != SkFree)
478 if (!ok || advised != needA)
480 ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
483 VG_TRACK( new_mem_mmap, needA, needL,
484 old_seg->hasR, old_seg->hasW, old_seg->hasX,
487 VG_(discard_translations)( needA, needL, "do_remap(6)" );
488 return VG_(mk_SysRes_Success)( old_addr );
490 /*NOTREACHED*/ vg_assert(0);
494 SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
495 if (sr_isError(sres))
497 VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
499 VG_(discard_translations)( old_addr+new_len, old_len-new_len,
501 return VG_(mk_SysRes_Success)( old_addr );
503 /*NOTREACHED*/ vg_assert(0);
506 return VG_(mk_SysRes_Success)( old_addr );
507 /*NOTREACHED*/ vg_assert(0);
510 return VG_(mk_SysRes_Error)( VKI_EINVAL );
512 return VG_(mk_SysRes_Error)( VKI_ENOMEM );
516 #endif /* HAVE_MREMAP */
519 /* ---------------------------------------------------------------------
520 File-descriptor tracking
521 ------------------------------------------------------------------ */
523 /* One of these is allocated for each open file descriptor. */
524 typedef struct OpenFd
526 Int fd; /* The file descriptor */
527 Char *pathname; /* NULL if not a regular file or unknown */
528 ExeContext *where; /* NULL if inherited from parent */
529 struct OpenFd *next, *prev;
532 /* List of allocated file descriptors. */
533 static OpenFd *allocated_fds = NULL;
535 /* Count of open file descriptors. */
536 static Int fd_count = 0;
539 /* Note the fact that a file descriptor was just closed. */
541 void record_fd_close(Int fd)
543 OpenFd *i = allocated_fds;
545 if (fd >= VG_(fd_hard_limit))
546 return; /* Valgrind internal */
551 i->prev->next = i->next;
553 allocated_fds = i->next;
555 i->next->prev = i->prev;
557 VG_(arena_free) (VG_AR_CORE, i->pathname);
558 VG_(arena_free) (VG_AR_CORE, i);
566 /* Note the fact that a file descriptor was just opened. If the
567 tid is -1, this indicates an inherited fd. If the pathname is NULL,
568 this either indicates a non-standard file (i.e. a pipe or socket or
569 some such thing) or that we don't know the filename. If the fd is
570 already open, then we're probably doing a dup2() to an existing fd,
571 so just overwrite the existing one. */
572 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
576 if (fd >= VG_(fd_hard_limit))
577 return; /* Valgrind internal */
579 /* Check to see if this fd is already open. */
583 if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
589 /* Not already one: allocate an OpenFd */
591 i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
594 i->next = allocated_fds;
595 if(allocated_fds) allocated_fds->prev = i;
601 i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
602 i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
605 // Record opening of an fd, and find its name.
606 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
608 static HChar buf[VKI_PATH_MAX];
610 if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
615 ML_(record_fd_open_with_given_name)(tid, fd, name);
618 // Record opening of a nameless fd.
619 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
621 ML_(record_fd_open_with_given_name)(tid, fd, NULL);
625 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
627 if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
628 VG_(sprintf)(name, "<unknown>");
630 VG_(sprintf)(name, "%s", sa->sun_path);
637 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
639 if (sa == NULL || len == 0) {
640 VG_(sprintf)(name, "<unknown>");
642 UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
644 VG_(sprintf)(name, "<unbound>");
646 VG_(sprintf)(name, "%u.%u.%u.%u:%u",
647 (addr>>24) & 0xFF, (addr>>16) & 0xFF,
648 (addr>>8) & 0xFF, addr & 0xFF,
649 VG_(ntohs)(sa->sin_port));
657 * Try get some details about a socket.
660 getsockdetails(Int fd)
663 struct vki_sockaddr a;
664 struct vki_sockaddr_in in;
665 struct vki_sockaddr_un un;
669 llen = sizeof(laddr);
670 VG_(memset)(&laddr, 0, llen);
672 if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
673 switch(laddr.a.sa_family) {
675 static char lname[32];
676 static char pname[32];
677 struct vki_sockaddr_in paddr;
678 UInt plen = sizeof(struct vki_sockaddr_in);
680 if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
681 VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
682 inet2name(&(laddr.in), llen, lname),
683 inet2name(&paddr, plen, pname));
685 VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
686 fd, inet2name(&(laddr.in), llen, lname));
691 static char lname[256];
692 VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
693 unix2name(&(laddr.un), llen, lname));
697 VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
698 laddr.a.sa_family, fd);
703 VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
707 /* Dump out a summary, and a more detailed list, of open file descriptors. */
708 void VG_(show_open_fds) (void)
710 OpenFd *i = allocated_fds;
712 VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
716 VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
720 UInt len = sizeof(val);
722 if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
724 VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
726 getsockdetails(i->fd);
731 VG_(pp_ExeContext)(i->where);
732 VG_(message)(Vg_UserMsg, "\n");
734 VG_(message)(Vg_UserMsg, " <inherited from parent>\n");
735 VG_(message)(Vg_UserMsg, "\n");
741 VG_(message)(Vg_UserMsg, "\n");
744 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
745 have /proc support compiled in, or a non-Linux kernel), then we need to
746 find out what file descriptors we inherited from our parent process the
747 hard way - by checking each fd in turn. */
749 void init_preopened_fds_without_proc_self_fd(void)
751 struct vki_rlimit lim;
755 if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
756 /* Hmm. getrlimit() failed. Now we're screwed, so just choose
757 an arbitrarily high number. 1024 happens to be the limit in
758 the 2.4 Linux kernels. */
761 count = lim.rlim_cur;
764 for (i = 0; i < count; i++)
765 if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
766 ML_(record_fd_open_named)(-1, i);
769 /* Initialize the list of open file descriptors with the file descriptors
770 we inherited from out parent process. */
772 void VG_(init_preopened_fds)(void)
774 // Nb: AIX5 is handled in syswrap-aix5.c.
775 // DDD: should probably use HAVE_PROC here or similar, instead.
776 #if defined(VGO_linux)
781 f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
783 init_preopened_fds_without_proc_self_fd();
787 while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
791 if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
793 Int fno = VG_(strtoll10)(d.d_name, &s);
795 if (fno != sr_Res(f))
796 if (VG_(clo_track_fds))
797 ML_(record_fd_open_named)(-1, fno);
799 VG_(message)(Vg_DebugMsg,
800 "Warning: invalid file name in /proc/self/fd: %s\n",
805 VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
809 VG_(close)(sr_Res(f));
811 #elif defined(VGO_darwin)
812 init_preopened_fds_without_proc_self_fd();
814 #elif defined(VGO_l4re)
815 VG_(unimplemented)((char*)__func__);
823 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
825 UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
826 Char *result = VG_(arena_malloc) ( aid, cc, len );
827 VG_(strcpy) ( result, s1 );
828 VG_(strcat) ( result, s2 );
833 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
834 Char *msg, Addr base, SizeT size )
836 Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
837 "socketcall.sendmsg", msg, VG_AR_CORE );
838 PRE_MEM_READ( outmsg, base, size );
839 VG_(arena_free) ( VG_AR_CORE, outmsg );
843 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
844 Char *msg, Addr base, SizeT size )
846 Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
847 "socketcall.recvmsg", msg, VG_AR_CORE );
849 PRE_MEM_READ( outmsg, base, size );
851 PRE_MEM_WRITE( outmsg, base, size );
852 VG_(arena_free) ( VG_AR_CORE, outmsg );
856 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
857 Char *fieldName, Addr base, SizeT size )
860 POST_MEM_WRITE( base, size );
864 void msghdr_foreachfield (
866 struct vki_msghdr *msg,
867 void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
873 foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
874 foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
875 foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
876 foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
877 foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
878 foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
879 foreach_func ( tid, False, "(msg)", (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
882 foreach_func ( tid, False,
884 (Addr)msg->msg_name, msg->msg_namelen );
886 if ( msg->msg_iov ) {
887 struct vki_iovec *iov = msg->msg_iov;
890 foreach_func ( tid, True,
892 (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
894 for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
895 foreach_func ( tid, False,
897 (Addr)iov->iov_base, iov->iov_len );
900 if ( msg->msg_control )
901 foreach_func ( tid, False,
903 (Addr)msg->msg_control, msg->msg_controllen );
906 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
908 struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
911 if (cm->cmsg_level == VKI_SOL_SOCKET &&
912 cm->cmsg_type == VKI_SCM_RIGHTS ) {
913 Int *fds = (Int *) VKI_CMSG_DATA(cm);
914 Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
918 for (i = 0; i < fdc; i++)
919 if(VG_(clo_track_fds))
920 // XXX: must we check the range on these fds with
921 // ML_(fd_allowed)()?
922 ML_(record_fd_open_named)(tid, fds[i]);
925 cm = VKI_CMSG_NXTHDR(msg, cm);
929 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
931 void pre_mem_read_sockaddr ( ThreadId tid,
933 struct vki_sockaddr *sa, UInt salen )
936 struct vki_sockaddr_un* sun = (struct vki_sockaddr_un *)sa;
937 struct vki_sockaddr_in* sin = (struct vki_sockaddr_in *)sa;
938 struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
940 /* NULL/zero-length sockaddrs are legal */
941 if ( sa == NULL || salen == 0 ) return;
943 outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
944 VG_(strlen)( description ) + 30 );
946 VG_(sprintf) ( outmsg, description, "sa_family" );
947 PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
949 switch (sa->sa_family) {
952 VG_(sprintf) ( outmsg, description, "sun_path" );
953 PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
954 // GrP fixme max of sun_len-2? what about nul char?
958 VG_(sprintf) ( outmsg, description, "sin_port" );
959 PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
960 VG_(sprintf) ( outmsg, description, "sin_addr" );
961 PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
965 VG_(sprintf) ( outmsg, description, "sin6_port" );
966 PRE_MEM_READ( outmsg,
967 (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
968 VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
969 PRE_MEM_READ( outmsg,
970 (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
971 VG_(sprintf) ( outmsg, description, "sin6_addr" );
972 PRE_MEM_READ( outmsg,
973 (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
974 VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
975 PRE_MEM_READ( outmsg,
976 (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
980 VG_(sprintf) ( outmsg, description, "" );
981 PRE_MEM_READ( outmsg, (Addr) sa, salen );
985 VG_(arena_free) ( VG_AR_CORE, outmsg );
988 /* Dereference a pointer to a UInt. */
989 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
991 UInt* a_p = (UInt*)a;
992 PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
999 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1000 Char* buf_s, Char* buflen_s )
1002 if (VG_(tdict).track_pre_mem_write) {
1003 UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1004 if (buflen_in > 0) {
1005 VG_(tdict).track_pre_mem_write(
1006 Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1011 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1012 Addr buf_p, Addr buflen_p, Char* s )
1014 if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1015 UInt buflen_out = deref_UInt( tid, buflen_p, s);
1016 if (buflen_out > 0 && buf_p != (Addr)NULL) {
1017 VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1022 /* ---------------------------------------------------------------------
1023 Data seg end, for brk()
1024 ------------------------------------------------------------------ */
1026 /* +--------+------------+
1028 +--------+------------+
1031 | | boundary is page aligned
1032 | VG_(brk_limit) -- no alignment constraint
1033 VG_(brk_base) -- page aligned -- does not move
1035 Both the anon part and the reservation part are always at least
1039 /* Set the new data segment end to NEWBRK. If this succeeds, return
1040 NEWBRK, else return the current data segment end. */
1042 static Addr do_brk ( Addr newbrk )
1044 NSegment const* aseg;
1045 NSegment const* rseg;
1052 VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1053 VG_(brk_base), VG_(brk_limit), newbrk);
1056 if (0) show_segments("in_brk");
1059 if (newbrk < VG_(brk_base))
1060 /* Clearly impossible. */
1063 if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1064 /* shrinking the data segment. Be lazy and don't munmap the
1066 NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1067 if (seg && seg->hasT)
1068 VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1070 /* Since we're being lazy and not unmapping pages, we have to
1071 zero out the area, so that if the area later comes back into
1072 circulation, it will be filled with zeroes, as if it really
1073 had been unmapped and later remapped. Be a bit paranoid and
1074 try hard to ensure we're not going to segfault by doing the
1075 write - check both ends of the range are in the same segment
1076 and that segment is writable. */
1078 /* pre: newbrk < VG_(brk_limit)
1079 => newbrk <= VG_(brk_limit)-1 */
1080 NSegment const * seg2;
1081 vg_assert(newbrk < VG_(brk_limit));
1082 seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1083 if (seg2 && seg == seg2 && seg->hasW)
1084 VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1087 VG_(brk_limit) = newbrk;
1091 /* otherwise we're expanding the brk segment. */
1092 if (VG_(brk_limit) > VG_(brk_base))
1093 aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1095 aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1096 rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
1098 /* These should be assured by setup_client_dataseg in m_main. */
1101 vg_assert(aseg->kind == SkAnonC);
1102 vg_assert(rseg->kind == SkResvn);
1103 vg_assert(aseg->end+1 == rseg->start);
1105 vg_assert(newbrk >= VG_(brk_base));
1106 if (newbrk <= rseg->start) {
1107 /* still fits within the anon segment. */
1108 VG_(brk_limit) = newbrk;
1112 if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1113 /* request is too large -- the resvn would fall below 1 page,
1114 which isn't allowed. */
1118 newbrkP = VG_PGROUNDUP(newbrk);
1119 vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1120 delta = newbrkP - rseg->start;
1121 vg_assert(delta > 0);
1122 vg_assert(VG_IS_PAGE_ALIGNED(delta));
1124 ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
1127 VG_(brk_limit) = newbrk;
1131 return VG_(brk_limit);
1135 /* ---------------------------------------------------------------------
1136 Vet file descriptors for sanity
1137 ------------------------------------------------------------------ */
1139 > - what does the "Bool soft" parameter mean?
1141 (Tom Hughes, 3 Oct 05):
1143 Whether or not to consider a file descriptor invalid if it is above
1144 the current soft limit.
1146 Basically if we are testing whether a newly created file descriptor is
1147 valid (in a post handler) then we set soft to true, and if we are
1148 testing whether a file descriptor that is about to be used (in a pre
1149 handler) is valid [viz, an already-existing fd] then we set it to false.
1151 The point is that if the (virtual) soft limit is lowered then any
1152 existing descriptors can still be read/written/closed etc (so long as
1153 they are below the valgrind reserved descriptors) but no new
1154 descriptors can be created above the new soft limit.
1156 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1159 /* Return true if we're allowed to use or create this fd */
1160 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
1162 Bool allowed = True;
1164 /* hard limits always apply */
1165 if (fd < 0 || fd >= VG_(fd_hard_limit))
1168 /* hijacking the output fds is never allowed */
1169 if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1172 /* if creating a new fd (rather than using an existing one), the
1173 soft limit must also be observed */
1174 if (isNewFd && fd >= VG_(fd_soft_limit))
1177 /* this looks like it ought to be included, but causes problems: */
1179 if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1182 /* The difficulty is as follows: consider a program P which expects
1183 to be able to mess with (redirect) its own stderr (fd 2).
1184 Usually to deal with P we would issue command line flags to send
1185 logging somewhere other than stderr, so as not to disrupt P.
1186 The problem is that -d unilaterally hijacks stderr with no
1187 consultation with P. And so, if this check is enabled, P will
1188 work OK normally but fail if -d is issued.
1190 Basically -d is a hack and you take your chances when using it.
1191 It's very useful for low level debugging -- particularly at
1192 startup -- and having its presence change the behaviour of the
1193 client is exactly what we don't want. */
1196 if ((!allowed) && VG_(showing_core_errors)() ) {
1197 VG_(message)(Vg_UserMsg,
1198 "Warning: invalid file descriptor %d in syscall %s()\n",
1200 if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1201 VG_(message)(Vg_UserMsg,
1202 " Use --log-fd=<number> to select an alternative log fd.\n");
1203 if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1204 VG_(message)(Vg_UserMsg,
1205 " Use --xml-fd=<number> to select an alternative XML "
1207 // DDD: consider always printing this stack trace, it's useful.
1208 // Also consider also making this a proper core error, ie.
1209 // suppressible and all that.
1210 if (VG_(clo_verbosity) > 1) {
1211 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1219 /* ---------------------------------------------------------------------
1220 Deal with a bunch of socket-related syscalls
1221 ------------------------------------------------------------------ */
1226 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1227 UWord arg0, UWord arg1,
1228 UWord arg2, UWord arg3 )
1230 /* int socketpair(int d, int type, int protocol, int sv[2]); */
1231 PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1232 arg3, 2*sizeof(int) );
1236 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1238 UWord arg0, UWord arg1,
1239 UWord arg2, UWord arg3 )
1242 Int fd1 = ((Int*)arg3)[0];
1243 Int fd2 = ((Int*)arg3)[1];
1244 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1245 POST_MEM_WRITE( arg3, 2*sizeof(int) );
1246 if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1247 !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1250 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1252 POST_MEM_WRITE( arg3, 2*sizeof(int) );
1253 if (VG_(clo_track_fds)) {
1254 ML_(record_fd_open_nameless)(tid, fd1);
1255 ML_(record_fd_open_nameless)(tid, fd2);
1264 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1267 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1268 if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1269 VG_(close)(sr_Res(res));
1270 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1272 if (VG_(clo_track_fds))
1273 ML_(record_fd_open_nameless)(tid, sr_Res(res));
1281 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1282 UWord arg0, UWord arg1, UWord arg2 )
1284 /* int bind(int sockfd, struct sockaddr *my_addr,
1286 pre_mem_read_sockaddr(
1287 tid, "socketcall.bind(my_addr.%s)",
1288 (struct vki_sockaddr *) arg1, arg2
1295 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1296 UWord arg0, UWord arg1, UWord arg2 )
1298 /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1300 Addr addrlen_p = arg2;
1301 if (addr_p != (Addr)NULL)
1302 ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1303 "socketcall.accept(addr)",
1304 "socketcall.accept(addrlen_in)" );
1308 ML_(generic_POST_sys_accept) ( ThreadId tid,
1310 UWord arg0, UWord arg1, UWord arg2 )
1313 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1314 if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1315 VG_(close)(sr_Res(res));
1316 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1319 Addr addrlen_p = arg2;
1320 if (addr_p != (Addr)NULL)
1321 ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1322 "socketcall.accept(addrlen_out)" );
1323 if (VG_(clo_track_fds))
1324 ML_(record_fd_open_nameless)(tid, sr_Res(res));
1332 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1333 UWord arg0, UWord arg1, UWord arg2,
1334 UWord arg3, UWord arg4, UWord arg5 )
1336 /* int sendto(int s, const void *msg, int len,
1338 const struct sockaddr *to, int tolen); */
1339 PRE_MEM_READ( "socketcall.sendto(msg)",
1342 pre_mem_read_sockaddr(
1343 tid, "socketcall.sendto(to.%s)",
1344 (struct vki_sockaddr *) arg4, arg5
1351 ML_(generic_PRE_sys_send) ( ThreadId tid,
1352 UWord arg0, UWord arg1, UWord arg2 )
1354 /* int send(int s, const void *msg, size_t len, int flags); */
1355 PRE_MEM_READ( "socketcall.send(msg)",
1364 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1365 UWord arg0, UWord arg1, UWord arg2,
1366 UWord arg3, UWord arg4, UWord arg5 )
1368 /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1369 struct sockaddr *from, int *fromlen); */
1373 Addr fromlen_p = arg5;
1374 PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1375 if (from_p != (Addr)NULL)
1376 ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1377 "socketcall.recvfrom(from)",
1378 "socketcall.recvfrom(fromlen_in)" );
1382 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1384 UWord arg0, UWord arg1, UWord arg2,
1385 UWord arg3, UWord arg4, UWord arg5 )
1390 Addr fromlen_p = arg5;
1392 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1393 if (from_p != (Addr)NULL)
1394 ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1395 "socketcall.recvfrom(fromlen_out)" );
1396 POST_MEM_WRITE( buf_p, len );
1402 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1403 UWord arg0, UWord arg1, UWord arg2 )
1405 /* int recv(int s, void *buf, int len, unsigned int flags); */
1407 The recv call is normally used only on a connected socket
1408 (see connect(2)) and is identical to recvfrom with a NULL
1411 PRE_MEM_WRITE( "socketcall.recv(buf)",
1417 ML_(generic_POST_sys_recv) ( ThreadId tid,
1419 UWord arg0, UWord arg1, UWord arg2 )
1421 if (res >= 0 && arg1 != 0) {
1422 POST_MEM_WRITE( arg1, /* buf */
1430 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1431 UWord arg0, UWord arg1, UWord arg2 )
1433 /* int connect(int sockfd,
1434 struct sockaddr *serv_addr, int addrlen ); */
1435 pre_mem_read_sockaddr( tid,
1436 "socketcall.connect(serv_addr.%s)",
1437 (struct vki_sockaddr *) arg1, arg2);
1443 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1444 UWord arg0, UWord arg1, UWord arg2,
1445 UWord arg3, UWord arg4 )
1447 /* int setsockopt(int s, int level, int optname,
1448 const void *optval, int optlen); */
1449 PRE_MEM_READ( "socketcall.setsockopt(optval)",
1451 arg4 /* optlen */ );
1457 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1458 UWord arg0, UWord arg1, UWord arg2 )
1460 /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1462 Addr namelen_p = arg2;
1463 /* Nb: name_p cannot be NULL */
1464 ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1465 "socketcall.getsockname(name)",
1466 "socketcall.getsockname(namelen_in)" );
1470 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1472 UWord arg0, UWord arg1, UWord arg2 )
1475 Addr namelen_p = arg2;
1476 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1477 ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1478 "socketcall.getsockname(namelen_out)" );
1484 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1485 UWord arg0, UWord arg1, UWord arg2 )
1487 /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1489 Addr namelen_p = arg2;
1490 /* Nb: name_p cannot be NULL */
1491 ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1492 "socketcall.getpeername(name)",
1493 "socketcall.getpeername(namelen_in)" );
1497 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1499 UWord arg0, UWord arg1, UWord arg2 )
1502 Addr namelen_p = arg2;
1503 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1504 ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1505 "socketcall.getpeername(namelen_out)" );
1511 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid,
1512 UWord arg0, UWord arg1 )
1514 /* int sendmsg(int s, const struct msghdr *msg, int flags); */
1515 struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1516 msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
1522 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid,
1523 UWord arg0, UWord arg1 )
1525 /* int recvmsg(int s, struct msghdr *msg, int flags); */
1526 struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1527 msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
1531 ML_(generic_POST_sys_recvmsg) ( ThreadId tid,
1532 UWord arg0, UWord arg1 )
1534 struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1535 msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
1536 check_cmsg_for_fds( tid, msg );
1540 /* ---------------------------------------------------------------------
1541 Deal with a bunch of IPC related syscalls
1542 ------------------------------------------------------------------ */
1547 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1548 UWord arg0, UWord arg1, UWord arg2 )
1550 /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1551 PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1557 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1558 UWord arg0, UWord arg1,
1559 UWord arg2, UWord arg3 )
1561 /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1562 struct timespec *timeout); */
1563 PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1565 PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1571 UInt get_sem_count( Int semid )
1573 #if defined(VGO_l4re)
1574 VG_(unimplemented)((char*)__func__);
1577 struct vki_semid_ds buf;
1578 union vki_semun arg;
1581 /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1582 (experimental) otherwise complains that the use in the return
1583 statement below is uninitialised. */
1589 res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1591 res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1592 VKI_IPC_STAT, (UWord)&arg);
1594 if (sr_isError(res))
1597 return buf.sem_nsems;
1602 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1603 UWord arg0, UWord arg1,
1604 UWord arg2, UWord arg3 )
1606 /* int semctl(int semid, int semnum, int cmd, ...); */
1607 union vki_semun arg = *(union vki_semun *)&arg3;
1609 switch (arg2 /* cmd */) {
1610 #if defined(VKI_IPC_INFO)
1613 case VKI_IPC_INFO|VKI_IPC_64:
1614 case VKI_SEM_INFO|VKI_IPC_64:
1615 PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1616 (Addr)arg.buf, sizeof(struct vki_seminfo) );
1621 #if defined(VKI_SEM_STAT)
1624 PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1625 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1628 #if defined(VKI_IPC_64)
1629 case VKI_IPC_STAT|VKI_IPC_64:
1630 #if defined(VKI_SEM_STAT)
1631 case VKI_SEM_STAT|VKI_IPC_64:
1633 PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1634 (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1639 PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1640 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1643 #if defined(VKI_IPC_64)
1644 case VKI_IPC_SET|VKI_IPC_64:
1645 PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1646 (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1651 #if defined(VKI_IPC_64)
1652 case VKI_GETALL|VKI_IPC_64:
1654 nsems = get_sem_count( arg0 );
1655 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1656 (Addr)arg.array, sizeof(unsigned short) * nsems );
1660 #if defined(VKI_IPC_64)
1661 case VKI_SETALL|VKI_IPC_64:
1663 nsems = get_sem_count( arg0 );
1664 PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1665 (Addr)arg.array, sizeof(unsigned short) * nsems );
1671 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1673 UWord arg0, UWord arg1,
1674 UWord arg2, UWord arg3 )
1676 union vki_semun arg = *(union vki_semun *)&arg3;
1678 switch (arg2 /* cmd */) {
1679 #if defined(VKI_IPC_INFO)
1682 case VKI_IPC_INFO|VKI_IPC_64:
1683 case VKI_SEM_INFO|VKI_IPC_64:
1684 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1689 #if defined(VKI_SEM_STAT)
1692 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1695 #if defined(VKI_IPC_64)
1696 case VKI_IPC_STAT|VKI_IPC_64:
1697 case VKI_SEM_STAT|VKI_IPC_64:
1698 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1703 #if defined(VKI_IPC_64)
1704 case VKI_GETALL|VKI_IPC_64:
1706 nsems = get_sem_count( arg0 );
1707 POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1717 UInt get_shm_size ( Int shmid )
1719 #if defined (VGO_l4re)
1720 VG_(unimplemented)((char*)__func__);
1725 struct vki_shmid64_ds buf;
1726 # ifdef VGP_amd64_linux
1727 /* See bug 222545 comment 7 */
1728 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1729 VKI_IPC_STAT, (UWord)&buf);
1731 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1732 VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1734 # else /* !def VKI_IPC_64 */
1735 struct vki_shmid_ds buf;
1736 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1737 # endif /* def VKI_IPC_64 */
1739 struct vki_shmid_ds buf;
1740 SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1741 VKI_IPC_STAT, 0, (UWord)&buf);
1743 if (sr_isError(__res))
1746 return buf.shm_segsz;
1751 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1752 UWord arg0, UWord arg1, UWord arg2 )
1754 /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1755 UInt segmentSize = get_shm_size ( arg0 );
1759 /* arm-linux only: work around the fact that
1760 VG_(am_get_advisory_client_simple) produces something that is
1761 VKI_PAGE_SIZE aligned, whereas what we want is something
1762 VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE. Hence
1763 increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1764 then round the result up to the next VKI_SHMLBA boundary.
1765 See bug 222545 comment 15. So far, arm-linux is the only
1766 platform where this is known to be necessary. */
1767 vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1768 if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1769 segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1771 tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1773 if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1774 arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1780 else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1786 ML_(generic_POST_sys_shmat) ( ThreadId tid,
1788 UWord arg0, UWord arg1, UWord arg2 )
1790 UInt segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1791 if ( segmentSize > 0 ) {
1792 UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1795 if (arg2 & VKI_SHM_RDONLY)
1796 prot &= ~VKI_PROT_WRITE;
1797 /* It isn't exactly correct to pass 0 for the fd and offset
1798 here. The kernel seems to think the corresponding section
1799 does have dev/ino numbers:
1801 04e52000-04ec8000 rw-s 00000000 00:06 1966090 /SYSV00000000 (deleted)
1803 However there is no obvious way to find them. In order to
1804 cope with the discrepancy, aspacem's sync checker omits the
1805 dev/ino correspondence check in cases where V does not know
1807 d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1809 /* we don't distinguish whether it's read-only or
1810 * read-write -- it doesn't matter really. */
1811 VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1814 VG_(discard_translations)( (Addr64)res,
1815 (ULong)VG_PGROUNDUP(segmentSize),
1816 "ML_(generic_POST_sys_shmat)" );
1823 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1825 /* int shmdt(const void *shmaddr); */
1826 return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1830 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1832 NSegment const* s = VG_(am_find_nsegment)(arg0);
1835 Addr s_start = s->start;
1836 SizeT s_len = s->end+1 - s->start;
1839 vg_assert(s->kind == SkShmC);
1840 vg_assert(s->start == arg0);
1842 d = VG_(am_notify_munmap)(s_start, s_len);
1843 s = NULL; /* s is now invalid */
1844 VG_TRACK( die_mem_munmap, s_start, s_len );
1846 VG_(discard_translations)( (Addr64)s_start,
1848 "ML_(generic_POST_sys_shmdt)" );
1854 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1855 UWord arg0, UWord arg1, UWord arg2 )
1857 /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1858 switch (arg1 /* cmd */) {
1859 #if defined(VKI_IPC_INFO)
1861 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1862 arg2, sizeof(struct vki_shminfo) );
1864 #if defined(VKI_IPC_64)
1865 case VKI_IPC_INFO|VKI_IPC_64:
1866 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1867 arg2, sizeof(struct vki_shminfo64) );
1872 #if defined(VKI_SHM_INFO)
1874 #if defined(VKI_IPC_64)
1875 case VKI_SHM_INFO|VKI_IPC_64:
1877 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1878 arg2, sizeof(struct vki_shm_info) );
1883 #if defined(VKI_SHM_STAT)
1886 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1887 arg2, sizeof(struct vki_shmid_ds) );
1890 #if defined(VKI_IPC_64)
1891 case VKI_IPC_STAT|VKI_IPC_64:
1892 case VKI_SHM_STAT|VKI_IPC_64:
1893 PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1894 arg2, sizeof(struct vki_shmid64_ds) );
1899 PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1900 arg2, sizeof(struct vki_shmid_ds) );
1903 #if defined(VKI_IPC_64)
1904 case VKI_IPC_SET|VKI_IPC_64:
1905 PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1906 arg2, sizeof(struct vki_shmid64_ds) );
1913 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1915 UWord arg0, UWord arg1, UWord arg2 )
1917 switch (arg1 /* cmd */) {
1918 #if defined(VKI_IPC_INFO)
1920 POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
1922 case VKI_IPC_INFO|VKI_IPC_64:
1923 POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
1927 #if defined(VKI_SHM_INFO)
1929 case VKI_SHM_INFO|VKI_IPC_64:
1930 POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
1935 #if defined(VKI_SHM_STAT)
1938 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
1941 #if defined(VKI_IPC_64)
1942 case VKI_IPC_STAT|VKI_IPC_64:
1943 case VKI_SHM_STAT|VKI_IPC_64:
1944 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
1953 /* ---------------------------------------------------------------------
1954 Generic handler for mmap
1955 ------------------------------------------------------------------ */
1958 * Although mmap is specified by POSIX and the argument are generally
1959 * consistent across platforms the precise details of the low level
1960 * argument passing conventions differ. For example:
1962 * - On x86-linux there is mmap (aka old_mmap) which takes the
1963 * arguments in a memory block and the offset in bytes; and
1964 * mmap2 (aka sys_mmap2) which takes the arguments in the normal
1965 * way and the offset in pages.
1967 * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
1968 * arguments in the normal way and the offset in bytes; and
1969 * mmap2 (aka sys_mmap2) which takes the arguments in the normal
1970 * way and the offset in pages.
1972 * - On amd64-linux everything is simple and there is just the one
1973 * call, mmap (aka sys_mmap) which takes the arguments in the
1974 * normal way and the offset in bytes.
1976 * - On s390x-linux there is mmap (aka old_mmap) which takes the
1977 * arguments in a memory block and the offset in bytes. mmap2
1978 * is also available (but not exported via unistd.h) with
1979 * arguments in a memory block and the offset in pages.
1981 * To cope with all this we provide a generic handler function here
1982 * and then each platform implements one or more system call handlers
1983 * which call this generic routine after extracting and normalising
1988 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
1989 UWord arg1, UWord arg2, UWord arg3,
1990 UWord arg4, UWord arg5, Off64T arg6 )
1997 #if defined(VGO_darwin)
1998 // Nb: we can't use this on Darwin, it has races:
1999 // * needs to RETRY if advisory succeeds but map fails
2000 // (could have been some other thread in a nonblocking call)
2001 // * needs to not use fixed-position mmap() on Darwin
2002 // (mmap will cheerfully smash whatever's already there, which might
2003 // be a new mapping from some other thread in a nonblocking call)
2004 VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2008 /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2009 shall be established. */
2010 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2013 if (!VG_IS_PAGE_ALIGNED(arg1)) {
2014 /* zap any misaligned addresses. */
2015 /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2016 to fail. Here, we catch them all. */
2017 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2020 if (!VG_IS_PAGE_ALIGNED(arg6)) {
2021 /* zap any misaligned offsets. */
2022 /* SuSV3 says: The off argument is constrained to be aligned and
2023 sized according to the value returned by sysconf() when
2024 passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2025 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2028 /* Figure out what kind of allocation constraints there are
2029 (fixed/hint/any), and ask aspacem what we should do. */
2032 if (arg4 & VKI_MAP_FIXED) {
2033 mreq.rkind = MFixed;
2042 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2044 /* Our request was bounced, so we'd better fail. */
2045 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2048 /* Otherwise we're OK (so far). Install aspacem's choice of
2049 address, and let the mmap go through. */
2050 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2051 arg4 | VKI_MAP_FIXED,
2054 /* A refinement: it may be that the kernel refused aspacem's choice
2055 of address. If we were originally asked for a hinted mapping,
2056 there is still a last chance: try again at any address.
2058 if (mreq.rkind == MHint && sr_isError(sres)) {
2062 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2064 /* Our request was bounced, so we'd better fail. */
2065 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2067 /* and try again with the kernel */
2068 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2069 arg4 | VKI_MAP_FIXED,
2073 if (!sr_isError(sres)) {
2075 /* Notify aspacem. */
2076 notify_core_of_mmap(
2077 (Addr)sr_Res(sres), /* addr kernel actually assigned */
2080 arg4, /* the original flags value */
2085 di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2086 False/*allow_SkFileV*/ );
2087 /* Notify the tool. */
2088 notify_tool_of_mmap(
2089 (Addr)sr_Res(sres), /* addr kernel actually assigned */
2092 di_handle /* so the tool can refer to the read debuginfo later,
2098 if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2099 vg_assert(sr_Res(sres) == arg1);
2105 /* ---------------------------------------------------------------------
2106 The Main Entertainment ... syscall wrappers
2107 ------------------------------------------------------------------ */
2109 /* Note: the PRE() and POST() wrappers are for the actual functions
2110 implementing the system calls in the OS kernel. These mostly have
2111 names like sys_write(); a few have names like old_mmap(). See the
2112 comment for ML_(syscall_table)[] for important info about the __NR_foo
2113 constants and their relationship to the sys_foo() functions.
2115 Some notes about names used for syscalls and args:
2116 - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2119 - For error messages, we generally use a somewhat generic name
2120 for the syscall (eg. "write" rather than "sys_write"). This should be
2121 good enough for the average user to understand what is happening,
2122 without confusing them with names like "sys_write".
2124 - Also, for error messages the arg names are mostly taken from the man
2125 pages (even though many of those man pages are really for glibc
2126 functions of the same name), rather than from the OS kernel source,
2127 for the same reason -- a user presented with a "bogus foo(bar)" arg
2128 will most likely look at the "foo" man page to see which is the "bar"
2131 Note that we use our own vki_* types. The one exception is in
2132 PRE_REG_READn calls, where pointer types haven't been changed, because
2133 they don't need to be -- eg. for "foo*" to be used, the type foo need not
2136 XXX: some of these are arch-specific, and should be factored out.
2139 #define PRE(name) DEFN_PRE_TEMPLATE(generic, name)
2140 #define POST(name) DEFN_POST_TEMPLATE(generic, name)
2142 // Macros to support 64-bit syscall args split into two 32 bit values
2143 #if defined(VG_LITTLEENDIAN)
2144 #define MERGE64(lo,hi) ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2145 #define MERGE64_FIRST(name) name##_low
2146 #define MERGE64_SECOND(name) name##_high
2147 #elif defined(VG_BIGENDIAN)
2148 #define MERGE64(hi,lo) ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2149 #define MERGE64_FIRST(name) name##_high
2150 #define MERGE64_SECOND(name) name##_low
2152 #error Unknown endianness
2158 /* simple; just make this thread exit */
2159 PRINT("exit( %ld )", ARG1);
2160 PRE_REG_READ1(void, "exit", int, status);
2161 tst = VG_(get_ThreadState)(tid);
2162 /* Set the thread's status to be exiting, then claim that the
2163 syscall succeeded. */
2164 tst->exitreason = VgSrc_ExitThread;
2165 tst->os_state.exitcode = ARG1;
2166 SET_STATUS_Success(0);
2171 PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2172 VG_SYSNUM_STRING(SYSNO));
2173 PRE_REG_READ0(long, "ni_syscall");
2174 SET_STATUS_Failure( VKI_ENOSYS );
2179 PRINT("sys_iopl ( %ld )", ARG1);
2180 PRE_REG_READ1(long, "iopl", unsigned long, level);
2185 *flags |= SfMayBlock;
2186 PRINT("sys_fsync ( %ld )", ARG1);
2187 PRE_REG_READ1(long, "fsync", unsigned int, fd);
2192 *flags |= SfMayBlock;
2193 PRINT("sys_fdatasync ( %ld )", ARG1);
2194 PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2199 *flags |= SfMayBlock;
2200 PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2201 PRE_REG_READ3(long, "msync",
2202 unsigned long, start, vki_size_t, length, int, flags);
2203 PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2206 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2207 // versions of LiS (Linux Streams). They are not part of the kernel.
2208 // Therefore, we have to provide this type ourself, rather than getting it
2209 // from the kernel sources.
2210 struct vki_pmsg_strbuf {
2211 int maxlen; /* no. of bytes in buffer */
2212 int len; /* no. of bytes returned */
2213 vki_caddr_t buf; /* pointer to data */
2217 /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2218 struct vki_pmsg_strbuf *ctrl;
2219 struct vki_pmsg_strbuf *data;
2220 *flags |= SfMayBlock;
2221 PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2222 PRE_REG_READ5(int, "getpmsg",
2223 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2224 int *, bandp, int *, flagsp);
2225 ctrl = (struct vki_pmsg_strbuf *)ARG2;
2226 data = (struct vki_pmsg_strbuf *)ARG3;
2227 if (ctrl && ctrl->maxlen > 0)
2228 PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2229 if (data && data->maxlen > 0)
2230 PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2232 PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2234 PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2238 struct vki_pmsg_strbuf *ctrl;
2239 struct vki_pmsg_strbuf *data;
2241 ctrl = (struct vki_pmsg_strbuf *)ARG2;
2242 data = (struct vki_pmsg_strbuf *)ARG3;
2243 if (RES == 0 && ctrl && ctrl->len > 0) {
2244 POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2246 if (RES == 0 && data && data->len > 0) {
2247 POST_MEM_WRITE( (Addr)data->buf, data->len);
2253 /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2254 struct vki_pmsg_strbuf *ctrl;
2255 struct vki_pmsg_strbuf *data;
2256 *flags |= SfMayBlock;
2257 PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2258 PRE_REG_READ5(int, "putpmsg",
2259 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2260 int, band, int, flags);
2261 ctrl = (struct vki_pmsg_strbuf *)ARG2;
2262 data = (struct vki_pmsg_strbuf *)ARG3;
2263 if (ctrl && ctrl->len > 0)
2264 PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2265 if (data && data->len > 0)
2266 PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2271 struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2272 PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2273 PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2275 PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2276 PRE_timeval_WRITE( "getitimer(&value->it_value)", &(value->it_value));
2281 if (ARG2 != (Addr)NULL) {
2282 struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2283 POST_timeval_WRITE( &(value->it_interval) );
2284 POST_timeval_WRITE( &(value->it_value) );
2290 PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2291 PRE_REG_READ3(long, "setitimer",
2293 struct itimerval *, value, struct itimerval *, ovalue);
2294 if (ARG2 != (Addr)NULL) {
2295 struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2296 PRE_timeval_READ( "setitimer(&value->it_interval)",
2297 &(value->it_interval));
2298 PRE_timeval_READ( "setitimer(&value->it_value)",
2299 &(value->it_value));
2301 if (ARG3 != (Addr)NULL) {
2302 struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2303 PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2304 &(ovalue->it_interval));
2305 PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2306 &(ovalue->it_value));
2312 if (ARG3 != (Addr)NULL) {
2313 struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2314 POST_timeval_WRITE( &(ovalue->it_interval) );
2315 POST_timeval_WRITE( &(ovalue->it_value) );
2321 PRINT("sys_chroot ( %#lx )", ARG1);
2322 PRE_REG_READ1(long, "chroot", const char *, path);
2323 PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2328 *flags |= SfMayBlock;
2329 PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2330 PRE_REG_READ3(long, "madvise",
2331 unsigned long, start, vki_size_t, length, int, advice);
2337 // Nb: this is different to the glibc version described in the man pages,
2338 // which lacks the fifth 'new_address' argument.
2339 if (ARG4 & VKI_MREMAP_FIXED) {
2340 PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2341 ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2342 PRE_REG_READ5(unsigned long, "mremap",
2343 unsigned long, old_addr, unsigned long, old_size,
2344 unsigned long, new_size, unsigned long, flags,
2345 unsigned long, new_addr);
2347 PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2348 ARG1, (ULong)ARG2, ARG3, ARG4);
2349 PRE_REG_READ4(unsigned long, "mremap",
2350 unsigned long, old_addr, unsigned long, old_size,
2351 unsigned long, new_size, unsigned long, flags);
2353 SET_STATUS_from_SysRes(
2354 do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2357 #endif /* HAVE_MREMAP */
2361 PRINT("sys_nice ( %ld )", ARG1);
2362 PRE_REG_READ1(long, "nice", int, inc);
2367 *flags |= SfMayBlock;
2368 PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2369 PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2374 *flags |= SfMayBlock;
2375 PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2376 PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2381 *flags |= SfMayBlock;
2382 PRINT("sys_mlockall ( %lx )", ARG1);
2383 PRE_REG_READ1(long, "mlockall", int, flags);
2386 PRE(sys_setpriority)
2388 PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2389 PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2392 PRE(sys_getpriority)
2394 PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2395 PRE_REG_READ2(long, "getpriority", int, which, int, who);
2400 *flags |= SfMayBlock;
2401 #if VG_WORDSIZE == 4
2402 PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2403 ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2404 PRE_REG_READ5(ssize_t, "pwrite64",
2405 unsigned int, fd, const char *, buf, vki_size_t, count,
2406 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2407 #elif VG_WORDSIZE == 8
2408 PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2409 ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2410 PRE_REG_READ4(ssize_t, "pwrite64",
2411 unsigned int, fd, const char *, buf, vki_size_t, count,
2414 # error Unexpected word size
2416 PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2421 *flags |= SfMayBlock;
2422 PRINT("sys_sync ( )");
2423 PRE_REG_READ0(long, "sync");
2428 PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2429 PRE_REG_READ2(long, "fstatfs",
2430 unsigned int, fd, struct statfs *, buf);
2431 PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2436 POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2441 PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2442 PRE_REG_READ3(long, "fstatfs64",
2443 unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2444 PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2448 POST_MEM_WRITE( ARG3, ARG2 );
2453 PRINT("sys_getsid ( %ld )", ARG1);
2454 PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2459 *flags |= SfMayBlock;
2460 #if VG_WORDSIZE == 4
2461 PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2462 ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2463 PRE_REG_READ5(ssize_t, "pread64",
2464 unsigned int, fd, char *, buf, vki_size_t, count,
2465 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2466 #elif VG_WORDSIZE == 8
2467 PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2468 ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2469 PRE_REG_READ4(ssize_t, "pread64",
2470 unsigned int, fd, char *, buf, vki_size_t, count,
2473 # error Unexpected word size
2475 PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2481 POST_MEM_WRITE( ARG2, RES );
2487 PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2488 PRE_REG_READ3(long, "mknod",
2489 const char *, pathname, int, mode, unsigned, dev);
2490 PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2495 *flags |= SfMayBlock;
2496 PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2497 PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2500 // Pre_read a char** argument.
2501 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
2505 Addr* a_p = (Addr*)a;
2506 PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2510 PRE_MEM_RASCIIZ( s2, a_deref );
2515 static Bool i_am_the_only_thread ( void )
2517 Int c = VG_(count_living_threads)();
2518 vg_assert(c >= 1); /* stay sane */
2522 /* Wait until all other threads disappear. */
2523 void VG_(reap_threads)(ThreadId self)
2525 while (!i_am_the_only_thread()) {
2526 /* Let other thread(s) run */
2528 VG_(poll_signals)(self);
2530 vg_assert(i_am_the_only_thread());
2533 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2534 // but it seems to work nonetheless...
2537 #if defined (VGO_l4re)
2538 VG_(unimplemented)((char*)__func__);
2540 Char* path = NULL; /* path to executable */
2544 Char* launcher_basename = NULL;
2548 Bool setuid_allowed, trace_this_child;
2550 PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2551 PRE_REG_READ3(vki_off_t, "execve",
2552 char *, filename, char **, argv, char **, envp);
2553 PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2555 pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2557 pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2559 vg_assert(VG_(is_valid_tid)(tid));
2560 tst = VG_(get_ThreadState)(tid);
2562 /* Erk. If the exec fails, then the following will have made a
2563 mess of things which makes it hard for us to continue. The
2564 right thing to do is piece everything together again in
2565 POST(execve), but that's close to impossible. Instead, we make
2566 an effort to check that the execve will work before actually
2569 /* Check that the name at least begins in client-accessible storage. */
2570 if (ARG1 == 0 /* obviously bogus */
2571 || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2572 SET_STATUS_Failure( VKI_EFAULT );
2576 // debug-only printing
2578 VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2580 VG_(printf)("ARG2 = ");
2582 HChar** vec = (HChar**)ARG2;
2583 for (q = 0; vec[q]; q++)
2584 VG_(printf)("%p(%s) ", vec[q], vec[q]);
2587 VG_(printf)("ARG2 = null\n");
2591 // Decide whether or not we want to follow along
2592 { // Make 'child_argv' be a pointer to the child's arg vector
2593 // (skipping the exe name)
2594 HChar** child_argv = (HChar**)ARG2;
2595 if (child_argv && child_argv[0] == NULL)
2597 trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2600 // Do the important checks: it is a file, is executable, permissions are
2601 // ok, etc. We allow setuid executables to run only in the case when
2602 // we are not simulating them, that is, they to be run natively.
2603 setuid_allowed = trace_this_child ? False : True;
2604 res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
2605 if (sr_isError(res)) {
2606 SET_STATUS_Failure( sr_Err(res) );
2610 /* If we're tracing the child, and the launcher name looks bogus
2611 (possibly because launcher.c couldn't figure it out, see
2612 comments therein) then we have no option but to fail. */
2613 if (trace_this_child
2614 && (VG_(name_of_launcher) == NULL
2615 || VG_(name_of_launcher)[0] != '/')) {
2616 SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2620 /* After this point, we can't recover if the execve fails. */
2621 VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
2624 // Terminate gdbserver if it is active.
2625 if (VG_(clo_vgdb) != Vg_VgdbNo) {
2626 // If the child will not be traced, we need to terminate gdbserver
2627 // to cleanup the gdbserver resources (e.g. the FIFO files).
2628 // If child will be traced, we also terminate gdbserver: the new
2629 // Valgrind will start a fresh gdbserver after exec.
2633 /* Resistance is futile. Nuke all other threads. POSIX mandates
2634 this. (Really, nuke them all, since the new process will make
2635 its own new thread.) */
2636 VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2637 VG_(reap_threads)(tid);
2639 // Set up the child's exe path.
2641 if (trace_this_child) {
2643 // We want to exec the launcher. Get its pre-remembered path.
2644 path = VG_(name_of_launcher);
2645 // VG_(name_of_launcher) should have been acquired by m_main at
2649 launcher_basename = VG_(strrchr)(path, '/');
2650 if (launcher_basename == NULL || launcher_basename[1] == 0) {
2651 launcher_basename = path; // hmm, tres dubious
2653 launcher_basename++;
2660 // Set up the child's environment.
2662 // Remove the valgrind-specific stuff from the environment so the
2663 // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2664 // This is done unconditionally, since if we are tracing the child,
2665 // the child valgrind will set up the appropriate client environment.
2666 // Nb: we make a copy of the environment before trying to mangle it
2667 // as it might be in read-only memory (this was bug #101881).
2669 // Then, if tracing the child, set VALGRIND_LIB for it.
2674 envp = VG_(env_clone)( (Char**)ARG3 );
2675 if (envp == NULL) goto hosed;
2676 VG_(env_remove_valgrind_env_stuff)( envp );
2679 if (trace_this_child) {
2680 // Set VALGRIND_LIB in ARG3 (the environment)
2681 VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2684 // Set up the child's args. If not tracing it, they are
2685 // simply ARG2. Otherwise, they are
2687 // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2689 // except that the first VG_(args_for_valgrind_noexecpass) args
2692 if (!trace_this_child) {
2693 argv = (Char**)ARG2;
2695 vg_assert( VG_(args_for_valgrind) );
2696 vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2697 vg_assert( VG_(args_for_valgrind_noexecpass)
2698 <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2699 /* how many args in total will there be? */
2700 // launcher basename
2703 tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2704 tot_args -= VG_(args_for_valgrind_noexecpass);
2705 // name of client exe
2707 // args for client exe, skipping [0]
2708 arg2copy = (Char**)ARG2;
2709 if (arg2copy && arg2copy[0]) {
2710 for (i = 1; arg2copy[i]; i++)
2714 argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2715 (tot_args+1) * sizeof(HChar*) );
2716 if (argv == 0) goto hosed;
2719 argv[j++] = launcher_basename;
2720 for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2721 if (i < VG_(args_for_valgrind_noexecpass))
2723 argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2725 argv[j++] = (Char*)ARG1;
2726 if (arg2copy && arg2copy[0])
2727 for (i = 1; arg2copy[i]; i++)
2728 argv[j++] = arg2copy[i];
2731 vg_assert(j == tot_args+1);
2734 /* restore the DATA rlimit for the child */
2735 VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2738 Set the signal state up for exec.
2740 We need to set the real signal state to make sure the exec'd
2741 process gets SIG_IGN properly.
2743 Also set our real sigmask to match the client's sigmask so that
2744 the exec'd child will get the right mask. First we need to
2745 clear out any pending signals so they they don't get delivered,
2746 which would confuse things.
2748 XXX This is a bug - the signals should remain pending, and be
2749 delivered to the new process after exec. There's also a
2750 race-condition, since if someone delivers us a signal between
2751 the sigprocmask and the execve, we'll still get the signal. Oh
2755 vki_sigset_t allsigs;
2758 /* What this loop does: it queries SCSS (the signal state that
2759 the client _thinks_ the kernel is in) by calling
2760 VG_(do_sys_sigaction), and modifies the real kernel signal
2761 state accordingly. */
2762 for (i = 1; i < VG_(max_signal); i++) {
2763 vki_sigaction_fromK_t sa_f;
2764 vki_sigaction_toK_t sa_t;
2765 VG_(do_sys_sigaction)(i, NULL, &sa_f);
2766 VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2767 if (sa_t.ksa_handler == VKI_SIG_IGN)
2768 VG_(sigaction)(i, &sa_t, NULL);
2770 sa_t.ksa_handler = VKI_SIG_DFL;
2771 VG_(sigaction)(i, &sa_t, NULL);
2775 VG_(sigfillset)(&allsigs);
2776 while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2779 VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2784 VG_(printf)("exec: %s\n", path);
2785 for (cpp = argv; cpp && *cpp; cpp++)
2786 VG_(printf)("argv: %s\n", *cpp);
2788 for (cpp = envp; cpp && *cpp; cpp++)
2789 VG_(printf)("env: %s\n", *cpp);
2792 SET_STATUS_from_SysRes(
2793 VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2796 /* If we got here, then the execve failed. We've already made way
2797 too much of a mess to continue, so we have to abort. */
2800 VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2801 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2802 VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2803 "execve() failing, so I'm dying.\n");
2804 VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2805 "or work out how to recover.\n");
2812 PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2813 PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2814 PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2819 PRINT("sys_alarm ( %ld )", ARG1);
2820 PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2825 Addr brk_limit = VG_(brk_limit);
2828 /* libc says: int brk(void *end_data_segment);
2829 kernel says: void* brk(void* end_data_segment); (more or less)
2831 libc returns 0 on success, and -1 (and sets errno) on failure.
2832 Nb: if you ask to shrink the dataseg end below what it
2833 currently is, that always succeeds, even if the dataseg end
2834 doesn't actually change (eg. brk(0)). Unless it seg faults.
2836 Kernel returns the new dataseg end. If the brk() failed, this
2837 will be unchanged from the old one. That's why calling (kernel)
2838 brk(0) gives the current dataseg end (libc brk() just returns
2841 Both will seg fault if you shrink it back into a text segment.
2843 PRINT("sys_brk ( %#lx )", ARG1);
2844 PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2846 brk_new = do_brk(ARG1);
2847 SET_STATUS_Success( brk_new );
2849 if (brk_new == ARG1) {
2850 /* brk() succeeded */
2851 if (brk_new < brk_limit) {
2852 /* successfully shrunk the data segment. */
2853 VG_TRACK( die_mem_brk, (Addr)ARG1,
2856 if (brk_new > brk_limit) {
2857 /* successfully grew the data segment */
2858 VG_TRACK( new_mem_brk, brk_limit,
2859 ARG1-brk_limit, tid );
2863 vg_assert(brk_limit == brk_new);
2869 PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2870 PRE_REG_READ1(long, "chdir", const char *, path);
2871 PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2876 PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2877 PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2878 PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2883 PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2884 PRE_REG_READ3(long, "chown",
2885 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2886 PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2891 PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2892 PRE_REG_READ3(long, "lchown",
2893 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2894 PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2899 PRINT("sys_close ( %ld )", ARG1);
2900 PRE_REG_READ1(long, "close", unsigned int, fd);
2902 /* Detect and negate attempts by the client to close Valgrind's log fd */
2903 if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2904 /* If doing -d style logging (which is to fd=2), don't
2905 allow that to be closed either. */
2906 || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
2907 SET_STATUS_Failure( VKI_EBADF );
2912 if (VG_(clo_track_fds)) record_fd_close(ARG1);
2917 PRINT("sys_dup ( %ld )", ARG1);
2918 PRE_REG_READ1(long, "dup", unsigned int, oldfd);
2924 if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
2926 SET_STATUS_Failure( VKI_EMFILE );
2928 if (VG_(clo_track_fds))
2929 ML_(record_fd_open_named)(tid, RES);
2935 PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
2936 PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
2937 if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
2938 SET_STATUS_Failure( VKI_EBADF );
2944 if (VG_(clo_track_fds))
2945 ML_(record_fd_open_named)(tid, RES);
2950 PRINT("sys_fchdir ( %ld )", ARG1);
2951 PRE_REG_READ1(long, "fchdir", unsigned int, fd);
2956 PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
2957 PRE_REG_READ3(long, "fchown",
2958 unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
2963 PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
2964 PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
2969 PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
2970 PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
2971 PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
2976 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
2979 static vki_sigset_t fork_saved_mask;
2981 // In Linux, the sys_fork() function varies across architectures, but we
2982 // ignore the various args it gets, and so it looks arch-neutral. Hmm.
2985 #if defined(VGO_l4re)
2986 VG_(unimplemented)((char*)__func__);
2992 PRINT("sys_fork ( )");
2993 PRE_REG_READ0(long, "fork");
2995 /* Block all signals during fork, so that we can fix things up in
2996 the child without being interrupted. */
2997 VG_(sigfillset)(&mask);
2998 VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3000 SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3002 if (!SUCCESS) return;
3004 #if defined(VGO_linux) || defined(VGO_aix5)
3005 // RES is 0 for child, non-0 (the child's PID) for parent.
3006 is_child = ( RES == 0 ? True : False );
3007 child_pid = ( is_child ? -1 : RES );
3008 #elif defined(VGO_darwin)
3009 // RES is the child's pid. RESHI is 1 for child, 0 for parent.
3016 VG_(do_atfork_pre)(tid);
3019 VG_(do_atfork_child)(tid);
3021 /* restore signal mask */
3022 VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3024 /* If --child-silent-after-fork=yes was specified, set the
3025 output file descriptors to 'impossible' values. This is
3026 noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3027 duly stops writing any further output. */
3028 if (VG_(clo_child_silent_after_fork)) {
3029 if (!VG_(log_output_sink).is_socket)
3030 VG_(log_output_sink).fd = -1;
3031 if (!VG_(xml_output_sink).is_socket)
3032 VG_(xml_output_sink).fd = -1;
3036 VG_(do_atfork_parent)(tid);
3038 PRINT(" fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3040 /* restore signal mask */
3041 VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3048 *flags |= SfMayBlock;
3049 PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3050 PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3055 *flags |= SfMayBlock;
3056 PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3057 PRE_REG_READ2(long, "truncate",
3058 const char *, path, unsigned long, length);
3059 PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3062 PRE(sys_ftruncate64)
3064 *flags |= SfMayBlock;
3065 #if VG_WORDSIZE == 4
3066 PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3067 PRE_REG_READ3(long, "ftruncate64",
3069 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3071 PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3072 PRE_REG_READ2(long, "ftruncate64",
3073 unsigned int,fd, UWord,length);
3079 *flags |= SfMayBlock;
3080 #if VG_WORDSIZE == 4
3081 PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3082 PRE_REG_READ3(long, "truncate64",
3084 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3086 PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3087 PRE_REG_READ2(long, "truncate64",
3088 const char *,path, UWord,length);
3090 PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3095 *flags |= SfMayBlock;
3096 PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3097 PRE_REG_READ3(long, "getdents",
3098 unsigned int, fd, struct linux_dirent *, dirp,
3099 unsigned int, count);
3100 PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3107 POST_MEM_WRITE( ARG2, RES );
3112 *flags |= SfMayBlock;
3113 PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3114 PRE_REG_READ3(long, "getdents64",
3115 unsigned int, fd, struct linux_dirent64 *, dirp,
3116 unsigned int, count);
3117 PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3120 POST(sys_getdents64)
3124 POST_MEM_WRITE( ARG2, RES );
3129 PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3130 PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3132 PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3138 if (ARG1 > 0 && RES > 0)
3139 POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3144 // Comment from linux/fs/dcache.c:
3145 // NOTE! The user-level library version returns a character pointer.
3146 // The kernel system call just returns the length of the buffer filled
3147 // (which includes the ending '\0' character), or a negative error
3149 // Is this Linux-specific? If so it should be moved to syswrap-linux.c.
3150 PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3151 PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3152 PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3158 if (RES != (Addr)NULL)
3159 POST_MEM_WRITE( ARG1, RES );
3164 PRINT("sys_geteuid ( )");
3165 PRE_REG_READ0(long, "geteuid");
3170 PRINT("sys_getegid ( )");
3171 PRE_REG_READ0(long, "getegid");
3176 PRINT("sys_getgid ( )");
3177 PRE_REG_READ0(long, "getgid");
3182 PRINT("sys_getpid ()");
3183 PRE_REG_READ0(long, "getpid");
3188 PRINT("sys_getpgid ( %ld )", ARG1);
3189 PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3194 PRINT("sys_getpgrp ()");
3195 PRE_REG_READ0(long, "getpgrp");
3200 PRINT("sys_getppid ()");
3201 PRE_REG_READ0(long, "getppid");
3204 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3206 POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3208 #ifdef _RLIMIT_POSIX_FLAG
3209 // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3210 // Unset it here to make the switch case below work correctly.
3211 a1 &= ~_RLIMIT_POSIX_FLAG;
3215 case VKI_RLIMIT_NOFILE:
3216 ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3217 ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3220 case VKI_RLIMIT_DATA:
3221 *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3224 case VKI_RLIMIT_STACK:
3225 *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3230 PRE(sys_old_getrlimit)
3232 PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3233 PRE_REG_READ2(long, "old_getrlimit",
3234 unsigned int, resource, struct rlimit *, rlim);
3235 PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3238 POST(sys_old_getrlimit)
3240 common_post_getrlimit(tid, ARG1, ARG2);
3245 PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3246 PRE_REG_READ2(long, "getrlimit",
3247 unsigned int, resource, struct rlimit *, rlim);
3248 PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3253 common_post_getrlimit(tid, ARG1, ARG2);
3258 PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3259 PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3260 PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3267 POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3270 PRE(sys_gettimeofday)
3272 PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3273 PRE_REG_READ2(long, "gettimeofday",
3274 struct timeval *, tv, struct timezone *, tz);
3275 // GrP fixme does darwin write to *tz anymore?
3277 PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3279 PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3282 POST(sys_gettimeofday)
3287 POST_timeval_WRITE( ARG1 );
3289 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3293 PRE(sys_settimeofday)
3295 PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3296 PRE_REG_READ2(long, "settimeofday",
3297 struct timeval *, tv, struct timezone *, tz);
3299 PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3301 PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3302 /* maybe should warn if tz->tz_dsttime is non-zero? */
3308 PRINT("sys_getuid ( )");
3309 PRE_REG_READ0(long, "getuid");
3312 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3314 /* We don't have any specific information on it, so
3315 try to do something reasonable based on direction and
3316 size bits. The encoding scheme is described in
3317 /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3319 According to Simon Hausmann, _IOC_READ means the kernel
3320 writes a value to the ioctl value passed from the user
3321 space and the other way around with _IOC_WRITE. */
3323 UInt dir = _VKI_IOC_DIR(request);
3324 UInt size = _VKI_IOC_SIZE(request);
3325 if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3327 * Be very lax about ioctl handling; the only
3328 * assumption is that the size is correct. Doesn't
3329 * require the full buffer to be initialized when
3330 * writing. Without this, using some device
3331 * drivers with a large number of strange ioctl
3332 * commands becomes very tiresome.
3334 } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3335 //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3336 //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3337 static Int moans = 3;
3338 if (moans > 0 && !VG_(clo_xml)) {
3340 VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3341 " with no size/direction hints\n", request);
3342 VG_(umsg)(" This could cause spurious value errors to appear.\n");
3343 VG_(umsg)(" See README_MISSING_SYSCALL_OR_IOCTL for "
3344 "guidance on writing a proper wrapper.\n" );
3347 //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3348 //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3349 if ((dir & _VKI_IOC_WRITE) && size > 0)
3350 PRE_MEM_READ( "ioctl(generic)", arg, size);
3351 if ((dir & _VKI_IOC_READ) && size > 0)
3352 PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3356 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3358 /* We don't have any specific information on it, so
3359 try to do something reasonable based on direction and
3360 size bits. The encoding scheme is described in
3361 /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3363 According to Simon Hausmann, _IOC_READ means the kernel
3364 writes a value to the ioctl value passed from the user
3365 space and the other way around with _IOC_WRITE. */
3367 UInt dir = _VKI_IOC_DIR(request);
3368 UInt size = _VKI_IOC_SIZE(request);
3369 if (size > 0 && (dir & _VKI_IOC_READ)
3371 && arg != (Addr)NULL)
3373 POST_MEM_WRITE(arg, size);
3378 If we're sending a SIGKILL to one of our own threads, then simulate
3379 it rather than really sending the signal, so that the target thread
3380 gets a chance to clean up. Returns True if we did the killing (or
3381 no killing is necessary), and False if the caller should use the
3382 normal kill syscall.
3384 "pid" is any pid argument which can be passed to kill; group kills
3385 (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3386 they'll most likely hit all the threads and we won't need to worry
3387 about cleanup. In truth, we can't fully emulate these multicast
3390 "tgid" is a thread group id. If it is not -1, then the target
3391 thread must be in that thread group.
3393 Bool ML_(do_sigkill)(Int pid, Int tgid)
3401 tid = VG_(lwpid_to_vgtid)(pid);
3402 if (tid == VG_INVALID_THREADID)
3403 return False; /* none of our threads */
3405 tst = VG_(get_ThreadState)(tid);
3406 if (tst == NULL || tst->status == VgTs_Empty)
3407 return False; /* hm, shouldn't happen */
3409 if (tgid != -1 && tst->os_state.threadgroup != tgid)
3410 return False; /* not the right thread group */
3412 /* Check to see that the target isn't already exiting. */
3413 if (!VG_(is_exiting)(tid)) {
3414 if (VG_(clo_trace_signals))
3415 VG_(message)(Vg_DebugMsg,
3416 "Thread %d being killed with SIGKILL\n",
3419 tst->exitreason = VgSrc_FatalSig;
3420 tst->os_state.fatalsig = VKI_SIGKILL;
3422 if (!VG_(is_running_thread)(tid))
3423 VG_(get_thread_out_of_syscall)(tid);
3431 PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3432 PRE_REG_READ2(long, "kill", int, pid, int, sig);
3433 if (!ML_(client_signal_OK)(ARG2)) {
3434 SET_STATUS_Failure( VKI_EINVAL );
3438 /* If we're sending SIGKILL, check to see if the target is one of
3439 our threads and handle it specially. */
3440 if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3441 SET_STATUS_Success(0);
3443 /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3444 affecting how posix-compliant the call is. I guess it is
3445 harmless to pass the 3rd arg on other platforms; hence pass
3447 SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3449 if (VG_(clo_trace_signals))
3450 VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3453 /* This kill might have given us a pending signal. Ask for a check once
3454 the syscall is done. */
3455 *flags |= SfPollAfter;
3460 *flags |= SfMayBlock;
3461 PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3462 PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3463 PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3464 PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3469 PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3470 PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3471 PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3472 PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3478 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3483 *flags |= SfMayBlock;
3484 PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3485 PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3486 PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3491 PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3492 PRE_REG_READ3(long, "mprotect",
3493 unsigned long, addr, vki_size_t, len, unsigned long, prot);
3495 if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3496 SET_STATUS_Failure( VKI_ENOMEM );
3498 #if defined(VKI_PROT_GROWSDOWN)
3500 if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3501 /* Deal with mprotects on growable stack areas.
3503 The critical files to understand all this are mm/mprotect.c
3504 in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3507 The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3508 round the start/end address of mprotect to the start/end of
3509 the underlying vma and glibc uses that as an easy way to
3510 change the protection of the stack by calling mprotect on the
3511 last page of the stack with PROT_GROWSDOWN set.
3513 The sanity check provided by the kernel is that the vma must
3514 have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate. */
3515 UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3516 NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3517 NSegment const *rseg;
3521 if (grows == VKI_PROT_GROWSDOWN) {
3522 rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
3524 rseg->kind == SkResvn &&
3525 rseg->smode == SmUpper &&
3526 rseg->end+1 == aseg->start) {
3527 Addr end = ARG1 + ARG2;
3529 ARG2 = end - aseg->start;
3530 ARG3 &= ~VKI_PROT_GROWSDOWN;
3532 SET_STATUS_Failure( VKI_EINVAL );
3534 } else if (grows == VKI_PROT_GROWSUP) {
3535 rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
3537 rseg->kind == SkResvn &&
3538 rseg->smode == SmLower &&
3539 aseg->end+1 == rseg->start) {
3540 ARG2 = aseg->end - ARG1 + 1;
3541 ARG3 &= ~VKI_PROT_GROWSUP;
3543 SET_STATUS_Failure( VKI_EINVAL );
3546 /* both GROWSUP and GROWSDOWN */
3547 SET_STATUS_Failure( VKI_EINVAL );
3550 #endif // defined(VKI_PROT_GROWSDOWN)
3559 ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3564 if (0) VG_(printf)(" munmap( %#lx )\n", ARG1);
3565 PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3566 PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3568 if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3569 SET_STATUS_Failure( VKI_EINVAL );
3577 ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3582 PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3583 PRE_REG_READ3(long, "mincore",
3584 unsigned long, start, vki_size_t, length,
3585 unsigned char *, vec);
3586 PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3590 POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3595 *flags |= SfMayBlock|SfPostOnFail;
3596 PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3597 PRE_REG_READ2(long, "nanosleep",
3598 struct timespec *, req, struct timespec *, rem);
3599 PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3601 PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3606 vg_assert(SUCCESS || FAILURE);
3607 if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3608 POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3613 if (ARG2 & VKI_O_CREAT) {
3615 PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3616 PRE_REG_READ3(long, "open",
3617 const char *, filename, int, flags, int, mode);
3620 PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3621 PRE_REG_READ2(long, "open",
3622 const char *, filename, int, flags);
3624 PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3626 #if defined(VGO_linux)
3627 /* Handle the case where the open is of /proc/self/cmdline or
3628 /proc/<pid>/cmdline, and just give it a copy of the fd for the
3629 fake file we cooked up at startup (in m_main). Also, seek the
3630 cloned fd back to the start. */
3633 Char* arg1s = (Char*) ARG1;
3636 VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3637 if (ML_(safe_to_deref)( arg1s, 1 ) &&
3638 (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3641 sres = VG_(dup)( VG_(cl_cmdline_fd) );
3642 SET_STATUS_from_SysRes( sres );
3643 if (!sr_isError(sres)) {
3644 OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3646 SET_STATUS_Failure( VKI_EMFILE );
3651 #endif // defined(VGO_linux)
3653 /* Otherwise handle normally */
3654 *flags |= SfMayBlock;
3660 if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3662 SET_STATUS_Failure( VKI_EMFILE );
3664 if (VG_(clo_track_fds))
3665 ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3671 *flags |= SfMayBlock;
3672 PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3673 PRE_REG_READ3(ssize_t, "read",
3674 unsigned int, fd, char *, buf, vki_size_t, count);
3676 if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3677 SET_STATUS_Failure( VKI_EBADF );
3679 PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3685 POST_MEM_WRITE( ARG2, RES );
3691 *flags |= SfMayBlock;
3692 PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3693 PRE_REG_READ3(ssize_t, "write",
3694 unsigned int, fd, const char *, buf, vki_size_t, count);
3695 /* check to see if it is allowed. If not, try for an exemption from
3696 --sim-hints=enable-outer (used for self hosting). */
3697 ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3698 if (!ok && ARG1 == 2/*stderr*/
3699 && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3702 SET_STATUS_Failure( VKI_EBADF );
3704 PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3709 *flags |= SfMayBlock;
3710 PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3711 PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3712 PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3718 if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3720 SET_STATUS_Failure( VKI_EMFILE );
3722 if (VG_(clo_track_fds))
3723 ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3730 int fd; -- file descriptor
3731 short events; -- requested events
3732 short revents; -- returned events
3734 int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3737 struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3738 *flags |= SfMayBlock;
3739 PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3740 PRE_REG_READ3(long, "poll",
3741 struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3743 for (i = 0; i < ARG2; i++) {
3744 PRE_MEM_READ( "poll(ufds.fd)",
3745 (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3746 PRE_MEM_READ( "poll(ufds.events)",
3747 (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3748 PRE_MEM_WRITE( "poll(ufds.reventss)",
3749 (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3757 struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3758 for (i = 0; i < ARG2; i++)
3759 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3767 PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3768 PRE_REG_READ3(long, "readlink",
3769 const char *, path, char *, buf, int, bufsiz);
3770 PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3771 PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3774 #if defined(VGO_linux)
3776 * Handle the case where readlink is looking at /proc/self/exe or
3780 Char* arg1s = (Char*) ARG1;
3781 VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3782 if (ML_(safe_to_deref)(arg1s, 1) &&
3783 (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3786 VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3787 SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3790 #endif // defined(VGO_linux)
3793 SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3797 if (SUCCESS && RES > 0)
3798 POST_MEM_WRITE( ARG2, RES );
3804 struct vki_iovec * vec;
3805 *flags |= SfMayBlock;
3806 PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3807 PRE_REG_READ3(ssize_t, "readv",
3808 unsigned long, fd, const struct iovec *, vector,
3809 unsigned long, count);
3810 if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3811 SET_STATUS_Failure( VKI_EBADF );
3813 PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3816 /* ToDo: don't do any of the following if the vector is invalid */
3817 vec = (struct vki_iovec *)ARG2;
3818 for (i = 0; i < (Int)ARG3; i++)
3819 PRE_MEM_WRITE( "readv(vector[...])",
3820 (Addr)vec[i].iov_base, vec[i].iov_len );
3830 struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3833 /* RES holds the number of bytes read. */
3834 for (i = 0; i < (Int)ARG3; i++) {
3835 Int nReadThisBuf = vec[i].iov_len;
3836 if (nReadThisBuf > remains) nReadThisBuf = remains;
3837 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3838 remains -= nReadThisBuf;
3839 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3846 PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3847 PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3848 PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3849 PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3854 *flags |= SfMayBlock;
3855 PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3856 PRE_REG_READ1(long, "rmdir", const char *, pathname);
3857 PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3862 *flags |= SfMayBlock;
3863 PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3864 PRE_REG_READ5(long, "select",
3865 int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3866 vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3867 // XXX: this possibly understates how much memory is read.
3869 PRE_MEM_READ( "select(readfds)",
3870 ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3872 PRE_MEM_READ( "select(writefds)",
3873 ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3875 PRE_MEM_READ( "select(exceptfds)",
3876 ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3878 PRE_timeval_READ( "select(timeout)", ARG5 );
3883 PRINT("sys_setgid ( %ld )", ARG1);
3884 PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
3889 PRINT("sys_setsid ( )");
3890 PRE_REG_READ0(long, "setsid");
3895 PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
3896 PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
3898 PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3903 PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
3904 PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
3909 PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
3910 PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
3915 PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
3916 PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
3922 PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
3923 PRE_REG_READ2(long, "setrlimit",
3924 unsigned int, resource, struct rlimit *, rlim);
3925 PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3927 #ifdef _RLIMIT_POSIX_FLAG
3928 // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
3929 // Unset it here to make the if statements below work correctly.
3930 arg1 &= ~_RLIMIT_POSIX_FLAG;
3933 if (arg1 == VKI_RLIMIT_NOFILE) {
3934 if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
3935 ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
3936 SET_STATUS_Failure( VKI_EPERM );
3939 VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
3940 SET_STATUS_Success( 0 );
3943 else if (arg1 == VKI_RLIMIT_DATA) {
3944 if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
3945 ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
3946 SET_STATUS_Failure( VKI_EPERM );
3949 VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
3950 SET_STATUS_Success( 0 );
3953 else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
3954 if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
3955 ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
3956 SET_STATUS_Failure( VKI_EPERM );
3959 VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
3960 VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
3961 SET_STATUS_Success( 0 );
3968 PRINT("sys_setuid ( %ld )", ARG1);
3969 PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
3974 PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3975 PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
3976 PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
3977 PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
3982 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3987 PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
3988 PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
3989 PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
3990 PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
3994 POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
3999 PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4000 PRE_REG_READ3(long, "statfs64",
4001 const char *, path, vki_size_t, size, struct statfs64 *, buf);
4002 PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4003 PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4007 POST_MEM_WRITE( ARG3, ARG2 );
4012 *flags |= SfMayBlock;
4013 PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4014 PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4015 PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4016 PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4021 /* time_t time(time_t *t); */
4022 PRINT("sys_time ( %#lx )",ARG1);
4023 PRE_REG_READ1(long, "time", int *, t);
4025 PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4032 POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4038 PRINT("sys_times ( %#lx )", ARG1);
4039 PRE_REG_READ1(long, "times", struct tms *, buf);
4041 PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4048 POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4054 PRINT("sys_umask ( %ld )", ARG1);
4055 PRE_REG_READ1(long, "umask", int, mask);
4060 *flags |= SfMayBlock;
4061 PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4062 PRE_REG_READ1(long, "unlink", const char *, pathname);
4063 PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4068 PRINT("sys_newuname ( %#lx )", ARG1);
4069 PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4070 PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4076 POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4082 *flags |= SfMayBlock;
4083 PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4084 PRE_REG_READ3(long, "waitpid",
4085 vki_pid_t, pid, unsigned int *, status, int, options);
4087 if (ARG2 != (Addr)NULL)
4088 PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4093 if (ARG2 != (Addr)NULL)
4094 POST_MEM_WRITE( ARG2, sizeof(int) );
4099 *flags |= SfMayBlock;
4100 PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4102 PRE_REG_READ4(long, "wait4",
4103 vki_pid_t, pid, unsigned int *, status, int, options,
4104 struct rusage *, rusage);
4105 if (ARG2 != (Addr)NULL)
4106 PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4107 if (ARG4 != (Addr)NULL)
4108 PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4113 if (ARG2 != (Addr)NULL)
4114 POST_MEM_WRITE( ARG2, sizeof(int) );
4115 if (ARG4 != (Addr)NULL)
4116 POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4122 struct vki_iovec * vec;
4123 *flags |= SfMayBlock;
4124 PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4125 PRE_REG_READ3(ssize_t, "writev",
4126 unsigned long, fd, const struct iovec *, vector,
4127 unsigned long, count);
4128 if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4129 SET_STATUS_Failure( VKI_EBADF );
4131 PRE_MEM_READ( "writev(vector)",
4132 ARG2, ARG3 * sizeof(struct vki_iovec) );
4134 /* ToDo: don't do any of the following if the vector is invalid */
4135 vec = (struct vki_iovec *)ARG2;
4136 for (i = 0; i < (Int)ARG3; i++)
4137 PRE_MEM_READ( "writev(vector[...])",
4138 (Addr)vec[i].iov_base, vec[i].iov_len );
4145 PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4146 PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4147 PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4149 PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4150 PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4156 PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4157 PRE_REG_READ1(long, "acct", const char *, filename);
4158 PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4163 *flags |= SfMayBlock;
4164 PRINT("sys_pause ( )");
4165 PRE_REG_READ0(long, "pause");
4168 PRE(sys_sigaltstack)
4170 PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4171 PRE_REG_READ2(int, "sigaltstack",
4172 const vki_stack_t *, ss, vki_stack_t *, oss);
4174 const vki_stack_t *ss = (vki_stack_t *)ARG1;
4175 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4176 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4177 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4180 PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4183 SET_STATUS_from_SysRes(
4184 VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4188 POST(sys_sigaltstack)
4191 if (RES == 0 && ARG2 != 0)
4192 POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4198 #endif // defined(VGO_linux) || defined(VGO_darwin)
4200 /*--------------------------------------------------------------------*/
4202 /*--------------------------------------------------------------------*/