2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls. syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2000-2010 Julian Seward
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_threadstate.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcproc.h" // For VG_(getpid)()
42 #include "pub_core_libcsignal.h"
43 #include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock),
45 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
46 #include "pub_core_tooliface.h"
47 #include "pub_core_options.h"
48 #include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals)
49 #include "pub_core_syscall.h"
50 #include "pub_core_machine.h"
51 #include "pub_core_syswrap.h"
53 #include "priv_types_n_macros.h"
54 #include "priv_syswrap-main.h"
56 #if defined(VGO_darwin)
57 #include "priv_syswrap-darwin.h"
60 /* Useful info which needs to be recorded somewhere:
61 Use of registers in syscalls is:
63 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65 x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM)
66 amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM)
67 ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
68 ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
71 ppc32 r2 r3 r4 r5 r6 r7 r8 r9 r10 r3(res),r4(err)
72 ppc64 r2 r3 r4 r5 r6 r7 r8 r9 r10 r3(res),r4(err)
75 x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c
76 amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c
79 x86 sys eax ebx ecx edx esi edi ebp n/a eax
81 For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
82 amd64-darwin. Apparently 0(%esp) is some kind of return address
83 (perhaps for syscalls done with "sysenter"?) I don't think it is
84 relevant for syscalls done with "int $0x80/1/2".
87 /* This is the top level of the system-call handler module. All
88 system calls are channelled through here, doing two things:
90 * notify the tool of the events (mem/reg reads, writes) happening
92 * perform the syscall, usually by passing it along to the kernel
95 A magical piece of assembly code, do_syscall_for_client_WRK, in
96 syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
97 kernel, whilst having the simulator retain control.
100 /* The main function is VG_(client_syscall). The simulation calls it
101 whenever a client thread wants to do a syscall. The following is a
102 sketch of what it does.
104 * Ensures the root thread's stack is suitably mapped. Tedious and
105 arcane. See big big comment in VG_(client_syscall).
107 * First, it rounds up the syscall number and args (which is a
108 platform dependent activity) and puts them in a struct ("args")
109 and also a copy in "orig_args".
111 The pre/post wrappers refer to these structs and so no longer
112 need magic macros to access any specific registers. This struct
113 is stored in thread-specific storage.
116 * The pre-wrapper is called, passing it a pointer to struct
120 * The pre-wrapper examines the args and pokes the tool
121 appropriately. It may modify the args; this is why "orig_args"
124 The pre-wrapper may choose to 'do' the syscall itself, and
125 concludes one of three outcomes:
127 Success(N) -- syscall is already complete, with success;
130 Fail(N) -- syscall is already complete, with failure;
133 HandToKernel -- (the usual case): this needs to be given to
134 the kernel to be done, using the values in
135 the possibly-modified "args" struct.
137 In addition, the pre-wrapper may set some flags:
139 MayBlock -- only applicable when outcome==HandToKernel
141 PostOnFail -- only applicable when outcome==HandToKernel or Fail
144 * If the pre-outcome is HandToKernel, the syscall is duly handed
145 off to the kernel (perhaps involving some thread switchery, but
146 that's not important). This reduces the possible set of outcomes
147 to either Success(N) or Fail(N).
150 * The outcome (Success(N) or Fail(N)) is written back to the guest
151 register(s). This is platform specific:
153 x86: Success(N) ==> eax = N
158 ppc32: Success(N) ==> r3 = N, CR0.SO = 0
159 Fail(N) ==> r3 = N, CR0.SO = 1
162 x86: Success(N) ==> edx:eax = N, cc = 0
163 Fail(N) ==> edx:eax = N, cc = 1
165 * The post wrapper is called if:
168 - outcome==Success or (outcome==Fail and PostOnFail is set)
170 The post wrapper is passed the adulterated syscall args (struct
171 "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
173 There are several other complications, primarily to do with
174 syscalls getting interrupted, explained in comments in the code.
177 /* CAVEATS for writing wrappers. It is important to follow these!
179 The macros defined in priv_types_n_macros.h are designed to help
180 decouple the wrapper logic from the actual representation of
181 syscall args/results, since these wrappers are designed to work on
184 Sometimes a PRE wrapper will complete the syscall itself, without
185 handing it to the kernel. It will use one of SET_STATUS_Success,
186 SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
187 value. It is critical to appreciate that use of the macro does not
188 immediately cause the underlying guest state to be updated -- that
189 is done by the driver logic in this file, when the wrapper returns.
191 As a result, PRE wrappers of the following form will malfunction:
196 SET_STATUS_Somehow(...)
198 // do something that assumes guest state is up to date
201 In particular, direct or indirect calls to VG_(poll_signals) after
202 setting STATUS can cause the guest state to be read (in order to
203 build signal frames). Do not do this. If you want a signal poll
204 after the syscall goes through, do "*flags |= SfPollAfter" and the
205 driver logic will do it for you.
209 Another critical requirement following introduction of new address
210 space manager (JRS, 20050923):
212 In a situation where the mappedness of memory has changed, aspacem
213 should be notified BEFORE the tool. Hence the following is
216 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
217 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
219 VG_(discard_translations)(s->start, s->end+1 - s->start);
221 whilst this is wrong:
223 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
224 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
226 VG_(discard_translations)(s->start, s->end+1 - s->start);
228 The reason is that the tool may itself ask aspacem for more shadow
229 memory as a result of the VG_TRACK call. In such a situation it is
230 critical that aspacem's segment array is up to date -- hence the
231 need to notify aspacem first.
235 Also .. take care to call VG_(discard_translations) whenever
236 memory with execute permissions is unmapped.
240 /* ---------------------------------------------------------------------
241 Do potentially blocking syscall for the client, and mess with
242 signal masks at the same time.
243 ------------------------------------------------------------------ */
245 /* Perform a syscall on behalf of a client thread, using a specific
246 signal mask. On completion, the signal mask is set to restore_mask
247 (which presumably blocks almost everything). If a signal happens
248 during the syscall, the handler should call
249 VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
250 thread's context to do the right thing.
252 The _WRK function is handwritten assembly, implemented per-platform
253 in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic
254 properties. See comments at the top of
255 VG_(fixup_guest_state_after_syscall_interrupted) below for details.
257 This function (these functions) are required to return zero in case
258 of success (even if the syscall itself failed), and nonzero if the
259 sigprocmask-swizzling calls failed. We don't actually care about
260 the failure values from sigprocmask, although most of the assembly
261 implementations do attempt to return that, using the convention
262 0 for success, or 0x8000 | error-code for failure.
264 #if defined(VGO_linux)
266 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
268 const vki_sigset_t *syscall_mask,
269 const vki_sigset_t *restore_mask,
271 #elif defined(VGO_aix5)
273 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
275 const vki_sigset_t *syscall_mask,
276 const vki_sigset_t *restore_mask,
277 Word sigsetSzB, /* unused */
278 Word __nr_sigprocmask );
279 #elif defined(VGO_darwin)
281 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
283 const vki_sigset_t *syscall_mask,
284 const vki_sigset_t *restore_mask,
285 Word sigsetSzB ); /* unused */
287 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
289 const vki_sigset_t *syscall_mask,
290 const vki_sigset_t *restore_mask,
291 Word sigsetSzB ); /* unused */
293 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
295 const vki_sigset_t *syscall_mask,
296 const vki_sigset_t *restore_mask,
297 Word sigsetSzB ); /* unused */
298 #elif defined(VGO_l4re)
300 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
302 const vki_sigset_t *syscall_mask,
303 const vki_sigset_t *restore_mask,
305 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
307 const vki_sigset_t *syscall_mask,
308 const vki_sigset_t *restore_mask,
311 VG_(unimplemented)((char*)__func__);
320 void do_syscall_for_client ( Int syscallno,
322 const vki_sigset_t* syscall_mask )
324 #if !defined(VGO_l4re)
327 # if defined(VGO_linux)
328 err = ML_(do_syscall_for_client_WRK)(
329 syscallno, &tst->arch.vex,
330 syscall_mask, &saved, sizeof(vki_sigset_t)
332 # elif defined(VGO_aix5)
333 err = ML_(do_syscall_for_client_WRK)(
334 syscallno, &tst->arch.vex,
335 syscall_mask, &saved, 0/*unused:sigsetSzB*/,
338 # elif defined(VGO_darwin)
339 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
340 case VG_DARWIN_SYSCALL_CLASS_UNIX:
341 err = ML_(do_syscall_for_client_unix_WRK)(
342 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
343 syscall_mask, &saved, 0/*unused:sigsetSzB*/
346 case VG_DARWIN_SYSCALL_CLASS_MACH:
347 err = ML_(do_syscall_for_client_mach_WRK)(
348 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
349 syscall_mask, &saved, 0/*unused:sigsetSzB*/
352 case VG_DARWIN_SYSCALL_CLASS_MDEP:
353 err = ML_(do_syscall_for_client_mdep_WRK)(
354 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
355 syscall_mask, &saved, 0/*unused:sigsetSzB*/
368 "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
372 VG_(unimplemented)("unimplemented function do_syscall_for_client()");
377 /* ---------------------------------------------------------------------
378 Impedance matchers and misc helpers
379 ------------------------------------------------------------------ */
382 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
384 return a1->sysno == a2->sysno
385 && a1->arg1 == a2->arg1
386 && a1->arg2 == a2->arg2
387 && a1->arg3 == a2->arg3
388 && a1->arg4 == a2->arg4
389 && a1->arg5 == a2->arg5
390 && a1->arg6 == a2->arg6
391 && a1->arg7 == a2->arg7
392 && a1->arg8 == a2->arg8;
396 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
398 /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
399 if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
401 # if defined(VGO_darwin)
402 /* Darwin-specific debugging guff */
403 vg_assert(s1->what == s2->what);
404 VG_(printf)("eq_SyscallStatus:\n");
405 VG_(printf)(" {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
406 VG_(printf)(" {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
412 /* Convert between SysRes and SyscallStatus, to the extent possible. */
415 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
417 SyscallStatus status;
418 status.what = SsComplete;
424 /* Impedance matchers. These convert syscall arg or result data from
425 the platform-specific in-guest-state format to the canonical
426 formats, and back. */
429 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical,
430 /*IN*/ VexGuestArchState* gst_vanilla,
433 #if defined(VGP_x86_linux)
434 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
435 canonical->sysno = gst->guest_EAX;
436 canonical->arg1 = gst->guest_EBX;
437 canonical->arg2 = gst->guest_ECX;
438 canonical->arg3 = gst->guest_EDX;
439 canonical->arg4 = gst->guest_ESI;
440 canonical->arg5 = gst->guest_EDI;
441 canonical->arg6 = gst->guest_EBP;
445 #elif defined(VGP_amd64_linux)
446 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
447 canonical->sysno = gst->guest_RAX;
448 canonical->arg1 = gst->guest_RDI;
449 canonical->arg2 = gst->guest_RSI;
450 canonical->arg3 = gst->guest_RDX;
451 canonical->arg4 = gst->guest_R10;
452 canonical->arg5 = gst->guest_R8;
453 canonical->arg6 = gst->guest_R9;
457 #elif defined(VGP_ppc32_linux)
458 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
459 canonical->sysno = gst->guest_GPR0;
460 canonical->arg1 = gst->guest_GPR3;
461 canonical->arg2 = gst->guest_GPR4;
462 canonical->arg3 = gst->guest_GPR5;
463 canonical->arg4 = gst->guest_GPR6;
464 canonical->arg5 = gst->guest_GPR7;
465 canonical->arg6 = gst->guest_GPR8;
469 #elif defined(VGP_ppc64_linux)
470 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
471 canonical->sysno = gst->guest_GPR0;
472 canonical->arg1 = gst->guest_GPR3;
473 canonical->arg2 = gst->guest_GPR4;
474 canonical->arg3 = gst->guest_GPR5;
475 canonical->arg4 = gst->guest_GPR6;
476 canonical->arg5 = gst->guest_GPR7;
477 canonical->arg6 = gst->guest_GPR8;
481 #elif defined(VGP_arm_linux)
482 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
483 canonical->sysno = gst->guest_R7;
484 canonical->arg1 = gst->guest_R0;
485 canonical->arg2 = gst->guest_R1;
486 canonical->arg3 = gst->guest_R2;
487 canonical->arg4 = gst->guest_R3;
488 canonical->arg5 = gst->guest_R4;
489 canonical->arg6 = gst->guest_R5;
493 #elif defined(VGP_ppc32_aix5)
494 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
495 canonical->sysno = gst->guest_GPR2;
496 canonical->arg1 = gst->guest_GPR3;
497 canonical->arg2 = gst->guest_GPR4;
498 canonical->arg3 = gst->guest_GPR5;
499 canonical->arg4 = gst->guest_GPR6;
500 canonical->arg5 = gst->guest_GPR7;
501 canonical->arg6 = gst->guest_GPR8;
502 canonical->arg7 = gst->guest_GPR9;
503 canonical->arg8 = gst->guest_GPR10;
505 #elif defined(VGP_ppc64_aix5)
506 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
507 canonical->sysno = gst->guest_GPR2;
508 canonical->arg1 = gst->guest_GPR3;
509 canonical->arg2 = gst->guest_GPR4;
510 canonical->arg3 = gst->guest_GPR5;
511 canonical->arg4 = gst->guest_GPR6;
512 canonical->arg5 = gst->guest_GPR7;
513 canonical->arg6 = gst->guest_GPR8;
514 canonical->arg7 = gst->guest_GPR9;
515 canonical->arg8 = gst->guest_GPR10;
517 #elif defined(VGP_x86_darwin)
518 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
519 UWord *stack = (UWord *)gst->guest_ESP;
520 // GrP fixme hope syscalls aren't called with really shallow stacks...
521 canonical->sysno = gst->guest_EAX;
522 if (canonical->sysno != 0) {
523 // stack[0] is return address
524 canonical->arg1 = stack[1];
525 canonical->arg2 = stack[2];
526 canonical->arg3 = stack[3];
527 canonical->arg4 = stack[4];
528 canonical->arg5 = stack[5];
529 canonical->arg6 = stack[6];
530 canonical->arg7 = stack[7];
531 canonical->arg8 = stack[8];
533 // GrP fixme hack handle syscall()
534 // GrP fixme what about __syscall() ?
535 // stack[0] is return address
536 // DDD: the tool can't see that the params have been shifted! Can
537 // lead to incorrect checking, I think, because the PRRAn/PSARn
538 // macros will mention the pre-shifted args.
539 canonical->sysno = stack[1];
540 vg_assert(canonical->sysno != 0);
541 canonical->arg1 = stack[2];
542 canonical->arg2 = stack[3];
543 canonical->arg3 = stack[4];
544 canonical->arg4 = stack[5];
545 canonical->arg5 = stack[6];
546 canonical->arg6 = stack[7];
547 canonical->arg7 = stack[8];
548 canonical->arg8 = stack[9];
550 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
551 VG_(getpid)(), /*tid,*/
552 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
555 // Here we determine what kind of syscall it was by looking at the
556 // interrupt kind, and then encode the syscall number using the 64-bit
557 // encoding for Valgrind's internal use.
559 // DDD: Would it be better to stash the JMP kind into the Darwin
560 // thread state rather than passing in the trc?
562 case VEX_TRC_JMP_SYS_INT128:
563 // int $0x80 = Unix, 64-bit result
564 vg_assert(canonical->sysno >= 0);
565 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
567 case VEX_TRC_JMP_SYS_SYSENTER:
568 // syscall = Unix, 32-bit result
569 // OR Mach, 32-bit result
570 if (canonical->sysno >= 0) {
571 // GrP fixme hack: 0xffff == I386_SYSCALL_NUMBER_MASK
572 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
575 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
578 case VEX_TRC_JMP_SYS_INT129:
579 // int $0x81 = Mach, 32-bit result
580 vg_assert(canonical->sysno < 0);
581 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
583 case VEX_TRC_JMP_SYS_INT130:
584 // int $0x82 = mdep, 32-bit result
585 vg_assert(canonical->sysno >= 0);
586 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
593 #elif defined(VGP_amd64_darwin)
594 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
595 UWord *stack = (UWord *)gst->guest_RSP;
597 vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
599 // GrP fixme hope syscalls aren't called with really shallow stacks...
600 canonical->sysno = gst->guest_RAX;
601 if (canonical->sysno != __NR_syscall) {
602 // stack[0] is return address
603 canonical->arg1 = gst->guest_RDI;
604 canonical->arg2 = gst->guest_RSI;
605 canonical->arg3 = gst->guest_RDX;
606 canonical->arg4 = gst->guest_R10; // not rcx with syscall insn
607 canonical->arg5 = gst->guest_R8;
608 canonical->arg6 = gst->guest_R9;
609 canonical->arg7 = stack[1];
610 canonical->arg8 = stack[2];
612 // GrP fixme hack handle syscall()
613 // GrP fixme what about __syscall() ?
614 // stack[0] is return address
615 // DDD: the tool can't see that the params have been shifted! Can
616 // lead to incorrect checking, I think, because the PRRAn/PSARn
617 // macros will mention the pre-shifted args.
618 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
619 vg_assert(canonical->sysno != __NR_syscall);
620 canonical->arg1 = gst->guest_RSI;
621 canonical->arg2 = gst->guest_RDX;
622 canonical->arg3 = gst->guest_R10; // not rcx with syscall insn
623 canonical->arg4 = gst->guest_R8;
624 canonical->arg5 = gst->guest_R9;
625 canonical->arg6 = stack[1];
626 canonical->arg7 = stack[2];
627 canonical->arg8 = stack[3];
629 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
630 VG_(getpid)(), /*tid,*/
631 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
634 // no canonical->sysno adjustment needed
636 #elif defined(VGP_x86_l4re)
637 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
639 case VEX_TRC_JMP_SYS_SYSENTER: // fallthrough
640 case VEX_TRC_JMP_SYS_INT48:
641 canonical->sysno = SYS_INVOKE;
643 case VEX_TRC_JMP_SYS_INT50:
644 canonical->sysno = SYS_DEBUG;
646 case VEX_TRC_JMP_SIGTRAP:
647 canonical->sysno = SYS_ENTER_KDEBUG;
649 case VEX_TRC_JMP_SYS_INT128:
650 canonical->sysno = SYS_LINUX_INT80;
652 case VEX_TRC_JMP_L4_UD2:
653 canonical->sysno = SYS_UD2;
655 case VEX_TRC_JMP_L4_ARTIFICIAL:
656 canonical->sysno = SYS_ARTIFICIAL;
659 VG_(printf)("Unhandled TRC: %d ??\n", trc);
660 enter_kdebug("invalid syscall");
662 canonical->arg1 = gst->guest_EAX;
663 canonical->arg2 = gst->guest_EBX;
664 canonical->arg3 = gst->guest_ECX;
665 canonical->arg4 = gst->guest_EDX;
666 canonical->arg5 = gst->guest_ESI;
667 canonical->arg6 = gst->guest_EDI;
668 canonical->arg7 = gst->guest_EBP;
672 # error "getSyscallArgsFromGuestState: unknown arch"
677 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical,
678 /*OUT*/VexGuestArchState* gst_vanilla )
680 #if defined(VGP_x86_linux)
681 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
682 gst->guest_EAX = canonical->sysno;
683 gst->guest_EBX = canonical->arg1;
684 gst->guest_ECX = canonical->arg2;
685 gst->guest_EDX = canonical->arg3;
686 gst->guest_ESI = canonical->arg4;
687 gst->guest_EDI = canonical->arg5;
688 gst->guest_EBP = canonical->arg6;
690 #elif defined(VGP_amd64_linux)
691 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
692 gst->guest_RAX = canonical->sysno;
693 gst->guest_RDI = canonical->arg1;
694 gst->guest_RSI = canonical->arg2;
695 gst->guest_RDX = canonical->arg3;
696 gst->guest_R10 = canonical->arg4;
697 gst->guest_R8 = canonical->arg5;
698 gst->guest_R9 = canonical->arg6;
700 #elif defined(VGP_ppc32_linux)
701 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
702 gst->guest_GPR0 = canonical->sysno;
703 gst->guest_GPR3 = canonical->arg1;
704 gst->guest_GPR4 = canonical->arg2;
705 gst->guest_GPR5 = canonical->arg3;
706 gst->guest_GPR6 = canonical->arg4;
707 gst->guest_GPR7 = canonical->arg5;
708 gst->guest_GPR8 = canonical->arg6;
710 #elif defined(VGP_ppc64_linux)
711 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
712 gst->guest_GPR0 = canonical->sysno;
713 gst->guest_GPR3 = canonical->arg1;
714 gst->guest_GPR4 = canonical->arg2;
715 gst->guest_GPR5 = canonical->arg3;
716 gst->guest_GPR6 = canonical->arg4;
717 gst->guest_GPR7 = canonical->arg5;
718 gst->guest_GPR8 = canonical->arg6;
720 #elif defined(VGP_arm_linux)
721 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
722 gst->guest_R7 = canonical->sysno;
723 gst->guest_R0 = canonical->arg1;
724 gst->guest_R1 = canonical->arg2;
725 gst->guest_R2 = canonical->arg3;
726 gst->guest_R3 = canonical->arg4;
727 gst->guest_R4 = canonical->arg5;
728 gst->guest_R5 = canonical->arg6;
730 #elif defined(VGP_ppc32_aix5)
731 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
732 gst->guest_GPR2 = canonical->sysno;
733 gst->guest_GPR3 = canonical->arg1;
734 gst->guest_GPR4 = canonical->arg2;
735 gst->guest_GPR5 = canonical->arg3;
736 gst->guest_GPR6 = canonical->arg4;
737 gst->guest_GPR7 = canonical->arg5;
738 gst->guest_GPR8 = canonical->arg6;
739 gst->guest_GPR9 = canonical->arg7;
740 gst->guest_GPR10 = canonical->arg8;
742 #elif defined(VGP_ppc64_aix5)
743 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
744 gst->guest_GPR2 = canonical->sysno;
745 gst->guest_GPR3 = canonical->arg1;
746 gst->guest_GPR4 = canonical->arg2;
747 gst->guest_GPR5 = canonical->arg3;
748 gst->guest_GPR6 = canonical->arg4;
749 gst->guest_GPR7 = canonical->arg5;
750 gst->guest_GPR8 = canonical->arg6;
751 gst->guest_GPR9 = canonical->arg7;
752 gst->guest_GPR10 = canonical->arg8;
754 #elif defined(VGP_x86_darwin)
755 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
756 UWord *stack = (UWord *)gst->guest_ESP;
758 gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
760 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
761 // stack[0] is return address
762 stack[1] = canonical->arg1;
763 stack[2] = canonical->arg2;
764 stack[3] = canonical->arg3;
765 stack[4] = canonical->arg4;
766 stack[5] = canonical->arg5;
767 stack[6] = canonical->arg6;
768 stack[7] = canonical->arg7;
769 stack[8] = canonical->arg8;
771 #elif defined(VGP_amd64_darwin)
772 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
773 UWord *stack = (UWord *)gst->guest_RSP;
775 gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
776 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
778 // stack[0] is return address
779 gst->guest_RDI = canonical->arg1;
780 gst->guest_RSI = canonical->arg2;
781 gst->guest_RDX = canonical->arg3;
782 gst->guest_RCX = canonical->arg4;
783 gst->guest_R8 = canonical->arg5;
784 gst->guest_R9 = canonical->arg6;
785 stack[1] = canonical->arg7;
786 stack[2] = canonical->arg8;
788 #elif defined(VGP_x86_l4re)
789 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
790 gst->guest_EAX = canonical->arg1;
791 gst->guest_EBX = canonical->arg2;
792 gst->guest_ECX = canonical->arg3;
793 gst->guest_EDX = canonical->arg4;
794 gst->guest_ESI = canonical->arg5;
795 gst->guest_EDI = canonical->arg6;
796 gst->guest_EBP = canonical->arg7;
799 # error "putSyscallArgsIntoGuestState: unknown arch"
804 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical,
805 /*IN*/ VexGuestArchState* gst_vanilla )
807 # if defined(VGP_x86_linux)
808 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
809 canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
810 canonical->what = SsComplete;
812 # elif defined(VGP_amd64_linux)
813 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
814 canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
815 canonical->what = SsComplete;
817 # elif defined(VGP_ppc32_linux)
818 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
819 UInt cr = LibVEX_GuestPPC32_get_CR( gst );
820 UInt cr0so = (cr >> 28) & 1;
821 canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
822 canonical->what = SsComplete;
824 # elif defined(VGP_ppc64_linux)
825 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
826 UInt cr = LibVEX_GuestPPC64_get_CR( gst );
827 UInt cr0so = (cr >> 28) & 1;
828 canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
829 canonical->what = SsComplete;
831 # elif defined(VGP_arm_linux)
832 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
833 canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
834 canonical->what = SsComplete;
836 # elif defined(VGP_ppc32_aix5)
837 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
838 canonical->sres = VG_(mk_SysRes_ppc32_aix5)( gst->guest_GPR3,
840 canonical->what = SsComplete;
842 # elif defined(VGP_ppc64_aix5)
843 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
844 canonical->sres = VG_(mk_SysRes_ppc64_aix5)( gst->guest_GPR3,
846 canonical->what = SsComplete;
848 # elif defined(VGP_x86_darwin)
849 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
850 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
851 UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
855 switch (gst->guest_SC_CLASS) {
856 case VG_DARWIN_SYSCALL_CLASS_UNIX:
857 // int $0x80 = Unix, 64-bit result
859 wLO = gst->guest_EAX;
860 wHI = gst->guest_EDX;
862 case VG_DARWIN_SYSCALL_CLASS_MACH:
863 // int $0x81 = Mach, 32-bit result
864 wLO = gst->guest_EAX;
866 case VG_DARWIN_SYSCALL_CLASS_MDEP:
867 // int $0x82 = mdep, 32-bit result
868 wLO = gst->guest_EAX;
874 canonical->sres = VG_(mk_SysRes_x86_darwin)(
875 gst->guest_SC_CLASS, err ? True : False,
878 canonical->what = SsComplete;
880 # elif defined(VGP_amd64_darwin)
881 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
882 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
883 ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
887 switch (gst->guest_SC_CLASS) {
888 case VG_DARWIN_SYSCALL_CLASS_UNIX:
889 // syscall = Unix, 128-bit result
891 wLO = gst->guest_RAX;
892 wHI = gst->guest_RDX;
894 case VG_DARWIN_SYSCALL_CLASS_MACH:
895 // syscall = Mach, 64-bit result
896 wLO = gst->guest_RAX;
898 case VG_DARWIN_SYSCALL_CLASS_MDEP:
899 // syscall = mdep, 64-bit result
900 wLO = gst->guest_RAX;
906 canonical->sres = VG_(mk_SysRes_amd64_darwin)(
907 gst->guest_SC_CLASS, err ? True : False,
910 canonical->what = SsComplete;
912 # elif defined(VGP_x86_l4re)
913 //VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
914 canonical->sres = VG_(mk_SysRes_x86_l4re)( 0 );
915 canonical->what = SsComplete;
918 # error "getSyscallStatusFromGuestState: unknown arch"
923 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
924 /*IN*/ SyscallStatus* canonical,
925 /*OUT*/VexGuestArchState* gst_vanilla )
927 # if defined(VGP_x86_linux)
928 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
929 vg_assert(canonical->what == SsComplete);
930 if (sr_isError(canonical->sres)) {
931 /* This isn't exactly right, in that really a Failure with res
932 not in the range 1 .. 4095 is unrepresentable in the
933 Linux-x86 scheme. Oh well. */
934 gst->guest_EAX = - (Int)sr_Err(canonical->sres);
936 gst->guest_EAX = sr_Res(canonical->sres);
938 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
939 OFFSET_x86_EAX, sizeof(UWord) );
941 # elif defined(VGP_amd64_linux)
942 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
943 vg_assert(canonical->what == SsComplete);
944 if (sr_isError(canonical->sres)) {
945 /* This isn't exactly right, in that really a Failure with res
946 not in the range 1 .. 4095 is unrepresentable in the
947 Linux-amd64 scheme. Oh well. */
948 gst->guest_RAX = - (Long)sr_Err(canonical->sres);
950 gst->guest_RAX = sr_Res(canonical->sres);
952 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
953 OFFSET_amd64_RAX, sizeof(UWord) );
955 # elif defined(VGP_ppc32_linux)
956 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
957 UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
958 vg_assert(canonical->what == SsComplete);
959 if (sr_isError(canonical->sres)) {
961 LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
962 gst->guest_GPR3 = sr_Err(canonical->sres);
965 LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
966 gst->guest_GPR3 = sr_Res(canonical->sres);
968 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
969 OFFSET_ppc32_GPR3, sizeof(UWord) );
970 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
971 OFFSET_ppc32_CR0_0, sizeof(UChar) );
973 # elif defined(VGP_ppc64_linux)
974 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
975 UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
976 vg_assert(canonical->what == SsComplete);
977 if (sr_isError(canonical->sres)) {
979 LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
980 gst->guest_GPR3 = sr_Err(canonical->sres);
983 LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
984 gst->guest_GPR3 = sr_Res(canonical->sres);
986 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
987 OFFSET_ppc64_GPR3, sizeof(UWord) );
988 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
989 OFFSET_ppc64_CR0_0, sizeof(UChar) );
991 # elif defined(VGP_arm_linux)
992 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
993 vg_assert(canonical->what == SsComplete);
994 if (sr_isError(canonical->sres)) {
995 /* This isn't exactly right, in that really a Failure with res
996 not in the range 1 .. 4095 is unrepresentable in the
997 Linux-arm scheme. Oh well. */
998 gst->guest_R0 = - (Int)sr_Err(canonical->sres);
1000 gst->guest_R0 = sr_Res(canonical->sres);
1002 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1003 OFFSET_arm_R0, sizeof(UWord) );
1005 # elif defined(VGP_ppc32_aix5)
1006 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
1007 vg_assert(canonical->what == SsComplete);
1008 gst->guest_GPR3 = canonical->sres.res;
1009 gst->guest_GPR4 = canonical->sres.err;
1010 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1011 OFFSET_ppc32_GPR3, sizeof(UWord) );
1012 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1013 OFFSET_ppc32_GPR4, sizeof(UWord) );
1015 # elif defined(VGP_ppc64_aix5)
1016 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
1017 vg_assert(canonical->what == SsComplete);
1018 gst->guest_GPR3 = canonical->sres.res;
1019 gst->guest_GPR4 = canonical->sres.err;
1020 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1021 OFFSET_ppc64_GPR3, sizeof(UWord) );
1022 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1023 OFFSET_ppc64_GPR4, sizeof(UWord) );
1025 #elif defined(VGP_x86_darwin)
1026 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1027 SysRes sres = canonical->sres;
1028 vg_assert(canonical->what == SsComplete);
1029 /* Unfortunately here we have to break abstraction and look
1030 directly inside 'res', in order to decide what to do. */
1031 switch (sres._mode) {
1032 case SysRes_MACH: // int $0x81 = Mach, 32-bit result
1033 case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
1034 gst->guest_EAX = sres._wLO;
1035 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1036 OFFSET_x86_EAX, sizeof(UInt) );
1038 case SysRes_UNIX_OK: // int $0x80 = Unix, 64-bit result
1039 case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
1040 gst->guest_EAX = sres._wLO;
1041 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1042 OFFSET_x86_EAX, sizeof(UInt) );
1043 gst->guest_EDX = sres._wHI;
1044 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1045 OFFSET_x86_EDX, sizeof(UInt) );
1046 LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1048 // GrP fixme sets defined for entire eflags, not just bit c
1049 // DDD: this breaks exp-ptrcheck.
1050 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1051 offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
1058 #elif defined(VGP_amd64_darwin)
1059 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1060 SysRes sres = canonical->sres;
1061 vg_assert(canonical->what == SsComplete);
1062 /* Unfortunately here we have to break abstraction and look
1063 directly inside 'res', in order to decide what to do. */
1064 switch (sres._mode) {
1065 case SysRes_MACH: // syscall = Mach, 64-bit result
1066 case SysRes_MDEP: // syscall = mdep, 64-bit result
1067 gst->guest_RAX = sres._wLO;
1068 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1069 OFFSET_amd64_RAX, sizeof(ULong) );
1071 case SysRes_UNIX_OK: // syscall = Unix, 128-bit result
1072 case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
1073 gst->guest_RAX = sres._wLO;
1074 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1075 OFFSET_amd64_RAX, sizeof(ULong) );
1076 gst->guest_RDX = sres._wHI;
1077 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1078 OFFSET_amd64_RDX, sizeof(ULong) );
1079 LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1081 // GrP fixme sets defined for entire rflags, not just bit c
1082 // DDD: this breaks exp-ptrcheck.
1083 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1084 offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
1091 # elif defined(VGP_x86_l4re)
1092 //VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1093 vg_assert(canonical->what == SsComplete);
1096 # error "putSyscallStatusIntoGuestState: unknown arch"
1101 /* Tell me the offsets in the guest state of the syscall params, so
1102 that the scalar argument checkers don't have to have this info
1106 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
1108 #if defined(VGP_x86_linux)
1109 layout->o_sysno = OFFSET_x86_EAX;
1110 layout->o_arg1 = OFFSET_x86_EBX;
1111 layout->o_arg2 = OFFSET_x86_ECX;
1112 layout->o_arg3 = OFFSET_x86_EDX;
1113 layout->o_arg4 = OFFSET_x86_ESI;
1114 layout->o_arg5 = OFFSET_x86_EDI;
1115 layout->o_arg6 = OFFSET_x86_EBP;
1116 layout->uu_arg7 = -1; /* impossible value */
1117 layout->uu_arg8 = -1; /* impossible value */
1119 #elif defined(VGP_amd64_linux)
1120 layout->o_sysno = OFFSET_amd64_RAX;
1121 layout->o_arg1 = OFFSET_amd64_RDI;
1122 layout->o_arg2 = OFFSET_amd64_RSI;
1123 layout->o_arg3 = OFFSET_amd64_RDX;
1124 layout->o_arg4 = OFFSET_amd64_R10;
1125 layout->o_arg5 = OFFSET_amd64_R8;
1126 layout->o_arg6 = OFFSET_amd64_R9;
1127 layout->uu_arg7 = -1; /* impossible value */
1128 layout->uu_arg8 = -1; /* impossible value */
1130 #elif defined(VGP_ppc32_linux)
1131 layout->o_sysno = OFFSET_ppc32_GPR0;
1132 layout->o_arg1 = OFFSET_ppc32_GPR3;
1133 layout->o_arg2 = OFFSET_ppc32_GPR4;
1134 layout->o_arg3 = OFFSET_ppc32_GPR5;
1135 layout->o_arg4 = OFFSET_ppc32_GPR6;
1136 layout->o_arg5 = OFFSET_ppc32_GPR7;
1137 layout->o_arg6 = OFFSET_ppc32_GPR8;
1138 layout->uu_arg7 = -1; /* impossible value */
1139 layout->uu_arg8 = -1; /* impossible value */
1141 #elif defined(VGP_ppc64_linux)
1142 layout->o_sysno = OFFSET_ppc64_GPR0;
1143 layout->o_arg1 = OFFSET_ppc64_GPR3;
1144 layout->o_arg2 = OFFSET_ppc64_GPR4;
1145 layout->o_arg3 = OFFSET_ppc64_GPR5;
1146 layout->o_arg4 = OFFSET_ppc64_GPR6;
1147 layout->o_arg5 = OFFSET_ppc64_GPR7;
1148 layout->o_arg6 = OFFSET_ppc64_GPR8;
1149 layout->uu_arg7 = -1; /* impossible value */
1150 layout->uu_arg8 = -1; /* impossible value */
1152 #elif defined(VGP_arm_linux)
1153 layout->o_sysno = OFFSET_arm_R7;
1154 layout->o_arg1 = OFFSET_arm_R0;
1155 layout->o_arg2 = OFFSET_arm_R1;
1156 layout->o_arg3 = OFFSET_arm_R2;
1157 layout->o_arg4 = OFFSET_arm_R3;
1158 layout->o_arg5 = OFFSET_arm_R4;
1159 layout->o_arg6 = OFFSET_arm_R5;
1160 layout->uu_arg7 = -1; /* impossible value */
1161 layout->uu_arg8 = -1; /* impossible value */
1163 #elif defined(VGP_ppc32_aix5)
1164 layout->o_sysno = OFFSET_ppc32_GPR2;
1165 layout->o_arg1 = OFFSET_ppc32_GPR3;
1166 layout->o_arg2 = OFFSET_ppc32_GPR4;
1167 layout->o_arg3 = OFFSET_ppc32_GPR5;
1168 layout->o_arg4 = OFFSET_ppc32_GPR6;
1169 layout->o_arg5 = OFFSET_ppc32_GPR7;
1170 layout->o_arg6 = OFFSET_ppc32_GPR8;
1171 layout->o_arg7 = OFFSET_ppc32_GPR9;
1172 layout->o_arg8 = OFFSET_ppc32_GPR10;
1174 #elif defined(VGP_ppc64_aix5)
1175 layout->o_sysno = OFFSET_ppc64_GPR2;
1176 layout->o_arg1 = OFFSET_ppc64_GPR3;
1177 layout->o_arg2 = OFFSET_ppc64_GPR4;
1178 layout->o_arg3 = OFFSET_ppc64_GPR5;
1179 layout->o_arg4 = OFFSET_ppc64_GPR6;
1180 layout->o_arg5 = OFFSET_ppc64_GPR7;
1181 layout->o_arg6 = OFFSET_ppc64_GPR8;
1182 layout->o_arg7 = OFFSET_ppc64_GPR9;
1183 layout->o_arg8 = OFFSET_ppc64_GPR10;
1185 #elif defined(VGP_x86_darwin)
1186 layout->o_sysno = OFFSET_x86_EAX;
1187 // syscall parameters are on stack in C convention
1188 layout->s_arg1 = sizeof(UWord) * 1;
1189 layout->s_arg2 = sizeof(UWord) * 2;
1190 layout->s_arg3 = sizeof(UWord) * 3;
1191 layout->s_arg4 = sizeof(UWord) * 4;
1192 layout->s_arg5 = sizeof(UWord) * 5;
1193 layout->s_arg6 = sizeof(UWord) * 6;
1194 layout->s_arg7 = sizeof(UWord) * 7;
1195 layout->s_arg8 = sizeof(UWord) * 8;
1197 #elif defined(VGP_amd64_darwin)
1198 layout->o_sysno = OFFSET_amd64_RAX;
1199 layout->o_arg1 = OFFSET_amd64_RDI;
1200 layout->o_arg2 = OFFSET_amd64_RSI;
1201 layout->o_arg3 = OFFSET_amd64_RDX;
1202 layout->o_arg4 = OFFSET_amd64_RCX;
1203 layout->o_arg5 = OFFSET_amd64_R8;
1204 layout->o_arg6 = OFFSET_amd64_R9;
1205 layout->s_arg7 = sizeof(UWord) * 1;
1206 layout->s_arg8 = sizeof(UWord) * 2;
1208 #elif defined(VGP_x86_l4re)
1209 layout->o_sysno = OFFSET_x86_EAX;
1210 layout->o_arg1 = OFFSET_x86_EBX;
1211 layout->o_arg2 = OFFSET_x86_ECX;
1212 layout->o_arg3 = OFFSET_x86_EDX;
1213 layout->o_arg4 = OFFSET_x86_ESI;
1214 layout->o_arg5 = OFFSET_x86_EDI;
1215 layout->o_arg6 = OFFSET_x86_EBP;
1216 layout->o_arg7 = -1; /* impossible value */
1217 layout->o_arg8 = -1; /* impossible value */
1218 layout->o_retval = OFFSET_x86_EAX;
1221 # error "getSyscallLayout: unknown arch"
1226 /* ---------------------------------------------------------------------
1227 The main driver logic
1228 ------------------------------------------------------------------ */
1230 /* Finding the handlers for a given syscall, or faking up one
1231 when no handler is found. */
1234 void bad_before ( ThreadId tid,
1235 SyscallArgLayout* layout,
1236 /*MOD*/SyscallArgs* args,
1237 /*OUT*/SyscallStatus* status,
1238 /*OUT*/UWord* flags )
1240 VG_(dmsg)("WARNING: unhandled syscall: %s\n",
1241 VG_SYSNUM_STRING_EXTRA(args->sysno));
1242 if (VG_(clo_verbosity) > 1) {
1243 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1245 VG_(dmsg)("You may be able to write your own handler.\n");
1246 VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1247 VG_(dmsg)("Nevertheless we consider this a bug. Please report\n");
1248 VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1250 SET_STATUS_Failure(VKI_ENOSYS);
1253 static SyscallTableEntry bad_sys =
1254 { bad_before, NULL };
1256 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1258 const SyscallTableEntry* sys = NULL;
1260 # if defined(VGO_linux)
1261 sys = ML_(get_linux_syscall_entry)( syscallno );
1263 # elif defined(VGP_ppc32_aix5)
1264 sys = ML_(get_ppc32_aix5_syscall_entry) ( syscallno );
1266 # elif defined(VGP_ppc64_aix5)
1267 sys = ML_(get_ppc64_aix5_syscall_entry) ( syscallno );
1269 # elif defined(VGO_darwin)
1270 Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1272 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1273 case VG_DARWIN_SYSCALL_CLASS_UNIX:
1274 if (idx >= 0 && idx < ML_(syscall_table_size) &&
1275 ML_(syscall_table)[idx].before != NULL)
1276 sys = &ML_(syscall_table)[idx];
1278 case VG_DARWIN_SYSCALL_CLASS_MACH:
1279 if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1280 ML_(mach_trap_table)[idx].before != NULL)
1281 sys = &ML_(mach_trap_table)[idx];
1283 case VG_DARWIN_SYSCALL_CLASS_MDEP:
1284 if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1285 ML_(mdep_trap_table)[idx].before != NULL)
1286 sys = &ML_(mdep_trap_table)[idx];
1293 # elif defined(VGO_l4re)
1294 if (0) VG_(message)(Vg_DebugMsg, "syscallno = %x = %d\n", syscallno, syscallno);
1296 if (syscallno < ML_(syscall_table_size) &&
1297 ML_(syscall_table)[syscallno].before != NULL)
1298 sys = &ML_(syscall_table)[syscallno];
1304 return sys == NULL ? &bad_sys : sys;
1308 /* Add and remove signals from mask so that we end up telling the
1309 kernel the state we actually want rather than what the client
1311 static void sanitize_client_sigmask(vki_sigset_t *mask)
1313 VG_(sigdelset)(mask, VKI_SIGKILL);
1314 VG_(sigdelset)(mask, VKI_SIGSTOP);
1315 VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1320 SyscallArgs orig_args;
1322 SyscallStatus status;
1327 SyscallInfo syscallInfo[VG_N_THREADS];
1330 /* The scheduler needs to be able to zero out these records after a
1331 fork, hence this is exported from m_syswrap. */
1332 void VG_(clear_syscallInfo) ( Int tid )
1334 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1335 VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1336 syscallInfo[tid].status.what = SsIdle;
1339 static void ensure_initialised ( void )
1342 static Bool init_done = False;
1346 for (i = 0; i < VG_N_THREADS; i++) {
1347 VG_(clear_syscallInfo)( i );
1351 /* --- This is the main function of this file. --- */
1353 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1357 const SyscallTableEntry* ent;
1358 SyscallArgLayout layout;
1361 ensure_initialised();
1363 vg_assert(VG_(is_valid_tid)(tid));
1364 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1365 vg_assert(VG_(is_running_thread)(tid));
1367 tst = VG_(get_ThreadState)(tid);
1369 /* BEGIN ensure root thread's stack is suitably mapped */
1370 /* In some rare circumstances, we may do the syscall without the
1371 bottom page of the stack being mapped, because the stack pointer
1372 was moved down just a few instructions before the syscall
1373 instruction, and there have been no memory references since
1374 then, that would cause a call to VG_(extend_stack) to have
1377 In native execution that's OK: the kernel automagically extends
1378 the stack's mapped area down to cover the stack pointer (or sp -
1379 redzone, really). In simulated normal execution that's OK too,
1380 since any signals we get from accessing below the mapped area of
1381 the (guest's) stack lead us to VG_(extend_stack), where we
1382 simulate the kernel's stack extension logic. But that leaves
1383 the problem of entering a syscall with the SP unmapped. Because
1384 the kernel doesn't know that the segment immediately above SP is
1385 supposed to be a grow-down segment, it causes the syscall to
1386 fail, and thereby causes a divergence between native behaviour
1387 (syscall succeeds) and simulated behaviour (syscall fails).
1389 This is quite a rare failure mode. It has only been seen
1390 affecting calls to sys_readlink on amd64-linux, and even then it
1391 requires a certain code sequence around the syscall to trigger
1394 extern int my_readlink ( const char* path );
1397 ".globl my_readlink\n"
1399 "\tsubq $0x1008,%rsp\n"
1400 "\tmovq %rdi,%rdi\n" // path is in rdi
1401 "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi
1402 "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx
1403 "\tmovl $"__NR_READLINK",%eax\n" // syscall number
1405 "\taddq $0x1008,%rsp\n"
1410 For more details, see bug #156404
1411 (https://bugs.kde.org/show_bug.cgi?id=156404).
1413 The fix is actually very simple. We simply need to call
1414 VG_(extend_stack) for this thread, handing it the lowest
1415 possible valid address for stack (sp - redzone), to ensure the
1416 pages all the way down to that address, are mapped. Because
1417 this is a potentially expensive and frequent operation, we
1420 First, only the main thread (tid=1) has a growdown stack. So
1421 ignore all others. It is conceivable, although highly unlikely,
1422 that the main thread exits, and later another thread is
1423 allocated tid=1, but that's harmless, I believe;
1424 VG_(extend_stack) will do nothing when applied to a non-root
1427 Secondly, first call VG_(am_find_nsegment) directly, to see if
1428 the page holding (sp - redzone) is mapped correctly. If so, do
1429 nothing. This is almost always the case. VG_(extend_stack)
1430 calls VG_(am_find_nsegment) twice, so this optimisation -- and
1431 that's all it is -- more or less halves the number of calls to
1432 VG_(am_find_nsegment) required.
1434 TODO: the test "seg->kind == SkAnonC" is really inadequate,
1435 because although it tests whether the segment is mapped
1436 _somehow_, it doesn't check that it has the right permissions
1437 (r,w, maybe x) ? We could test that here, but it will also be
1438 necessary to fix the corresponding test in VG_(extend_stack).
1440 All this guff is of course Linux-specific. Hence the ifdef.
1442 # if defined(VGO_linux)
1443 if (tid == 1/*ROOT THREAD*/) {
1444 Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1445 NSegment const* seg = VG_(am_find_nsegment)(stackMin);
1446 if (seg && seg->kind == SkAnonC) {
1447 /* stackMin is already mapped. Nothing to do. */
1449 (void)VG_(extend_stack)( stackMin,
1450 tst->client_stack_szB );
1454 /* END ensure root thread's stack is suitably mapped */
1456 /* First off, get the syscall args and number. This is a
1457 platform-dependent action. */
1459 sci = & syscallInfo[tid];
1460 vg_assert(sci->status.what == SsIdle);
1462 getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1464 /* Copy .orig_args to .args. The pre-handler may modify .args, but
1465 we want to keep the originals too, just in case. */
1466 sci->args = sci->orig_args;
1468 /* Save the syscall number in the thread state in case the syscall
1469 is interrupted by a signal. */
1470 sysno = sci->orig_args.sysno;
1472 # if defined(VGO_darwin)
1473 /* Record syscall class. But why? Because the syscall might be
1474 interrupted by a signal, and in the signal handler (which will
1475 be m_signals.async_signalhandler) we will need to build a SysRes
1476 reflecting the syscall return result. In order to do that we
1477 need to know the syscall class. Hence stash it in the guest
1478 state of this thread. This madness is not needed on Linux or
1479 AIX5, because those OSs only have a single syscall return
1480 convention and so there is no ambiguity involved in converting
1481 the post-signal machine state into a SysRes. */
1482 tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1485 /* The default what-to-do-next thing is hand the syscall to the
1486 kernel, so we pre-set that here. Set .sres to something
1487 harmless looking (is irrelevant because .what is not
1489 sci->status.what = SsHandToKernel;
1490 sci->status.sres = VG_(mk_SysRes_Error)(0);
1493 /* Fetch the syscall's handlers. If no handlers exist for this
1494 syscall, we are given dummy handlers which force an immediate
1495 return with ENOSYS. */
1496 ent = get_syscall_entry(sysno);
1498 /* Fetch the layout information, which tells us where in the guest
1499 state the syscall args reside. This is a platform-dependent
1500 action. This info is needed so that the scalar syscall argument
1501 checks (PRE_REG_READ calls) know which bits of the guest state
1502 they need to inspect. */
1503 getSyscallArgLayout( &layout );
1505 #if !defined(VGO_l4re)
1506 /* Make sure the tmp signal mask matches the real signal mask;
1507 sigsuspend may change this. */
1508 vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1511 /* Right, we're finally ready to Party. Call the pre-handler and
1512 see what we get back. At this point:
1514 sci->status.what is Unset (we don't know yet).
1515 sci->orig_args contains the original args.
1516 sci->args is the same as sci->orig_args.
1520 PRINT("SYSCALL[%d,%d](%s) ",
1521 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1523 /* Do any pre-syscall actions */
1524 if (VG_(needs).syscall_wrapper) {
1526 tmpv[0] = sci->orig_args.arg1;
1527 tmpv[1] = sci->orig_args.arg2;
1528 tmpv[2] = sci->orig_args.arg3;
1529 tmpv[3] = sci->orig_args.arg4;
1530 tmpv[4] = sci->orig_args.arg5;
1531 tmpv[5] = sci->orig_args.arg6;
1532 tmpv[6] = sci->orig_args.arg7;
1533 tmpv[7] = sci->orig_args.arg8;
1534 VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1535 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1539 vg_assert(ent->before);
1542 &sci->args, &sci->status, &sci->flags );
1544 /* The pre-handler may have modified:
1548 All else remains unchanged.
1549 Although the args may be modified, pre handlers are not allowed
1550 to change the syscall number.
1552 /* Now we proceed according to what the pre-handler decided. */
1553 vg_assert(sci->status.what == SsHandToKernel
1554 || sci->status.what == SsComplete);
1555 vg_assert(sci->args.sysno == sci->orig_args.sysno);
1557 if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1558 /* The pre-handler completed the syscall itself, declaring
1560 if (sci->flags & SfNoWriteResult) {
1561 PRINT(" --> [pre-success] NoWriteResult");
1563 PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
1564 (ULong)sr_ResHI(sci->status.sres),
1565 (ULong)sr_Res(sci->status.sres));
1567 /* In this case the allowable flags are to ask for a signal-poll
1568 and/or a yield after the call. Changing the args isn't
1570 vg_assert(0 == (sci->flags
1571 & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1572 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1576 if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1577 /* The pre-handler decided to fail syscall itself. */
1578 PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
1579 /* In this case, the pre-handler is also allowed to ask for the
1580 post-handler to be run anyway. Changing the args is not
1582 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1583 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1587 if (sci->status.what != SsHandToKernel) {
1592 else /* (sci->status.what == HandToKernel) */ {
1593 /* Ok, this is the usual case -- and the complicated one. There
1594 are two subcases: sync and async. async is the general case
1595 and is to be used when there is any possibility that the
1596 syscall might block [a fact that the pre-handler must tell us
1597 via the sci->flags field.] Because the tidying-away /
1598 context-switch overhead of the async case could be large, if
1599 we are sure that the syscall will not block, we fast-track it
1600 by doing it directly in this thread, which is a lot
1603 /* Check that the given flags are allowable: MayBlock, PollAfter
1604 and PostOnFail are ok. */
1605 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1607 if (sci->flags & SfMayBlock) {
1609 /* Syscall may block, so run it asynchronously */
1610 #if defined(VGO_l4re)
1616 PRINT(" --> [async] ... \n");
1618 #if !defined(VGO_l4re)
1619 mask = tst->sig_mask;
1620 sanitize_client_sigmask(&mask);
1623 /* Gack. More impedance matching. Copy the possibly
1624 modified syscall args back into the guest state. */
1625 /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1626 then this assertion is senseless:
1627 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1628 The case that exposed it was sys_posix_spawn on Darwin,
1629 which heavily modifies its arguments but then lets the call
1630 go through anyway, with SfToBlock set, hence we end up here. */
1631 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1633 /* Drop the bigLock */
1634 VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1635 /* Urr. We're now in a race against other threads trying to
1636 acquire the bigLock. I guess that doesn't matter provided
1637 that do_syscall_for_client only touches thread-local
1640 #if defined(VGO_l4re)
1641 // TODO should we not call do_syscall_for_client
1642 sres = VG_(do_l4re_syscall)(&sci->args, tid);
1644 sci->status = convert_SysRes_to_SyscallStatus(sres);
1645 /* After a syscall on L4, every register can be modified by the kernel.
1646 Because of this, it is important to flush these changes back to the
1648 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1649 /* Do the call, which operates directly on the guest state,
1650 not on our abstracted copies of the args/result. */
1652 do_syscall_for_client(sysno, tst, &mask);
1654 /* do_syscall_for_client may not return if the syscall was
1655 interrupted by a signal. In that case, flow of control is
1656 first to m_signals.async_sighandler, which calls
1657 VG_(fixup_guest_state_after_syscall_interrupted), which
1658 fixes up the guest state, and possibly calls
1659 VG_(post_syscall). Once that's done, control drops back
1660 to the scheduler. */
1662 /* Darwin: do_syscall_for_client may not return if the
1663 syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1664 responded by starting the thread at wqthread_hijack(reuse=1)
1665 (to run another workqueue item). In that case, wqthread_hijack
1666 calls ML_(wqthread_continue), which is similar to
1667 VG_(fixup_guest_state_after_syscall_interrupted). */
1670 /* Reacquire the lock */
1671 VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1673 /* Even more impedance matching. Extract the syscall status
1674 from the guest state. */
1675 getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1676 vg_assert(sci->status.what == SsComplete);
1678 /* Be decorative, if required. */
1679 if (VG_(clo_trace_syscalls)) {
1680 Bool failed = sr_isError(sci->status.sres);
1682 PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
1683 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1684 (ULong)sr_Err(sci->status.sres));
1686 PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
1687 "Success(0x%llx:0x%llx)",
1688 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1689 (ULong)sr_ResHI(sci->status.sres),
1690 (ULong)sr_Res(sci->status.sres) );
1696 /* run the syscall directly */
1697 #if defined(VGO_l4re)
1698 SysRes sres = VG_(do_l4re_syscall)(&sci->args, tid);
1699 /* After a syscall on L4, every register can be modified by the kernel.
1700 Because of this, it is important to flush these changes back to the
1702 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1704 /* The pre-handler may have modified the syscall args, but
1705 since we're passing values in ->args directly to the
1706 kernel, there's no point in flushing them back to the
1707 guest state. Indeed doing so could be construed as
1710 = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
1711 sci->args.arg3, sci->args.arg4,
1712 sci->args.arg5, sci->args.arg6,
1713 sci->args.arg7, sci->args.arg8 );
1715 sci->status = convert_SysRes_to_SyscallStatus(sres);
1717 /* Be decorative, if required. */
1718 if (VG_(clo_trace_syscalls)) {
1719 Bool failed = sr_isError(sci->status.sres);
1721 PRINT("[sync] --> Failure(0x%llx)",
1722 (ULong)sr_Err(sci->status.sres) );
1724 PRINT("[sync] --> Success(0x%llx:0x%llx)",
1725 (ULong)sr_ResHI(sci->status.sres),
1726 (ULong)sr_Res(sci->status.sres) );
1732 vg_assert(sci->status.what == SsComplete);
1734 vg_assert(VG_(is_running_thread)(tid));
1736 /* Dump the syscall result back in the guest state. This is
1737 a platform-specific action. */
1738 if (!(sci->flags & SfNoWriteResult))
1739 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1742 - the guest state is now correctly modified following the syscall
1743 - modified args, original args and syscall status are still
1744 available in the syscallInfo[] entry for this syscall.
1746 Now go on to do the post-syscall actions (read on down ..)
1749 VG_(post_syscall)(tid);
1754 /* Perform post syscall actions. The expected state on entry is
1755 precisely as at the end of VG_(client_syscall), that is:
1757 - guest state up to date following the syscall
1758 - modified args, original args and syscall status are still
1759 available in the syscallInfo[] entry for this syscall.
1760 - syscall status matches what's in the guest state.
1762 There are two ways to get here: the normal way -- being called by
1763 VG_(client_syscall), and the unusual way, from
1764 VG_(fixup_guest_state_after_syscall_interrupted).
1765 Darwin: there's a third way, ML_(wqthread_continue).
1767 void VG_(post_syscall) (ThreadId tid)
1770 const SyscallTableEntry* ent;
1771 SyscallStatus test_status;
1776 vg_assert(VG_(is_valid_tid)(tid));
1777 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1778 vg_assert(VG_(is_running_thread)(tid));
1780 tst = VG_(get_ThreadState)(tid);
1781 sci = & syscallInfo[tid];
1783 #if defined(VGO_l4re)
1784 if (0) VG_(printf)("--> %s(tid=%d)\n", __func__, tid);
1787 /* m_signals.sigvgkill_handler might call here even when not in
1789 if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
1790 sci->status.what = SsIdle;
1794 /* Validate current syscallInfo entry. In particular we require
1795 that the current .status matches what's actually in the guest
1796 state. At least in the normal case where we have actually
1797 previously written the result into the guest state. */
1798 vg_assert(sci->status.what == SsComplete);
1800 getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
1801 if (!(sci->flags & SfNoWriteResult))
1802 vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
1803 /* Failure of the above assertion on Darwin can indicate a problem
1804 in the syscall wrappers that pre-fail or pre-succeed the
1805 syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
1806 when they really should call SET_STATUS_from_SysRes. The former
1807 create a UNIX-class syscall result on Darwin, which may not be
1808 correct for the syscall; if that's the case then this assertion
1809 fires. See PRE(thread_fast_set_cthread_self) for an example. On
1810 non-Darwin platforms this assertion is should never fail, and this
1811 comment is completely irrelevant. */
1812 /* Ok, looks sane */
1814 /* Get the system call number. Because the pre-handler isn't
1815 allowed to mess with it, it should be the same for both the
1816 original and potentially-modified args. */
1817 #if !defined(VGO_l4re)
1818 vg_assert(sci->args.sysno == sci->orig_args.sysno);
1821 sysno = sci->args.sysno;
1823 ent = get_syscall_entry(sysno);
1825 /* pre: status == Complete (asserted above) */
1826 /* Consider either success or failure. Now run the post handler if:
1828 - Success or (Failure and PostOnFail is set)
1831 && ((!sr_isError(sci->status.sres))
1832 || (sr_isError(sci->status.sres)
1833 && (sci->flags & SfPostOnFail) ))) {
1835 (ent->after)( tid, &sci->args, &sci->status );
1838 /* Because the post handler might have changed the status (eg, the
1839 post-handler for sys_open can change the result from success to
1840 failure if the kernel supplied a fd that it doesn't like), once
1841 again dump the syscall result back in the guest state.*/
1842 if (!(sci->flags & SfNoWriteResult))
1843 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1845 /* Do any post-syscall actions required by the tool. */
1846 if (VG_(needs).syscall_wrapper) {
1848 tmpv[0] = sci->orig_args.arg1;
1849 tmpv[1] = sci->orig_args.arg2;
1850 tmpv[2] = sci->orig_args.arg3;
1851 tmpv[3] = sci->orig_args.arg4;
1852 tmpv[4] = sci->orig_args.arg5;
1853 tmpv[5] = sci->orig_args.arg6;
1854 tmpv[6] = sci->orig_args.arg7;
1855 tmpv[7] = sci->orig_args.arg8;
1856 VG_TDICT_CALL(tool_post_syscall, tid,
1858 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
1862 /* The syscall is done. */
1863 vg_assert(sci->status.what == SsComplete);
1864 sci->status.what = SsIdle;
1866 #if !defined(VGO_l4re)
1867 /* The pre/post wrappers may have concluded that pending signals
1868 might have been created, and will have set SfPollAfter to
1869 request a poll for them once the syscall is done. */
1870 if (sci->flags & SfPollAfter)
1871 VG_(poll_signals)(tid);
1874 /* Similarly, the wrappers might have asked for a yield
1876 if (sci->flags & SfYieldAfter)
1881 /* ---------------------------------------------------------------------
1882 Dealing with syscalls which get interrupted by a signal:
1883 VG_(fixup_guest_state_after_syscall_interrupted)
1884 ------------------------------------------------------------------ */
1886 /* Syscalls done on behalf of the client are finally handed off to the
1887 kernel in VG_(client_syscall) above, either by calling
1888 do_syscall_for_client (the async case), or by calling
1889 VG_(do_syscall6) (the sync case).
1891 If the syscall is not interrupted by a signal (it may block and
1892 later unblock, but that's irrelevant here) then those functions
1893 eventually return and so control is passed to VG_(post_syscall).
1894 NB: not sure if the sync case can actually get interrupted, as it
1895 operates with all signals masked.
1897 However, the syscall may get interrupted by an async-signal. In
1898 that case do_syscall_for_client/VG_(do_syscall6) do not
1899 return. Instead we wind up in m_signals.async_sighandler. We need
1900 to fix up the guest state to make it look like the syscall was
1901 interrupted for guest. So async_sighandler calls here, and this
1902 does the fixup. Note that from here we wind up calling
1903 VG_(post_syscall) too.
1907 /* These are addresses within ML_(do_syscall_for_client_WRK). See
1908 syscall-$PLAT.S for details.
1910 #if defined(VGO_linux) || defined(VGO_aix5)
1911 extern const Addr ML_(blksys_setup);
1912 extern const Addr ML_(blksys_restart);
1913 extern const Addr ML_(blksys_complete);
1914 extern const Addr ML_(blksys_committed);
1915 extern const Addr ML_(blksys_finished);
1916 #elif defined(VGO_darwin)
1917 /* Darwin requires extra uglyness */
1918 extern const Addr ML_(blksys_setup_MACH);
1919 extern const Addr ML_(blksys_restart_MACH);
1920 extern const Addr ML_(blksys_complete_MACH);
1921 extern const Addr ML_(blksys_committed_MACH);
1922 extern const Addr ML_(blksys_finished_MACH);
1923 extern const Addr ML_(blksys_setup_MDEP);
1924 extern const Addr ML_(blksys_restart_MDEP);
1925 extern const Addr ML_(blksys_complete_MDEP);
1926 extern const Addr ML_(blksys_committed_MDEP);
1927 extern const Addr ML_(blksys_finished_MDEP);
1928 extern const Addr ML_(blksys_setup_UNIX);
1929 extern const Addr ML_(blksys_restart_UNIX);
1930 extern const Addr ML_(blksys_complete_UNIX);
1931 extern const Addr ML_(blksys_committed_UNIX);
1932 extern const Addr ML_(blksys_finished_UNIX);
1933 #elif defined(VGO_l4re)
1936 # error "Unknown OS"
1940 /* Back up guest state to restart a system call. */
1942 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
1944 #if defined(VGP_x86_linux)
1945 arch->vex.guest_EIP -= 2; // sizeof(int $0x80)
1947 /* Make sure our caller is actually sane, and we're really backing
1948 back over a syscall.
1953 UChar *p = (UChar *)arch->vex.guest_EIP;
1955 if (p[0] != 0xcd || p[1] != 0x80)
1956 VG_(message)(Vg_DebugMsg,
1957 "?! restarting over syscall at %#x %02x %02x\n",
1958 arch->vex.guest_EIP, p[0], p[1]);
1960 vg_assert(p[0] == 0xcd && p[1] == 0x80);
1963 #elif defined(VGP_amd64_linux)
1964 arch->vex.guest_RIP -= 2; // sizeof(syscall)
1966 /* Make sure our caller is actually sane, and we're really backing
1967 back over a syscall.
1972 UChar *p = (UChar *)arch->vex.guest_RIP;
1974 if (p[0] != 0x0F || p[1] != 0x05)
1975 VG_(message)(Vg_DebugMsg,
1976 "?! restarting over syscall at %#llx %02x %02x\n",
1977 arch->vex.guest_RIP, p[0], p[1]);
1979 vg_assert(p[0] == 0x0F && p[1] == 0x05);
1982 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
1983 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr)
1985 /* Make sure our caller is actually sane, and we're really backing
1986 back over a syscall.
1991 UChar *p = (UChar *)arch->vex.guest_CIA;
1993 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
1994 VG_(message)(Vg_DebugMsg,
1995 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
1996 arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
1998 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
2001 #elif defined(VGP_arm_linux)
2002 arch->vex.guest_R15 -= 4; // sizeof(arm instr)
2004 UChar *p = (UChar*)arch->vex.guest_R15;
2006 if ((p[3] & 0xF) != 0xF)
2007 VG_(message)(Vg_DebugMsg,
2008 "?! restarting over syscall that is not syscall at %#llx %02x %02x %02x %02x\n",
2009 arch->vex.guest_R15 + 0ULL, p[0], p[1], p[2], p[3]);
2011 vg_assert((p[3] & 0xF) == 0xF);
2014 #elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
2015 /* Hmm. This is problematic, because on AIX the kernel resumes
2016 after a syscall at LR, not at the insn following SC. Hence
2017 there is no obvious way to figure out where the SC is. Current
2018 solution is to have a pseudo-register in the guest state,
2019 CIA_AT_SC, which holds the address of the most recent SC
2020 executed. Backing up to that syscall then simply involves
2021 copying that value back into CIA (the program counter). */
2022 arch->vex.guest_CIA = arch->vex.guest_CIA_AT_SC;
2024 /* Make sure our caller is actually sane, and we're really backing
2025 back over a syscall.
2030 UChar *p = (UChar *)arch->vex.guest_CIA;
2032 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
2033 VG_(message)(Vg_DebugMsg,
2034 "?! restarting over syscall at %#lx %02x %02x %02x %02x\n",
2035 (UWord)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
2037 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
2040 #elif defined(VGP_x86_darwin)
2041 arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
2043 /* Make sure our caller is actually sane, and we're really backing
2044 back over a syscall.
2052 UChar *p = (UChar *)arch->vex.guest_EIP;
2053 Bool ok = (p[0] == 0xCD && p[1] == 0x80)
2054 || (p[0] == 0xCD && p[1] == 0x81)
2055 || (p[0] == 0xCD && p[1] == 0x82)
2056 || (p[0] == 0x0F && p[1] == 0x34);
2058 VG_(message)(Vg_DebugMsg,
2059 "?! restarting over syscall at %#x %02x %02x\n",
2060 arch->vex.guest_EIP, p[0], p[1]);
2064 #elif defined(VGP_amd64_darwin)
2065 // DDD: #warning GrP fixme amd64 restart unimplemented
2068 #elif defined(VGO_l4re)
2069 VG_(unimplemented)("unimplemented function ML_(fixup_guest_state_to_restart_syscall)()");
2072 # error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
2076 #if !defined(VGO_l4re)
2078 Fix up the guest state when a syscall is interrupted by a signal
2079 and so has been forced to return 'sysret'.
2081 To do this, we determine the precise state of the syscall by
2082 looking at the (real) IP at the time the signal happened. The
2083 syscall sequence looks like:
2087 3. save result to guest state (EAX, RAX, R3+CR0.SO)
2091 happens at Then Why?
2092 [1-2) restart nothing has happened (restart syscall)
2093 [2] restart syscall hasn't started, or kernel wants to restart
2094 [2-3) save syscall complete, but results not saved
2095 [3-4) syscall complete, results saved
2097 Sometimes we never want to restart an interrupted syscall (because
2098 sigaction says not to), so we only restart if "restart" is True.
2100 This will also call VG_(post_syscall) if the syscall has actually
2101 completed (either because it was interrupted, or because it
2102 actually finished). It will not call VG_(post_syscall) if the
2103 syscall is set up for restart, which means that the pre-wrapper may
2104 get called multiple times.
2108 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
2113 /* Note that we don't know the syscall number here, since (1) in
2114 general there's no reliable way to get hold of it short of
2115 stashing it in the guest state before the syscall, and (2) in
2116 any case we don't need to know it for the actions done by this
2119 Furthermore, 'sres' is only used in the case where the syscall
2120 is complete, but the result has not been committed to the guest
2121 state yet. In any other situation it will be meaningless and
2122 therefore ignored. */
2125 SyscallStatus canonical;
2126 ThreadArchState* th_regs;
2129 /* Compute some Booleans indicating which range we're in. */
2131 in_setup_to_restart, // [1,2) in the .S files
2132 at_restart, // [2] in the .S files
2133 in_complete_to_committed, // [3,4) in the .S files
2134 in_committed_to_finished; // [4,5) in the .S files
2136 # if defined(VGO_linux) || defined(VGO_aix5)
2138 = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
2140 = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
2142 = ip == ML_(blksys_restart);
2143 in_complete_to_committed
2144 = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2145 in_committed_to_finished
2146 = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2147 # elif defined(VGO_darwin)
2149 = (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
2150 && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
2151 && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
2153 = (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
2154 || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
2155 || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
2157 = (ip == ML_(blksys_restart_MACH))
2158 || (ip == ML_(blksys_restart_MDEP))
2159 || (ip == ML_(blksys_restart_UNIX));
2160 in_complete_to_committed
2161 = (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
2162 || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
2163 || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
2164 in_committed_to_finished
2165 = (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
2166 || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
2167 || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
2168 /* Wasn't that just So Much Fun? Does your head hurt yet? Mine does. */
2170 # error "Unknown OS"
2173 if (VG_(clo_trace_signals))
2174 VG_(message)( Vg_DebugMsg,
2175 "interrupted_syscall: tid=%d, ip=0x%llx, "
2176 "restart=%s, sres.isErr=%s, sres.val=%lld\n",
2179 restart ? "True" : "False",
2180 sr_isError(sres) ? "True" : "False",
2181 (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
2183 vg_assert(VG_(is_valid_tid)(tid));
2184 vg_assert(tid >= 1 && tid < VG_N_THREADS);
2185 vg_assert(VG_(is_running_thread)(tid));
2187 tst = VG_(get_ThreadState)(tid);
2188 th_regs = &tst->arch;
2189 sci = & syscallInfo[tid];
2191 /* Figure out what the state of the syscall was by examining the
2192 (real) IP at the time of the signal, and act accordingly. */
2193 if (outside_range) {
2194 if (VG_(clo_trace_signals))
2195 VG_(message)( Vg_DebugMsg,
2196 " not in syscall at all: hmm, very suspicious\n" );
2197 /* Looks like we weren't in a syscall at all. Hmm. */
2198 vg_assert(sci->status.what != SsIdle);
2202 /* We should not be here unless this thread had first started up
2203 the machinery for a syscall by calling VG_(client_syscall).
2205 vg_assert(sci->status.what != SsIdle);
2207 /* now, do one of four fixup actions, depending on where the IP has
2210 if (in_setup_to_restart) {
2211 /* syscall hasn't even started; go around again */
2212 if (VG_(clo_trace_signals))
2213 VG_(message)( Vg_DebugMsg, " not started: restarting\n");
2214 vg_assert(sci->status.what == SsHandToKernel);
2215 ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2220 /* We're either about to run the syscall, or it was interrupted
2221 and the kernel restarted it. Restart if asked, otherwise
2224 if (VG_(clo_trace_signals))
2225 VG_(message)( Vg_DebugMsg, " at syscall instr: restarting\n");
2226 ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2228 if (VG_(clo_trace_signals))
2229 VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR\n");
2230 canonical = convert_SysRes_to_SyscallStatus(
2231 VG_(mk_SysRes_Error)( VKI_EINTR )
2233 if (!(sci->flags & SfNoWriteResult))
2234 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2235 sci->status = canonical;
2236 VG_(post_syscall)(tid);
2241 if (in_complete_to_committed) {
2242 /* Syscall complete, but result hasn't been written back yet.
2243 Write the SysRes we were supplied with back to the guest
2245 if (VG_(clo_trace_signals))
2246 VG_(message)( Vg_DebugMsg,
2247 " completed, but uncommitted: committing\n");
2248 canonical = convert_SysRes_to_SyscallStatus( sres );
2249 if (!(sci->flags & SfNoWriteResult))
2250 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2251 sci->status = canonical;
2252 VG_(post_syscall)(tid);
2256 if (in_committed_to_finished) {
2257 /* Result committed, but the signal mask has not been restored;
2258 we expect our caller (the signal handler) will have fixed
2260 if (VG_(clo_trace_signals))
2261 VG_(message)( Vg_DebugMsg,
2262 " completed and committed: nothing to do\n");
2263 getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2264 vg_assert(sci->status.what == SsComplete);
2265 VG_(post_syscall)(tid);
2269 VG_(core_panic)("?? strange syscall interrupt state?");
2271 /* In all cases, the syscall is now finished (even if we called
2272 ML_(fixup_guest_state_to_restart_syscall), since that just
2273 re-positions the guest's IP for another go at it). So we need
2274 to record that fact. */
2275 sci->status.what = SsIdle;
2280 #if defined(VGO_darwin)
2281 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2282 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2283 // This longjmps back to the scheduler.
2284 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2289 VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2291 PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
2292 VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2294 vg_assert(VG_(is_valid_tid)(tid));
2295 vg_assert(tid >= 1 && tid < VG_N_THREADS);
2296 vg_assert(VG_(is_running_thread)(tid));
2298 tst = VG_(get_ThreadState)(tid);
2299 sci = & syscallInfo[tid];
2300 vg_assert(sci->status.what != SsIdle);
2301 vg_assert(tst->os_state.wq_jmpbuf_valid); // check this BEFORE post_syscall
2303 // Pretend the syscall completed normally, but don't touch the thread state.
2304 sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2305 sci->flags |= SfNoWriteResult;
2306 VG_(post_syscall)(tid);
2308 sci->status.what = SsIdle;
2310 vg_assert(tst->sched_jmpbuf_valid);
2311 __builtin_longjmp(tst->sched_jmpbuf, True);
2319 /* ---------------------------------------------------------------------
2320 A place to store the where-to-call-when-really-done pointer
2321 ------------------------------------------------------------------ */
2323 // When the final thread is done, where shall I call to shutdown the
2324 // system cleanly? Is set once at startup (in m_main) and never
2325 // changes after that. Is basically a pointer to the exit
2326 // continuation. This is all just a nasty hack to avoid calling
2327 // directly from m_syswrap to m_main at exit, since that would cause
2328 // m_main to become part of a module cycle, which is silly.
2329 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2330 (ThreadId,VgSchedReturnCode)
2333 /*--------------------------------------------------------------------*/
2335 /*--------------------------------------------------------------------*/