2 /*--------------------------------------------------------------------*/
3 /*--- Platform-specific syscalls stuff. syswrap-x86-linux.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2000-2010 Nicholas Nethercote
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
31 #if defined(VGP_x86_linux)
33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
34 in interrupted_syscall() need to be reviewed. They don't seem
35 to assign the shadow state.
38 #include "pub_core_basics.h"
39 #include "pub_core_vki.h"
40 #include "pub_core_vkiscnums.h"
41 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
42 #include "pub_core_threadstate.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_debuglog.h"
45 #include "pub_core_libcbase.h"
46 #include "pub_core_libcassert.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_libcsignal.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_scheduler.h"
53 #include "pub_core_sigframe.h" // For VG_(sigframe_destroy)()
54 #include "pub_core_signals.h"
55 #include "pub_core_syscall.h"
56 #include "pub_core_syswrap.h"
57 #include "pub_core_tooliface.h"
58 #include "pub_core_stacks.h" // VG_(register_stack)
60 #include "priv_types_n_macros.h"
61 #include "priv_syswrap-generic.h" /* for decls of generic wrappers */
62 #include "priv_syswrap-linux.h" /* for decls of linux-ish wrappers */
63 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
64 #include "priv_syswrap-main.h"
67 /* ---------------------------------------------------------------------
69 ------------------------------------------------------------------ */
71 /* Call f(arg1), but first switch stacks, using 'stack' as the new
72 stack, and use 'retaddr' as f's return-to address. Also, clear all
73 the integer registers before entering f.*/
74 __attribute__((noreturn))
75 void ML_(call_on_new_stack_0_1) ( Addr stack,
85 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
86 "vgModuleLocal_call_on_new_stack_0_1:\n"
87 " movl %esp, %esi\n" // remember old stack pointer
88 " movl 4(%esi), %esp\n" // set stack
89 " pushl 16(%esi)\n" // arg1 to stack
90 " pushl 8(%esi)\n" // retaddr to stack
91 " pushl 12(%esi)\n" // f to stack
92 " movl $0, %eax\n" // zero all GP regs
100 " ud2\n" // should never get here
106 Perform a clone system call. clone is strange because it has
107 fork()-like return-twice semantics, so it needs special
112 int (fn)(void*) in 0+FSZ(%esp)
113 void* child_stack in 4+FSZ(%esp)
114 int flags in 8+FSZ(%esp)
115 void* arg in 12+FSZ(%esp)
116 pid_t* child_tid in 16+FSZ(%esp)
117 pid_t* parent_tid in 20+FSZ(%esp)
118 void* tls_ptr in 24+FSZ(%esp)
120 System call requires:
122 int $__NR_clone in %eax
124 void* child_stack in %ecx
125 pid_t* parent_tid in %edx
126 pid_t* child_tid in %edi
127 void* tls_ptr in %esi
129 Returns an Int encoded in the linux-x86 way, not a SysRes.
131 #define FSZ "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
132 #define __NR_CLONE VG_STRINGIFY(__NR_clone)
133 #define __NR_EXIT VG_STRINGIFY(__NR_exit)
136 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
142 vki_modify_ldt_t * );
145 "do_syscall_clone_x86_linux:\n"
150 /* set up child stack with function and arg */
151 " movl 4+"FSZ"(%esp), %ecx\n" /* syscall arg2: child stack */
152 " movl 12+"FSZ"(%esp), %ebx\n" /* fn arg */
153 " movl 0+"FSZ"(%esp), %eax\n" /* fn */
154 " lea -8(%ecx), %ecx\n" /* make space on stack */
155 " movl %ebx, 4(%ecx)\n" /* fn arg */
156 " movl %eax, 0(%ecx)\n" /* fn */
158 /* get other args to clone */
159 " movl 8+"FSZ"(%esp), %ebx\n" /* syscall arg1: flags */
160 " movl 20+"FSZ"(%esp), %edx\n" /* syscall arg3: parent tid * */
161 " movl 16+"FSZ"(%esp), %edi\n" /* syscall arg5: child tid * */
162 " movl 24+"FSZ"(%esp), %esi\n" /* syscall arg4: tls_ptr * */
163 " movl $"__NR_CLONE", %eax\n"
164 " int $0x80\n" /* clone() */
165 " testl %eax, %eax\n" /* child if retval == 0 */
168 /* CHILD - call thread function */
170 " call *%eax\n" /* call fn */
172 /* exit with result */
173 " movl %eax, %ebx\n" /* arg1: return value from fn */
174 " movl $"__NR_EXIT", %eax\n"
177 /* Hm, exit returned */
180 "1:\n" /* PARENT or ERROR */
193 // forward declarations
194 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
195 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
198 When a client clones, we need to keep track of the new thread. This means:
199 1. allocate a ThreadId+ThreadState+stack for the the thread
201 2. initialize the thread's new VCPU state
203 3. create the thread using the same args as the client requested,
204 but using the scheduler entrypoint for EIP, and a separate stack
207 static SysRes do_clone ( ThreadId ptid,
208 UInt flags, Addr esp,
211 vki_modify_ldt_t *tlsinfo)
213 static const Bool debug = False;
215 ThreadId ctid = VG_(alloc_ThreadState)();
216 ThreadState* ptst = VG_(get_ThreadState)(ptid);
217 ThreadState* ctst = VG_(get_ThreadState)(ctid);
222 vki_sigset_t blockall, savedmask;
224 VG_(sigfillset)(&blockall);
226 vg_assert(VG_(is_running_thread)(ptid));
227 vg_assert(VG_(is_valid_tid)(ctid));
229 stack = (UWord*)ML_(allocstack)(ctid);
231 res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
235 /* Copy register state
237 Both parent and child return to the same place, and the code
238 following the clone syscall works out which is which, so we
239 don't need to worry about it.
241 The parent gets the child's new tid returned from clone, but the
244 If the clone call specifies a NULL esp for the new thread, then
245 it actually gets a copy of the parent's esp.
247 /* Note: the clone call done by the Quadrics Elan3 driver specifies
248 clone flags of 0xF00, and it seems to rely on the assumption
249 that the child inherits a copy of the parent's GDT.
250 setup_child takes care of setting that up. */
251 setup_child( &ctst->arch, &ptst->arch, True );
253 /* Make sys_clone appear to have returned Success(0) in the
255 ctst->arch.vex.guest_EAX = 0;
258 ctst->arch.vex.guest_ESP = esp;
260 ctst->os_state.parent = ptid;
262 /* inherit signal mask */
263 ctst->sig_mask = ptst->sig_mask;
264 ctst->tmp_sig_mask = ptst->sig_mask;
266 /* Start the child with its threadgroup being the same as the
267 parent's. This is so that any exit_group calls that happen
268 after the child is created but before it sets its
269 os_state.threadgroup field for real (in thread_wrapper in
270 syswrap-linux.c), really kill the new thread. a.k.a this avoids
271 a race condition in which the thread is unkillable (via
272 exit_group) because its threadgroup is not set. The race window
273 is probably only a few hundred or a few thousand cycles long.
275 ctst->os_state.threadgroup = ptst->os_state.threadgroup;
277 /* We don't really know where the client stack is, because its
278 allocated by the client. The best we can do is look at the
279 memory mappings and try to derive some useful information. We
280 assume that esp starts near its highest possible value, and can
281 only go down to the start of the mmaped segment. */
282 seg = VG_(am_find_nsegment)((Addr)esp);
283 if (seg && seg->kind != SkResvn) {
284 ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(esp);
285 ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
287 VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
290 VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
291 ctid, seg->start, VG_PGROUNDUP(esp));
293 VG_(message)(Vg_UserMsg,
294 "!? New thread %d starts with ESP(%#lx) unmapped\n",
296 ctst->client_stack_szB = 0;
299 /* Assume the clone will succeed, and tell any tool that wants to
300 know that this thread has come into existence. We cannot defer
301 it beyond this point because sys_set_thread_area, just below,
302 causes tCheck to assert by making references to the new ThreadId
303 if we don't state the new thread exists prior to that point.
304 If the clone fails, we'll send out a ll_exit notification for it
305 at the out: label below, to clean up. */
306 VG_TRACK ( pre_thread_ll_create, ptid, ctid );
308 if (flags & VKI_CLONE_SETTLS) {
310 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%d "
311 "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
312 tlsinfo, tlsinfo->entry_number,
313 tlsinfo->base_addr, tlsinfo->limit,
314 ptst->arch.vex.guest_ESP,
315 ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
316 res = sys_set_thread_area(ctid, tlsinfo);
321 flags &= ~VKI_CLONE_SETTLS;
323 /* start the thread with everything blocked */
324 VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
326 /* Create the new thread */
327 eax = do_syscall_clone_x86_linux(
328 ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
329 child_tidptr, parent_tidptr, NULL
331 res = VG_(mk_SysRes_x86_linux)( eax );
333 VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
336 if (sr_isError(res)) {
338 VG_(cleanup_thread)(&ctst->arch);
339 ctst->status = VgTs_Empty;
340 /* oops. Better tell the tool the thread exited in a hurry :-) */
341 VG_TRACK( pre_thread_ll_exit, ctid );
348 /* ---------------------------------------------------------------------
350 ------------------------------------------------------------------ */
352 /* Details of the LDT simulation
353 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
355 When a program runs natively, the linux kernel allows each *thread*
356 in it to have its own LDT. Almost all programs never do this --
357 it's wildly unportable, after all -- and so the kernel never
358 allocates the structure, which is just as well as an LDT occupies
359 64k of memory (8192 entries of size 8 bytes).
361 A thread may choose to modify its LDT entries, by doing the
362 __NR_modify_ldt syscall. In such a situation the kernel will then
363 allocate an LDT structure for it. Each LDT entry is basically a
364 (base, limit) pair. A virtual address in a specific segment is
365 translated to a linear address by adding the segment's base value.
366 In addition, the virtual address must not exceed the limit value.
368 To use an LDT entry, a thread loads one of the segment registers
369 (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
370 .. 8191) it wants to use. In fact, the required value is (index <<
371 3) + 7, but that's not important right now. Any normal instruction
372 which includes an addressing mode can then be made relative to that
373 LDT entry by prefixing the insn with a so-called segment-override
374 prefix, a byte which indicates which of the 6 segment registers
377 Now, a key constraint is that valgrind's address checks operate in
378 terms of linear addresses. So we have to explicitly translate
379 virtual addrs into linear addrs, and that means doing a complete
382 Calls to modify_ldt are intercepted. For each thread, we maintain
383 an LDT (with the same normally-never-allocated optimisation that
384 the kernel does). This is updated as expected via calls to
387 When a thread does an amode calculation involving a segment
388 override prefix, the relevant LDT entry for the thread is
389 consulted. It all works.
391 There is a conceptual problem, which appears when switching back to
392 native execution, either temporarily to pass syscalls to the
393 kernel, or permanently, when debugging V. Problem at such points
394 is that it's pretty pointless to copy the simulated machine's
395 segment registers to the real machine, because we'd also need to
396 copy the simulated LDT into the real one, and that's prohibitively
399 Fortunately it looks like no syscalls rely on the segment regs or
400 LDT being correct, so we can get away with it. Apart from that the
401 simulation is pretty straightforward. All 6 segment registers are
402 tracked, although only %ds, %es, %fs and %gs are allowed as
403 prefixes. Perhaps it could be restricted even more than that -- I
404 am not sure what is and isn't allowed in user-mode.
407 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
408 the Linux kernel's logic (cut-n-paste of code in
409 linux/kernel/ldt.c). */
412 void translate_to_hw_format ( /* IN */ vki_modify_ldt_t* inn,
413 /* OUT */ VexGuestX86SegDescr* out,
416 UInt entry_1, entry_2;
417 vg_assert(8 == sizeof(VexGuestX86SegDescr));
420 VG_(printf)("translate_to_hw_format: base %#lx, limit %d\n",
421 inn->base_addr, inn->limit );
423 /* Allow LDTs to be cleared by the user. */
424 if (inn->base_addr == 0 && inn->limit == 0) {
426 (inn->contents == 0 &&
427 inn->read_exec_only == 1 &&
428 inn->seg_32bit == 0 &&
429 inn->limit_in_pages == 0 &&
430 inn->seg_not_present == 1 &&
431 inn->useable == 0 )) {
438 entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
439 (inn->limit & 0x0ffff);
440 entry_2 = (inn->base_addr & 0xff000000) |
441 ((inn->base_addr & 0x00ff0000) >> 16) |
442 (inn->limit & 0xf0000) |
443 ((inn->read_exec_only ^ 1) << 9) |
444 (inn->contents << 10) |
445 ((inn->seg_not_present ^ 1) << 15) |
446 (inn->seg_32bit << 22) |
447 (inn->limit_in_pages << 23) |
450 entry_2 |= (inn->useable << 20);
452 /* Install the new entry ... */
454 out->LdtEnt.Words.word1 = entry_1;
455 out->LdtEnt.Words.word2 = entry_2;
458 /* Create a zeroed-out GDT. */
459 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
461 Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
462 return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxG.1", nbytes, 1);
465 /* Create a zeroed-out LDT. */
466 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
468 Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
469 return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxL.1", nbytes, 1);
472 /* Free up an LDT or GDT allocated by the above fns. */
473 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
476 VG_(arena_free)(VG_AR_CORE, (void*)dt);
479 /* Copy contents between two existing LDTs. */
480 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
481 VexGuestX86SegDescr* dst )
486 for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
490 /* Copy contents between two existing GDTs. */
491 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
492 VexGuestX86SegDescr* dst )
497 for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
501 /* Free this thread's DTs, if it has any. */
502 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
504 vg_assert(sizeof(HWord) == sizeof(void*));
507 VG_(printf)("deallocate_LGDTs_for_thread: "
508 "ldt = 0x%lx, gdt = 0x%lx\n",
509 vex->guest_LDT, vex->guest_GDT );
511 if (vex->guest_LDT != (HWord)NULL) {
512 free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
513 vex->guest_LDT = (HWord)NULL;
516 if (vex->guest_GDT != (HWord)NULL) {
517 free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
518 vex->guest_GDT = (HWord)NULL;
526 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
527 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
531 * read_ldt() is not really atomic - this is not a problem since
532 * synchronization of reads and writes done to the LDT has to be
533 * assured by user-space anyway. Writes are atomic, to protect
534 * the security checks done on new descriptors.
537 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
544 VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
545 tid, ptr, bytecount );
547 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
548 vg_assert(8 == sizeof(VexGuestX86SegDescr));
550 ldt = (Char*)(VG_(threads)[tid].arch.vex.guest_LDT);
551 res = VG_(mk_SysRes_Success)( 0 );
553 /* LDT not allocated, meaning all entries are null */
556 size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
557 if (size > bytecount)
560 res = VG_(mk_SysRes_Success)( size );
561 for (i = 0; i < size; i++)
570 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
573 VexGuestX86SegDescr* ldt;
574 vki_modify_ldt_t* ldt_info;
577 VG_(printf)("write_ldt: tid = %d, ptr = %p, "
578 "bytecount = %d, oldmode = %d\n",
579 tid, ptr, bytecount, oldmode );
581 vg_assert(8 == sizeof(VexGuestX86SegDescr));
582 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
584 ldt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
585 ldt_info = (vki_modify_ldt_t*)ptr;
587 res = VG_(mk_SysRes_Error)( VKI_EINVAL );
588 if (bytecount != sizeof(vki_modify_ldt_t))
591 res = VG_(mk_SysRes_Error)( VKI_EINVAL );
592 if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
594 if (ldt_info->contents == 3) {
597 if (ldt_info->seg_not_present == 0)
601 /* If this thread doesn't have an LDT, we'd better allocate it
604 ldt = alloc_zeroed_x86_LDT();
605 VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
608 /* Install the new entry ... */
609 translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
610 res = VG_(mk_SysRes_Success)( 0 );
617 static SysRes sys_modify_ldt ( ThreadId tid,
618 Int func, void* ptr, UInt bytecount )
620 SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
624 ret = read_ldt(tid, ptr, bytecount);
627 ret = write_ldt(tid, ptr, bytecount, 1);
630 VG_(unimplemented)("sys_modify_ldt: func == 2");
631 /* god knows what this is about */
632 /* ret = read_default_ldt(ptr, bytecount); */
636 ret = write_ldt(tid, ptr, bytecount, 0);
643 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
646 VexGuestX86SegDescr* gdt;
648 vg_assert(8 == sizeof(VexGuestX86SegDescr));
649 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
652 return VG_(mk_SysRes_Error)( VKI_EFAULT );
654 gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
656 /* If the thread doesn't have a GDT, allocate it now. */
658 gdt = alloc_zeroed_x86_GDT();
659 VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
662 idx = info->entry_number;
665 /* Find and use the first free entry. Don't allocate entry
666 zero, because the hardware will never do that, and apparently
667 doing so confuses some code (perhaps stuff running on
669 for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
670 if (gdt[idx].LdtEnt.Words.word1 == 0
671 && gdt[idx].LdtEnt.Words.word2 == 0)
675 if (idx == VEX_GUEST_X86_GDT_NENT)
676 return VG_(mk_SysRes_Error)( VKI_ESRCH );
677 } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
678 /* Similarly, reject attempts to use GDT[0]. */
679 return VG_(mk_SysRes_Error)( VKI_EINVAL );
682 translate_to_hw_format(info, &gdt[idx], 0);
684 VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
685 "set_thread_area(info->entry)",
686 (Addr) & info->entry_number, sizeof(unsigned int) );
687 info->entry_number = idx;
688 VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
689 (Addr) & info->entry_number, sizeof(unsigned int) );
691 return VG_(mk_SysRes_Success)( 0 );
695 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
698 VexGuestX86SegDescr* gdt;
700 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
701 vg_assert(8 == sizeof(VexGuestX86SegDescr));
704 return VG_(mk_SysRes_Error)( VKI_EFAULT );
706 idx = info->entry_number;
708 if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
709 return VG_(mk_SysRes_Error)( VKI_EINVAL );
711 gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
713 /* If the thread doesn't have a GDT, allocate it now. */
715 gdt = alloc_zeroed_x86_GDT();
716 VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
719 info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
720 ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
721 gdt[idx].LdtEnt.Bits.BaseLow;
722 info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
723 gdt[idx].LdtEnt.Bits.LimitLow;
724 info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
725 info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
726 info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
727 info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
728 info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
729 info->useable = gdt[idx].LdtEnt.Bits.Sys;
732 return VG_(mk_SysRes_Success)( 0 );
735 /* ---------------------------------------------------------------------
737 ------------------------------------------------------------------ */
739 void VG_(cleanup_thread) ( ThreadArchState* arch )
741 /* Release arch-specific resources held by this thread. */
742 /* On x86, we have to dump the LDT and GDT. */
743 deallocate_LGDTs_for_thread( &arch->vex );
747 static void setup_child ( /*OUT*/ ThreadArchState *child,
748 /*IN*/ ThreadArchState *parent,
749 Bool inherit_parents_GDT )
751 /* We inherit our parent's guest state. */
752 child->vex = parent->vex;
753 child->vex_shadow1 = parent->vex_shadow1;
754 child->vex_shadow2 = parent->vex_shadow2;
756 /* We inherit our parent's LDT. */
757 if (parent->vex.guest_LDT == (HWord)NULL) {
758 /* We hope this is the common case. */
759 child->vex.guest_LDT = (HWord)NULL;
761 /* No luck .. we have to take a copy of the parent's. */
762 child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
763 copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
764 (VexGuestX86SegDescr*)child->vex.guest_LDT );
767 /* Either we start with an empty GDT (the usual case) or inherit a
768 copy of our parents' one (Quadrics Elan3 driver -style clone
770 child->vex.guest_GDT = (HWord)NULL;
772 if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
773 child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
774 copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
775 (VexGuestX86SegDescr*)child->vex.guest_GDT );
780 /* ---------------------------------------------------------------------
781 PRE/POST wrappers for x86/Linux-specific syscalls
782 ------------------------------------------------------------------ */
784 #define PRE(name) DEFN_PRE_TEMPLATE(x86_linux, name)
785 #define POST(name) DEFN_POST_TEMPLATE(x86_linux, name)
787 /* Add prototypes for the wrappers declared here, so that gcc doesn't
788 harass us for not having prototypes. Really this is a kludge --
789 the right thing to do is to make these wrappers 'static' since they
790 aren't visible outside this file, but that requires even more macro
792 DECL_TEMPLATE(x86_linux, sys_socketcall);
793 DECL_TEMPLATE(x86_linux, sys_stat64);
794 DECL_TEMPLATE(x86_linux, sys_fstatat64);
795 DECL_TEMPLATE(x86_linux, sys_fstat64);
796 DECL_TEMPLATE(x86_linux, sys_lstat64);
797 DECL_TEMPLATE(x86_linux, sys_clone);
798 DECL_TEMPLATE(x86_linux, old_mmap);
799 DECL_TEMPLATE(x86_linux, sys_mmap2);
800 DECL_TEMPLATE(x86_linux, sys_sigreturn);
801 DECL_TEMPLATE(x86_linux, sys_ipc);
802 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
803 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
804 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
805 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
806 DECL_TEMPLATE(x86_linux, sys_ptrace);
807 DECL_TEMPLATE(x86_linux, sys_sigaction);
808 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
809 DECL_TEMPLATE(x86_linux, old_select);
810 DECL_TEMPLATE(x86_linux, sys_vm86old);
811 DECL_TEMPLATE(x86_linux, sys_vm86);
812 DECL_TEMPLATE(x86_linux, sys_syscall223);
816 /* struct sel_arg_struct {
818 fd_set *inp, *outp, *exp;
822 PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
823 PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
824 *flags |= SfMayBlock;
826 UInt* arg_struct = (UInt*)ARG1;
827 UInt a1, a2, a3, a4, a5;
835 PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", a1,a2,a3,a4,a5);
836 if (a2 != (Addr)NULL)
837 PRE_MEM_READ( "old_select(readfds)", a2, a1/8 /* __FD_SETSIZE/8 */ );
838 if (a3 != (Addr)NULL)
839 PRE_MEM_READ( "old_select(writefds)", a3, a1/8 /* __FD_SETSIZE/8 */ );
840 if (a4 != (Addr)NULL)
841 PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
842 if (a5 != (Addr)NULL)
843 PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
852 PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
853 PRE_REG_READ2(int, "clone",
854 unsigned long, flags,
855 void *, child_stack);
857 if (ARG1 & VKI_CLONE_PARENT_SETTID) {
858 if (VG_(tdict).track_pre_reg_read) {
859 PRA3("clone", int *, parent_tidptr);
861 PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
862 if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
867 if (ARG1 & VKI_CLONE_SETTLS) {
868 if (VG_(tdict).track_pre_reg_read) {
869 PRA4("clone", vki_modify_ldt_t *, tlsinfo);
871 PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
872 if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
877 if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
878 if (VG_(tdict).track_pre_reg_read) {
879 PRA5("clone", int *, child_tidptr);
881 PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
882 if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
889 SET_STATUS_Failure( VKI_EFAULT );
895 if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
896 SET_STATUS_Failure( VKI_EINVAL );
900 /* Be ultra-paranoid and filter out any clone-variants we don't understand:
901 - ??? specifies clone flags of 0x100011
902 - ??? specifies clone flags of 0x1200011.
903 - NPTL specifies clone flags of 0x7D0F00.
904 - The Quadrics Elan3 driver specifies clone flags of 0xF00.
905 - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
906 Everything else is rejected.
910 /* 11 Nov 05: for the time being, disable this ultra-paranoia.
911 The switch below probably does a good enough job. */
912 (cloneflags == 0x100011 || cloneflags == 0x1200011
913 || cloneflags == 0x7D0F00
914 || cloneflags == 0x790F00
915 || cloneflags == 0x3D0F00
916 || cloneflags == 0x410F00
917 || cloneflags == 0xF00
918 || cloneflags == 0xF21)) {
922 /* Nah. We don't like it. Go away. */
926 /* Only look at the flags we really care about */
927 switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
928 | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
929 case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
930 /* thread creation */
931 SET_STATUS_from_SysRes(
934 (Addr)ARG2, /* child ESP */
935 (Int *)ARG3, /* parent_tidptr */
936 (Int *)ARG5, /* child_tidptr */
937 (vki_modify_ldt_t *)ARG4)); /* set_tls */
940 case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
941 /* FALLTHROUGH - assume vfork == fork */
942 cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
944 case 0: /* plain fork */
945 SET_STATUS_from_SysRes(
946 ML_(do_fork_clone)(tid,
947 cloneflags, /* flags */
948 (Int *)ARG3, /* parent_tidptr */
949 (Int *)ARG5)); /* child_tidptr */
954 /* should we just ENOSYS? */
955 VG_(message)(Vg_UserMsg, "\n");
956 VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
957 VG_(message)(Vg_UserMsg, "\n");
958 VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
959 VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
960 VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
961 VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
963 ("Valgrind does not support general clone().");
967 if (ARG1 & VKI_CLONE_PARENT_SETTID)
968 POST_MEM_WRITE(ARG3, sizeof(Int));
969 if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
970 POST_MEM_WRITE(ARG5, sizeof(Int));
972 /* Thread creation was successful; let the child have the chance
974 *flags |= SfYieldAfter;
980 /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
981 an explanation of what follows. */
984 PRINT("sys_sigreturn ( )");
986 vg_assert(VG_(is_valid_tid)(tid));
987 vg_assert(tid >= 1 && tid < VG_N_THREADS);
988 vg_assert(VG_(is_running_thread)(tid));
990 /* Adjust esp to point to start of frame; skip back up over
991 sigreturn sequence's "popl %eax" and handler ret addr */
992 tst = VG_(get_ThreadState)(tid);
993 tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
994 /* XXX why does ESP change differ from rt_sigreturn case below? */
996 /* This is only so that the EIP is (might be) useful to report if
997 something goes wrong in the sigreturn */
998 ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1000 /* Restore register state from frame and remove it */
1001 VG_(sigframe_destroy)(tid, False);
1003 /* Tell the driver not to update the guest state with the "result",
1004 and set a bogus result to keep it happy. */
1005 *flags |= SfNoWriteResult;
1006 SET_STATUS_Success(0);
1008 /* Check to see if any signals arose as a result of this. */
1009 *flags |= SfPollAfter;
1012 PRE(sys_rt_sigreturn)
1014 /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
1015 an explanation of what follows. */
1018 PRINT("sys_rt_sigreturn ( )");
1020 vg_assert(VG_(is_valid_tid)(tid));
1021 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1022 vg_assert(VG_(is_running_thread)(tid));
1024 /* Adjust esp to point to start of frame; skip back up over handler
1026 tst = VG_(get_ThreadState)(tid);
1027 tst->arch.vex.guest_ESP -= sizeof(Addr);
1028 /* XXX why does ESP change differ from sigreturn case above? */
1030 /* This is only so that the EIP is (might be) useful to report if
1031 something goes wrong in the sigreturn */
1032 ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1034 /* Restore register state from frame and remove it */
1035 VG_(sigframe_destroy)(tid, True);
1037 /* Tell the driver not to update the guest state with the "result",
1038 and set a bogus result to keep it happy. */
1039 *flags |= SfNoWriteResult;
1040 SET_STATUS_Success(0);
1042 /* Check to see if any signals arose as a result of this. */
1043 *flags |= SfPollAfter;
1048 PRINT("sys_modify_ldt ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
1049 PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1050 unsigned long, bytecount);
1053 /* read the LDT into ptr */
1054 PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1056 if (ARG1 == 1 || ARG1 == 0x11) {
1057 /* write the LDT with the entry pointed at by ptr */
1058 PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1060 /* "do" the syscall ourselves; the kernel never sees it */
1061 SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1063 if (ARG1 == 0 && SUCCESS && RES > 0) {
1064 POST_MEM_WRITE( ARG2, RES );
1068 PRE(sys_set_thread_area)
1070 PRINT("sys_set_thread_area ( %#lx )", ARG1);
1071 PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1072 PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1074 /* "do" the syscall ourselves; the kernel never sees it */
1075 SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1078 PRE(sys_get_thread_area)
1080 PRINT("sys_get_thread_area ( %#lx )", ARG1);
1081 PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1082 PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1084 /* "do" the syscall ourselves; the kernel never sees it */
1085 SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1088 POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1092 // Parts of this are x86-specific, but the *PEEK* cases are generic.
1094 // ARG3 is only used for pointers into the traced process's address
1095 // space and for offsets into the traced process's struct
1096 // user_regs_struct. It is never a pointer into this process's memory
1097 // space, and we should therefore not check anything it points to.
1100 PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
1101 PRE_REG_READ4(int, "ptrace",
1102 long, request, long, pid, long, addr, long, data);
1104 case VKI_PTRACE_PEEKTEXT:
1105 case VKI_PTRACE_PEEKDATA:
1106 case VKI_PTRACE_PEEKUSR:
1107 PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1110 case VKI_PTRACE_GETREGS:
1111 PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1112 sizeof (struct vki_user_regs_struct));
1114 case VKI_PTRACE_GETFPREGS:
1115 PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1116 sizeof (struct vki_user_i387_struct));
1118 case VKI_PTRACE_GETFPXREGS:
1119 PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1120 sizeof(struct vki_user_fxsr_struct) );
1122 case VKI_PTRACE_SETREGS:
1123 PRE_MEM_READ( "ptrace(setregs)", ARG4,
1124 sizeof (struct vki_user_regs_struct));
1126 case VKI_PTRACE_SETFPREGS:
1127 PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1128 sizeof (struct vki_user_i387_struct));
1130 case VKI_PTRACE_SETFPXREGS:
1131 PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1132 sizeof(struct vki_user_fxsr_struct) );
1134 case VKI_PTRACE_GETEVENTMSG:
1135 PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1137 case VKI_PTRACE_GETSIGINFO:
1138 PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1140 case VKI_PTRACE_SETSIGINFO:
1141 PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1151 case VKI_PTRACE_PEEKTEXT:
1152 case VKI_PTRACE_PEEKDATA:
1153 case VKI_PTRACE_PEEKUSR:
1154 POST_MEM_WRITE( ARG4, sizeof (long));
1156 case VKI_PTRACE_GETREGS:
1157 POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1159 case VKI_PTRACE_GETFPREGS:
1160 POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1162 case VKI_PTRACE_GETFPXREGS:
1163 POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1165 case VKI_PTRACE_GETEVENTMSG:
1166 POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1168 case VKI_PTRACE_GETSIGINFO:
1169 /* XXX: This is a simplification. Different parts of the
1170 * siginfo_t are valid depending on the type of signal.
1172 POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1179 static Addr deref_Addr ( ThreadId tid, Addr a, Char* s )
1181 Addr* a_p = (Addr*)a;
1182 PRE_MEM_READ( s, (Addr)a_p, sizeof(Addr) );
1188 PRINT("sys_ipc ( %ld, %ld, %ld, %ld, %#lx, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5,ARG6);
1189 // XXX: this is simplistic -- some args are not used in all circumstances.
1190 PRE_REG_READ6(int, "ipc",
1191 vki_uint, call, int, first, int, second, int, third,
1192 void *, ptr, long, fifth)
1194 switch (ARG1 /* call */) {
1196 ML_(generic_PRE_sys_semop)( tid, ARG2, ARG5, ARG3 );
1197 *flags |= SfMayBlock;
1203 UWord arg = deref_Addr( tid, ARG5, "semctl(arg)" );
1204 ML_(generic_PRE_sys_semctl)( tid, ARG2, ARG3, ARG4, arg );
1207 case VKI_SEMTIMEDOP:
1208 ML_(generic_PRE_sys_semtimedop)( tid, ARG2, ARG5, ARG3, ARG6 );
1209 *flags |= SfMayBlock;
1212 ML_(linux_PRE_sys_msgsnd)( tid, ARG2, ARG5, ARG3, ARG4 );
1213 if ((ARG4 & VKI_IPC_NOWAIT) == 0)
1214 *flags |= SfMayBlock;
1221 msgp = deref_Addr( tid,
1222 (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
1224 msgtyp = deref_Addr( tid,
1225 (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgtyp),
1228 ML_(linux_PRE_sys_msgrcv)( tid, ARG2, msgp, ARG3, msgtyp, ARG4 );
1230 if ((ARG4 & VKI_IPC_NOWAIT) == 0)
1231 *flags |= SfMayBlock;
1237 ML_(linux_PRE_sys_msgctl)( tid, ARG2, ARG3, ARG5 );
1242 PRE_MEM_WRITE( "shmat(raddr)", ARG4, sizeof(Addr) );
1243 w = ML_(generic_PRE_sys_shmat)( tid, ARG2, ARG5, ARG3 );
1245 SET_STATUS_Failure( VKI_EINVAL );
1251 if (!ML_(generic_PRE_sys_shmdt)(tid, ARG5))
1252 SET_STATUS_Failure( VKI_EINVAL );
1256 case VKI_SHMCTL: /* IPCOP_shmctl */
1257 ML_(generic_PRE_sys_shmctl)( tid, ARG2, ARG3, ARG5 );
1260 VG_(message)(Vg_DebugMsg, "FATAL: unhandled syscall(ipc) %ld\n", ARG1 );
1261 VG_(core_panic)("... bye!\n");
1262 break; /*NOTREACHED*/
1269 switch (ARG1 /* call */) {
1275 UWord arg = deref_Addr( tid, ARG5, "semctl(arg)" );
1276 ML_(generic_PRE_sys_semctl)( tid, ARG2, ARG3, ARG4, arg );
1279 case VKI_SEMTIMEDOP:
1287 msgp = deref_Addr( tid,
1288 (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
1290 msgtyp = deref_Addr( tid,
1291 (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgtyp),
1294 ML_(linux_POST_sys_msgrcv)( tid, RES, ARG2, msgp, ARG3, msgtyp, ARG4 );
1300 ML_(linux_POST_sys_msgctl)( tid, RES, ARG2, ARG3, ARG5 );
1306 /* force readability. before the syscall it is
1307 * indeed uninitialized, as can be seen in
1308 * glibc/sysdeps/unix/sysv/linux/shmat.c */
1309 POST_MEM_WRITE( ARG4, sizeof( Addr ) );
1311 addr = deref_Addr ( tid, ARG4, "shmat(addr)" );
1312 ML_(generic_POST_sys_shmat)( tid, addr, ARG2, ARG5, ARG3 );
1316 ML_(generic_POST_sys_shmdt)( tid, RES, ARG5 );
1321 ML_(generic_POST_sys_shmctl)( tid, RES, ARG2, ARG3, ARG5 );
1324 VG_(message)(Vg_DebugMsg,
1325 "FATAL: unhandled syscall(ipc) %ld\n",
1327 VG_(core_panic)("... bye!\n");
1328 break; /*NOTREACHED*/
1334 /* struct mmap_arg_struct {
1338 unsigned long flags;
1340 unsigned long offset;
1342 UWord a1, a2, a3, a4, a5, a6;
1345 UWord* args = (UWord*)ARG1;
1346 PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1347 PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1356 PRINT("old_mmap ( %#lx, %llu, %ld, %ld, %ld, %ld )",
1357 a1, (ULong)a2, a3, a4, a5, a6 );
1359 r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1360 SET_STATUS_from_SysRes(r);
1367 // Exactly like old_mmap() except:
1368 // - all 6 args are passed in regs, rather than in a memory-block.
1369 // - the file offset is specified in pagesize units rather than bytes,
1370 // so that it can be used for files bigger than 2^32 bytes.
1371 // pagesize or 4K-size units in offset? For ppc32/64-linux, this is
1372 // 4K-sized. Assert that the page size is 4K here for safety.
1373 vg_assert(VKI_PAGE_SIZE == 4096);
1374 PRINT("sys_mmap2 ( %#lx, %llu, %ld, %ld, %ld, %ld )",
1375 ARG1, (ULong)ARG2, ARG3, ARG4, ARG5, ARG6 );
1376 PRE_REG_READ6(long, "mmap2",
1377 unsigned long, start, unsigned long, length,
1378 unsigned long, prot, unsigned long, flags,
1379 unsigned long, fd, unsigned long, offset);
1381 r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1382 4096 * (Off64T)ARG6 );
1383 SET_STATUS_from_SysRes(r);
1386 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1387 // applicable to every architecture -- I think only to 32-bit archs.
1388 // We're going to need something like linux/core_os32.h for such
1389 // things, eventually, I think. --njn
1392 PRINT("sys_lstat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
1393 PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1394 PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1395 PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1402 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1408 PRINT("sys_stat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
1409 PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1410 PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1411 PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1416 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1421 PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx )",ARG1,ARG2,(char*)ARG2,ARG3);
1422 PRE_REG_READ3(long, "fstatat64",
1423 int, dfd, char *, file_name, struct stat64 *, buf);
1424 PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1425 PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1430 POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1435 PRINT("sys_fstat64 ( %ld, %#lx )",ARG1,ARG2);
1436 PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1437 PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1442 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1447 # define ARG2_0 (((UWord*)ARG2)[0])
1448 # define ARG2_1 (((UWord*)ARG2)[1])
1449 # define ARG2_2 (((UWord*)ARG2)[2])
1450 # define ARG2_3 (((UWord*)ARG2)[3])
1451 # define ARG2_4 (((UWord*)ARG2)[4])
1452 # define ARG2_5 (((UWord*)ARG2)[5])
1454 *flags |= SfMayBlock;
1455 PRINT("sys_socketcall ( %ld, %#lx )",ARG1,ARG2);
1456 PRE_REG_READ2(long, "socketcall", int, call, unsigned long *, args);
1458 switch (ARG1 /* request */) {
1460 case VKI_SYS_SOCKETPAIR:
1461 /* int socketpair(int d, int type, int protocol, int sv[2]); */
1462 PRE_MEM_READ( "socketcall.socketpair(args)", ARG2, 4*sizeof(Addr) );
1463 ML_(generic_PRE_sys_socketpair)( tid, ARG2_0, ARG2_1, ARG2_2, ARG2_3 );
1466 case VKI_SYS_SOCKET:
1467 /* int socket(int domain, int type, int protocol); */
1468 PRE_MEM_READ( "socketcall.socket(args)", ARG2, 3*sizeof(Addr) );
1472 /* int bind(int sockfd, struct sockaddr *my_addr,
1474 PRE_MEM_READ( "socketcall.bind(args)", ARG2, 3*sizeof(Addr) );
1475 ML_(generic_PRE_sys_bind)( tid, ARG2_0, ARG2_1, ARG2_2 );
1478 case VKI_SYS_LISTEN:
1479 /* int listen(int s, int backlog); */
1480 PRE_MEM_READ( "socketcall.listen(args)", ARG2, 2*sizeof(Addr) );
1483 case VKI_SYS_ACCEPT: {
1484 /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1485 PRE_MEM_READ( "socketcall.accept(args)", ARG2, 3*sizeof(Addr) );
1486 ML_(generic_PRE_sys_accept)( tid, ARG2_0, ARG2_1, ARG2_2 );
1490 case VKI_SYS_ACCEPT4: {
1491 /*int accept(int s, struct sockaddr *add, int *addrlen, int flags)*/
1492 PRE_MEM_READ( "socketcall.accept4(args)", ARG2, 4*sizeof(Addr) );
1493 ML_(generic_PRE_sys_accept)( tid, ARG2_0, ARG2_1, ARG2_2 );
1497 case VKI_SYS_SENDTO:
1498 /* int sendto(int s, const void *msg, int len,
1500 const struct sockaddr *to, int tolen); */
1501 PRE_MEM_READ( "socketcall.sendto(args)", ARG2, 6*sizeof(Addr) );
1502 ML_(generic_PRE_sys_sendto)( tid, ARG2_0, ARG2_1, ARG2_2,
1503 ARG2_3, ARG2_4, ARG2_5 );
1507 /* int send(int s, const void *msg, size_t len, int flags); */
1508 PRE_MEM_READ( "socketcall.send(args)", ARG2, 4*sizeof(Addr) );
1509 ML_(generic_PRE_sys_send)( tid, ARG2_0, ARG2_1, ARG2_2 );
1512 case VKI_SYS_RECVFROM:
1513 /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1514 struct sockaddr *from, int *fromlen); */
1515 PRE_MEM_READ( "socketcall.recvfrom(args)", ARG2, 6*sizeof(Addr) );
1516 ML_(generic_PRE_sys_recvfrom)( tid, ARG2_0, ARG2_1, ARG2_2,
1517 ARG2_3, ARG2_4, ARG2_5 );
1521 /* int recv(int s, void *buf, int len, unsigned int flags); */
1523 The recv call is normally used only on a connected socket
1524 (see connect(2)) and is identical to recvfrom with a NULL
1527 PRE_MEM_READ( "socketcall.recv(args)", ARG2, 4*sizeof(Addr) );
1528 ML_(generic_PRE_sys_recv)( tid, ARG2_0, ARG2_1, ARG2_2 );
1531 case VKI_SYS_CONNECT:
1532 /* int connect(int sockfd,
1533 struct sockaddr *serv_addr, int addrlen ); */
1534 PRE_MEM_READ( "socketcall.connect(args)", ARG2, 3*sizeof(Addr) );
1535 ML_(generic_PRE_sys_connect)( tid, ARG2_0, ARG2_1, ARG2_2 );
1538 case VKI_SYS_SETSOCKOPT:
1539 /* int setsockopt(int s, int level, int optname,
1540 const void *optval, int optlen); */
1541 PRE_MEM_READ( "socketcall.setsockopt(args)", ARG2, 5*sizeof(Addr) );
1542 ML_(generic_PRE_sys_setsockopt)( tid, ARG2_0, ARG2_1, ARG2_2,
1546 case VKI_SYS_GETSOCKOPT:
1547 /* int getsockopt(int s, int level, int optname,
1548 void *optval, socklen_t *optlen); */
1549 PRE_MEM_READ( "socketcall.getsockopt(args)", ARG2, 5*sizeof(Addr) );
1550 ML_(linux_PRE_sys_getsockopt)( tid, ARG2_0, ARG2_1, ARG2_2,
1554 case VKI_SYS_GETSOCKNAME:
1555 /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1556 PRE_MEM_READ( "socketcall.getsockname(args)", ARG2, 3*sizeof(Addr) );
1557 ML_(generic_PRE_sys_getsockname)( tid, ARG2_0, ARG2_1, ARG2_2 );
1560 case VKI_SYS_GETPEERNAME:
1561 /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1562 PRE_MEM_READ( "socketcall.getpeername(args)", ARG2, 3*sizeof(Addr) );
1563 ML_(generic_PRE_sys_getpeername)( tid, ARG2_0, ARG2_1, ARG2_2 );
1566 case VKI_SYS_SHUTDOWN:
1567 /* int shutdown(int s, int how); */
1568 PRE_MEM_READ( "socketcall.shutdown(args)", ARG2, 2*sizeof(Addr) );
1571 case VKI_SYS_SENDMSG: {
1572 /* int sendmsg(int s, const struct msghdr *msg, int flags); */
1574 /* this causes warnings, and I don't get why. glibc bug?
1575 * (after all it's glibc providing the arguments array)
1576 PRE_MEM_READ( "socketcall.sendmsg(args)", ARG2, 3*sizeof(Addr) );
1578 ML_(generic_PRE_sys_sendmsg)( tid, ARG2_0, ARG2_1 );
1582 case VKI_SYS_RECVMSG: {
1583 /* int recvmsg(int s, struct msghdr *msg, int flags); */
1585 /* this causes warnings, and I don't get why. glibc bug?
1586 * (after all it's glibc providing the arguments array)
1587 PRE_MEM_READ("socketcall.recvmsg(args)", ARG2, 3*sizeof(Addr) );
1589 ML_(generic_PRE_sys_recvmsg)( tid, ARG2_0, ARG2_1 );
1594 VG_(message)(Vg_DebugMsg,"Warning: unhandled socketcall 0x%lx\n",ARG1);
1595 SET_STATUS_Failure( VKI_EINVAL );
1606 POST(sys_socketcall)
1608 # define ARG2_0 (((UWord*)ARG2)[0])
1609 # define ARG2_1 (((UWord*)ARG2)[1])
1610 # define ARG2_2 (((UWord*)ARG2)[2])
1611 # define ARG2_3 (((UWord*)ARG2)[3])
1612 # define ARG2_4 (((UWord*)ARG2)[4])
1613 # define ARG2_5 (((UWord*)ARG2)[5])
1617 switch (ARG1 /* request */) {
1619 case VKI_SYS_SOCKETPAIR:
1620 r = ML_(generic_POST_sys_socketpair)(
1621 tid, VG_(mk_SysRes_Success)(RES),
1622 ARG2_0, ARG2_1, ARG2_2, ARG2_3
1624 SET_STATUS_from_SysRes(r);
1627 case VKI_SYS_SOCKET:
1628 r = ML_(generic_POST_sys_socket)( tid, VG_(mk_SysRes_Success)(RES) );
1629 SET_STATUS_from_SysRes(r);
1633 /* int bind(int sockfd, struct sockaddr *my_addr,
1637 case VKI_SYS_LISTEN:
1638 /* int listen(int s, int backlog); */
1641 case VKI_SYS_ACCEPT:
1642 case VKI_SYS_ACCEPT4:
1643 /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1644 /* int accept4(int s, struct sockaddr *addr, int *addrlen, int flags); */
1645 r = ML_(generic_POST_sys_accept)( tid, VG_(mk_SysRes_Success)(RES),
1646 ARG2_0, ARG2_1, ARG2_2 );
1647 SET_STATUS_from_SysRes(r);
1650 case VKI_SYS_SENDTO:
1656 case VKI_SYS_RECVFROM:
1657 ML_(generic_POST_sys_recvfrom)( tid, VG_(mk_SysRes_Success)(RES),
1658 ARG2_0, ARG2_1, ARG2_2,
1659 ARG2_3, ARG2_4, ARG2_5 );
1663 ML_(generic_POST_sys_recv)( tid, RES, ARG2_0, ARG2_1, ARG2_2 );
1666 case VKI_SYS_CONNECT:
1669 case VKI_SYS_SETSOCKOPT:
1672 case VKI_SYS_GETSOCKOPT:
1673 ML_(linux_POST_sys_getsockopt)( tid, VG_(mk_SysRes_Success)(RES),
1675 ARG2_2, ARG2_3, ARG2_4 );
1678 case VKI_SYS_GETSOCKNAME:
1679 ML_(generic_POST_sys_getsockname)( tid, VG_(mk_SysRes_Success)(RES),
1680 ARG2_0, ARG2_1, ARG2_2 );
1683 case VKI_SYS_GETPEERNAME:
1684 ML_(generic_POST_sys_getpeername)( tid, VG_(mk_SysRes_Success)(RES),
1685 ARG2_0, ARG2_1, ARG2_2 );
1688 case VKI_SYS_SHUTDOWN:
1691 case VKI_SYS_SENDMSG:
1694 case VKI_SYS_RECVMSG:
1695 ML_(generic_POST_sys_recvmsg)( tid, ARG2_0, ARG2_1 );
1699 VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%lx\n",ARG1);
1700 VG_(core_panic)("... bye!\n");
1701 break; /*NOTREACHED*/
1711 /* Convert from non-RT to RT sigset_t's */
1713 void convert_sigset_to_rt(const vki_old_sigset_t *oldset, vki_sigset_t *set)
1715 VG_(sigemptyset)(set);
1716 set->sig[0] = *oldset;
1720 vki_sigaction_toK_t new, *newp;
1721 vki_sigaction_fromK_t old, *oldp;
1723 PRINT("sys_sigaction ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
1724 PRE_REG_READ3(int, "sigaction",
1725 int, signum, const struct old_sigaction *, act,
1726 struct old_sigaction *, oldact);
1731 struct vki_old_sigaction *sa = (struct vki_old_sigaction *)ARG2;
1732 PRE_MEM_READ( "sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler));
1733 PRE_MEM_READ( "sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask));
1734 PRE_MEM_READ( "sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags));
1735 if (ML_(safe_to_deref)(sa,sizeof(sa))
1736 && (sa->sa_flags & VKI_SA_RESTORER))
1737 PRE_MEM_READ( "sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer));
1741 PRE_MEM_WRITE( "sigaction(oldact)", ARG3, sizeof(struct vki_old_sigaction));
1745 //jrs 20050207: what?! how can this make any sense?
1746 //if (VG_(is_kerror)(SYSRES))
1750 struct vki_old_sigaction *oldnew = (struct vki_old_sigaction *)ARG2;
1751 new.ksa_handler = oldnew->ksa_handler;
1752 new.sa_flags = oldnew->sa_flags;
1753 new.sa_restorer = oldnew->sa_restorer;
1754 convert_sigset_to_rt(&oldnew->sa_mask, &new.sa_mask);
1758 SET_STATUS_from_SysRes( VG_(do_sys_sigaction)(ARG1, newp, oldp) );
1760 if (ARG3 != 0 && SUCCESS && RES == 0) {
1761 struct vki_old_sigaction *oldold = (struct vki_old_sigaction *)ARG3;
1762 oldold->ksa_handler = oldp->ksa_handler;
1763 oldold->sa_flags = oldp->sa_flags;
1764 oldold->sa_restorer = oldp->sa_restorer;
1765 oldold->sa_mask = oldp->sa_mask.sig[0];
1772 if (RES == 0 && ARG3 != 0)
1773 POST_MEM_WRITE( ARG3, sizeof(struct vki_old_sigaction));
1778 /* The C library interface to sigsuspend just takes a pointer to
1779 a signal mask but this system call has three arguments - the first
1780 two don't appear to be used by the kernel and are always passed as
1781 zero by glibc and the third is the first word of the signal mask
1782 so only 32 signals are supported.
1784 In fact glibc normally uses rt_sigsuspend if it is available as
1785 that takes a pointer to the signal mask so supports more signals.
1787 *flags |= SfMayBlock;
1788 PRINT("sys_sigsuspend ( %ld, %ld, %ld )", ARG1,ARG2,ARG3 );
1789 PRE_REG_READ3(int, "sigsuspend",
1790 int, history0, int, history1,
1791 vki_old_sigset_t, mask);
1796 PRINT("sys_vm86old ( %#lx )", ARG1);
1797 PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1798 PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1803 POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1808 PRINT("sys_vm86 ( %ld, %#lx )", ARG1,ARG2);
1809 PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1810 if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1811 PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1816 if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1817 POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1821 /* ---------------------------------------------------------------
1822 PRE/POST wrappers for x86/Linux-variant specific syscalls
1823 ------------------------------------------------------------ */
1829 /* 223 is used by sys_bproc. If we're not on a declared bproc
1830 variant, fail in the usual way. */
1832 if (!VG_(strstr)(VG_(clo_kernel_variant), "bproc")) {
1833 PRINT("non-existent syscall! (syscall 223)");
1834 PRE_REG_READ0(long, "ni_syscall(223)");
1835 SET_STATUS_Failure( VKI_ENOSYS );
1839 err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1842 SET_STATUS_Failure( err );
1845 /* Let it go through. */
1846 *flags |= SfMayBlock; /* who knows? play safe. */
1849 POST(sys_syscall223)
1851 ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1859 /* ---------------------------------------------------------------------
1860 The x86/Linux syscall table
1861 ------------------------------------------------------------------ */
1863 /* Add an x86-linux specific wrapper to a syscall table. */
1864 #define PLAX_(sysno, name) WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1865 #define PLAXY(sysno, name) WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1868 // This table maps from __NR_xxx syscall numbers (from
1869 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1870 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1872 // For those syscalls not handled by Valgrind, the annotation indicate its
1873 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1876 static SyscallTableEntry syscall_table[] = {
1877 //zz // (restart_syscall) // 0
1878 GENX_(__NR_exit, sys_exit), // 1
1879 GENX_(__NR_fork, sys_fork), // 2
1880 GENXY(__NR_read, sys_read), // 3
1881 GENX_(__NR_write, sys_write), // 4
1883 GENXY(__NR_open, sys_open), // 5
1884 GENXY(__NR_close, sys_close), // 6
1885 GENXY(__NR_waitpid, sys_waitpid), // 7
1886 GENXY(__NR_creat, sys_creat), // 8
1887 GENX_(__NR_link, sys_link), // 9
1889 GENX_(__NR_unlink, sys_unlink), // 10
1890 GENX_(__NR_execve, sys_execve), // 11
1891 GENX_(__NR_chdir, sys_chdir), // 12
1892 GENXY(__NR_time, sys_time), // 13
1893 GENX_(__NR_mknod, sys_mknod), // 14
1895 GENX_(__NR_chmod, sys_chmod), // 15
1896 //zz LINX_(__NR_lchown, sys_lchown16), // 16
1897 GENX_(__NR_break, sys_ni_syscall), // 17
1898 //zz // (__NR_oldstat, sys_stat), // 18 (obsolete)
1899 LINX_(__NR_lseek, sys_lseek), // 19
1901 GENX_(__NR_getpid, sys_getpid), // 20
1902 LINX_(__NR_mount, sys_mount), // 21
1903 LINX_(__NR_umount, sys_oldumount), // 22
1904 LINX_(__NR_setuid, sys_setuid16), // 23 ## P
1905 LINX_(__NR_getuid, sys_getuid16), // 24 ## P
1907 LINX_(__NR_stime, sys_stime), // 25 * (SVr4,SVID,X/OPEN)
1908 PLAXY(__NR_ptrace, sys_ptrace), // 26
1909 GENX_(__NR_alarm, sys_alarm), // 27
1910 //zz // (__NR_oldfstat, sys_fstat), // 28 * L -- obsolete
1911 GENX_(__NR_pause, sys_pause), // 29
1913 LINX_(__NR_utime, sys_utime), // 30
1914 GENX_(__NR_stty, sys_ni_syscall), // 31
1915 GENX_(__NR_gtty, sys_ni_syscall), // 32
1916 GENX_(__NR_access, sys_access), // 33
1917 GENX_(__NR_nice, sys_nice), // 34
1919 GENX_(__NR_ftime, sys_ni_syscall), // 35
1920 GENX_(__NR_sync, sys_sync), // 36
1921 GENX_(__NR_kill, sys_kill), // 37
1922 GENX_(__NR_rename, sys_rename), // 38
1923 GENX_(__NR_mkdir, sys_mkdir), // 39
1925 GENX_(__NR_rmdir, sys_rmdir), // 40
1926 GENXY(__NR_dup, sys_dup), // 41
1927 LINXY(__NR_pipe, sys_pipe), // 42
1928 GENXY(__NR_times, sys_times), // 43
1929 GENX_(__NR_prof, sys_ni_syscall), // 44
1931 GENX_(__NR_brk, sys_brk), // 45
1932 LINX_(__NR_setgid, sys_setgid16), // 46
1933 LINX_(__NR_getgid, sys_getgid16), // 47
1934 //zz // (__NR_signal, sys_signal), // 48 */* (ANSI C)
1935 LINX_(__NR_geteuid, sys_geteuid16), // 49
1937 LINX_(__NR_getegid, sys_getegid16), // 50
1938 GENX_(__NR_acct, sys_acct), // 51
1939 LINX_(__NR_umount2, sys_umount), // 52
1940 GENX_(__NR_lock, sys_ni_syscall), // 53
1941 LINXY(__NR_ioctl, sys_ioctl), // 54
1943 LINXY(__NR_fcntl, sys_fcntl), // 55
1944 GENX_(__NR_mpx, sys_ni_syscall), // 56
1945 GENX_(__NR_setpgid, sys_setpgid), // 57
1946 GENX_(__NR_ulimit, sys_ni_syscall), // 58
1947 //zz // (__NR_oldolduname, sys_olduname), // 59 Linux -- obsolete
1949 GENX_(__NR_umask, sys_umask), // 60
1950 GENX_(__NR_chroot, sys_chroot), // 61
1951 //zz // (__NR_ustat, sys_ustat) // 62 SVr4 -- deprecated
1952 GENXY(__NR_dup2, sys_dup2), // 63
1953 GENX_(__NR_getppid, sys_getppid), // 64
1955 GENX_(__NR_getpgrp, sys_getpgrp), // 65
1956 GENX_(__NR_setsid, sys_setsid), // 66
1957 PLAXY(__NR_sigaction, sys_sigaction), // 67
1958 //zz // (__NR_sgetmask, sys_sgetmask), // 68 */* (ANSI C)
1959 //zz // (__NR_ssetmask, sys_ssetmask), // 69 */* (ANSI C)
1961 LINX_(__NR_setreuid, sys_setreuid16), // 70
1962 LINX_(__NR_setregid, sys_setregid16), // 71
1963 PLAX_(__NR_sigsuspend, sys_sigsuspend), // 72
1964 LINXY(__NR_sigpending, sys_sigpending), // 73
1965 //zz // (__NR_sethostname, sys_sethostname), // 74 */*
1967 GENX_(__NR_setrlimit, sys_setrlimit), // 75
1968 GENXY(__NR_getrlimit, sys_old_getrlimit), // 76
1969 GENXY(__NR_getrusage, sys_getrusage), // 77
1970 GENXY(__NR_gettimeofday, sys_gettimeofday), // 78
1971 GENX_(__NR_settimeofday, sys_settimeofday), // 79
1973 LINXY(__NR_getgroups, sys_getgroups16), // 80
1974 LINX_(__NR_setgroups, sys_setgroups16), // 81
1975 PLAX_(__NR_select, old_select), // 82
1976 GENX_(__NR_symlink, sys_symlink), // 83
1977 //zz // (__NR_oldlstat, sys_lstat), // 84 -- obsolete
1979 GENX_(__NR_readlink, sys_readlink), // 85
1980 //zz // (__NR_uselib, sys_uselib), // 86 */Linux
1981 //zz // (__NR_swapon, sys_swapon), // 87 */Linux
1982 //zz // (__NR_reboot, sys_reboot), // 88 */Linux
1983 //zz // (__NR_readdir, old_readdir), // 89 -- superseded
1985 PLAX_(__NR_mmap, old_mmap), // 90
1986 GENXY(__NR_munmap, sys_munmap), // 91
1987 GENX_(__NR_truncate, sys_truncate), // 92
1988 GENX_(__NR_ftruncate, sys_ftruncate), // 93
1989 GENX_(__NR_fchmod, sys_fchmod), // 94
1991 LINX_(__NR_fchown, sys_fchown16), // 95
1992 GENX_(__NR_getpriority, sys_getpriority), // 96
1993 GENX_(__NR_setpriority, sys_setpriority), // 97
1994 GENX_(__NR_profil, sys_ni_syscall), // 98
1995 GENXY(__NR_statfs, sys_statfs), // 99
1997 GENXY(__NR_fstatfs, sys_fstatfs), // 100
1998 LINX_(__NR_ioperm, sys_ioperm), // 101
1999 PLAXY(__NR_socketcall, sys_socketcall), // 102 x86/Linux-only
2000 LINXY(__NR_syslog, sys_syslog), // 103
2001 GENXY(__NR_setitimer, sys_setitimer), // 104
2003 GENXY(__NR_getitimer, sys_getitimer), // 105
2004 GENXY(__NR_stat, sys_newstat), // 106
2005 GENXY(__NR_lstat, sys_newlstat), // 107
2006 GENXY(__NR_fstat, sys_newfstat), // 108
2007 //zz // (__NR_olduname, sys_uname), // 109 -- obsolete
2009 GENX_(__NR_iopl, sys_iopl), // 110
2010 LINX_(__NR_vhangup, sys_vhangup), // 111
2011 GENX_(__NR_idle, sys_ni_syscall), // 112
2012 PLAXY(__NR_vm86old, sys_vm86old), // 113 x86/Linux-only
2013 GENXY(__NR_wait4, sys_wait4), // 114
2015 //zz // (__NR_swapoff, sys_swapoff), // 115 */Linux
2016 LINXY(__NR_sysinfo, sys_sysinfo), // 116
2017 PLAXY(__NR_ipc, sys_ipc), // 117
2018 GENX_(__NR_fsync, sys_fsync), // 118
2019 PLAX_(__NR_sigreturn, sys_sigreturn), // 119 ?/Linux
2021 PLAX_(__NR_clone, sys_clone), // 120
2022 //zz // (__NR_setdomainname, sys_setdomainname), // 121 */*(?)
2023 GENXY(__NR_uname, sys_newuname), // 122
2024 PLAX_(__NR_modify_ldt, sys_modify_ldt), // 123
2025 LINXY(__NR_adjtimex, sys_adjtimex), // 124
2027 GENXY(__NR_mprotect, sys_mprotect), // 125
2028 LINXY(__NR_sigprocmask, sys_sigprocmask), // 126
2029 //zz // Nb: create_module() was removed 2.4-->2.6
2030 GENX_(__NR_create_module, sys_ni_syscall), // 127
2031 LINX_(__NR_init_module, sys_init_module), // 128
2032 LINX_(__NR_delete_module, sys_delete_module), // 129
2034 //zz // Nb: get_kernel_syms() was removed 2.4-->2.6
2035 GENX_(__NR_get_kernel_syms, sys_ni_syscall), // 130
2036 LINX_(__NR_quotactl, sys_quotactl), // 131
2037 GENX_(__NR_getpgid, sys_getpgid), // 132
2038 GENX_(__NR_fchdir, sys_fchdir), // 133
2039 //zz // (__NR_bdflush, sys_bdflush), // 134 */Linux
2041 //zz // (__NR_sysfs, sys_sysfs), // 135 SVr4
2042 LINX_(__NR_personality, sys_personality), // 136
2043 GENX_(__NR_afs_syscall, sys_ni_syscall), // 137
2044 LINX_(__NR_setfsuid, sys_setfsuid16), // 138
2045 LINX_(__NR_setfsgid, sys_setfsgid16), // 139
2047 LINXY(__NR__llseek, sys_llseek), // 140
2048 GENXY(__NR_getdents, sys_getdents), // 141
2049 GENX_(__NR__newselect, sys_select), // 142
2050 GENX_(__NR_flock, sys_flock), // 143
2051 GENX_(__NR_msync, sys_msync), // 144
2053 GENXY(__NR_readv, sys_readv), // 145
2054 GENX_(__NR_writev, sys_writev), // 146
2055 GENX_(__NR_getsid, sys_getsid), // 147
2056 GENX_(__NR_fdatasync, sys_fdatasync), // 148
2057 LINXY(__NR__sysctl, sys_sysctl), // 149
2059 GENX_(__NR_mlock, sys_mlock), // 150
2060 GENX_(__NR_munlock, sys_munlock), // 151
2061 GENX_(__NR_mlockall, sys_mlockall), // 152
2062 LINX_(__NR_munlockall, sys_munlockall), // 153
2063 LINXY(__NR_sched_setparam, sys_sched_setparam), // 154
2065 LINXY(__NR_sched_getparam, sys_sched_getparam), // 155
2066 LINX_(__NR_sched_setscheduler, sys_sched_setscheduler), // 156
2067 LINX_(__NR_sched_getscheduler, sys_sched_getscheduler), // 157
2068 LINX_(__NR_sched_yield, sys_sched_yield), // 158
2069 LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
2071 LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
2072 LINXY(__NR_sched_rr_get_interval, sys_sched_rr_get_interval), // 161
2073 GENXY(__NR_nanosleep, sys_nanosleep), // 162
2074 GENX_(__NR_mremap, sys_mremap), // 163
2075 LINX_(__NR_setresuid, sys_setresuid16), // 164
2077 LINXY(__NR_getresuid, sys_getresuid16), // 165
2078 PLAXY(__NR_vm86, sys_vm86), // 166 x86/Linux-only
2079 GENX_(__NR_query_module, sys_ni_syscall), // 167
2080 GENXY(__NR_poll, sys_poll), // 168
2081 //zz // (__NR_nfsservctl, sys_nfsservctl), // 169 */Linux
2083 LINX_(__NR_setresgid, sys_setresgid16), // 170
2084 LINXY(__NR_getresgid, sys_getresgid16), // 171
2085 LINXY(__NR_prctl, sys_prctl), // 172
2086 PLAX_(__NR_rt_sigreturn, sys_rt_sigreturn), // 173 x86/Linux only?
2087 LINXY(__NR_rt_sigaction, sys_rt_sigaction), // 174
2089 LINXY(__NR_rt_sigprocmask, sys_rt_sigprocmask), // 175
2090 LINXY(__NR_rt_sigpending, sys_rt_sigpending), // 176
2091 LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait),// 177
2092 LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo),// 178
2093 LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend), // 179
2095 GENXY(__NR_pread64, sys_pread64), // 180
2096 GENX_(__NR_pwrite64, sys_pwrite64), // 181
2097 LINX_(__NR_chown, sys_chown16), // 182
2098 GENXY(__NR_getcwd, sys_getcwd), // 183
2099 LINXY(__NR_capget, sys_capget), // 184
2101 LINX_(__NR_capset, sys_capset), // 185
2102 GENXY(__NR_sigaltstack, sys_sigaltstack), // 186
2103 LINXY(__NR_sendfile, sys_sendfile), // 187
2104 GENXY(__NR_getpmsg, sys_getpmsg), // 188
2105 GENX_(__NR_putpmsg, sys_putpmsg), // 189
2107 // Nb: we treat vfork as fork
2108 GENX_(__NR_vfork, sys_fork), // 190
2109 GENXY(__NR_ugetrlimit, sys_getrlimit), // 191
2110 PLAX_(__NR_mmap2, sys_mmap2), // 192
2111 GENX_(__NR_truncate64, sys_truncate64), // 193
2112 GENX_(__NR_ftruncate64, sys_ftruncate64), // 194
2114 PLAXY(__NR_stat64, sys_stat64), // 195
2115 PLAXY(__NR_lstat64, sys_lstat64), // 196
2116 PLAXY(__NR_fstat64, sys_fstat64), // 197
2117 GENX_(__NR_lchown32, sys_lchown), // 198
2118 GENX_(__NR_getuid32, sys_getuid), // 199
2120 GENX_(__NR_getgid32, sys_getgid), // 200
2121 GENX_(__NR_geteuid32, sys_geteuid), // 201
2122 GENX_(__NR_getegid32, sys_getegid), // 202
2123 GENX_(__NR_setreuid32, sys_setreuid), // 203
2124 GENX_(__NR_setregid32, sys_setregid), // 204
2126 GENXY(__NR_getgroups32, sys_getgroups), // 205
2127 GENX_(__NR_setgroups32, sys_setgroups), // 206
2128 GENX_(__NR_fchown32, sys_fchown), // 207
2129 LINX_(__NR_setresuid32, sys_setresuid), // 208
2130 LINXY(__NR_getresuid32, sys_getresuid), // 209
2132 LINX_(__NR_setresgid32, sys_setresgid), // 210
2133 LINXY(__NR_getresgid32, sys_getresgid), // 211
2134 GENX_(__NR_chown32, sys_chown), // 212
2135 GENX_(__NR_setuid32, sys_setuid), // 213
2136 GENX_(__NR_setgid32, sys_setgid), // 214
2138 LINX_(__NR_setfsuid32, sys_setfsuid), // 215
2139 LINX_(__NR_setfsgid32, sys_setfsgid), // 216
2140 //zz // (__NR_pivot_root, sys_pivot_root), // 217 */Linux
2141 GENXY(__NR_mincore, sys_mincore), // 218
2142 GENX_(__NR_madvise, sys_madvise), // 219
2144 GENXY(__NR_getdents64, sys_getdents64), // 220
2145 LINXY(__NR_fcntl64, sys_fcntl64), // 221
2146 GENX_(222, sys_ni_syscall), // 222
2147 PLAXY(223, sys_syscall223), // 223 // sys_bproc?
2148 LINX_(__NR_gettid, sys_gettid), // 224
2150 LINX_(__NR_readahead, sys_readahead), // 225 */Linux
2151 LINX_(__NR_setxattr, sys_setxattr), // 226
2152 LINX_(__NR_lsetxattr, sys_lsetxattr), // 227
2153 LINX_(__NR_fsetxattr, sys_fsetxattr), // 228
2154 LINXY(__NR_getxattr, sys_getxattr), // 229
2156 LINXY(__NR_lgetxattr, sys_lgetxattr), // 230
2157 LINXY(__NR_fgetxattr, sys_fgetxattr), // 231
2158 LINXY(__NR_listxattr, sys_listxattr), // 232
2159 LINXY(__NR_llistxattr, sys_llistxattr), // 233
2160 LINXY(__NR_flistxattr, sys_flistxattr), // 234
2162 LINX_(__NR_removexattr, sys_removexattr), // 235
2163 LINX_(__NR_lremovexattr, sys_lremovexattr), // 236
2164 LINX_(__NR_fremovexattr, sys_fremovexattr), // 237
2165 LINXY(__NR_tkill, sys_tkill), // 238 */Linux
2166 LINXY(__NR_sendfile64, sys_sendfile64), // 239
2168 LINXY(__NR_futex, sys_futex), // 240
2169 LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
2170 LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
2171 PLAX_(__NR_set_thread_area, sys_set_thread_area), // 243
2172 PLAX_(__NR_get_thread_area, sys_get_thread_area), // 244
2174 LINXY(__NR_io_setup, sys_io_setup), // 245
2175 LINX_(__NR_io_destroy, sys_io_destroy), // 246
2176 LINXY(__NR_io_getevents, sys_io_getevents), // 247
2177 LINX_(__NR_io_submit, sys_io_submit), // 248
2178 LINXY(__NR_io_cancel, sys_io_cancel), // 249
2180 LINX_(__NR_fadvise64, sys_fadvise64), // 250 */(Linux?)
2181 GENX_(251, sys_ni_syscall), // 251
2182 LINX_(__NR_exit_group, sys_exit_group), // 252
2183 LINXY(__NR_lookup_dcookie, sys_lookup_dcookie), // 253
2184 LINXY(__NR_epoll_create, sys_epoll_create), // 254
2186 LINX_(__NR_epoll_ctl, sys_epoll_ctl), // 255
2187 LINXY(__NR_epoll_wait, sys_epoll_wait), // 256
2188 //zz // (__NR_remap_file_pages, sys_remap_file_pages), // 257 */Linux
2189 LINX_(__NR_set_tid_address, sys_set_tid_address), // 258
2190 LINXY(__NR_timer_create, sys_timer_create), // 259
2192 LINXY(__NR_timer_settime, sys_timer_settime), // (timer_create+1)
2193 LINXY(__NR_timer_gettime, sys_timer_gettime), // (timer_create+2)
2194 LINX_(__NR_timer_getoverrun, sys_timer_getoverrun),//(timer_create+3)
2195 LINX_(__NR_timer_delete, sys_timer_delete), // (timer_create+4)
2196 LINX_(__NR_clock_settime, sys_clock_settime), // (timer_create+5)
2198 LINXY(__NR_clock_gettime, sys_clock_gettime), // (timer_create+6)
2199 LINXY(__NR_clock_getres, sys_clock_getres), // (timer_create+7)
2200 LINXY(__NR_clock_nanosleep, sys_clock_nanosleep),// (timer_create+8) */*
2201 GENXY(__NR_statfs64, sys_statfs64), // 268
2202 GENXY(__NR_fstatfs64, sys_fstatfs64), // 269
2204 LINX_(__NR_tgkill, sys_tgkill), // 270 */Linux
2205 GENX_(__NR_utimes, sys_utimes), // 271
2206 LINX_(__NR_fadvise64_64, sys_fadvise64_64), // 272 */(Linux?)
2207 GENX_(__NR_vserver, sys_ni_syscall), // 273
2208 LINX_(__NR_mbind, sys_mbind), // 274 ?/?
2210 LINXY(__NR_get_mempolicy, sys_get_mempolicy), // 275 ?/?
2211 LINX_(__NR_set_mempolicy, sys_set_mempolicy), // 276 ?/?
2212 LINXY(__NR_mq_open, sys_mq_open), // 277
2213 LINX_(__NR_mq_unlink, sys_mq_unlink), // (mq_open+1)
2214 LINX_(__NR_mq_timedsend, sys_mq_timedsend), // (mq_open+2)
2216 LINXY(__NR_mq_timedreceive, sys_mq_timedreceive),// (mq_open+3)
2217 LINX_(__NR_mq_notify, sys_mq_notify), // (mq_open+4)
2218 LINXY(__NR_mq_getsetattr, sys_mq_getsetattr), // (mq_open+5)
2219 GENX_(__NR_sys_kexec_load, sys_ni_syscall), // 283
2220 LINXY(__NR_waitid, sys_waitid), // 284
2222 GENX_(285, sys_ni_syscall), // 285
2223 LINX_(__NR_add_key, sys_add_key), // 286
2224 LINX_(__NR_request_key, sys_request_key), // 287
2225 LINXY(__NR_keyctl, sys_keyctl), // 288
2226 LINX_(__NR_ioprio_set, sys_ioprio_set), // 289
2228 LINX_(__NR_ioprio_get, sys_ioprio_get), // 290
2229 LINX_(__NR_inotify_init, sys_inotify_init), // 291
2230 LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
2231 LINX_(__NR_inotify_rm_watch, sys_inotify_rm_watch), // 293
2232 // LINX_(__NR_migrate_pages, sys_migrate_pages), // 294
2234 LINXY(__NR_openat, sys_openat), // 295
2235 LINX_(__NR_mkdirat, sys_mkdirat), // 296
2236 LINX_(__NR_mknodat, sys_mknodat), // 297
2237 LINX_(__NR_fchownat, sys_fchownat), // 298
2238 LINX_(__NR_futimesat, sys_futimesat), // 299
2240 PLAXY(__NR_fstatat64, sys_fstatat64), // 300
2241 LINX_(__NR_unlinkat, sys_unlinkat), // 301
2242 LINX_(__NR_renameat, sys_renameat), // 302
2243 LINX_(__NR_linkat, sys_linkat), // 303
2244 LINX_(__NR_symlinkat, sys_symlinkat), // 304
2246 LINX_(__NR_readlinkat, sys_readlinkat), // 305
2247 LINX_(__NR_fchmodat, sys_fchmodat), // 306
2248 LINX_(__NR_faccessat, sys_faccessat), // 307
2249 LINX_(__NR_pselect6, sys_pselect6), // 308
2250 LINXY(__NR_ppoll, sys_ppoll), // 309
2252 // LINX_(__NR_unshare, sys_unshare), // 310
2253 LINX_(__NR_set_robust_list, sys_set_robust_list), // 311
2254 LINXY(__NR_get_robust_list, sys_get_robust_list), // 312
2255 LINX_(__NR_splice, sys_splice), // 313
2256 LINX_(__NR_sync_file_range, sys_sync_file_range), // 314
2258 // LINX_(__NR_tee, sys_ni_syscall), // 315
2259 // LINX_(__NR_vmsplice, sys_ni_syscall), // 316
2260 // LINX_(__NR_move_pages, sys_ni_syscall), // 317
2261 LINXY(__NR_getcpu, sys_getcpu), // 318
2262 LINXY(__NR_epoll_pwait, sys_epoll_pwait), // 319
2264 LINX_(__NR_utimensat, sys_utimensat), // 320
2265 LINXY(__NR_signalfd, sys_signalfd), // 321
2266 LINXY(__NR_timerfd_create, sys_timerfd_create), // 322
2267 LINX_(__NR_eventfd, sys_eventfd), // 323
2268 LINX_(__NR_fallocate, sys_fallocate), // 324
2270 LINXY(__NR_timerfd_settime, sys_timerfd_settime), // 325
2271 LINXY(__NR_timerfd_gettime, sys_timerfd_gettime), // 326
2272 LINXY(__NR_signalfd4, sys_signalfd4), // 327
2273 LINX_(__NR_eventfd2, sys_eventfd2), // 328
2274 LINXY(__NR_epoll_create1, sys_epoll_create1), // 329
2276 LINXY(__NR_dup3, sys_dup3), // 330
2277 LINXY(__NR_pipe2, sys_pipe2), // 331
2278 LINXY(__NR_inotify_init1, sys_inotify_init1), // 332
2279 LINXY(__NR_preadv, sys_preadv), // 333
2280 LINX_(__NR_pwritev, sys_pwritev), // 334
2282 LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
2283 LINXY(__NR_perf_counter_open, sys_perf_counter_open) // 336
2286 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
2288 const UInt syscall_table_size
2289 = sizeof(syscall_table) / sizeof(syscall_table[0]);
2291 /* Is it in the contiguous initial section of the table? */
2292 if (sysno < syscall_table_size) {
2293 SyscallTableEntry* sys = &syscall_table[sysno];
2294 if (sys->before == NULL)
2295 return NULL; /* no entry */
2300 /* Can't find a wrapper */
2304 #endif // defined(VGP_x86_linux)
2306 /*--------------------------------------------------------------------*/
2308 /*--------------------------------------------------------------------*/