1 /*--------------------------------------------------------------------*/
2 /*--- Platform-specific syscalls stuff. syswrap-x86-l4re.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation
9 Copyright (C) 2000-2008 Nicholas Nethercote
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 The GNU General Public License is contained in the file COPYING.
30 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
31 in interrupted_syscall() need to be reviewed. They don't seem
32 to assign the shadow state.
35 #include "pub_core_basics.h"
36 #include "pub_core_vki.h"
37 #include "pub_core_vkiscnums.h"
38 #include "pub_core_libcsetjmp.h"
39 #include "pub_core_threadstate.h"
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_debuglog.h"
42 #include "pub_core_libcbase.h"
43 #include "pub_core_libcassert.h"
44 #include "pub_core_libcprint.h"
45 #include "pub_core_libcproc.h"
46 #include "pub_core_libcsignal.h"
47 #include "pub_core_mallocfree.h"
48 #include "pub_core_options.h"
49 #include "pub_core_scheduler.h"
50 #include "pub_core_sigframe.h" // For VG_(sigframe_destroy)()
51 #include "pub_core_signals.h"
52 #include "pub_core_syscall.h"
53 #include "pub_core_syswrap.h"
54 #include "pub_core_tooliface.h"
55 #include "pub_core_stacks.h" // VG_(register_stack)
57 #include "priv_types_n_macros.h"
58 #include "priv_syswrap-generic.h" /* for decls of generic wrappers */
59 #include "priv_syswrap-linux.h" /* for decls of linux-ish wrappers */
60 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
61 #include "priv_syswrap-main.h"
64 #include <l4/sys/types.h>
65 #include <l4/sys/thread.h>
66 #include <l4/sys/utcb.h>
67 #include <l4/sys/ipc.h>
68 #include <l4/util/util.h>
69 #include <l4/sys/consts.h>
70 #include <l4/sys/debugger.h>
73 /* ---------------------------------------------------------------------
75 ------------------------------------------------------------------ */
77 /* Call f(arg1), but first switch stacks, using 'stack' as the new
78 stack, and use 'retaddr' as f's return-to address. Also, clear all
79 the integer registers before entering f.*/
80 __attribute__((noreturn))
81 void ML_(call_on_new_stack_0_1) ( Addr stack,
91 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
92 "vgModuleLocal_call_on_new_stack_0_1:\n"
93 " movl %esp, %esi\n" // remember old stack pointer
94 " movl 4(%esi), %esp\n" // set stack
95 " pushl 16(%esi)\n" // arg1 to stack
96 " pushl 8(%esi)\n" // retaddr to stack
97 " pushl 12(%esi)\n" // f to stack
98 " movl $0, %eax\n" // zero all GP regs
105 " ret\n" // jump to f
106 " ud2\n" // should never get here
110 // forward declarations
111 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool, Word );
112 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
115 When a client creates a new thread, we need to keep track of the new thread. This means:
116 1. allocate a ThreadId+ThreadState+stack for the the thread
118 2. initialize the thread's new VCPU state
119 setting the initial instruction pointer to the right one
121 3. create the thread using the same args as the client requested,
122 but using the scheduler entrypoint for EIP, and a separate stack
125 static SysRes do_create_new_thread ( ThreadId ptid,
130 static const Bool debug = False;
132 ThreadId ctid = VG_(alloc_ThreadState)();
133 ThreadState* ptst = VG_(get_ThreadState)(ptid);
134 ThreadState* ctst = VG_(get_ThreadState)(ctid);
141 char *thread_name = VG_(malloc)("", 15); /* VG::threadXYZ */
142 vg_assert(VG_(is_running_thread)(ptid));
143 vg_assert(VG_(is_valid_tid)(ctid));
145 stack = (UWord*)ML_(allocstack)(ctid);
147 res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
151 /* Copy register state
153 Both parent and child return to the same place, and the code
154 following the clone syscall works out which is which, so we
155 don't need to worry about it.
157 The parent gets the child's new tid returned from clone, but the
160 If the clone call specifies a NULL esp for the new thread, then
161 it actually gets a copy of the parent's esp.
163 /* Note: the clone call done by the Quadrics Elan3 driver specifies
164 clone flags of 0xF00, and it seems to rely on the assumption
165 that the child inherits a copy of the parent's GDT.
166 setup_child takes care of setting that up. */
167 setup_child( &ctst->arch, &ptst->arch, True, client_ip );
169 /* Make sys_clone appear to have returned Success(0) in the
172 // TODO ctst->arch.vex.guest_EAX = 0;
175 ctst->arch.vex.guest_ESP = esp;
177 ctst->os_state.parent = ptid;
179 /* We don't really know where the client stack is, because its
180 allocated by the client. The best we can do is look at the
181 memory mappings and try to derive some useful information. We
182 assume that esp starts near its highest possible value, and can
183 only go down to the start of the mmaped segment. */
184 seg = VG_(am_find_nsegment)((Addr)esp);
185 if (seg && seg->kind != SkResvn) {
186 ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(esp);
187 ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
189 VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
192 VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
193 ctid, seg->start, VG_PGROUNDUP(esp));
195 VG_(message)(Vg_UserMsg, "!? New thread %d starts with ESP(%#lx) unmapped\n",
197 ctst->client_stack_szB = 0;
200 /* Assume the clone will succeed, and tell any tool that wants to
201 know that this thread has come into existence. We cannot defer
202 it beyond this point because sys_set_thread_area, just below,
203 causes tCheck to assert by making references to the new ThreadId
204 if we don't state the new thread exists prior to that point.
205 If the clone fails, we'll send out a ll_exit notification for it
206 at the out: label below, to clean up. */
207 VG_TRACK ( pre_thread_ll_create, ptid, ctid );
209 /* Create the new thread */
210 /* eax = do_syscall_clone_x86_linux(
211 ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
212 child_tidptr, parent_tidptr, NULL
214 /* l4_thread_ex_regs_u( ptst->arch.vex.guest_EDX, // cap
215 ML_(start_thread_NORETURN), // ip
220 /* Therefore Valgrind doesn't lose control over the client thread
221 * a special function is called in valgrinds context */
222 vg_eip = (l4_umword_t) &ML_(start_thread_NORETURN);
224 *stack = (UWord) &VG_(threads)[ctid];
227 vg_stack = (l4_umword_t) stack;
229 VG_(debugLog)(0, "syswrap", "creating new thread\n"
230 "\t\t\t (vg) ip = 0x%08lx\n"
231 "\t\t\t (vg) stack = 0x%08x\n"
232 "\t\t\t (cl) ip = 0x%08x\n"
233 "\t\t\t (cl) stack = 0x%08x\n"
234 "\t\t\t flags = 0x%08x\n",
237 ctst->arch.vex.guest_EIP,
238 ctst->arch.vex.guest_ESP,
241 l4_thread_ex_regs_ret_u( ptst->arch.vex.guest_EDX, /* capability */
242 &vg_eip, /* instruction pointer (vg context) */
243 &vg_stack, /* stack pointer (vg context) */
244 (l4_umword_t *) &flags, /* flags */
247 /* Every thread in Valgrind gets a name - a nice feature
248 * of the fiasco micro kernel, good for debugging */
250 if (VG_(snprintf)(thread_name, 15, "VG::thread%d", ctid) > 0)
251 l4_debugger_set_object_name(ptst->arch.vex.guest_EDX, thread_name);
254 res = VG_(mk_SysRes_x86_l4re)( eax );
256 if (sr_isError(res)) {
258 VG_(cleanup_thread)(&ctst->arch);
259 ctst->status = VgTs_Empty;
260 /* oops. Better tell the tool the thread exited in a hurry :-) */
261 VG_TRACK( pre_thread_ll_exit, ctid );
267 /* ---------------------------------------------------------------------
269 ------------------------------------------------------------------ */
271 void VG_(cleanup_thread) ( ThreadArchState* arch )
273 /* Release arch-specific resources held by this thread. */
277 static void setup_child ( /*OUT*/ ThreadArchState *child,
278 /*IN*/ ThreadArchState *parent,
279 Bool inherit_parents_GDT,
282 /* We inherit our parent's guest state. */
283 child->vex = parent->vex;
284 child->vex_shadow1 = parent->vex_shadow1;
285 child->vex_shadow2 = parent->vex_shadow2;
286 /* In L4Re creating a new thread means not cloning like in linux,
287 * instead a new instruction pointer is required */
288 child->vex.guest_EIP = client_ip;
292 /* ---------------------------------------------------------------------
293 PRE/POST wrappers for x86/Linux-specific syscalls
294 ------------------------------------------------------------------ */
296 #define PRE(name) DEFN_PRE_TEMPLATE(x86_l4re, name)
297 #define POST(name) DEFN_POST_TEMPLATE(x86_l4re, name)
299 #define FOO(name) case L4_PROTO_##name : VG_(printf)(#name); break;
302 L4_PROTO_DATASPACE = 0x4000,
311 * print some informations about current syscall
312 * see also l4sys/include/err.h
313 * l4sys/include/types.h
316 void print_infos_to_syscall(l4_msgtag_t *tag, ThreadId tid) {
317 VG_(printf)("msgtag_label = %lx\n", l4_msgtag_label(*tag));
319 VG_(printf)(" Protocol: ");
320 switch(l4_msgtag_label(*tag)) {
322 case L4_PROTO_ALLOW_SYSCALL: // =L4_PROTO_PF_EXCEPTION
323 VG_(printf)("ALLOW_SYSCALL/PF_EXCEPTION");
343 default: VG_(printf)("unknown");
353 * Determine if system call arguments indicate task termination.
355 Bool syscall_is_exit(SyscallArgs* args)
357 return (args->arg4 /* EDX */ == (L4_INVALID_CAP | L4_SYSF_RECV))
358 && (args->arg3 /* ECX */ == L4_IPC_NEVER.raw );
362 * Determine if syscall arguments are for thread_ex_regs
364 Bool syscall_is_threadexregs(SyscallArgs *args, l4_msg_regs_t *mr, l4_msgtag_t tag)
366 return (l4_msgtag_label(tag) == L4_PROTO_THREAD)
367 && (mr->mr[0] & L4_THREAD_EX_REGS_OP);
372 * Determine if system call would perform a GDT set.
374 Bool syscall_is_setgdt(l4_msg_regs_t *mr, l4_msgtag_t tag)
376 return (l4_msgtag_label(tag) == L4_PROTO_THREAD)
377 && (mr->mr[0] == L4_THREAD_GDT_X86_OP);
382 * Determine if syscall is a PARENT signal IPC.
384 Bool syscall_is_parentsignal(l4_msg_regs_t *mr, l4_msgtag_t tag)
386 return (l4_msgtag_label(tag) == L4_PROTO_PARENT)
392 * Handle GDT set system call.
394 * This code basically does what Fiasco does, but uses the guest_GDT
395 * that was allocated upon thread creation instead of performing a
398 int handle_gdt_set(l4_msg_regs_t *mr, l4_msgtag_t tag, ThreadState *ts)
400 enum { GDT_USER_ENTRY1 = 0x48 };
401 unsigned idx = mr->mr[1];
402 unsigned words = l4_msgtag_words(tag);
403 Addr gdt_user_start = (Addr)(ts->arch.vex.guest_GDT + GDT_USER_ENTRY1);
408 if (words == 1) { // case 1: read entry
409 mr->mr[0] = *(unsigned int *)gdt_user_start;
411 words -= 2; // -2 words are opcode and index
413 VG_(memcpy)(gdt_user_start + 2 * idx,
414 &mr->mr[2], words * sizeof(unsigned));
417 * Return value is a msgtag with the label set to the proper GDT entry.
418 * This needs to be correct as it will later on be used to index the GDT.
420 ts->arch.vex.guest_EAX = ((idx << 3) + GDT_USER_ENTRY1 + 3) << 16;
429 #define DEBUG_MYSELF 0
434 if (0) VG_(printf)("PRE_generic: sysno = %08lx arg0 = %8lx arg1 = %8lx arg2 = %8lx\n"
435 " arg3 = %8lx arg4 = %8lx arg5 = %8lx arg6 = %8lx\n",
436 arrghs->sysno, arrghs->arg1, arrghs->arg2, arrghs->arg3, arrghs->arg4,
437 arrghs->arg5, arrghs->arg6, arrghs->arg7);
438 /* get access to virtual utcb of client */
439 u = ts_utcb(&VG_(threads)[tid]);
442 tag = (l4_msgtag_t *) &(arrghs->arg1);
443 tst = VG_(get_ThreadState)(tid);
446 print_infos_to_syscall(tag, tid);
447 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
449 if (syscall_is_exit(arrghs)) {
450 /* l4_sleep_forever */
451 tst->exitreason = VgSrc_ExitThread;
452 tst->os_state.exitcode = 1;
453 SET_STATUS_Success(0);
454 //enter_kdebug("l4_sleep_forever");
456 } else if (syscall_is_threadexregs(arrghs, v, *tag)) {
457 // TODO is this really a thread-create??
458 // TODO store thread cap
459 // catch cap mappings
461 /* the guest wants to create a new thread */
463 VG_(debugLog)(1, "syswrap", "The client wants to create a new thread\n"
464 "\t\t\tcollected infos (from virt utcb):\n"
465 "\t\t\t (cl) ip = 0x%x\n"
466 "\t\t\t (cl) sp = 0x%x\n"
467 "\t\t\t flags = 0x%x\n",
468 (unsigned int) v->mr[1],
469 (unsigned int) v->mr[2],
470 (unsigned int) v->mr[0]);
472 if (0) enter_kdebug("before thread create");
474 do_create_new_thread ( tid,
475 v->mr[2], /*stack pointer*/
477 v->mr[1] /*instruction pointer*/
480 if (0) enter_kdebug("after thread create");
481 SET_STATUS_Success(0);
482 *flags |= SfYieldAfter;
484 } else if (syscall_is_setgdt(v, *tag)) {
485 SET_STATUS_Success(handle_gdt_set(v, *tag, tst));
487 } else if (syscall_is_parentsignal(v, *tag)) {
488 /* the guest signals his parent that he would exit now */
490 VG_(debugLog)(0, "syswrap", "The client would like to exit\n");
491 VG_(debugLog)(0, "syswrap", "exit code = 0x%x\n", (unsigned int) v->mr[2]);
493 tst->exitreason = VgSrc_ExitThread;
494 tst->os_state.exitcode = v->mr[2];
496 /* For a correct exit we must "kill" all threads, but i don't
497 * know if this is the right solution.
502 for (_tid = 1; _tid < VG_N_THREADS; _tid++) {
503 if ( VG_(threads)[_tid].status != VgTs_Empty &&
504 VG_(threads)[_tid].status != VgTs_Zombie &&
506 _tst = VG_(get_ThreadState)(_tid);
507 VG_(threads)[_tid].status = VgTs_Zombie;
508 tst->exitreason = VgSrc_ExitThread;
512 SET_STATUS_Success(0);
516 * In all other cases this might be a "normal" IPC and thus might block.
518 *flags |= SfMayBlock;
525 if (0) VG_(printf)("\
526 POST_generic: sysno %8lx arg0 = %8lx arg1 = %8lx arg2 = %8lx\n"
527 " arg3 = %8lx arg4 = %8lx arg5 = %8lx arg6 = %8lx\n",
528 arrghs->sysno, arrghs->arg1, arrghs->arg2,
529 arrghs->arg3, arrghs->arg4, arrghs->arg5, arrghs->arg6, arrghs->arg7);
540 /* ---------------------------------------------------------------------
541 The x86/l4re syscall table
542 ------------------------------------------------------------------ */
544 /* Add an x86-l4re specific wrapper to a syscall table. */
545 #define PLAX_(sysno, name) WRAPPER_ENTRY_X_(x86_l4re, sysno, name)
546 #define PLAXY(sysno, name) WRAPPER_ENTRY_XY(x86_l4re, sysno, name)
549 // This table maps from __NR_xxx syscall numbers (from
550 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
551 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
553 // For those syscalls not handled by Valgrind, the annotation indicate its
554 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
557 const SyscallTableEntry ML_(syscall_table)[] = {
558 PLAXY(SYS_INVOKE, generic),
559 PLAXY(SYS_DEBUG, dummy),
560 PLAXY(SYS_ENTER_KDEBUG, dummy),
561 PLAXY(SYS_LINUX_INT80, dummy),
562 PLAXY(SYS_UD2, dummy),
563 PLAXY(SYS_ARTIFICIAL, dummy),
566 const UInt ML_(syscall_table_size) =
567 sizeof(ML_(syscall_table)) / sizeof(ML_(syscall_table)[0]);
569 /*--------------------------------------------------------------------*/
571 /*--------------------------------------------------------------------*/