l4/pkg/valgrind/src/valgrind-3.6.0-svn/coregrind/m_scheduler/scheduler.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Thread scheduling.                               scheduler.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2000-2010 Julian Seward
  11       jseward@acm.org
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, write to the Free Software
  25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  26    02111-1307, USA.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29 */
  30
  31 /*
  32    Overview
  33
  34    Valgrind tries to emulate the kernel's threading as closely as
  35    possible.  The client does all threading via the normal syscalls
  36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
  37    the same process structure as would be created without Valgrind.
  38    There are no extra threads.
  39
  40    The main difference is that Valgrind only allows one client thread
  41    to run at once.  This is controlled with the CPU Big Lock,
  42    "the_BigLock".  Any time a thread wants to run client code or
  43    manipulate any shared state (which is anything other than its own
  44    ThreadState entry), it must hold the_BigLock.
  45
  46    When a thread is about to block in a blocking syscall, it releases
  47    the_BigLock, and re-takes it when it becomes runnable again (either
  48    because the syscall finished, or we took a signal).
  49
  50    VG_(scheduler) therefore runs in each thread.  It returns only when
  51    the thread is exiting, either because it exited itself, or it was
  52    told to exit by another thread.
  53
  54    This file is almost entirely OS-independent.  The details of how
  55    the OS handles threading and signalling are abstracted away and
  56    implemented elsewhere.  [Some of the functions have worked their
  57    way back for the moment, until we do an OS port in earnest...]
  58  */
  59
  60 #include "pub_core_basics.h"
  61 #include "pub_core_debuglog.h"
  62 #include "pub_core_vki.h"
  63 #include "pub_core_vkiscnums.h"    // __NR_sched_yield
  64 #include "pub_core_threadstate.h"
  65 #include "pub_core_aspacemgr.h"
  66 #include "pub_core_clreq.h"         // for VG_USERREQ__*
  67 #include "pub_core_dispatch.h"
  68 #include "pub_core_errormgr.h"      // For VG_(get_n_errs_found)()
  69 #include "pub_core_libcbase.h"
  70 #include "pub_core_libcassert.h"
  71 #include "pub_core_libcprint.h"
  72 #include "pub_core_libcproc.h"
  73 #include "pub_core_libcsignal.h"
  74 #if defined(VGO_darwin)
  75 #include "pub_core_mach.h"
  76 #endif
  77 #include "pub_core_machine.h"
  78 #include "pub_core_mallocfree.h"
  79 #include "pub_core_options.h"
  80 #include "pub_core_replacemalloc.h"
  81 #include "pub_core_signals.h"
  82 #include "pub_core_stacks.h"
  83 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
  84 #include "pub_core_syscall.h"
  85 #include "pub_core_syswrap.h"
  86 #include "pub_core_tooliface.h"
  87 #include "pub_core_translate.h"     // For VG_(translate)()
  88 #include "pub_core_transtab.h"
  89 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
  90 #include "priv_sema.h"
  91 #include "pub_core_scheduler.h"     // self
  92 #include "pub_core_redir.h"
  93
  94 /* ---------------------------------------------------------------------
  95    Types and globals for the scheduler.
  96    ------------------------------------------------------------------ */
  97
  98 /* ThreadId and ThreadState are defined elsewhere*/
  99
 100 /* Defines the thread-scheduling timeslice, in terms of the number of
 101    basic blocks we attempt to run each thread for.  Smaller values
 102    give finer interleaving but much increased scheduling overheads. */
 103 #define SCHEDULING_QUANTUM   100000
 104
 105 /* If False, a fault is Valgrind-internal (ie, a bug) */
 106 Bool VG_(in_generated_code) = False;
 107
 108 /* Counts downwards in VG_(run_innerloop). */
 109 UInt VG_(dispatch_ctr);
 110
 111 /* 64-bit counter for the number of basic blocks done. */
 112 static ULong bbs_done = 0;
 113
 114 /* Forwards */
 115 static void do_client_request ( ThreadId tid );
 116 static void scheduler_sanity ( ThreadId tid );
 117 static void mostly_clear_thread_record ( ThreadId tid );
 118
 119 /* Stats. */
 120 static ULong n_scheduling_events_MINOR = 0;
 121 static ULong n_scheduling_events_MAJOR = 0;
 122
 123 /* Sanity checking counts. */
 124 static UInt sanity_fast_count = 0;
 125 static UInt sanity_slow_count = 0;
 126
 127 void VG_(print_scheduler_stats)(void)
 128 {
 129    VG_(message)(Vg_DebugMsg,
 130       "scheduler: %'llu jumps (bb entries).\n", bbs_done );
 131    VG_(message)(Vg_DebugMsg,
 132       "scheduler: %'llu/%'llu major/minor sched events.\n",
 133       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
 134    VG_(message)(Vg_DebugMsg,
 135                 "   sanity: %d cheap, %d expensive checks.\n",
 136                 sanity_fast_count, sanity_slow_count );
 137 }
 138
 139 /* CPU semaphore, so that threads can run exclusively */
 140 #if !defined(VGO_l4re) // L4Re uses it outside as well
 141 static
 142 #endif
 143 vg_sema_t the_BigLock;
 144
 145
 146 /* ---------------------------------------------------------------------
 147    Helper functions for the scheduler.
 148    ------------------------------------------------------------------ */
 149
 150 static
 151 void print_sched_event ( ThreadId tid, Char* what )
 152 {
 153    VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
 154 }
 155
 156 static
 157 HChar* name_of_sched_event ( UInt event )
 158 {
 159    switch (event) {
 160       case VEX_TRC_JMP_SYS_SYSCALL:   return "SYSCALL";
 161       case VEX_TRC_JMP_SYS_INT32:     return "INT32";
 162       case VEX_TRC_JMP_SYS_INT128:    return "INT128";
 163       case VEX_TRC_JMP_SYS_INT129:    return "INT129";
 164       case VEX_TRC_JMP_SYS_INT130:    return "INT130";
 165       case VEX_TRC_JMP_SYS_SYSENTER:  return "SYSENTER";
 166       case VEX_TRC_JMP_CLIENTREQ:     return "CLIENTREQ";
 167       case VEX_TRC_JMP_YIELD:         return "YIELD";
 168       case VEX_TRC_JMP_NODECODE:      return "NODECODE";
 169       case VEX_TRC_JMP_MAPFAIL:       return "MAPFAIL";
 170       case VEX_TRC_JMP_NOREDIR:       return "NOREDIR";
 171       case VEX_TRC_JMP_EMWARN:        return "EMWARN";
 172       case VEX_TRC_JMP_TINVAL:        return "TINVAL";
 173       case VG_TRC_INVARIANT_FAILED:   return "INVFAILED";
 174       case VG_TRC_INNER_COUNTERZERO:  return "COUNTERZERO";
 175       case VG_TRC_INNER_FASTMISS:     return "FASTMISS";
 176       case VG_TRC_FAULT_SIGNAL:       return "FAULTSIGNAL";
 177 #ifdef VGO_l4re
 178       case VEX_TRC_JMP_L4_UTCB_EAX:   return "L4UTCB EAX";
 179       case VEX_TRC_JMP_L4_UTCB_EBX:   return "L4UTCB EAX";
 180       case VEX_TRC_JMP_L4_UTCB_ECX:   return "L4UTCB EAX";
 181       case VEX_TRC_JMP_L4_UTCB_EDX:   return "L4UTCB EAX";
 182       case VEX_TRC_JMP_SYS_INT48:     return "INT 0x30";
 183       case VEX_TRC_JMP_SYS_INT50:     return "INT 0x32";
 184       case VEX_TRC_JMP_SIGTRAP:       return "SIGTRAP (INT3)";
 185           case VEX_TRC_JMP_L4_UD2:        return "L4: UD2";
 186           case VEX_TRC_JMP_L4_ARTIFICIAL: return "L4: Artificial";
 187 #endif
 188       default:                        return "??UNKNOWN??";
 189   }
 190 }
 191
 192 /* Allocate a completely empty ThreadState record. */
 193 ThreadId VG_(alloc_ThreadState) ( void )
 194 {
 195    Int i;
 196    for (i = 1; i < VG_N_THREADS; i++) {
 197       if (VG_(threads)[i].status == VgTs_Empty) {
 198          VG_(threads)[i].status = VgTs_Init;
 199          VG_(threads)[i].exitreason = VgSrc_None;
 200 #if defined(VGO_l4re)
 201          VG_(debugLog)(1, "sched", "Initializing ThreadState %d\n", i);
 202
 203          VG_(memset)(ts_utcb_copy(&VG_(threads)[i]), 0, L4RE_UTCB_SIZE);
 204
 205          VG_(threads)[i].os_state.utcb = (l4_utcb_t *)ts_utcb_copy(&VG_(threads)[i]);
 206
 207          // copy current utcb as initial utcb into thread state
 208          l4_utcb_t *utcb = l4_utcb_wrap();
 209          VG_(memcpy)(ts_utcb(&VG_(threads)[i]), utcb, L4RE_UTCB_SIZE);
 210 #endif
 211          return i;
 212       }
 213    }
 214    VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
 215    VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
 216    VG_(core_panic)("VG_N_THREADS is too low");
 217    /*NOTREACHED*/
 218 }
 219
 220 /*
 221    Mark a thread as Runnable.  This will block until the_BigLock is
 222    available, so that we get exclusive access to all the shared
 223    structures and the CPU.  Up until we get the_BigLock, we must not
 224    touch any shared state.
 225
 226    When this returns, we'll actually be running.
 227  */
 228 void VG_(acquire_BigLock)(ThreadId tid, HChar* who)
 229 {
 230    ThreadState *tst;
 231
 232 #if 0
 233    if (VG_(clo_trace_sched)) {
 234       HChar buf[100];
 235       vg_assert(VG_(strlen)(who) <= 100-50);
 236       VG_(sprintf)(buf, "waiting for lock (%s)", who);
 237       print_sched_event(tid, buf);
 238    }
 239 #endif
 240
 241    /* First, acquire the_BigLock.  We can't do anything else safely
 242       prior to this point.  Even doing debug printing prior to this
 243       point is, technically, wrong. */
 244    ML_(sema_down)(&the_BigLock, False/*not LL*/);
 245
 246    tst = VG_(get_ThreadState)(tid);
 247
 248    vg_assert(tst->status != VgTs_Runnable);
 249
 250    tst->status = VgTs_Runnable;
 251
 252    if (VG_(running_tid) != VG_INVALID_THREADID)
 253       VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
 254    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
 255    VG_(running_tid) = tid;
 256
 257    { Addr gsp = VG_(get_SP)(tid);
 258      VG_(unknown_SP_update)(gsp, gsp, 0/*unknown origin*/);
 259    }
 260
 261    if (VG_(clo_trace_sched)) {
 262       HChar buf[150];
 263       vg_assert(VG_(strlen)(who) <= 150-50);
 264       VG_(sprintf)(buf, " acquired lock (%s)", who);
 265       print_sched_event(tid, buf);
 266    }
 267 }
 268
 269 /*
 270    Set a thread into a sleeping state, and give up exclusive access to
 271    the CPU.  On return, the thread must be prepared to block until it
 272    is ready to run again (generally this means blocking in a syscall,
 273    but it may mean that we remain in a Runnable state and we're just
 274    yielding the CPU to another thread).
 275  */
 276 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate, HChar* who)
 277 {
 278    ThreadState *tst = VG_(get_ThreadState)(tid);
 279
 280    vg_assert(tst->status == VgTs_Runnable);
 281
 282    vg_assert(sleepstate == VgTs_WaitSys ||
 283              sleepstate == VgTs_Yielding);
 284
 285    tst->status = sleepstate;
 286
 287    vg_assert(VG_(running_tid) == tid);
 288    VG_(running_tid) = VG_INVALID_THREADID;
 289
 290    if (VG_(clo_trace_sched)) {
 291       Char buf[200];
 292       vg_assert(VG_(strlen)(who) <= 200-100);
 293       VG_(sprintf)(buf, "releasing lock (%s) -> %s",
 294                         who, VG_(name_of_ThreadStatus)(sleepstate));
 295       print_sched_event(tid, buf);
 296    }
 297
 298    /* Release the_BigLock; this will reschedule any runnable
 299       thread. */
 300    ML_(sema_up)(&the_BigLock, False/*not LL*/);
 301 }
 302
 303 /* See pub_core_scheduler.h for description */
 304 void VG_(acquire_BigLock_LL) ( HChar* who )
 305 {
 306   ML_(sema_down)(&the_BigLock, True/*LL*/);
 307 }
 308
 309 /* See pub_core_scheduler.h for description */
 310 void VG_(release_BigLock_LL) ( HChar* who )
 311 {
 312    ML_(sema_up)(&the_BigLock, True/*LL*/);
 313 }
 314
 315
 316 /* Clear out the ThreadState and release the semaphore. Leaves the
 317    ThreadState in VgTs_Zombie state, so that it doesn't get
 318    reallocated until the caller is really ready. */
 319 void VG_(exit_thread)(ThreadId tid)
 320 {
 321    vg_assert(VG_(is_valid_tid)(tid));
 322    vg_assert(VG_(is_running_thread)(tid));
 323    vg_assert(VG_(is_exiting)(tid));
 324
 325    mostly_clear_thread_record(tid);
 326    VG_(running_tid) = VG_INVALID_THREADID;
 327
 328    /* There should still be a valid exitreason for this thread */
 329    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
 330
 331    if (VG_(clo_trace_sched))
 332       print_sched_event(tid, "release lock in VG_(exit_thread)");
 333
 334    ML_(sema_up)(&the_BigLock, False/*not LL*/);
 335 }
 336
 337 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
 338    out of the syscall and onto doing the next thing, whatever that is.
 339    If it isn't blocked in a syscall, has no effect on the thread. */
 340 void VG_(get_thread_out_of_syscall)(ThreadId tid)
 341 {
 342    vg_assert(VG_(is_valid_tid)(tid));
 343    vg_assert(!VG_(is_running_thread)(tid));
 344
 345    if (VG_(threads)[tid].status == VgTs_WaitSys) {
 346       if (VG_(clo_trace_signals)) {
 347          VG_(message)(Vg_DebugMsg,
 348                       "get_thread_out_of_syscall zaps tid %d lwp %d\n",
 349                       tid, VG_(threads)[tid].os_state.lwpid);
 350       }
 351 #     if defined(VGO_darwin)
 352       {
 353          // GrP fixme use mach primitives on darwin?
 354          // GrP fixme thread_abort_safely?
 355          // GrP fixme race for thread with WaitSys set but not in syscall yet?
 356          extern kern_return_t thread_abort(mach_port_t);
 357          thread_abort(VG_(threads)[tid].os_state.lwpid);
 358       }
 359 #     else
 360       {
 361          __attribute__((unused))
 362          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
 363          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
 364             I'm really not sure.  Here's a race scenario which argues
 365             that we shoudn't; but equally I'm not sure the scenario is
 366             even possible, because of constraints caused by the question
 367             of who holds the BigLock when.
 368
 369             Target thread tid does sys_read on a socket and blocks.  This
 370             function gets called, and we observe correctly that tid's
 371             status is WaitSys but then for whatever reason this function
 372             goes very slowly for a while.  Then data arrives from
 373             wherever, tid's sys_read returns, tid exits.  Then we do
 374             tkill on tid, but tid no longer exists; tkill returns an
 375             error code and the assert fails. */
 376          /* vg_assert(r == 0); */
 377       }
 378 #     endif
 379    }
 380 }
 381
 382 /*
 383    Yield the CPU for a short time to let some other thread run.
 384  */
 385 void VG_(vg_yield)(void)
 386 {
 387    ThreadId tid = VG_(running_tid);
 388
 389    vg_assert(tid != VG_INVALID_THREADID);
 390    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
 391
 392    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
 393
 394    /*
 395       Tell the kernel we're yielding.
 396     */
 397 #if defined(VGO_l4re)
 398 //   l4_thread_yield();
 399    l4_thread_switch(L4_INVALID_CAP);
 400 #else
 401    VG_(do_syscall0)(__NR_sched_yield);
 402 #endif
 403
 404    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
 405 }
 406
 407
 408 /* Set the standard set of blocked signals, used whenever we're not
 409    running a client syscall. */
 410 static void block_signals(void)
 411 {
 412 #if defined(VGO_l4re)
 413 //   VG_(unimplemented)("unimplemented function block_signals()");
 414    /* Do nothing */
 415 #else
 416    vki_sigset_t mask;
 417
 418    VG_(sigfillset)(&mask);
 419
 420    /* Don't block these because they're synchronous */
 421    VG_(sigdelset)(&mask, VKI_SIGSEGV);
 422    VG_(sigdelset)(&mask, VKI_SIGBUS);
 423    VG_(sigdelset)(&mask, VKI_SIGFPE);
 424    VG_(sigdelset)(&mask, VKI_SIGILL);
 425    VG_(sigdelset)(&mask, VKI_SIGTRAP);
 426
 427    /* Can't block these anyway */
 428    VG_(sigdelset)(&mask, VKI_SIGSTOP);
 429    VG_(sigdelset)(&mask, VKI_SIGKILL);
 430
 431    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
 432 #endif
 433 }
 434
 435 static void os_state_clear(ThreadState *tst)
 436 {
 437    tst->os_state.lwpid       = 0;
 438    tst->os_state.threadgroup = 0;
 439 #  if defined(VGO_linux)
 440    /* no other fields to clear */
 441 #  elif defined(VGO_aix5)
 442    tst->os_state.cancel_async    = False;
 443    tst->os_state.cancel_disabled = False;
 444    tst->os_state.cancel_progress = Canc_NoRequest;
 445 #  elif defined(VGO_darwin)
 446    tst->os_state.post_mach_trap_fn = NULL;
 447    tst->os_state.pthread           = 0;
 448    tst->os_state.func_arg          = 0;
 449    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
 450    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
 451    tst->os_state.wq_jmpbuf_valid   = False;
 452    tst->os_state.remote_port       = 0;
 453    tst->os_state.msgh_id           = 0;
 454    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
 455 #  elif defined(VGO_l4re)
 456    tst->os_state.utcb = 0;
 457 #  else
 458 #    error "Unknown OS"
 459 #  endif
 460 }
 461
 462 static void os_state_init(ThreadState *tst)
 463 {
 464    tst->os_state.valgrind_stack_base    = 0;
 465    tst->os_state.valgrind_stack_init_SP = 0;
 466    os_state_clear(tst);
 467 }
 468
 469 static
 470 void mostly_clear_thread_record ( ThreadId tid )
 471 {
 472 #if !defined(VGO_l4re)
 473    vki_sigset_t savedmask;
 474 #endif
 475
 476    vg_assert(tid >= 0 && tid < VG_N_THREADS);
 477    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
 478    VG_(threads)[tid].tid = tid;
 479
 480    /* Leave the thread in Zombie, so that it doesn't get reallocated
 481       until the caller is finally done with the thread stack. */
 482    VG_(threads)[tid].status               = VgTs_Zombie;
 483
 484 #if !defined(VGO_l4re)
 485    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
 486    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
 487 #endif
 488
 489    os_state_clear(&VG_(threads)[tid]);
 490
 491    /* start with no altstack */
 492    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
 493    VG_(threads)[tid].altstack.ss_size = 0;
 494    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
 495
 496 #if !defined(VGO_l4re)
 497    VG_(clear_out_queued_signals)(tid, &savedmask);
 498 #endif
 499
 500    VG_(threads)[tid].sched_jmpbuf_valid = False;
 501 }
 502
 503 /*
 504    Called in the child after fork.  If the parent has multiple
 505    threads, then we've inherited a VG_(threads) array describing them,
 506    but only the thread which called fork() is actually alive in the
 507    child.  This functions needs to clean up all those other thread
 508    structures.
 509
 510    Whichever tid in the parent which called fork() becomes the
 511    master_tid in the child.  That's because the only living slot in
 512    VG_(threads) in the child after fork is VG_(threads)[tid], and it
 513    would be too hard to try to re-number the thread and relocate the
 514    thread state down to VG_(threads)[1].
 515
 516    This function also needs to reinitialize the_BigLock, since
 517    otherwise we may end up sharing its state with the parent, which
 518    would be deeply confusing.
 519 */
 520 static void sched_fork_cleanup(ThreadId me)
 521 {
 522    ThreadId tid;
 523    vg_assert(VG_(running_tid) == me);
 524
 525 #  if defined(VGO_darwin)
 526    // GrP fixme hack reset Mach ports
 527    VG_(mach_init)();
 528 #  endif
 529
 530    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
 531    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
 532
 533    /* clear out all the unused thread slots */
 534    for (tid = 1; tid < VG_N_THREADS; tid++) {
 535       if (tid != me) {
 536          mostly_clear_thread_record(tid);
 537          VG_(threads)[tid].status = VgTs_Empty;
 538          VG_(clear_syscallInfo)(tid);
 539       }
 540    }
 541
 542    /* re-init and take the sema */
 543    ML_(sema_deinit)(&the_BigLock);
 544    ML_(sema_init)(&the_BigLock);
 545    ML_(sema_down)(&the_BigLock, False/*not LL*/);
 546 }
 547
 548
 549 /* First phase of initialisation of the scheduler.  Initialise the
 550    bigLock, zeroise the VG_(threads) structure and decide on the
 551    ThreadId of the root thread.
 552 */
 553 ThreadId VG_(scheduler_init_phase1) ( void )
 554 {
 555    Int i;
 556    ThreadId tid_main;
 557
 558    VG_(debugLog)(1,"sched","sched_init_phase1\n");
 559
 560    ML_(sema_init)(&the_BigLock);
 561
 562    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
 563       /* Paranoia .. completely zero it out. */
 564       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
 565
 566       VG_(threads)[i].sig_queue = NULL;
 567
 568       os_state_init(&VG_(threads)[i]);
 569       mostly_clear_thread_record(i);
 570
 571       VG_(threads)[i].status                    = VgTs_Empty;
 572       VG_(threads)[i].client_stack_szB          = 0;
 573       VG_(threads)[i].client_stack_highest_word = (Addr)NULL;
 574    }
 575
 576    tid_main = VG_(alloc_ThreadState)();
 577
 578    /* Bleh.  Unfortunately there are various places in the system that
 579       assume that the main thread has a ThreadId of 1.
 580       - Helgrind (possibly)
 581       - stack overflow message in default_action() in m_signals.c
 582       - definitely a lot more places
 583    */
 584    vg_assert(tid_main == 1);
 585
 586    return tid_main;
 587 }
 588
 589
 590 /* Second phase of initialisation of the scheduler.  Given the root
 591    ThreadId computed by first phase of initialisation, fill in stack
 592    details and acquire bigLock.  Initialise the scheduler.  This is
 593    called at startup.  The caller subsequently initialises the guest
 594    state components of this main thread.
 595 */
 596 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
 597                                   Addr     clstack_end,
 598                                   SizeT    clstack_size )
 599 {
 600    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
 601                    "cls_end=0x%lx, cls_sz=%ld\n",
 602                    tid_main, clstack_end, clstack_size);
 603
 604    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
 605    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
 606
 607    VG_(threads)[tid_main].client_stack_highest_word
 608       = clstack_end + 1 - sizeof(UWord);
 609    VG_(threads)[tid_main].client_stack_szB
 610       = clstack_size;
 611
 612    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
 613 }
 614
 615
 616 /* ---------------------------------------------------------------------
 617    Helpers for running translations.
 618    ------------------------------------------------------------------ */
 619
 620 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
 621    mask state, but does need to pass "val" through. */
 622 #define SCHEDSETJMP(tid, jumped, stmt)                                        \
 623    do {                                                                        \
 624       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);        \
 625                                                                         \
 626       (jumped) = __builtin_setjmp(_qq_tst->sched_jmpbuf);               \
 627       if ((jumped) == 0) {                                                \
 628          vg_assert(!_qq_tst->sched_jmpbuf_valid);                        \
 629          _qq_tst->sched_jmpbuf_valid = True;                                \
 630          stmt;                                                                \
 631       }        else if (VG_(clo_trace_sched))                                        \
 632          VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%d\n",        \
 633                      __LINE__, tid, jumped);                            \
 634       vg_assert(_qq_tst->sched_jmpbuf_valid);                                \
 635       _qq_tst->sched_jmpbuf_valid = False;                                \
 636    } while(0)
 637
 638
 639 /* Do various guest state alignment checks prior to running a thread.
 640    Specifically, check that what we have matches Vex's guest state
 641    layout requirements.  See libvex.h for details, but in short the
 642    requirements are: There must be no holes in between the primary
 643    guest state, its two copies, and the spill area.  In short, all 4
 644    areas must have a 16-aligned size and be 16-aligned, and placed
 645    back-to-back. */
 646 static void do_pre_run_checks ( ThreadState* tst )
 647 {
 648    Addr a_vex     = (Addr) & tst->arch.vex;
 649    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
 650    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
 651    Addr a_spill   = (Addr) & tst->arch.vex_spill;
 652    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
 653    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
 654    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
 655    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
 656
 657    if (0)
 658    VG_(printf)("gst %p %d, sh1 %p %d, "
 659                "sh2 %p %d, spill %p %d\n",
 660                (void*)a_vex, sz_vex,
 661                (void*)a_vexsh1, sz_vexsh1,
 662                (void*)a_vexsh2, sz_vexsh2,
 663                (void*)a_spill, sz_spill );
 664
 665    vg_assert(VG_IS_16_ALIGNED(sz_vex));
 666    vg_assert(VG_IS_16_ALIGNED(sz_vexsh1));
 667    vg_assert(VG_IS_16_ALIGNED(sz_vexsh2));
 668    vg_assert(VG_IS_16_ALIGNED(sz_spill));
 669
 670    vg_assert(VG_IS_16_ALIGNED(a_vex));
 671    vg_assert(VG_IS_16_ALIGNED(a_vexsh1));
 672    vg_assert(VG_IS_16_ALIGNED(a_vexsh2));
 673    vg_assert(VG_IS_16_ALIGNED(a_spill));
 674
 675    /* Check that the guest state and its two shadows have the same
 676       size, and that there are no holes in between.  The latter is
 677       important because Memcheck assumes that it can reliably access
 678       the shadows by indexing off a pointer to the start of the
 679       primary guest state area. */
 680    vg_assert(sz_vex == sz_vexsh1);
 681    vg_assert(sz_vex == sz_vexsh2);
 682    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
 683    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
 684    /* Also check there's no hole between the second shadow area and
 685       the spill area. */
 686    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
 687    vg_assert(a_vex + 3 * sz_vex == a_spill);
 688
 689 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
 690    /* ppc guest_state vector regs must be 16 byte aligned for
 691       loads/stores.  This is important! */
 692    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VR0));
 693    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VR0));
 694    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VR0));
 695    /* be extra paranoid .. */
 696    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VR1));
 697    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VR1));
 698    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VR1));
 699 #  endif
 700
 701 #  if defined(VGA_arm)
 702    /* arm guest_state VFP regs must be 8 byte aligned for
 703       loads/stores. */
 704    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D0));
 705    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
 706    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
 707    /* be extra paranoid .. */
 708    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
 709    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
 710    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
 711 #  endif
 712 }
 713
 714
 715 /* Run the thread tid for a while, and return a VG_TRC_* value
 716    indicating why VG_(run_innerloop) stopped. */
 717 static UInt run_thread_for_a_while ( ThreadId tid )
 718 {
 719    volatile Int          jumped;
 720    volatile ThreadState* tst = NULL; /* stop gcc complaining */
 721    volatile UInt         trc;
 722    volatile Int          dispatch_ctr_SAVED;
 723    volatile Int          done_this_time;
 724
 725    /* Paranoia */
 726    vg_assert(VG_(is_valid_tid)(tid));
 727    vg_assert(VG_(is_running_thread)(tid));
 728    vg_assert(!VG_(is_exiting)(tid));
 729
 730    tst = VG_(get_ThreadState)(tid);
 731    do_pre_run_checks( (ThreadState*)tst );
 732    /* end Paranoia */
 733
 734    trc = 0;
 735    dispatch_ctr_SAVED = VG_(dispatch_ctr);
 736
 737 #  if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
 738    /* On AIX, we need to get a plausible value for SPRG3 for this
 739       thread, since it's used I think as a thread-state pointer.  It
 740       is presumably set by the kernel for each dispatched thread and
 741       cannot be changed by user space.  It therefore seems safe enough
 742       to copy the host's value of it into the guest state at the point
 743       the thread is dispatched.
 744       (Later): Hmm, looks like SPRG3 is only used in 32-bit mode.
 745       Oh well. */
 746    { UWord host_sprg3;
 747      __asm__ __volatile__( "mfspr %0,259\n" : "=b"(host_sprg3) );
 748     VG_(threads)[tid].arch.vex.guest_SPRG3_RO = host_sprg3;
 749     vg_assert(sizeof(VG_(threads)[tid].arch.vex.guest_SPRG3_RO) == sizeof(void*));
 750    }
 751 #  endif
 752
 753    /* there should be no undealt-with signals */
 754    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
 755
 756    if (0) {
 757       vki_sigset_t m;
 758       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
 759       vg_assert(err == 0);
 760       VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
 761       for (i = 1; i <= _VKI_NSIG; i++)
 762          if (!VG_(sigismember)(&m, i))
 763             VG_(printf)("%d ", i);
 764       VG_(printf)("\n");
 765    }
 766
 767    // Tell the tool this thread is about to run client code
 768    VG_TRACK( start_client_code, tid, bbs_done );
 769
 770    vg_assert(VG_(in_generated_code) == False);
 771    VG_(in_generated_code) = True;
 772 #if defined(VGO_l4re)
 773 #if defined(L4RE_DEBUG_EXECUTION)
 774    //VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
 775    VG_(debugLog)(0, "sched", "bbs_done=%lld ip = %p guest ip = %p\n",
 776                                  bbs_done, (void *)VG_(get_IP)(tid),
 777                                  (void*)&tst->arch.vex.guest_EIP);
 778 #endif
 779 #endif
 780    SCHEDSETJMP(
 781       tid,
 782       jumped,
 783       trc = (UInt)VG_(run_innerloop)( (void*)&tst->arch.vex,
 784                                       VG_(clo_profile_flags) > 0 ? 1 : 0 )
 785    );
 786
 787    vg_assert(VG_(in_generated_code) == True);
 788    VG_(in_generated_code) = False;
 789
 790    if (jumped) {
 791       /* We get here if the client took a fault that caused our signal
 792          handler to longjmp. */
 793       vg_assert(trc == 0);
 794       trc = VG_TRC_FAULT_SIGNAL;
 795 #if !defined(VGO_l4re)
 796       block_signals();
 797 #endif
 798    }
 799
 800    done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 0;
 801
 802    vg_assert(done_this_time >= 0);
 803    bbs_done += (ULong)done_this_time;
 804
 805    // Tell the tool this thread has stopped running client code
 806    VG_TRACK( stop_client_code, tid, bbs_done );
 807
 808    return trc;
 809 }
 810
 811
 812 /* Run a no-redir translation just once, and return the resulting
 813    VG_TRC_* value. */
 814 static UInt run_noredir_translation ( Addr hcode, ThreadId tid )
 815 {
 816    volatile Int          jumped;
 817    volatile ThreadState* tst;
 818    volatile UWord        argblock[4];
 819    volatile UInt         retval;
 820
 821    /* Paranoia */
 822    vg_assert(VG_(is_valid_tid)(tid));
 823    vg_assert(VG_(is_running_thread)(tid));
 824    vg_assert(!VG_(is_exiting)(tid));
 825
 826    tst = VG_(get_ThreadState)(tid);
 827    do_pre_run_checks( (ThreadState*)tst );
 828    /* end Paranoia */
 829
 830 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
 831    /* I don't think we need to clear this thread's guest_RESVN here,
 832       because we can only get here if run_thread_for_a_while() has
 833       been used immediately before, on this same thread. */
 834 #  endif
 835
 836    /* There can be 3 outcomes from VG_(run_a_noredir_translation):
 837
 838       - a signal occurred and the sighandler longjmp'd.  Then both [2]
 839         and [3] are unchanged - hence zero.
 840
 841       - translation ran normally, set [2] (next guest IP) and set [3]
 842         to whatever [1] was beforehand, indicating a normal (boring)
 843         jump to the next block.
 844
 845       - translation ran normally, set [2] (next guest IP) and set [3]
 846         to something different from [1] beforehand, which indicates a
 847         TRC_ value.
 848    */
 849    argblock[0] = (UWord)hcode;
 850    argblock[1] = (UWord)&VG_(threads)[tid].arch.vex;
 851    argblock[2] = 0; /* next guest IP is written here */
 852    argblock[3] = 0; /* guest state ptr afterwards is written here */
 853
 854    // Tell the tool this thread is about to run client code
 855    VG_TRACK( start_client_code, tid, bbs_done );
 856
 857    vg_assert(VG_(in_generated_code) == False);
 858    VG_(in_generated_code) = True;
 859
 860    SCHEDSETJMP(
 861       tid,
 862       jumped,
 863       VG_(run_a_noredir_translation)( &argblock[0] )
 864    );
 865
 866    VG_(in_generated_code) = False;
 867
 868    if (jumped) {
 869       /* We get here if the client took a fault that caused our signal
 870          handler to longjmp. */
 871       vg_assert(argblock[2] == 0); /* next guest IP was not written */
 872       vg_assert(argblock[3] == 0); /* trc was not written */
 873 #if !defined(VGO_l4re)
 874       block_signals();
 875 #endif
 876       retval = VG_TRC_FAULT_SIGNAL;
 877    } else {
 878       /* store away the guest program counter */
 879       VG_(set_IP)( tid, argblock[2] );
 880       if (argblock[3] == argblock[1])
 881          /* the guest state pointer afterwards was unchanged */
 882          retval = VG_TRC_BORING;
 883       else
 884          retval = (UInt)argblock[3];
 885    }
 886
 887    bbs_done++;
 888
 889    // Tell the tool this thread has stopped running client code
 890    VG_TRACK( stop_client_code, tid, bbs_done );
 891
 892    return retval;
 893 }
 894
 895
 896 /* ---------------------------------------------------------------------
 897    The scheduler proper.
 898    ------------------------------------------------------------------ */
 899
 900 static void handle_tt_miss ( ThreadId tid )
 901 {
 902    Bool found;
 903    Addr ip = VG_(get_IP)(tid);
 904
 905    /* Trivial event.  Miss in the fast-cache.  Do a full
 906       lookup for it. */
 907    found = VG_(search_transtab)( NULL, ip, True/*upd_fast_cache*/ );
 908    if (UNLIKELY(!found)) {
 909 #if defined(VGO_l4re)
 910 #if defined(L4RE_DEBUG_EXECUTION)
 911        //VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
 912        VG_(debugLog)(0, "sched","tid=%d, ip=%p, bbs_done=%lld\n", tid, (void *)ip, bbs_done);
 913 //           (VG_(translate)( tid, ip, /*debug*/True, 1 /*0xffffffff*/ /*0*//* verbose*/,
 914 //                        bbs_done, True/*allow redirection*/ ));
 915 #endif
 916 #endif
 917       /* Not found; we need to request a translation. */
 918       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
 919                           bbs_done, True/*allow redirection*/ )) {
 920          found = VG_(search_transtab)( NULL, ip, True );
 921          vg_assert2(found, "VG_TRC_INNER_FASTMISS: missing tt_fast entry");
 922
 923       } else {
 924          // If VG_(translate)() fails, it's because it had to throw a
 925          // signal because the client jumped to a bad address.  That
 926          // means that either a signal has been set up for delivery,
 927          // or the thread has been marked for termination.  Either
 928          // way, we just need to go back into the scheduler loop.
 929       }
 930    }
 931 }
 932
 933 static void handle_syscall(ThreadId tid, UInt trc)
 934 {
 935    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
 936    Bool jumped;
 937
 938    /* Syscall may or may not block; either way, it will be
 939       complete by the time this call returns, and we'll be
 940       runnable again.  We could take a signal while the
 941       syscall runs. */
 942
 943    if (VG_(clo_sanity_level >= 3))
 944       VG_(am_do_sync_check)("(BEFORE SYSCALL)",__FILE__,__LINE__);
 945
 946    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
 947
 948    if (VG_(clo_sanity_level >= 3))
 949       VG_(am_do_sync_check)("(AFTER SYSCALL)",__FILE__,__LINE__);
 950
 951    if (!VG_(is_running_thread)(tid))
 952       VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
 953                   tid, VG_(running_tid), tid, tst->status);
 954    vg_assert(VG_(is_running_thread)(tid));
 955
 956 #if !defined(VGO_l4re)
 957    if (jumped) {
 958       block_signals();
 959       VG_(poll_signals)(tid);
 960    }
 961 #endif
 962 }
 963
 964 /* tid just requested a jump to the noredir version of its current
 965    program counter.  So make up that translation if needed, run it,
 966    and return the resulting thread return code. */
 967 static UInt/*trc*/ handle_noredir_jump ( ThreadId tid )
 968 {
 969    AddrH hcode = 0;
 970    Addr  ip    = VG_(get_IP)(tid);
 971
 972    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
 973    if (!found) {
 974       /* Not found; we need to request a translation. */
 975       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
 976                           False/*NO REDIRECTION*/ )) {
 977
 978          found = VG_(search_unredir_transtab)( &hcode, ip );
 979          vg_assert2(found, "unredir translation missing after creation?!");
 980
 981       } else {
 982          // If VG_(translate)() fails, it's because it had to throw a
 983          // signal because the client jumped to a bad address.  That
 984          // means that either a signal has been set up for delivery,
 985          // or the thread has been marked for termination.  Either
 986          // way, we just need to go back into the scheduler loop.
 987          return VG_TRC_BORING;
 988       }
 989
 990    }
 991
 992    vg_assert(found);
 993    vg_assert(hcode != 0);
 994
 995    /* Otherwise run it and return the resulting VG_TRC_* value. */
 996    return run_noredir_translation( hcode, tid );
 997 }
 998
 999
1000 /*
1001    Run a thread until it wants to exit.
1002
1003    We assume that the caller has already called VG_(acquire_BigLock) for
1004    us, so we own the VCPU.  Also, all signals are blocked.
1005  */
1006 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1007 {
1008    UInt     trc;
1009    ThreadState *tst = VG_(get_ThreadState)(tid);
1010
1011    if (VG_(clo_trace_sched))
1012       print_sched_event(tid, "entering VG_(scheduler)");
1013
1014 #if !defined(VGO_l4re)
1015    /* set the proper running signal mask */
1016    block_signals();
1017 #endif
1018
1019    vg_assert(VG_(is_running_thread)(tid));
1020
1021    VG_(dispatch_ctr) = SCHEDULING_QUANTUM + 1;
1022
1023    while (!VG_(is_exiting)(tid)) {
1024
1025       if (VG_(dispatch_ctr) == 1) {
1026
1027 #        if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
1028          /* Note: count runnable threads before dropping The Lock. */
1029          Int rt = VG_(count_runnable_threads)();
1030 #        endif
1031
1032          /* Our slice is done, so yield the CPU to another thread.  On
1033             Linux, this doesn't sleep between sleeping and running,
1034             since that would take too much time.  On AIX, we have to
1035             prod the scheduler to get it consider other threads; not
1036             doing so appears to cause very long delays before other
1037             runnable threads get rescheduled. */
1038
1039          /* 4 July 06: it seems that a zero-length nsleep is needed to
1040             cause async thread cancellation (canceller.c) to terminate
1041             in finite time; else it is in some kind of race/starvation
1042             situation and completion is arbitrarily delayed (although
1043             this is not a deadlock).
1044
1045             Unfortunately these sleeps cause MPI jobs not to terminate
1046             sometimes (some kind of livelock).  So sleeping once
1047             every N opportunities appears to work. */
1048
1049          /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1050             sys_yield also helps the problem, whilst not crashing apps. */
1051
1052          VG_(release_BigLock)(tid, VgTs_Yielding,
1053                                    "VG_(scheduler):timeslice");
1054          /* ------------ now we don't have The Lock ------------ */
1055
1056 #        if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
1057          { static Int ctr=0;
1058            vg_assert(__NR_AIX5__nsleep != __NR_AIX5_UNKNOWN);
1059            vg_assert(__NR_AIX5_yield   != __NR_AIX5_UNKNOWN);
1060            if (1 && rt > 0 && ((++ctr % 3) == 0)) {
1061               //struct vki_timespec ts;
1062               //ts.tv_sec = 0;
1063               //ts.tv_nsec = 0*1000*1000;
1064               //VG_(do_syscall2)(__NR_AIX5__nsleep, (UWord)&ts, (UWord)NULL);
1065               VG_(do_syscall0)(__NR_AIX5_yield);
1066            }
1067          }
1068 #        endif
1069
1070          VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1071          /* ------------ now we do have The Lock ------------ */
1072
1073          /* OK, do some relatively expensive housekeeping stuff */
1074          scheduler_sanity(tid);
1075          VG_(sanity_check_general)(False);
1076
1077          /* Look for any pending signals for this thread, and set them up
1078             for delivery */
1079 #if !defined(VGO_l4re)
1080          VG_(poll_signals)(tid);
1081 #endif
1082
1083          if (VG_(is_exiting)(tid))
1084             break;                /* poll_signals picked up a fatal signal */
1085
1086          /* For stats purposes only. */
1087          n_scheduling_events_MAJOR++;
1088
1089          /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
1090             that it decrements the counter before testing it for zero, so
1091             that if tst->dispatch_ctr is set to N you get at most N-1
1092             iterations.  Also this means that tst->dispatch_ctr must
1093             exceed zero before entering the innerloop.  Also also, the
1094             decrement is done before the bb is actually run, so you
1095             always get at least one decrement even if nothing happens. */
1096          VG_(dispatch_ctr) = SCHEDULING_QUANTUM + 1;
1097
1098          /* paranoia ... */
1099          vg_assert(tst->tid == tid);
1100          vg_assert(tst->os_state.lwpid == VG_(gettid)());
1101       }
1102
1103       /* For stats purposes only. */
1104       n_scheduling_events_MINOR++;
1105
1106       if (0)
1107          VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
1108                                    tid, VG_(dispatch_ctr) - 1 );
1109
1110       trc = run_thread_for_a_while ( tid );
1111
1112       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1113          Char buf[50];
1114          VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc));
1115          print_sched_event(tid, buf);
1116       }
1117
1118       if (trc == VEX_TRC_JMP_NOREDIR) {
1119          /* If we got a request to run a no-redir version of
1120             something, do so now -- handle_noredir_jump just (creates
1121             and) runs that one translation.  The flip side is that the
1122             noredir translation can't itself return another noredir
1123             request -- that would be nonsensical.  It can, however,
1124             return VG_TRC_BORING, which just means keep going as
1125             normal. */
1126          trc = handle_noredir_jump(tid);
1127          vg_assert(trc != VEX_TRC_JMP_NOREDIR);
1128       }
1129
1130       switch (trc) {
1131       case VG_TRC_BORING:
1132          /* no special event, just keep going. */
1133          break;
1134
1135       case VG_TRC_INNER_FASTMISS:
1136          vg_assert(VG_(dispatch_ctr) > 1);
1137          handle_tt_miss(tid);
1138          break;
1139
1140       case VEX_TRC_JMP_CLIENTREQ:
1141          do_client_request(tid);
1142          break;
1143
1144 #if defined(VGO_l4re)
1145 #if 1
1146 #define DEBUG_UTCB
1147 #else
1148 #define DEBUG_UTCB \
1149          if (1) \
1150             VG_(debugLog)(0, "sched", "utcb access via client " \
1151                                       "virtual utcb of thread %d @ %p -> eax\n", \
1152                                       tid, \
1153                                       (Addr)ts_utcb(&VG_(threads)[tid]));
1154 #endif
1155 #define TRACK_UTCB \
1156          VG_TRACK( new_mem_startup, (Addr)ts_utcb(&VG_(threads)[tid]), L4RE_UTCB_SIZE, 1, 1, 1, 0 );
1157
1158       case VEX_TRC_JMP_L4_UTCB_EAX:
1159          VG_(threads)[tid].arch.vex.guest_EAX = (Addr)ts_utcb(&VG_(threads)[tid]);
1160          DEBUG_UTCB
1161          TRACK_UTCB
1162          break;
1163       case VEX_TRC_JMP_L4_UTCB_EBX:
1164          VG_(threads)[tid].arch.vex.guest_EBX = (Addr)ts_utcb(&VG_(threads)[tid]);
1165          DEBUG_UTCB
1166          TRACK_UTCB
1167          break;
1168       case VEX_TRC_JMP_L4_UTCB_ECX:
1169          VG_(threads)[tid].arch.vex.guest_ECX = (Addr)ts_utcb(&VG_(threads)[tid]);
1170          DEBUG_UTCB
1171          TRACK_UTCB
1172          break;
1173       case VEX_TRC_JMP_L4_UTCB_EDX:
1174          VG_(threads)[tid].arch.vex.guest_EDX = (Addr)ts_utcb(&VG_(threads)[tid]);
1175          DEBUG_UTCB
1176          TRACK_UTCB
1177          break;
1178       case VEX_TRC_JMP_L4_UTCB_EDI:
1179          VG_(threads)[tid].arch.vex.guest_EDI = (Addr)ts_utcb(&VG_(threads)[tid]);
1180          DEBUG_UTCB
1181          TRACK_UTCB
1182          break;
1183       case VEX_TRC_JMP_L4_UTCB_ESI:
1184          VG_(threads)[tid].arch.vex.guest_ESI = (Addr)ts_utcb(&VG_(threads)[tid]);
1185          DEBUG_UTCB
1186          TRACK_UTCB
1187          break;
1188           case VEX_TRC_JMP_SYS_INT48:     /* L4Re: Invoke */
1189           case VEX_TRC_JMP_SYS_INT50:     /* L4Re: Debug */
1190           case VEX_TRC_JMP_SYS_INT128:    /* L4Re/UX: INT80 */
1191           case VEX_TRC_JMP_L4_UD2:        /* L4Re: UD2 */
1192           case VEX_TRC_JMP_L4_ARTIFICIAL: /* L4Re: artificial trap */
1193           handle_syscall(tid, trc);
1194           if (VG_(clo_sanity_level) > 2)
1195               VG_(sanity_check_general)(True); /* sanity-check every syscall */
1196          break;
1197 #else
1198       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1199       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1200       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1201       case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
1202          handle_syscall(tid, trc);
1203          if (VG_(clo_sanity_level) > 2)
1204             VG_(sanity_check_general)(True); /* sanity-check every syscall */
1205          break;
1206 #endif
1207
1208       case VEX_TRC_JMP_YIELD:
1209          /* Explicit yield, because this thread is in a spin-lock
1210             or something.  Only let the thread run for a short while
1211             longer.  Because swapping to another thread is expensive,
1212             we're prepared to let this thread eat a little more CPU
1213             before swapping to another.  That means that short term
1214             spins waiting for hardware to poke memory won't cause a
1215             thread swap. */
1216          if (VG_(dispatch_ctr) > 2000)
1217             VG_(dispatch_ctr) = 2000;
1218          break;
1219
1220       case VG_TRC_INNER_COUNTERZERO:
1221          /* Timeslice is out.  Let a new thread be scheduled. */
1222          vg_assert(VG_(dispatch_ctr) == 1);
1223          break;
1224
1225       case VG_TRC_FAULT_SIGNAL:
1226          /* Everything should be set up (either we're exiting, or
1227             about to start in a signal handler). */
1228          break;
1229
1230       case VEX_TRC_JMP_MAPFAIL:
1231          /* Failure of arch-specific address translation (x86/amd64
1232             segment override use) */
1233          /* jrs 2005 03 11: is this correct? */
1234          VG_(synth_fault)(tid);
1235          break;
1236
1237       case VEX_TRC_JMP_EMWARN: {
1238          static Int  counts[EmWarn_NUMBER];
1239          static Bool counts_initted = False;
1240          VexEmWarn ew;
1241          HChar*    what;
1242          Bool      show;
1243          Int       q;
1244          if (!counts_initted) {
1245             counts_initted = True;
1246             for (q = 0; q < EmWarn_NUMBER; q++)
1247                counts[q] = 0;
1248          }
1249          ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
1250          what = (ew < 0 || ew >= EmWarn_NUMBER)
1251                    ? "unknown (?!)"
1252                    : LibVEX_EmWarn_string(ew);
1253          show = (ew < 0 || ew >= EmWarn_NUMBER)
1254                    ? True
1255                    : counts[ew]++ < 3;
1256          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1257             VG_(message)( Vg_UserMsg,
1258                           "Emulation warning: unsupported action:\n");
1259             VG_(message)( Vg_UserMsg, "  %s\n", what);
1260             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1261          }
1262          break;
1263       }
1264
1265       case VEX_TRC_JMP_EMFAIL: {
1266          VexEmWarn ew;
1267          HChar*    what;
1268          ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
1269          what = (ew < 0 || ew >= EmWarn_NUMBER)
1270                    ? "unknown (?!)"
1271                    : LibVEX_EmWarn_string(ew);
1272          VG_(message)( Vg_UserMsg,
1273                        "Emulation fatal error -- Valgrind cannot continue:\n");
1274          VG_(message)( Vg_UserMsg, "  %s\n", what);
1275          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1276          VG_(message)(Vg_UserMsg, "\n");
1277          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1278          VG_(message)(Vg_UserMsg, "\n");
1279          VG_(exit)(1);
1280          break;
1281       }
1282
1283       case VEX_TRC_JMP_SIGTRAP:
1284 #if defined(VGO_l4re)
1285          /* In case of l4re int 0x3 means enter_kdebug().*/
1286          handle_syscall(tid, trc);
1287
1288          if (VG_(clo_sanity_level) > 2)
1289             VG_(sanity_check_general)(True); /* sanity-check every syscall */
1290 #else
1291          VG_(synth_sigtrap)(tid);
1292 #endif
1293          break;
1294
1295       case VEX_TRC_JMP_SIGSEGV:
1296          VG_(synth_fault)(tid);
1297          break;
1298
1299       case VEX_TRC_JMP_SIGBUS:
1300          VG_(synth_sigbus)(tid);
1301          break;
1302
1303       case VEX_TRC_JMP_NODECODE:
1304          VG_(umsg)(
1305             "valgrind: Unrecognised instruction at address %#lx.\n",
1306             VG_(get_IP)(tid));
1307 #define M(a) VG_(umsg)(a "\n");
1308    M("Your program just tried to execute an instruction that Valgrind" );
1309    M("did not recognise.  There are two possible reasons for this."    );
1310    M("1. Your program has a bug and erroneously jumped to a non-code"  );
1311    M("   location.  If you are running Memcheck and you just saw a"    );
1312    M("   warning about a bad jump, it's probably your program's fault.");
1313    M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1314    M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1315    M("   you are not sure, please let us know and we'll try to fix it.");
1316    M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1317    M("probably kill your program."                                     );
1318 #undef M
1319          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1320          break;
1321
1322       case VEX_TRC_JMP_TINVAL:
1323          VG_(discard_translations)(
1324             (Addr64)VG_(threads)[tid].arch.vex.guest_TISTART,
1325             VG_(threads)[tid].arch.vex.guest_TILEN,
1326             "scheduler(VEX_TRC_JMP_TINVAL)"
1327          );
1328          if (0)
1329             VG_(printf)("dump translations done.\n");
1330          break;
1331
1332       case VG_TRC_INVARIANT_FAILED:
1333          /* This typically happens if, after running generated code,
1334             it is detected that host CPU settings (eg, FPU/Vector
1335             control words) are not as they should be.  Vex's code
1336             generation specifies the state such control words should
1337             be in on entry to Vex-generated code, and they should be
1338             unchanged on exit from it.  Failure of this assertion
1339             usually means a bug in Vex's code generation. */
1340          //{ UInt xx;
1341          //  __asm__ __volatile__ (
1342          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1343          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1344          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1345          //}
1346          vg_assert2(0, "VG_(scheduler), phase 3: "
1347                        "run_innerloop detected host "
1348                        "state invariant failure", trc);
1349
1350       case VEX_TRC_JMP_SYS_SYSENTER:
1351          /* Do whatever simulation is appropriate for an x86 sysenter
1352             instruction.  Note that it is critical to set this thread's
1353             guest_EIP to point at the code to execute after the
1354             sysenter, since Vex-generated code will not have set it --
1355             vex does not know what it should be.  Vex sets the next
1356             address to zero, so if you don't set guest_EIP, the thread
1357             will jump to zero afterwards and probably die as a result. */
1358 #        if defined(VGP_x86_linux)
1359          vg_assert2(0, "VG_(scheduler), phase 3: "
1360                        "sysenter_x86 on x86-linux is not supported");
1361 #        elif defined(VGP_x86_darwin)
1362          /* return address in client edx */
1363          VG_(threads)[tid].arch.vex.guest_EIP
1364             = VG_(threads)[tid].arch.vex.guest_EDX;
1365          handle_syscall(tid, trc);
1366 #        elif defined(VGP_x86_l4re)
1367          /* nearly the same. L4Re stores return EIP in %ebx */
1368          VG_(threads)[tid].arch.vex.guest_EIP
1369             = VG_(threads)[tid].arch.vex.guest_EBX;
1370          handle_syscall(tid, trc);
1371 #        else
1372          vg_assert2(0, "VG_(scheduler), phase 3: "
1373                        "sysenter_x86 on non-x86 platform?!?!");
1374 #        endif
1375          break;
1376
1377       default:
1378          vg_assert2(0, "VG_(scheduler), phase 3: "
1379                        "unexpected thread return code (%u)", trc);
1380          /* NOTREACHED */
1381          break;
1382
1383       } /* switch (trc) */
1384    }
1385
1386    if (VG_(clo_trace_sched))
1387       print_sched_event(tid, "exiting VG_(scheduler)");
1388
1389    vg_assert(VG_(is_exiting)(tid));
1390
1391    return tst->exitreason;
1392 }
1393
1394
1395 /*
1396    This causes all threads to forceably exit.  They aren't actually
1397    dead by the time this returns; you need to call
1398    VG_(reap_threads)() to wait for them.
1399  */
1400 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1401 {
1402    ThreadId tid;
1403
1404    vg_assert(VG_(is_running_thread)(me));
1405
1406    for (tid = 1; tid < VG_N_THREADS; tid++) {
1407       if (tid == me
1408           || VG_(threads)[tid].status == VgTs_Empty)
1409          continue;
1410       if (0)
1411          VG_(printf)(
1412             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
1413
1414       VG_(threads)[tid].exitreason = src;
1415       if (src == VgSrc_FatalSig)
1416          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1417       VG_(get_thread_out_of_syscall)(tid);
1418    }
1419 }
1420
1421
1422 /* ---------------------------------------------------------------------
1423    Specifying shadow register values
1424    ------------------------------------------------------------------ */
1425
1426 #if defined(VGA_x86)
1427 #  define VG_CLREQ_ARGS       guest_EAX
1428 #  define VG_CLREQ_RET        guest_EDX
1429 #elif defined(VGA_amd64)
1430 #  define VG_CLREQ_ARGS       guest_RAX
1431 #  define VG_CLREQ_RET        guest_RDX
1432 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
1433 #  define VG_CLREQ_ARGS       guest_GPR4
1434 #  define VG_CLREQ_RET        guest_GPR3
1435 #elif defined(VGA_arm)
1436 #  define VG_CLREQ_ARGS       guest_R4
1437 #  define VG_CLREQ_RET        guest_R3
1438 #else
1439 #  error Unknown arch
1440 #endif
1441
1442 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1443 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1444 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1445
1446 // These macros write a value to a client's thread register, and tell the
1447 // tool that it's happened (if necessary).
1448
1449 #define SET_CLREQ_RETVAL(zztid, zzval) \
1450    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1451         VG_TRACK( post_reg_write, \
1452                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1453    } while (0)
1454
1455 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1456    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1457         VG_TRACK( post_reg_write_clientcall_return, \
1458                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1459    } while (0)
1460
1461
1462 /* ---------------------------------------------------------------------
1463    Handle client requests.
1464    ------------------------------------------------------------------ */
1465
1466 // OS-specific(?) client requests
1467 static Bool os_client_request(ThreadId tid, UWord *args)
1468 {
1469    Bool handled = True;
1470
1471    vg_assert(VG_(is_running_thread)(tid));
1472
1473    switch(args[0]) {
1474    case VG_USERREQ__LIBC_FREERES_DONE:
1475       /* This is equivalent to an exit() syscall, but we don't set the
1476          exitcode (since it might already be set) */
1477       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1478          VG_(message)(Vg_DebugMsg,
1479                       "__libc_freeres() done; really quitting!\n");
1480       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1481       break;
1482
1483    default:
1484       handled = False;
1485       break;
1486    }
1487
1488    return handled;
1489 }
1490
1491
1492 /* Do a client request for the thread tid.  After the request, tid may
1493    or may not still be runnable; if not, the scheduler will have to
1494    choose a new thread to run.
1495 */
1496 static
1497 void do_client_request ( ThreadId tid )
1498 {
1499    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1500    UWord req_no = arg[0];
1501
1502    if (0)
1503       VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
1504    switch (req_no) {
1505
1506       case VG_USERREQ__CLIENT_CALL0: {
1507          UWord (*f)(ThreadId) = (void*)arg[1];
1508          if (f == NULL)
1509             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1510          else
1511             SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1512          break;
1513       }
1514       case VG_USERREQ__CLIENT_CALL1: {
1515          UWord (*f)(ThreadId, UWord) = (void*)arg[1];
1516          if (f == NULL)
1517             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1518          else
1519             SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1520          break;
1521       }
1522       case VG_USERREQ__CLIENT_CALL2: {
1523          UWord (*f)(ThreadId, UWord, UWord) = (void*)arg[1];
1524          if (f == NULL)
1525             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1526          else
1527             SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1528          break;
1529       }
1530       case VG_USERREQ__CLIENT_CALL3: {
1531          UWord (*f)(ThreadId, UWord, UWord, UWord) = (void*)arg[1];
1532          if (f == NULL)
1533             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1534          else
1535             SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1536          break;
1537       }
1538
1539       // Nb: this looks like a circular definition, because it kind of is.
1540       // See comment in valgrind.h to understand what's going on.
1541       case VG_USERREQ__RUNNING_ON_VALGRIND:
1542          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1543          break;
1544
1545       case VG_USERREQ__PRINTF: {
1546          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1547             _VALIST_BY_REF version instead */
1548          if (sizeof(va_list) != sizeof(UWord))
1549             goto va_list_casting_error_NORETURN;
1550          union {
1551             va_list vargs;
1552             unsigned long uw;
1553          } u;
1554          u.uw = (unsigned long)arg[2];
1555          Int count =
1556             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
1557          VG_(message_flush)();
1558          SET_CLREQ_RETVAL( tid, count );
1559          break;
1560       }
1561
1562       case VG_USERREQ__PRINTF_BACKTRACE: {
1563          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1564             _VALIST_BY_REF version instead */
1565          if (sizeof(va_list) != sizeof(UWord))
1566             goto va_list_casting_error_NORETURN;
1567          union {
1568             va_list vargs;
1569             unsigned long uw;
1570          } u;
1571          u.uw = (unsigned long)arg[2];
1572          Int count =
1573             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
1574          VG_(message_flush)();
1575          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1576          SET_CLREQ_RETVAL( tid, count );
1577          break;
1578       }
1579
1580       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1581          va_list* vargsp = (va_list*)arg[2];
1582          Int count =
1583             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
1584          VG_(message_flush)();
1585          SET_CLREQ_RETVAL( tid, count );
1586          break;
1587       }
1588
1589       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1590          va_list* vargsp = (va_list*)arg[2];
1591          Int count =
1592             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
1593          VG_(message_flush)();
1594          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1595          SET_CLREQ_RETVAL( tid, count );
1596          break;
1597       }
1598
1599       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1600          va_list* vargsp = (va_list*)arg[2];
1601          Int count =
1602             VG_(vmessage)( Vg_DebugMsg, (char *)arg[1], *vargsp );
1603          VG_(message_flush)();
1604          SET_CLREQ_RETVAL( tid, count );
1605          break;
1606       }
1607
1608       case VG_USERREQ__ADD_IFUNC_TARGET: {
1609          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1610          SET_CLREQ_RETVAL( tid, 0);
1611          break; }
1612
1613       case VG_USERREQ__STACK_REGISTER: {
1614          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1615          SET_CLREQ_RETVAL( tid, sid );
1616          break; }
1617
1618       case VG_USERREQ__STACK_DEREGISTER: {
1619          VG_(deregister_stack)(arg[1]);
1620          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1621          break; }
1622
1623       case VG_USERREQ__STACK_CHANGE: {
1624          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1625          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1626          break; }
1627
1628       case VG_USERREQ__GET_MALLOCFUNCS: {
1629          struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1630
1631          info->tl_malloc               = VG_(tdict).tool_malloc;
1632          info->tl_calloc               = VG_(tdict).tool_calloc;
1633          info->tl_realloc              = VG_(tdict).tool_realloc;
1634          info->tl_memalign             = VG_(tdict).tool_memalign;
1635          info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
1636          info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
1637          info->tl_free                 = VG_(tdict).tool_free;
1638          info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
1639          info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1640          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
1641
1642          info->mallinfo                = VG_(mallinfo);
1643          info->clo_trace_malloc        = VG_(clo_trace_malloc);
1644
1645          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1646
1647          break;
1648       }
1649
1650       /* Requests from the client program */
1651
1652       case VG_USERREQ__DISCARD_TRANSLATIONS:
1653          if (VG_(clo_verbosity) > 2)
1654             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
1655                          " addr %p,  len %lu\n",
1656                          (void*)arg[1], arg[2] );
1657
1658          VG_(discard_translations)(
1659             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
1660          );
1661
1662          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1663          break;
1664
1665       case VG_USERREQ__COUNT_ERRORS:
1666          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
1667          break;
1668
1669       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
1670          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
1671          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1672          break;
1673
1674       case VG_USERREQ__MALLOCLIKE_BLOCK:
1675       case VG_USERREQ__FREELIKE_BLOCK:
1676          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
1677          if (!arg[1]) {
1678             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1679             break;
1680          } else {
1681             goto my_default;
1682          }
1683
1684       default:
1685        my_default:
1686          if (os_client_request(tid, arg)) {
1687             // do nothing, os_client_request() handled it
1688          } else if (VG_(needs).client_requests) {
1689             UWord ret;
1690
1691             if (VG_(clo_verbosity) > 2)
1692                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
1693                            arg[0], (void*)arg[1], arg[2] );
1694
1695             if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
1696                SET_CLREQ_RETVAL(tid, ret);
1697          } else {
1698             static Bool whined = False;
1699
1700             if (!whined && VG_(clo_verbosity) > 2) {
1701                // Allow for requests in core, but defined by tools, which
1702                // have 0 and 0 in their two high bytes.
1703                Char c1 = (arg[0] >> 24) & 0xff;
1704                Char c2 = (arg[0] >> 16) & 0xff;
1705                if (c1 == 0) c1 = '_';
1706                if (c2 == 0) c2 = '_';
1707                VG_(message)(Vg_UserMsg, "Warning:\n"
1708                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
1709                    "  VG_(needs).client_requests should be set?\n",
1710                             arg[0], c1, c2, arg[0] & 0xffff);
1711                whined = True;
1712             }
1713          }
1714          break;
1715    }
1716    return;
1717
1718    /*NOTREACHED*/
1719   va_list_casting_error_NORETURN:
1720    VG_(umsg)(
1721       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
1722       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
1723       "on a platform where they cannot be supported.  Please use the\n"
1724       "equivalent _VALIST_BY_REF versions instead.\n"
1725       "\n"
1726       "This is a binary-incompatible change in Valgrind's client request\n"
1727       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
1728       "are expected to almost never see this message.  The only case in\n"
1729       "which you might see this message is if your code uses the macros\n"
1730       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
1731       "to recompile such code, using the header files from this version of\n"
1732       "Valgrind, and not any previous version.\n"
1733       "\n"
1734       "If you see this mesage in any other circumstances, it is probably\n"
1735       "a bug in Valgrind.  In this case, please file a bug report at\n"
1736       "\n"
1737       "   http://www.valgrind.org/support/bug_reports.html\n"
1738       "\n"
1739       "Will now abort.\n"
1740    );
1741    vg_assert(0);
1742 }
1743
1744
1745 /* ---------------------------------------------------------------------
1746    Sanity checking (permanently engaged)
1747    ------------------------------------------------------------------ */
1748
1749 /* Internal consistency checks on the sched structures. */
1750 static
1751 void scheduler_sanity ( ThreadId tid )
1752 {
1753    Bool bad = False;
1754    static UInt lasttime = 0;
1755    UInt now;
1756    Int lwpid = VG_(gettid)();
1757
1758    if (!VG_(is_running_thread)(tid)) {
1759       VG_(message)(Vg_DebugMsg,
1760                    "Thread %d is supposed to be running, "
1761                    "but doesn't own the_BigLock (owned by %d)\n",
1762                    tid, VG_(running_tid));
1763       bad = True;
1764    }
1765
1766    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
1767       VG_(message)(Vg_DebugMsg,
1768                    "Thread %d supposed to be in LWP %d, but we're actually %d\n",
1769                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
1770       bad = True;
1771    }
1772
1773 #if !defined(VGO_darwin)
1774    // GrP fixme
1775    if (lwpid != the_BigLock.owner_lwpid) {
1776       VG_(message)(Vg_DebugMsg,
1777                    "Thread (LWPID) %d doesn't own the_BigLock\n",
1778                    tid);
1779       bad = True;
1780    }
1781 #endif
1782
1783    /* Periodically show the state of all threads, for debugging
1784       purposes. */
1785    now = VG_(read_millisecond_timer)();
1786    if (0 && (!bad) && (lasttime + 4000/*ms*/ <= now)) {
1787       lasttime = now;
1788       VG_(printf)("\n------------ Sched State at %d ms ------------\n",
1789                   (Int)now);
1790       VG_(show_sched_status)();
1791    }
1792
1793    /* core_panic also shows the sched status, which is why we don't
1794       show it above if bad==True. */
1795    if (bad)
1796       VG_(core_panic)("scheduler_sanity: failed");
1797 }
1798
1799 void VG_(sanity_check_general) ( Bool force_expensive )
1800 {
1801    ThreadId tid;
1802
1803    static UInt next_slow_check_at = 1;
1804    static UInt slow_check_interval = 25;
1805
1806    if (VG_(clo_sanity_level) < 1) return;
1807
1808    /* --- First do all the tests that we can do quickly. ---*/
1809
1810    sanity_fast_count++;
1811
1812    /* Check stuff pertaining to the memory check system. */
1813
1814    /* Check that nobody has spuriously claimed that the first or
1815       last 16 pages of memory have become accessible [...] */
1816    if (VG_(needs).sanity_checks) {
1817       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
1818    }
1819
1820    /* --- Now some more expensive checks. ---*/
1821
1822    /* Once every now and again, check some more expensive stuff.
1823       Gradually increase the interval between such checks so as not to
1824       burden long-running programs too much. */
1825    if ( force_expensive
1826         || VG_(clo_sanity_level) > 1
1827         || (VG_(clo_sanity_level) == 1
1828             && sanity_fast_count == next_slow_check_at)) {
1829
1830       if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
1831
1832       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
1833       slow_check_interval++;
1834       sanity_slow_count++;
1835
1836       if (VG_(needs).sanity_checks) {
1837           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
1838       }
1839
1840       /* Look for stack overruns.  Visit all threads. */
1841       for (tid = 1; tid < VG_N_THREADS; tid++) {
1842          SizeT    remains;
1843          VgStack* stack;
1844
1845          if (VG_(threads)[tid].status == VgTs_Empty ||
1846              VG_(threads)[tid].status == VgTs_Zombie)
1847             continue;
1848
1849          stack
1850             = (VgStack*)
1851               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
1852          remains
1853             = VG_(am_get_VgStack_unused_szB)(stack);
1854          if (remains < VKI_PAGE_SIZE)
1855             VG_(message)(Vg_DebugMsg,
1856                          "WARNING: Thread %d is within %ld bytes "
1857                          "of running out of stack!\n",
1858                          tid, remains);
1859       }
1860    }
1861
1862    if (VG_(clo_sanity_level) > 1) {
1863       /* Check sanity of the low-level memory manager.  Note that bugs
1864          in the client's code can cause this to fail, so we don't do
1865          this check unless specially asked for.  And because it's
1866          potentially very expensive. */
1867       VG_(sanity_check_malloc_all)();
1868    }
1869 }
1870
1871 /*--------------------------------------------------------------------*/
1872 /*--- end                                                          ---*/
1873 /*--------------------------------------------------------------------*/