l4/pkg/plr/server/src/app

   1 // vim: ft=cpp
   2
   3 /*
   4  * app --
   5  *
   6  *    Definitions of applications, instances
   7  *
   8  * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
   9  *     economic rights: Technische Universität Dresden (Germany)
  10  * This file is part of TUD:OS and distributed under the terms of the
  11  * GNU General Public License 2.
  12  * Please see the COPYING-GPL-2 file for details.
  13  */
  14
  15 #pragma once
  16
  17 #include <cstdio>
  18 #include <iomanip>
  19 #include <stdlib.h>
  20 #include <malloc.h>
  21 #include <map>
  22 #include <semaphore.h>
  23 #include <pthread-l4.h>
  24 #include <atomic>
  25
  26 #include <l4/sys/types.h>
  27 #include <l4/sys/utcb.h>
  28 #include <l4/sys/factory>
  29 #include <l4/sys/thread>
  30 #include <l4/sys/task>
  31 #include <l4/sys/scheduler>
  32 #include <l4/sys/segment.h>
  33 #include <l4/sys/debugger.h>
  34
  35 #include <l4/vcpu/vcpu>
  36 #include <l4/plr/measurements.h>
  37 #include <l4/util/util.h>
  38 #include <l4/util/bitops.h>
  39
  40 #include <l4/re/error_helper>
  41 #include <l4/re/util/cap_alloc>
  42 #include <l4/re/util/kumem_alloc>
  43
  44 #include "log"
  45 #include "exceptions"
  46 #include "constants.h"
  47 #include "memory"
  48
  49 using L4Re::chksys;
  50 using L4Re::chkcap;
  51
  52 extern "C" void my_handler(void);
  53
  54 class Breakpoint;
  55
  56 namespace Romain {
  57
  58 /*
  59  * Instance of an application
  60  *
  61  * Every instance of the app is run within a dedicated vCPU address space.
  62  * Instances are created depending on the amount of redundancy/checking
  63  * needed.
  64  */
  65 class App_instance
  66 {
  67         // XXX: For multithreading, we might consider having a vCPU task for
  68         //      every thread of the app -> see papers on deterministic multithreading
  69         L4::Cap<L4::Task>    _vcpu_task;
  70         /*
  71          * Instance ID
  72          */
  73         l4_umword_t          _id;
  74
  75         /*
  76          * Map of addr -> addr mappings.
  77          *
  78          * This is a dirty trick keeping track of all pages in the master AS
  79          * that are mapped to the replica AS. We need it, because the usual sequence
  80          * for using a dataspace is:
  81          *
  82          *   ds.alloc()
  83          *   ds.attach()
  84          *   <raise page faults>
  85          *   ds.detach()
  86          *   unmap()
  87          *
  88          * And in the last unmap(), we cannot consult the region map for
  89          * this mapping anymore.
  90          *
  91          * XXX: Real fix would be to slightly adapt the region map for our
  92          *      purposes, because otherwise we are storing *a lot* of
  93          *      page-to-page mappings here.
  94          */
  95         std::map<l4_addr_t, l4_addr_t> _mappings;
  96
  97         enum { debug_name_size = 16 };
  98
  99         public:
 100                 explicit App_instance(char const *name = "", l4_umword_t const instanceID = 0)
 101                         : _id(instanceID)
 102                 {
 103                         /*
 104                          * Create instance vCPU
 105                          */
 106                         _vcpu_task = chkcap(L4Re::Util::cap_alloc.alloc<L4::Task>(),
 107                                             "vCPU task alloc");
 108                         chksys(L4Re::Env::env()->factory()->create_task(_vcpu_task,
 109                                                                         l4_fpage_invalid()),
 110                                "create vCPU task");
 111
 112
 113                         /*
 114                          * Every replica gets a name set as the debug ID
 115                          */
 116                         char namebuf[debug_name_size];
 117                         snprintf(namebuf, debug_name_size, "V%ld %s", _id, name);
 118                         l4_debugger_set_object_name(_vcpu_task.cap(), namebuf);
 119                 }
 120
 121                 L4::Cap<L4::Task> vcpu_task()   const { return _vcpu_task; }
 122                 l4_umword_t              id()   const { return _id; }
 123
 124                 /*
 125                  * Map a flexpage in an aligned way.
 126                  *
 127                  * Current impl.: simply map the page as we indirectly assume that
 128                  *                we are always called for a single page.
 129                  *
 130                  * Future: this should align the local and remote targets and use the
 131                  *         largest possible mapping so that we can avoid a couple
 132                  *         of page faults if possible. XXX
 133                  */
 134                 void map_aligned(l4_addr_t local, l4_addr_t remote, l4_umword_t shift, l4_umword_t flags)
 135                 {
 136                         //DEBUG() << "map_aligned(" << std::hex << local << ", " << remote
 137                         //        << ", " << shift << ", " << flags << ")";
 138                         l4_fpage_t fp = l4_fpage(local, shift, flags);
 139                         //DEBUG() << "fp: " << fp.raw;
 140                         l4_msgtag_t tag = vcpu_task()->map(L4Re::This_task, fp, remote);
 141                         _check(l4_msgtag_has_error(tag), "error mapping page");
 142                         //DEBUG() << "mapped " << std::hex << fp.raw << " : " << std::hex << tag.raw;
 143                         for (l4_umword_t offs = 0; offs < (L4_PAGESIZE << (shift - L4_PAGESHIFT));
 144                                   offs += L4_PAGESIZE) {
 145                                 _mappings[remote + offs] = local + offs;
 146                         }
 147                 }
 148
 149
 150                 /*
 151                  * Unmap a flexpage from replica
 152                  */
 153                 void unmap(l4_umword_t fpraw)
 154                 {
 155                         l4_fpage_t fp;
 156                         l4_addr_t remote;
 157                         fp.raw = fpraw;
 158                         remote = l4_fpage_page(fp) << L4_PAGESHIFT;
 159
 160                         l4_addr_t a = _mappings[remote];
 161                         DEBUG() << "unmap @ " << std::hex << remote << " -> " << "0x" << a;
 162                         vcpu_task()->unmap(l4_fpage(a, L4_PAGESIZE, L4_FPAGE_RO), L4_FP_ALL_SPACES);
 163                         _mappings[remote] = 0;
 164                         //enter_kdebug("unmapped");
 165                 }
 166 };
 167
 168 /*
 169  * Representation of an application-level thread
 170  *
 171  * In fact, a vCPU is used for every such thread. This class also includes
 172  * the stacks needed for setting up the thread and later on running the
 173  * VCPU exception handlers.
 174  */
 175 class App_thread
 176 {
 177         private:
 178                 l4_addr_t _handler_fn; // pointer to exception handler code
 179                 l4_addr_t _thread_fn;  // pointer to initial startup code
 180
 181                 /* Handler stack layout:
 182                  *
 183                  * +-------------------------------+ _handler_stack + sizeof(_handler_stack)
 184                  * | Instance Mgr pointer          |
 185                  * | App_instance pointer          |
 186                  * | App_thread pointer            |
 187                  * | Thread group pointer          |
 188                  * | App_model pointer             |
 189                  * +-------------------------------+ _initial stack ptr
 190                  * |   handler entry ebp           |
 191                  * |   ...                         |
 192                  * +-------------------------------+ _handler_stack
 193                  */
 194                 char *_handler_stack;
 195
 196                 l4_addr_t _handler_sp;
 197                 l4_addr_t _thread_sp;
 198
 199                 l4_umword_t         _cpu;
 200                 L4::Cap<L4::Thread> _vcpu_cap;     // cap for vcpu object
 201                 L4vcpu::Vcpu       *_vcpu;         // vcpu state area
 202                 l4_utcb_t          *_vcpu_utcb;    // vcpu UTCB
 203                 pthread_t           _pthread;      // pthread backing this VCPU
 204                 l4_addr_t           _remote_utcb;  // address of remote UTCB
 205
 206                 /*
 207                  * Master segment registers. Restored whenever we enter the
 208                  * master through a VCPU fault.
 209                  */
 210                 l4_umword_t         _master_ds;
 211                 l4_umword_t         _master_fs;
 212                 l4_umword_t         _master_gs;
 213
 214                 l4_umword_t         _pending_trap; // for injecting HW traps
 215                 l4_umword_t         _events;       // keeping track of handle events
 216                 enum eventpending {
 217                         /* Set if we detected a page fault that could not be handled.
 218                          * Thereby, the PF handler can then bail out if this fault is
 219                          * raised again. */
 220                         Unhandled_Page_Fault = 1,
 221                 };
 222
 223                 struct gdt_entry_struct
 224                 {
 225                         l4_uint16_t limit_low;      // The lower 16 bits of the limit.
 226                         l4_uint16_t base_low;       // The lower 16 bits of the base.
 227                         l4_uint8_t  base_middle;    // The next 8 bits of the base.
 228                         l4_uint8_t  access;         // Access flags, determine what ring this segment can be used in.
 229                         l4_uint8_t  granularity;
 230                         l4_uint8_t  base_high;      // The last 8 bits of the base.
 231                 } __attribute__((packed))
 232                 _client_gdt[2];
 233                 bool                _gdt_modified; // track if GDT was modified
 234
 235 #if WATCHDOG
 236                 /*
 237                  * Watchdog: set on creation and defined in config file
 238                  */
 239                 bool              _use_watchdog;
 240                 int               _watchdog_timeout;
 241
 242                 /*
 243                  * Watchdog: interrupt object set on vcpu startup
 244                  */
 245                 L4::Cap<L4::Irq>  _watchdog_irq;
 246
 247                 /*
 248                  * Watchdog: use single-stepping for synchronization
 249                  */
 250                 bool              _watchdog_ss;
 251                 unsigned          _watchdog_ss_count;
 252
 253                 /*
 254                  * Watchdog: use breakpoints for synchronization
 255                  */
 256                 bool              _watchdog_breakpointing;
 257                 Breakpoint       *_watchdog_breakpoint;
 258
 259                 /*
 260                  * Watchdog: am I the replica that passed the watchdog interrupt
 261                  * with another trap
 262                  */
 263                 bool              _watchdog_passed;
 264
 265                 bool              _got_watchdog;
 266                 bool              _got_other_trap;
 267                 bool              _watchdog_suspended;
 268                 bool              _watchdog_met_leader;
 269 #endif
 270
 271                 /*
 272                  * Benchmarking: counters used to determine number and cycles spent in
 273                  * different parts of the master if BENCHMARKING is set to 1 in
 274                  * server/src/constants.h
 275                  *
 276                  * t_* -> accumulate cycles spent
 277                  * c_* -> count the number of times certain paths were entered
 278                  */
 279                 unsigned long long t_lock, c_lock;           // lock observer
 280                 unsigned long long t_pfh, c_pfh;             // page fault handling
 281                 unsigned long long t_syscalls, c_syscalls;   // syscall observer
 282                 unsigned long long t_kiptime, c_kiptime;     // KIP time observer
 283                 unsigned long long t_traps, c_traps;         // trap handling
 284
 285                 unsigned long long t_handling;               // total handling time
 286                 unsigned long long t_observer, c_observer;   // time in observers
 287                 unsigned long long t_keepup, c_keepup;       // passive replicas: time to keep up with leader
 288                 unsigned long long t_user;                   // time in user mode
 289                 unsigned long long last_user_resume;         // timestamp of last ret to user
 290
 291                 unsigned long long t_sync_enter_all;
 292                 unsigned long long t_sync_wait_for_active, c_sync_wait_for_active;
 293                 unsigned long long t_sync_wait, t_sync_waitforarrival;
 294                 unsigned long long t_sync_getdata, c_sync_getdata;
 295                 unsigned long long t_sync_active_validate, c_sync_active_validate;
 296
 297                 //unsigned long long t_sync;
 298                 unsigned long long t_sync_enter;             // TS before DMR::enter()
 299                 unsigned long long t_sync_entered;           // TS before sleeping / validating replicas
 300                 unsigned long long t_sync_leave;
 301
 302                 unsigned long long t_resume_active, c_resume_active;
 303                 unsigned long long t_resume_passive, c_resume_passive;
 304                 unsigned long long t_resume_enter;
 305
 306                 /*
 307                  * Tracks if we are the currently active
 308                  * trap handling replica
 309                  */
 310                 bool active_handler;
 311
 312                 /*
 313                  * Get topmost address of exception handler/thread stacks
 314                  */
 315                 l4_addr_t top_of_handler_stack() { return (l4_addr_t)(_handler_stack + HANDLER_STACK_SIZE); }
 316
 317                 /*
 318                  * Initialize handler and init thread stacks.
 319                  *
 320                  * This ensures that the handler stack is paged in correctly before we
 321                  * do anything. Otherwise the handler might raise a page fault upon
 322                  * first entry.
 323                  */
 324                 void touch_stacks();
 325
 326
 327                 /*
 328                  * Create the vCPU kernel object
 329                  */
 330                 void alloc_vcpu_cap();
 331
 332
 333                 /*
 334                  * Alloc and setup vCPU UTCB
 335                  *
 336                  * The setup code stores a pointer to this App_thread object on
 337                  * the handler's stack so that it can be found upon an exception.
 338                  */
 339                 void alloc_vcpu_mem();
 340
 341                 App_thread() { }
 342                 App_thread(const App_thread&) { }
 343
 344         public:
 345
 346                 App_thread(l4_addr_t eip,
 347                            l4_addr_t esp,
 348                            l4_addr_t handler_fn,
 349                            l4_addr_t thread_fn,
 350                            bool use_watchdog = false,
 351                            l4_umword_t watchdog_timeout = 0)
 352                         :
 353                           _handler_fn(handler_fn),
 354                           _thread_fn(thread_fn),
 355                           _cpu(1),
 356                           _vcpu(0),
 357                           _vcpu_utcb(0),
 358                           _remote_utcb(0xFFFFFFFF),
 359                           _pending_trap(0),
 360                           _events(0),
 361                           _gdt_modified(false)
 362 #if WATCHDOG
 363                           ,
 364                           _use_watchdog(use_watchdog),
 365                           _watchdog_timeout(watchdog_timeout),
 366                           _watchdog_ss(false),
 367                           _watchdog_ss_count(0),
 368                           _watchdog_passed(false),
 369                           _watchdog_breakpointing(false),
 370                           _watchdog_suspended(false),
 371                           _got_watchdog(false),
 372                           _got_other_trap(false),
 373                           _watchdog_met_leader(false)
 374 #endif
 375                      , t_lock(0ULL), c_lock(0ULL), t_pfh(0ULL), c_pfh(0ULL),
 376                            t_syscalls(0ULL), c_syscalls(0ULL), t_kiptime(0ULL),
 377                            c_kiptime(0ULL), t_traps(0ULL), c_traps(0ULL), t_handling(0ULL),
 378                            t_observer(0ULL), c_observer(0ULL),
 379                            t_keepup(0ULL), c_keepup(0ULL),
 380                            t_user(0ULL), last_user_resume(0ULL),
 381                            t_sync_enter_all(0ULL), t_sync_wait_for_active(0ULL), c_sync_wait_for_active(0ULL),
 382                            t_sync_wait(0ULL), t_sync_waitforarrival(0ULL),
 383                            t_sync_getdata(0ULL), c_sync_getdata(0ULL),
 384                            t_sync_active_validate(0ULL), c_sync_active_validate(0ULL),
 385                            t_resume_active(0ULL), c_resume_active(0ULL),
 386                            t_resume_passive(0ULL), c_resume_passive(0ULL),
 387                            t_resume_enter(0ULL)
 388                 {
 389                         asm volatile (
 390                             "mov %%fs, %0\n\t"
 391                             "mov %%gs, %1\n\t"
 392                             "mov %%ds, %2\n\t"
 393                             : "=r" (_master_fs),
 394                                   "=r" (_master_gs),
 395                               "=r" (_master_ds));
 396
 397                         _handler_stack = (char*)memalign(L4_PAGESIZE, HANDLER_STACK_SIZE);
 398                         _handler_sp    = top_of_handler_stack();
 399                         DEBUG() << "HANDLER STACK: " << (void*)_handler_stack;
 400                         _check(!_handler_stack, "could not allocate handler stack");
 401
 402                         touch_stacks();
 403                         alloc_vcpu_cap();
 404                         alloc_vcpu_mem();
 405
 406                         memset(gdt(), 0, gdt_size());
 407
 408                         DEBUG() << "vCPU cap: " << std::hex << vcpu_cap();
 409
 410                         DEBUG() << "STACK: " << std::hex << (void*)esp;
 411                         vcpu()->r()->ip = eip;
 412                         vcpu()->r()->sp = esp;
 413                         DEBUG() << "EIP " << (void*)eip << " ESP " << (void*)esp;
 414                 }
 415
 416 #if WATCHDOG
 417                 void use_watchdog(bool u) { _use_watchdog = u; }
 418                 bool use_watchdog() { return _use_watchdog; }
 419                 void watchdog_timeout(l4_umword_t p) { _watchdog_timeout = p; }
 420                 l4_umword_t watchdog_timeout() { return _watchdog_timeout; }
 421
 422                 void watchdog_ss(bool ss) { _watchdog_ss = ss; }
 423                 bool watchdog_ss() { return _watchdog_ss; }
 424                 unsigned watchdog_ss_count() { return _watchdog_ss_count; }
 425                 void increment_watchdog_ss_count() { ++_watchdog_ss_count; }
 426                 void reset_watchdog_ss_count() { _watchdog_ss_count = 0; }
 427
 428                 void its_me_who_passed_the_watchdog(bool p) { _watchdog_passed = p; }
 429                 bool its_me_who_passed_the_watchdog() { return _watchdog_passed; }
 430
 431                 void watchdog_irq(L4::Cap<L4::Irq> irq) { _watchdog_irq = irq; }
 432                 L4::Cap<L4::Irq> watchdog_irq() { return _watchdog_irq; }
 433
 434                 void watchdog_breakpoint(Breakpoint *b) { _watchdog_breakpoint = b; }
 435                 Breakpoint *watchdog_breakpoint() { return _watchdog_breakpoint; }
 436
 437                 void watchdog_breakpointing(bool b) { _watchdog_breakpointing = b; }
 438                 bool watchdog_breakpointing() { return _watchdog_breakpointing; }
 439
 440                 void got_watchdog(bool w) { _got_watchdog = w; }
 441                 bool got_watchdog() { return _got_watchdog; }
 442
 443                 void got_other_trap(bool t) { _got_other_trap = t; }
 444                 bool got_other_trap() { return _got_other_trap; }
 445
 446                 void watchdog_suspended(bool s) { _watchdog_suspended = s; }
 447                 bool watchdog_suspended() { return _watchdog_suspended; }
 448
 449                 void i_have_met_the_leader(bool m) { _watchdog_met_leader = m; }
 450                 bool i_have_met_the_leader() { return _watchdog_met_leader; }
 451 #endif
 452
 453                 bool is_active() { return active_handler; }
 454                 void activate() { active_handler = true; }
 455                 void deactivate() { active_handler = false; }
 456
 457                 void count_lock(unsigned long long increment)
 458                 {
 459 #if BENCHMARKING
 460                         t_lock += increment; c_lock++;
 461 #endif
 462                 }
 463
 464                 void count_pfh(unsigned long long increment)
 465                 {
 466 #if BENCHMARKING
 467                         t_pfh += increment; c_pfh++;
 468 #endif
 469                 }
 470                 void count_syscalls(unsigned long long increment)
 471                 {
 472 #if BENCHMARKING
 473                         t_syscalls += increment; c_syscalls++;
 474 #endif
 475                 }
 476
 477                 void count_kiptime(unsigned long long increment)
 478                 {
 479 #if BENCHMARKING
 480                         t_kiptime += increment; c_kiptime++;
 481 #endif
 482                 }
 483
 484                 void count_traps(unsigned long long increment)
 485                 {
 486 #if BENCHMARKING
 487                         t_traps += increment; c_traps++;
 488 #endif
 489                 }
 490
 491                 void count_handling(unsigned long long increment)
 492                 {
 493 #if BENCHMARKING
 494                         t_handling += increment;
 495 #endif
 496                 }
 497
 498                 void ts_from_user() {
 499 #if BENCHMARKING
 500                         t_sync_enter = l4_rdtsc();
 501                         t_user += (t_sync_enter - last_user_resume);
 502 #endif
 503                 }
 504
 505                 void ts_sync_entered() {
 506 #if BENCHMARKING
 507                         t_sync_entered = l4_rdtsc();
 508                         t_sync_enter_all += (t_sync_entered - t_sync_enter);
 509 #endif
 510                 }
 511
 512                 void ts_sync_leave() {
 513 #if BENCHMARKING
 514                         t_sync_leave = l4_rdtsc();
 515                         if (is_active()) {
 516                                 t_sync_active_validate +=  (t_sync_leave - t_sync_enter);
 517                                 ++c_sync_active_validate;
 518                         } else {
 519                                 t_sync_wait_for_active +=  (t_sync_leave - t_sync_enter);
 520                                 ++c_sync_wait_for_active;
 521                         }
 522 #endif
 523                 }
 524
 525                 void ts_resume_start() {
 526 #if BENCHMARKING
 527                         t_resume_enter = l4_rdtsc();
 528                         if (is_active()) {
 529                                 t_observer += (t_resume_enter - t_sync_leave);
 530                                 c_observer++;
 531                         } else {
 532                                 t_keepup += (t_resume_enter - t_sync_leave);
 533                                 c_keepup++;
 534                         }
 535 #endif
 536                 }
 537
 538                 void ts_user_resume(bool first = false) {
 539 #if BENCHMARKING
 540                         last_user_resume = l4_rdtsc();
 541
 542                         // the first call is only to set the resume TS, don't count
 543                         // resume time there
 544                         if (first)
 545                                 return;
 546
 547                         if (is_active()) {
 548                                 t_resume_active += (last_user_resume - t_resume_enter);
 549                                 ++c_resume_active;
 550                         } else {
 551                                 t_resume_passive += (last_user_resume - t_resume_enter);
 552                                 ++c_resume_passive;
 553                         }
 554                         deactivate(); // simply do this any time we resume
 555 #endif
 556                 }
 557
 558                 void inc_wait(unsigned long long increment)
 559                 {
 560 #if BENCHMARKING
 561                         t_sync_wait += increment;
 562 #endif
 563                 }
 564
 565                 void inc_waitleader(unsigned long long inc)
 566                 {
 567 #if BENCHMARKING
 568                         t_sync_waitforarrival += inc;
 569 #endif
 570                 }
 571
 572
 573                 void inc_getdata(unsigned long long increment)
 574                 {
 575 #if BENCHMARKING
 576                         c_sync_getdata++;
 577                         t_sync_getdata += increment;
 578 #endif
 579                 }
 580
 581
 582                 void print_helper(char const *msg, unsigned long long time,
 583                                   unsigned long long count = 0, bool withCount = false)
 584                 {
 585                         if (withCount) {
 586                                 INFO() << std::left << std::setw(32) << msg
 587                                        << " : " << std::right << std::setw(16) << time
 588                                            << " [ " << std::setw(10) << count << " ]";
 589                         } else {
 590                                 INFO() << std::left << std::setw(32) << msg
 591                                        << " : " << std::right << std::setw(16) << time;
 592                         }
 593                 }
 594
 595                 void print_stats()
 596                 {
 597 #if BENCHMARKING
 598                         print_helper(GREEN  "Clocks spent in user            " NOCOLOR, t_user);
 599                         print_helper(GREEN  "Clocks spent in master          " NOCOLOR, t_handling);
 600                         print_helper(YELLOW "       synchronization          " NOCOLOR, t_sync_enter_all + t_sync_active_validate + t_sync_wait_for_active);
 601                         print_helper(       "           enter sync           ", t_sync_enter_all);
 602                         print_helper(       "           active: check        ", t_sync_active_validate, c_sync_active_validate, true);
 603                         print_helper(       "           passive: wait        ", t_sync_wait_for_active, c_sync_wait_for_active, true);
 604                         print_helper(       "              (early wait)      ", t_sync_waitforarrival);
 605                         print_helper(       "              (total wait)      ", t_sync_wait);
 606                         print_helper(       "              (get data)        ", t_sync_getdata, c_sync_getdata, true);
 607                         print_helper(YELLOW "       observers                " NOCOLOR, t_observer, c_observer, true);
 608                         print_helper(       "           PFH                  ", t_pfh, c_pfh, true);
 609                         print_helper(       "           Locking              ", t_lock, c_lock, true);
 610                         print_helper(       "           Syscalls             ", t_syscalls, c_syscalls, true);
 611                         print_helper(       "           gettime()            ", t_kiptime, c_kiptime, true);
 612                         print_helper(       "           CPU Traps            ", t_traps, c_traps, true);
 613                         print_helper(YELLOW "       keepup with leader       " NOCOLOR, t_keepup, c_keepup, true);
 614                         print_helper(YELLOW "       resume                   " NOCOLOR, t_resume_active + t_resume_passive);
 615                         print_helper(       "           active               ", t_resume_active,  c_resume_active, true);
 616                         print_helper(       "           passive              ", t_resume_passive, c_resume_passive, true);
 617                         INFO() << "    ------------------------------------------------------";
 618 #endif
 619                 }
 620
 621                 /*
 622                  * Manage fast lookup for the replica's UTCB address
 623                  */
 624                 void remote_utcb(l4_addr_t a) { _remote_utcb = a; }
 625                 l4_addr_t remote_utcb() const { return _remote_utcb; }
 626
 627                 /*
 628                  * Start the vCPU thread
 629                  */
 630                 void start();
 631
 632
 633                 l4_addr_t            handler_sp()     const { return _handler_sp; }
 634                 void handler_sp(l4_addr_t sp) { _handler_sp = sp; }
 635
 636                 l4_addr_t            thread_sp()      const { return _thread_sp; }
 637                 void thread_sp(l4_addr_t sp)  { _thread_sp = sp; }
 638
 639                 l4_addr_t            thread_entry()   const { return _thread_fn; }
 640
 641                 l4_umword_t           cpu()           const { return _cpu; }
 642                 void                  cpu(l4_umword_t c)    { _cpu = c; }
 643                 L4::Cap<L4::Thread>   vcpu_cap()      const { return _vcpu_cap; }
 644                 void                  vcpu_cap(L4::Cap<L4::Thread> c) { _vcpu_cap = c; }
 645                 L4vcpu::Vcpu         *vcpu()          const { return _vcpu; }
 646                 l4_utcb_t            *vcpu_utcb()     const { return _vcpu_utcb; }
 647
 648                 l4_umword_t           ds()            const { return _master_ds; }
 649                 l4_umword_t           fs()            const { return _master_fs; }
 650                 l4_umword_t           gs()            const { return _master_gs; }
 651 //              void                  gs(l4_addr_t a)       { _master_gs = a; }
 652
 653                 void *                gdt()           const
 654                 {
 655                         return (void*)&_client_gdt[0];
 656                 }
 657                 l4_umword_t           gdt_size()      const { return sizeof(_client_gdt); }
 658
 659                 /***********************************************************************
 660                  * GDT Handling Explained
 661                  *
 662                  * Fiasco uses the FS register to store the current UTCB address,
 663                  * libpthread uses GS for providing thread-local storage. Both require
 664                  * a valid entry in the GDT, which user space can access through the
 665                  * fiasco_gdt_set() system call. Furthermore, Fiasco provides a range
 666                  * of user-defined segment entries at offsets 0x48, 0x50, and 0x58.
 667                  *
 668                  * By default, the GDT entry for the UTCB address is 0x40. As Romain
 669                  * uses pthreads, the first user-defined segment is used for Romain's
 670                  * TLS address.
 671                  *
 672                  * Replicas use user-defined entries 2 and 3:
 673                  * - Entry 2 (0x50) contains the replica's UTCB address.
 674                  * - Entry 3 (0x58) can later be set up for thread-local storage.
 675                  *
 676                  * This means there are no free user-defined GDT entries anymore! If we
 677                  * wanted to fix this, we'd have to manually swap GDT entries every
 678                  * time we switch between replicas and master. This would require two
 679                  * additional system calls for modifying the GDT.
 680                  ***********************************************************************/
 681
 682                 /*
 683                  * Set up the initial GDT segment (e.g., UTCB address)
 684                  * XXX: rename!
 685                  */
 686                 void setup_utcb_segdesc(l4_addr_t base, l4_addr_t limit)
 687                 {
 688                         DEBUG() << "Base " << std::hex << base
 689                                 << " Limit " << limit;
 690                         memset(_client_gdt, 0, sizeof(_client_gdt));
 691
 692                         _client_gdt[0].limit_low   = limit & 0xFFFF;
 693                         _client_gdt[0].base_low    = base & 0xFFFF;
 694                         _client_gdt[0].base_middle = (base >> 16) & 0xFF;
 695                         _client_gdt[0].base_high   = (base >> 24) & 0xFF;
 696                         _client_gdt[0].access      = 0xF2;
 697                         _client_gdt[0].granularity = 0x40;
 698
 699                         _gdt_modified = true;
 700                 }
 701
 702
 703                 bool gdt_changed() { return _gdt_modified; }
 704
 705
 706                 /*
 707                  * Write the second entry, actually.
 708                  * XXX: RENAME!
 709                  */
 710                 void write_gdt_entry(l4_umword_t *src, l4_umword_t bytes)
 711                 {
 712                         memcpy(&_client_gdt[1], src, bytes);
 713                         _gdt_modified = true;
 714                 }
 715
 716
 717                 /*
 718                  * Write the user GDT entries
 719                  */
 720                 void commit_client_gdt();
 721
 722                 /*
 723                  * Schedule a "virtual" trap
 724                  *
 725                  * The whole thing is used to mark pending events for future
 726                  * invocations of some fault observers. These events currently
 727                  * include
 728                  *
 729                  *   - unhandled page fault
 730                  */
 731                 void set_pending_trap(l4_umword_t no) { _pending_trap |= (1 << no); }
 732
 733                 void set_unhandled_pf()
 734                 {
 735                         _events |= Unhandled_Page_Fault;
 736                         set_pending_trap(0xE);
 737                 }
 738
 739                 void unset_unhandled_pf() { _events &= ~Unhandled_Page_Fault; }
 740                 bool unhandled_pf()       { return _events & Unhandled_Page_Fault; }
 741
 742                 l4_umword_t events_pending() { return _events; }
 743
 744                 /*
 745                  * Get the next pending trap (and remove it from pending list)
 746                  */
 747                 l4_umword_t get_pending_trap()
 748                 {
 749                         l4_umword_t ret = l4util_find_first_set_bit(&_pending_trap, sizeof(_pending_trap));
 750                         if (ret >= sizeof(_pending_trap) * 8) {
 751                                 return 0;
 752                         } else {
 753                                 _pending_trap &= ~(1 << ret);
 754                         }
 755
 756                         return ret;
 757                 }
 758
 759
 760                 void print_vcpu_state()
 761                 {
 762                         char pref[32];
 763                         snprintf(pref, 32, "[VCPU %p] ", vcpu());
 764                         vcpu()->print_state(pref);
 765                 }
 766
 767                 l4_umword_t csum_state();
 768
 769
 770                 void halt()
 771                 {
 772                         INFO() << "   Halting VCPU " << std::hex << vcpu();
 773                         l4_sched_param_t sp = l4_sched_param(0);
 774                         if (pthread_l4_cap(pthread_self()) != vcpu_cap().cap()) {
 775                         chksys(L4Re::Env::env()->scheduler()->run_thread(vcpu_cap(), sp));
 776                 }
 777                 }
 778
 779
 780                 void return_to(l4_addr_t ret)
 781                 {
 782                         vcpu()->r()->sp += sizeof(l4_umword_t); // RET: inc. ESP
 783                         vcpu()->r()->ip  = ret;                 // RET: return addr
 784                 }
 785 };
 786
 787
 788 }
 789
 790 /*
 791  * Common prolog to be executed upon entry to exception handler function. It
 792  * restores this VCPU's ES, DS, FS, and GS registers before continuing
 793  * execution in the handler address space.
 794  */
 795 #define handler_prolog(app_thread) \
 796         do {  \
 797                   asm volatile ( \
 798                       "mov %0, %%es;" \
 799                       "mov %0, %%ds;" \
 800                       "mov %1, %%fs;" \
 801                       "mov %2, %%gs;" \
 802                       : : \
 803                           "r"((app_thread)->ds()), "r"((app_thread)->fs()), \
 804                           "r"((app_thread)->gs())); \
 805         } while (0)