l4/pkg/plr/server/src/app

   1 // vi: ft=cpp
   2
   3 /*
   4  * app --
   5  *
   6  *    Definitions of applications, instances
   7  *
   8  * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
   9  *     economic rights: Technische Universität Dresden (Germany)
  10  * This file is part of TUD:OS and distributed under the terms of the
  11  * GNU General Public License 2.
  12  * Please see the COPYING-GPL-2 file for details.
  13  */
  14
  15 #pragma once
  16
  17 #include <cstdio>
  18 #include <stdlib.h>
  19 #include <malloc.h>
  20 #include <map>
  21 #include <semaphore.h>
  22
  23 #include <l4/sys/types.h>
  24 #include <l4/sys/utcb.h>
  25 #include <l4/sys/factory>
  26 #include <l4/sys/thread>
  27 #include <l4/sys/task>
  28 #include <l4/sys/scheduler>
  29 #include <l4/sys/segment.h>
  30 #include <l4/sys/debugger.h>
  31
  32 #include <l4/vcpu/vcpu>
  33 #include <l4/util/util.h>
  34 #include <l4/util/bitops.h>
  35
  36 #include <l4/re/error_helper>
  37 #include <l4/re/util/cap_alloc>
  38 #include <l4/re/util/kumem_alloc>
  39
  40 #include "log"
  41 #include "exceptions"
  42 #include "constants.h"
  43 #include "memory"
  44
  45 using L4Re::chksys;
  46 using L4Re::chkcap;
  47
  48 extern "C" void my_handler(void);
  49
  50 namespace Romain {
  51
  52 /*
  53  * Instance of an application
  54  *
  55  * Every instance of the app is run within a dedicated vCPU address space.
  56  * Instances are created depending on the amount of redundancy/checking
  57  * needed.
  58  */
  59 class App_instance
  60 {
  61         // XXX: For multithreading, we might consider having a vCPU task for
  62         //      every thread of the app -> see papers on deterministic multithreading
  63         L4::Cap<L4::Task>    _vcpu_task;
  64         /*
  65          * Instance ID
  66          */
  67         unsigned             _id;
  68
  69         /*
  70          * Map of addr -> addr mappings.
  71          *
  72          * This is a dirty trick keeping track of all pages in the master AS
  73          * that are mapped to the replica AS. We need it, because the usual sequence
  74          * for using a dataspace is:
  75          *
  76          *   ds.alloc()
  77          *   ds.attach()
  78          *   <raise page faults>
  79          *   ds.detach()
  80          *   unmap()
  81          *
  82          * And in the last unmap(), we cannot consult the region map for
  83          * this mapping anymore.
  84          *
  85          * XXX: Real fix would be to slightly adapt the region map for our
  86          *      purposes, because otherwise we are storing *a lot* of
  87          *      page-to-page mappings here.
  88          */
  89         std::map<l4_addr_t, l4_addr_t> _mappings;
  90
  91         enum { debug_name_size = 16 };
  92
  93         public:
  94                 explicit App_instance(char const *name = "", unsigned const instanceID = 0)
  95                         : _id(instanceID)
  96                 {
  97                         /*
  98                          * Create instance vCPU
  99                          */
 100                         _vcpu_task = chkcap(L4Re::Util::cap_alloc.alloc<L4::Task>(),
 101                                             "vCPU task alloc");
 102                         chksys(L4Re::Env::env()->factory()->create_task(_vcpu_task,
 103                                                                         l4_fpage_invalid()),
 104                                "create vCPU task");
 105
 106
 107                         /*
 108                          * Every replica gets a name set as the debug ID
 109                          */
 110                         char namebuf[debug_name_size];
 111                         snprintf(namebuf, debug_name_size, "V%d %s", _id, name);
 112                         l4_debugger_set_object_name(_vcpu_task.cap(), namebuf);
 113                 }
 114
 115                 L4::Cap<L4::Task> vcpu_task()   const { return _vcpu_task; }
 116                 unsigned                 id()   const { return _id; }
 117
 118                 /*
 119                  * Map a flexpage in an aligned way.
 120                  *
 121                  * Current impl.: simply map the page as we indirectly assume that
 122                  *                we are always called for a single page.
 123                  *
 124                  * Future: this should align the local and remote targets and use the
 125                  *         largest possible mapping so that we can avoid a couple
 126                  *         of page faults if possible. XXX
 127                  */
 128                 void map_aligned(l4_addr_t local, l4_addr_t remote, unsigned shift, unsigned flags)
 129                 {
 130                         //DEBUG() << "map_aligned(" << std::hex << local << ", " << remote
 131                         //        << ", " << shift << ", " << flags << ")";
 132                         l4_fpage_t fp = l4_fpage(local, shift, flags);
 133                         //DEBUG() << "fp: " << fp.raw;
 134                         l4_msgtag_t tag = vcpu_task()->map(L4Re::This_task, fp, remote);
 135                         _check(l4_msgtag_has_error(tag), "error mapping page");
 136                         //DEBUG() << "mapped " << std::hex << fp.raw << " : " << std::hex << tag.raw;
 137                         for (unsigned offs = 0; offs < (L4_PAGESIZE << (shift - L4_PAGESHIFT));
 138                                   offs += L4_PAGESIZE) {
 139                                 _mappings[remote + offs] = local + offs;
 140                         }
 141                 }
 142
 143
 144                 /*
 145                  * Map a local region to a remote region using the least
 146                  * possible amount of map operations (XXX).
 147                  */
 148                 void map(l4_addr_t local_start, l4_addr_t remote_start,/* l4_size_t size,*/
 149                                  unsigned pageflags, l4_size_t size = L4_PAGESIZE)
 150                 {
 151                         //DEBUG() << "map " << std::hex << local_start << " -> " << remote_start
 152                         //        << " size " << size;
 153
 154                         while (size > 0) {
 155                                 unsigned frame_l = local_start  >> L4_PAGESHIFT;
 156                                 unsigned frame_r = remote_start >> L4_PAGESHIFT;
 157                                 (void)frame_l; (void)frame_r;
 158                                 unsigned shift   = 0;
 159
 160 /* Macro checks whether the size fits a given number of pages */
 161 #define FOO(x) do { \
 162         if ((!frame_l & (x-1)) && (!frame_r & (x-1)) && (size >= (x*L4_PAGESIZE))) { \
 163                 shift += 1; \
 164         } \
 165 } while (0)
 166                                 //FOO(2); FOO(4); FOO(8); FOO(16); FOO(32); FOO(64);
 167                                 map_aligned(local_start, remote_start, L4_PAGESHIFT + shift, pageflags);
 168                                 local_start  += (L4_PAGESIZE << shift);
 169                                 remote_start += (L4_PAGESIZE << shift);
 170                                 size -= (L4_PAGESIZE << shift);
 171                         }
 172                         //enter_kdebug("mapped");
 173                 }
 174
 175
 176                 /*
 177                  * Unmap a flexpage from replica
 178                  */
 179                 void unmap(l4_umword_t fpraw)
 180                 {
 181                         l4_fpage_t fp;
 182                         l4_addr_t remote;
 183                         fp.raw = fpraw;
 184                         remote = l4_fpage_page(fp) << L4_PAGESHIFT;
 185
 186                         l4_addr_t a = _mappings[remote];
 187                         DEBUG() << "unmap @ " << std::hex << remote << " -> " << "0x" << a;
 188                         vcpu_task()->unmap(l4_fpage(a, L4_PAGESIZE, L4_FPAGE_RO), L4_FP_ALL_SPACES);
 189                         _mappings[remote] = 0;
 190                         //enter_kdebug("unmapped");
 191                 }
 192 };
 193
 194
 195 /*
 196  * Representation of an application-level thread
 197  *
 198  * In fact, a vCPU is used for every such thread. This class also includes
 199  * the stacks needed for setting up the thread and later on running the
 200  * VCPU exception handlers.
 201  */
 202 class App_thread
 203 {
 204         private:
 205                 l4_addr_t _handler_fn; // pointer to exception handler code
 206                 l4_addr_t _thread_fn;  // pointer to initial startup code
 207
 208                 /* Handler stack layout:
 209                  *
 210                  * +-------------------------------+ _handler_stack + sizeof(_handler_stack)
 211                  * | Instance Mgr pointer          |
 212                  * | App_instance pointer          |
 213                  * | App_thread pointer            |
 214                  * | Thread group pointer          |
 215                  * | App_model pointer             |
 216                  * +-------------------------------+ _initial stack ptr
 217                  * |   handler entry ebp           |
 218                  * |   ...                         |
 219                  * +-------------------------------+ _handler_stack
 220                  */
 221                 char *_handler_stack;
 222
 223                 l4_addr_t _handler_sp;
 224                 l4_addr_t _thread_sp;
 225
 226                 l4_umword_t         _cpu;
 227                 L4::Cap<L4::Thread> _vcpu_cap;     // cap for vcpu object
 228                 L4vcpu::Vcpu       *_vcpu;         // vcpu state area
 229                 l4_utcb_t          *_vcpu_utcb;    // vcpu UTCB
 230                 pthread_t           _pthread;      // pthread backing this VCPU
 231                 l4_addr_t           _remote_utcb;  // address of remote UTCB
 232
 233                 /*
 234                  * Master segment registers. Restored whenever we enter the
 235                  * master through a VCPU fault.
 236                  */
 237                 unsigned long       _master_ds;
 238                 unsigned long       _master_fs;
 239                 unsigned long       _master_gs;
 240
 241                 l4_umword_t         _pending_trap; // for injecting HW traps
 242                 l4_umword_t         _events;       // keeping track of handle events
 243                 enum eventpending {
 244                         /* Set if we detected a page fault that could not be handled.
 245                          * Thereby, the PF handler can then bail out if this fault is
 246                          * raised again. */
 247                         Unhandled_Page_Fault = 1,
 248                 };
 249
 250                 struct gdt_entry_struct
 251                 {
 252                         unsigned short limit_low;      // The lower 16 bits of the limit.
 253                         unsigned short base_low;       // The lower 16 bits of the base.
 254                         unsigned char base_middle;     // The next 8 bits of the base.
 255                         unsigned char access;          // Access flags, determine what ring this segment can be used in.
 256                         unsigned char granularity;
 257                         unsigned char base_high;       // The last 8 bits of the base.
 258                 } __attribute__((packed))
 259                 _client_gdt[2];
 260
 261                 /*
 262                  * Get topmost address of exception handler/thread stacks
 263                  */
 264                 l4_addr_t top_of_handler_stack() { return (l4_addr_t)(_handler_stack + HANDLER_STACK_SIZE); }
 265
 266                 /*
 267                  * Initialize handler and init thread stacks.
 268                  *
 269                  * This ensures that the handler stack is paged in correctly before we
 270                  * do anything. Otherwise the handler might raise a page fault upon
 271                  * first entry.
 272                  */
 273                 void touch_stacks();
 274
 275
 276                 /*
 277                  * Create the vCPU kernel object
 278                  */
 279                 void alloc_vcpu_cap();
 280
 281
 282                 /*
 283                  * Alloc and setup vCPU UTCB
 284                  *
 285                  * The setup code stores a pointer to this App_thread object on
 286                  * the handler's stack so that it can be found upon an exception.
 287                  */
 288                 void alloc_vcpu_mem();
 289
 290                 App_thread() { }
 291                 App_thread(const App_thread&) { }
 292
 293         public:
 294
 295                 App_thread(l4_addr_t eip,
 296                            l4_addr_t esp,
 297                            l4_addr_t handler_fn,
 298                            l4_addr_t thread_fn)
 299                         :
 300                           _handler_fn(handler_fn),
 301                           _thread_fn(thread_fn),
 302                           _cpu(1),
 303                           _vcpu(0),
 304                           _vcpu_utcb(0),
 305                           _remote_utcb(0xFFFFFFFF),
 306                           _pending_trap(0),
 307                           _events(0)
 308                 {
 309                         asm volatile (
 310                             "mov %%fs, %0\n\t"
 311                             "mov %%gs, %1\n\t"
 312                             "mov %%ds, %2\n\t"
 313                             : "=r" (_master_fs),
 314                                   "=r" (_master_gs),
 315                               "=r" (_master_ds));
 316
 317                         _handler_stack = (char*)memalign(L4_PAGESIZE, HANDLER_STACK_SIZE);
 318                         _handler_sp    = top_of_handler_stack();
 319                         DEBUG() << "HANDLER STACK: " << (void*)_handler_stack;
 320                         _check(!_handler_stack, "could not allocate handler stack");
 321
 322                         touch_stacks();
 323                         alloc_vcpu_cap();
 324                         alloc_vcpu_mem();
 325
 326                         DEBUG() << "vCPU cap: " << std::hex << vcpu_cap();
 327
 328                         DEBUG() << "STACK: " << std::hex << (void*)esp;
 329                         vcpu()->r()->ip = eip;
 330                         vcpu()->r()->sp = esp;
 331                         DEBUG() << "EIP " << (void*)eip << " ESP " << (void*)esp;
 332                 }
 333
 334
 335                 /*
 336                  * Manage fast lookup for the replica's UTCB address
 337                  */
 338                 void remote_utcb(l4_addr_t a) { _remote_utcb = a; }
 339                 l4_addr_t remote_utcb() const { return _remote_utcb; }
 340
 341                 /*
 342                  * Start the vCPU thread
 343                  */
 344                 void start();
 345
 346
 347                 l4_addr_t            handler_sp()     const { return _handler_sp; }
 348                 void handler_sp(l4_addr_t sp) { _handler_sp = sp; }
 349
 350                 l4_addr_t            thread_sp()      const { return _thread_sp; }
 351                 void thread_sp(l4_addr_t sp)  { _thread_sp = sp; }
 352
 353                 l4_addr_t            thread_entry()   const { return _thread_fn; }
 354
 355                 l4_umword_t           cpu()           const { return _cpu; }
 356                 void                  cpu(l4_umword_t c)    { _cpu = c; }
 357                 L4::Cap<L4::Thread>   vcpu_cap()      const { return _vcpu_cap; }
 358                 void                  vcpu_cap(L4::Cap<L4::Thread> c) { _vcpu_cap = c; }
 359                 L4vcpu::Vcpu         *vcpu()          const { return _vcpu; }
 360                 l4_utcb_t            *vcpu_utcb()     const { return _vcpu_utcb; }
 361
 362                 unsigned long         ds()            const { return _master_ds; }
 363                 unsigned long         fs()            const { return _master_fs; }
 364                 unsigned long         gs()            const { return _master_gs; }
 365 //              void                  gs(l4_addr_t a)       { _master_gs = a; }
 366
 367                 void *                gdt()           const
 368                 {
 369                         return (void*)&_client_gdt[0];
 370                 }
 371                 unsigned              gdt_size()      const { return sizeof(_client_gdt); }
 372
 373                 /***********************************************************************
 374                  * GDT Handling Explained
 375                  *
 376                  * Fiasco uses the FS register to store the current UTCB address,
 377                  * libpthread uses GS for providing thread-local storage. Both require
 378                  * a valid entry in the GDT, which user space can access through the
 379                  * fiasco_gdt_set() system call. Furthermore, Fiasco provides a range
 380                  * of user-defined segment entries at offsets 0x48, 0x50, and 0x58.
 381                  *
 382                  * By default, the GDT entry for the UTCB address is 0x40. As Romain
 383                  * uses pthreads, the first user-defined segment is used for Romain's
 384                  * TLS address.
 385                  *
 386                  * Replicas use user-defined entries 2 and 3:
 387                  * - Entry 2 (0x50) contains the replica's UTCB address.
 388                  * - Entry 3 (0x58) can later be set up for thread-local storage.
 389                  *
 390                  * This means there are no free user-defined GDT entries anymore! If we
 391                  * wanted to fix this, we'd have to manually swap GDT entries every
 392                  * time we switch between replicas and master. This would require two
 393                  * additional system calls for modifying the GDT.
 394                  ***********************************************************************/
 395
 396                 /*
 397                  * Set up the initial GDT segment (e.g., UTCB address)
 398                  */
 399                 void setup_utcb_segdesc(l4_addr_t base, l4_addr_t limit)
 400                 {
 401                         memset(_client_gdt, 0, sizeof(_client_gdt));
 402
 403                         _client_gdt[0].limit_low   = limit & 0xFFFF;
 404                         _client_gdt[0].base_low    = base & 0xFFFF;
 405                         _client_gdt[0].base_middle = (base >> 16) & 0xFF;
 406                         _client_gdt[0].base_high   = (base >> 24) & 0xFF;
 407                         _client_gdt[0].access      = 0xF2;
 408                         _client_gdt[0].granularity = 0x40;
 409                 }
 410
 411
 412                 /*
 413                  * Write the second entry, actually.
 414                  */
 415                 void write_gdt_entry(l4_umword_t *src, l4_umword_t bytes)
 416                 {
 417                         memcpy(&_client_gdt[1], src, bytes);
 418                         vcpu()->r()->gs = fiasco_gdt_set(vcpu_cap().cap(), &_client_gdt[1],
 419                                                          sizeof(_client_gdt[1]), 2, l4_utcb());
 420                         DEBUG() << "set " << std::hex << vcpu()->r()->gs;
 421                 }
 422
 423
 424                 /*
 425                  *
 426                  */
 427                 void commit_client_gdt()
 428                 {
 429                         vcpu()->r()->fs = fiasco_gdt_set(vcpu_cap().cap(), gdt(),
 430                                                          gdt_size()/2, 1, l4_utcb());
 431                         DEBUG() << "set " << std::hex << vcpu()->r()->fs;
 432                 }
 433
 434                 /*
 435                  * Schedule a "virtual" trap
 436                  *
 437                  * The whole thing is used to mark pending events for future
 438                  * invocations of some fault observers. These events currently
 439                  * include
 440                  *
 441                  *   - unhandled page fault
 442                  */
 443                 void set_pending_trap(unsigned no) { _pending_trap |= (1 << no); }
 444
 445                 void set_unhandled_pf()
 446                 {
 447                         _events |= Unhandled_Page_Fault;
 448                         set_pending_trap(0xE);
 449                 }
 450
 451                 void unset_unhandled_pf() { _events &= ~Unhandled_Page_Fault; }
 452                 bool unhandled_pf()       { return _events & Unhandled_Page_Fault; }
 453
 454                 l4_umword_t events_pending() { return _events; }
 455
 456                 /*
 457                  * Get the next pending trap (and remove it from pending list)
 458                  */
 459                 unsigned get_pending_trap()
 460                 {
 461                         unsigned ret = l4util_find_first_set_bit(&_pending_trap, sizeof(_pending_trap));
 462                         if (ret >= sizeof(_pending_trap) * 8) {
 463                                 return 0;
 464                         } else {
 465                                 _pending_trap &= ~(1 << ret);
 466                         }
 467
 468                         return ret;
 469                 }
 470
 471
 472                 void print_vcpu_state()
 473                 {
 474                         char pref[32];
 475                         snprintf(pref, 32, "[VCPU %p] ", vcpu());
 476                         vcpu()->print_state(pref);
 477                 }
 478
 479                 unsigned long csum_state();
 480 };
 481
 482
 483 }
 484
 485 /*
 486  * Common prolog to be executed upon entry to exception handler function. It
 487  * restores this VCPU's ES, DS, FS, and GS registers before continuing
 488  * execution in the handler address space.
 489  */
 490 #define handler_prolog(app_thread) \
 491         do {  \
 492                   asm volatile ( \
 493                       "mov %0, %%es;" \
 494                       "mov %0, %%ds;" \
 495                       "mov %1, %%fs;" \
 496                       "mov %2, %%gs;" \
 497                       : : \
 498                           "r"((app_thread)->ds()), "r"((app_thread)->fs()), \
 499                           "r"((app_thread)->gs())); \
 500         } while (0)