]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/plr/server/src/app
Update
[l4.git] / l4 / pkg / plr / server / src / app
1 // vim: ft=cpp
2
3 /*
4  * app --
5  *
6  *    Definitions of applications, instances
7  *
8  * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
9  *     economic rights: Technische Universität Dresden (Germany)
10  * This file is part of TUD:OS and distributed under the terms of the
11  * GNU General Public License 2.
12  * Please see the COPYING-GPL-2 file for details.
13  */
14
15 #pragma once
16
17 #include <cstdio>
18 #include <iomanip>
19 #include <stdlib.h>
20 #include <malloc.h>
21 #include <map>
22 #include <semaphore.h>
23 #include <pthread-l4.h>
24 #include <atomic>
25
26 #include <l4/sys/types.h>
27 #include <l4/sys/utcb.h>
28 #include <l4/sys/factory>
29 #include <l4/sys/thread>
30 #include <l4/sys/task>
31 #include <l4/sys/scheduler>
32 #include <l4/sys/segment.h>
33 #include <l4/sys/debugger.h>
34
35 #include <l4/vcpu/vcpu>
36 #include <l4/plr/measurements.h>
37 #include <l4/util/util.h>
38 #include <l4/util/bitops.h>
39
40 #include <l4/re/error_helper>
41 #include <l4/re/util/cap_alloc>
42 #include <l4/re/util/kumem_alloc>
43
44 #include "log"
45 #include "exceptions"
46 #include "constants.h"
47 #include "memory"
48
49 using L4Re::chksys;
50 using L4Re::chkcap;
51
52 extern "C" void my_handler(void);
53
54 class Breakpoint;
55
56 namespace Romain {
57
58 /*
59  * Instance of an application
60  *
61  * Every instance of the app is run within a dedicated vCPU address space.
62  * Instances are created depending on the amount of redundancy/checking
63  * needed.
64  */
65 class App_instance
66 {
67         // XXX: For multithreading, we might consider having a vCPU task for
68         //      every thread of the app -> see papers on deterministic multithreading
69         L4::Cap<L4::Task>    _vcpu_task;
70         /*
71          * Instance ID
72          */
73         l4_umword_t          _id;
74
75         /*
76          * Map of addr -> addr mappings.
77          *
78          * This is a dirty trick keeping track of all pages in the master AS
79          * that are mapped to the replica AS. We need it, because the usual sequence
80          * for using a dataspace is:
81          *
82          *   ds.alloc()
83          *   ds.attach()
84          *   <raise page faults>
85          *   ds.detach()
86          *   unmap()
87          *
88          * And in the last unmap(), we cannot consult the region map for
89          * this mapping anymore.
90          *
91          * XXX: Real fix would be to slightly adapt the region map for our
92          *      purposes, because otherwise we are storing *a lot* of
93          *      page-to-page mappings here.
94          */
95         std::map<l4_addr_t, l4_addr_t> _mappings;
96
97         enum { debug_name_size = 16 };
98
99         public:
100                 explicit App_instance(char const *name = "", l4_umword_t const instanceID = 0)
101                         : _id(instanceID)
102                 {
103                         /*
104                          * Create instance vCPU
105                          */
106                         _vcpu_task = chkcap(L4Re::Util::cap_alloc.alloc<L4::Task>(),
107                                             "vCPU task alloc");
108                         chksys(L4Re::Env::env()->factory()->create_task(_vcpu_task,
109                                                                         l4_fpage_invalid()),
110                                "create vCPU task");
111
112
113                         /*
114                          * Every replica gets a name set as the debug ID
115                          */
116                         char namebuf[debug_name_size];
117                         snprintf(namebuf, debug_name_size, "V%ld %s", _id, name);
118                         l4_debugger_set_object_name(_vcpu_task.cap(), namebuf);
119                 }
120
121                 L4::Cap<L4::Task> vcpu_task()   const { return _vcpu_task; }
122                 l4_umword_t              id()   const { return _id; }
123
124                 /*
125                  * Map a flexpage in an aligned way.
126                  *
127                  * Current impl.: simply map the page as we indirectly assume that
128                  *                we are always called for a single page.
129                  *
130                  * Future: this should align the local and remote targets and use the
131                  *         largest possible mapping so that we can avoid a couple
132                  *         of page faults if possible. XXX
133                  */
134                 void map_aligned(l4_addr_t local, l4_addr_t remote, l4_umword_t shift, l4_umword_t flags)
135                 {
136                         //DEBUG() << "map_aligned(" << std::hex << local << ", " << remote
137                         //        << ", " << shift << ", " << flags << ")";
138                         l4_fpage_t fp = l4_fpage(local, shift, flags);
139                         //DEBUG() << "fp: " << fp.raw;
140                         l4_msgtag_t tag = vcpu_task()->map(L4Re::This_task, fp, remote);
141                         _check(l4_msgtag_has_error(tag), "error mapping page");
142                         //DEBUG() << "mapped " << std::hex << fp.raw << " : " << std::hex << tag.raw;
143                         for (l4_umword_t offs = 0; offs < (L4_PAGESIZE << (shift - L4_PAGESHIFT));
144                                   offs += L4_PAGESIZE) {
145                                 _mappings[remote + offs] = local + offs;
146                         }
147                 }
148
149
150                 /*
151                  * Unmap a flexpage from replica
152                  */
153                 void unmap(l4_umword_t fpraw)
154                 {
155                         l4_fpage_t fp;
156                         l4_addr_t remote;
157                         fp.raw = fpraw;
158                         remote = l4_fpage_page(fp) << L4_PAGESHIFT;
159
160                         l4_addr_t a = _mappings[remote];
161                         DEBUG() << "unmap @ " << std::hex << remote << " -> " << "0x" << a;
162                         vcpu_task()->unmap(l4_fpage(a, L4_PAGESIZE, L4_FPAGE_RO), L4_FP_ALL_SPACES);
163                         _mappings[remote] = 0;
164                         //enter_kdebug("unmapped");
165                 }
166 };
167
168 /*
169  * Representation of an application-level thread
170  *
171  * In fact, a vCPU is used for every such thread. This class also includes
172  * the stacks needed for setting up the thread and later on running the
173  * VCPU exception handlers.
174  */
175 class App_thread
176 {
177         private:
178                 l4_addr_t _handler_fn; // pointer to exception handler code
179                 l4_addr_t _thread_fn;  // pointer to initial startup code
180
181                 /* Handler stack layout:
182                  *
183                  * +-------------------------------+ _handler_stack + sizeof(_handler_stack)
184                  * | Instance Mgr pointer          |
185                  * | App_instance pointer          |
186                  * | App_thread pointer            |
187                  * | Thread group pointer          |
188                  * | App_model pointer             |
189                  * +-------------------------------+ _initial stack ptr
190                  * |   handler entry ebp           |
191                  * |   ...                         |
192                  * +-------------------------------+ _handler_stack
193                  */
194                 char *_handler_stack;
195
196                 l4_addr_t _handler_sp;
197                 l4_addr_t _thread_sp;
198
199                 l4_umword_t         _cpu;
200                 L4::Cap<L4::Thread> _vcpu_cap;     // cap for vcpu object
201                 L4vcpu::Vcpu       *_vcpu;         // vcpu state area
202                 l4_utcb_t          *_vcpu_utcb;    // vcpu UTCB
203                 pthread_t           _pthread;      // pthread backing this VCPU
204                 l4_addr_t           _remote_utcb;  // address of remote UTCB
205
206                 /*
207                  * Master segment registers. Restored whenever we enter the
208                  * master through a VCPU fault.
209                  */
210                 l4_umword_t         _master_ds;
211                 l4_umword_t         _master_fs;
212                 l4_umword_t         _master_gs;
213
214                 l4_umword_t         _pending_trap; // for injecting HW traps
215                 l4_umword_t         _events;       // keeping track of handle events
216                 enum eventpending {
217                         /* Set if we detected a page fault that could not be handled.
218                          * Thereby, the PF handler can then bail out if this fault is
219                          * raised again. */
220                         Unhandled_Page_Fault = 1,
221                 };
222
223                 struct gdt_entry_struct
224                 {
225                         l4_uint16_t limit_low;      // The lower 16 bits of the limit.
226                         l4_uint16_t base_low;       // The lower 16 bits of the base.
227                         l4_uint8_t  base_middle;    // The next 8 bits of the base.
228                         l4_uint8_t  access;         // Access flags, determine what ring this segment can be used in.
229                         l4_uint8_t  granularity;
230                         l4_uint8_t  base_high;      // The last 8 bits of the base.
231                 } __attribute__((packed))
232                 _client_gdt[2];
233                 bool                _gdt_modified; // track if GDT was modified
234
235 #if WATCHDOG
236                 /*
237                  * Watchdog: set on creation and defined in config file
238                  */
239                 bool              _use_watchdog;
240                 int               _watchdog_timeout;
241         
242                 /*
243                  * Watchdog: interrupt object set on vcpu startup
244                  */
245                 L4::Cap<L4::Irq>  _watchdog_irq;
246                 
247                 /*
248                  * Watchdog: use single-stepping for synchronization
249                  */ 
250                 bool              _watchdog_ss;
251                 unsigned          _watchdog_ss_count;
252                 
253                 /*
254                  * Watchdog: use breakpoints for synchronization
255                  */
256                 bool              _watchdog_breakpointing;
257                 Breakpoint       *_watchdog_breakpoint;
258
259                 /*
260                  * Watchdog: am I the replica that passed the watchdog interrupt
261                  * with another trap
262                  */
263                 bool              _watchdog_passed;
264
265                 bool              _got_watchdog;
266                 bool              _got_other_trap;
267                 bool              _watchdog_suspended;
268                 bool              _watchdog_met_leader;
269 #endif
270
271                 /*
272                  * Benchmarking: counters used to determine number and cycles spent in
273                  * different parts of the master if BENCHMARKING is set to 1 in
274                  * server/src/constants.h
275                  *
276                  * t_* -> accumulate cycles spent
277                  * c_* -> count the number of times certain paths were entered
278                  */
279                 unsigned long long t_lock, c_lock;           // lock observer
280                 unsigned long long t_pfh, c_pfh;             // page fault handling
281                 unsigned long long t_syscalls, c_syscalls;   // syscall observer
282                 unsigned long long t_kiptime, c_kiptime;     // KIP time observer
283                 unsigned long long t_traps, c_traps;         // trap handling
284
285                 unsigned long long t_handling;               // total handling time
286                 unsigned long long t_observer, c_observer;   // time in observers
287                 unsigned long long t_keepup, c_keepup;       // passive replicas: time to keep up with leader
288                 unsigned long long t_user;                   // time in user mode
289                 unsigned long long last_user_resume;         // timestamp of last ret to user
290
291                 unsigned long long t_sync_enter_all;
292                 unsigned long long t_sync_wait_for_active, c_sync_wait_for_active;
293                 unsigned long long t_sync_wait, t_sync_waitforarrival;
294                 unsigned long long t_sync_getdata, c_sync_getdata;
295                 unsigned long long t_sync_active_validate, c_sync_active_validate;
296
297                 //unsigned long long t_sync;
298                 unsigned long long t_sync_enter;             // TS before DMR::enter()
299                 unsigned long long t_sync_entered;           // TS before sleeping / validating replicas
300                 unsigned long long t_sync_leave;
301
302                 unsigned long long t_resume_active, c_resume_active;
303                 unsigned long long t_resume_passive, c_resume_passive;
304                 unsigned long long t_resume_enter;
305                 
306                 /*
307                  * Tracks if we are the currently active
308                  * trap handling replica
309                  */
310                 bool active_handler;
311
312                 /*
313                  * Get topmost address of exception handler/thread stacks
314                  */
315                 l4_addr_t top_of_handler_stack() { return (l4_addr_t)(_handler_stack + HANDLER_STACK_SIZE); }
316
317                 /*
318                  * Initialize handler and init thread stacks.
319                  *
320                  * This ensures that the handler stack is paged in correctly before we
321                  * do anything. Otherwise the handler might raise a page fault upon
322                  * first entry.
323                  */
324                 void touch_stacks();
325
326
327                 /*
328                  * Create the vCPU kernel object
329                  */
330                 void alloc_vcpu_cap();
331
332
333                 /*
334                  * Alloc and setup vCPU UTCB
335                  *
336                  * The setup code stores a pointer to this App_thread object on
337                  * the handler's stack so that it can be found upon an exception.
338                  */
339                 void alloc_vcpu_mem();
340
341                 App_thread() { }
342                 App_thread(const App_thread&) { }
343
344         public:
345
346                 App_thread(l4_addr_t eip,
347                            l4_addr_t esp,
348                            l4_addr_t handler_fn,
349                            l4_addr_t thread_fn,
350                            bool use_watchdog = false,
351                            l4_umword_t watchdog_timeout = 0)
352                         :
353                           _handler_fn(handler_fn),
354                           _thread_fn(thread_fn),
355                           _cpu(1),
356                           _vcpu(0),
357                           _vcpu_utcb(0),
358                           _remote_utcb(0xFFFFFFFF),
359                           _pending_trap(0),
360                           _events(0),
361                           _gdt_modified(false)
362 #if WATCHDOG
363                           ,
364                           _use_watchdog(use_watchdog),
365                           _watchdog_timeout(watchdog_timeout),
366                           _watchdog_ss(false),
367                           _watchdog_ss_count(0),
368                           _watchdog_passed(false),
369                           _watchdog_breakpointing(false),
370                           _watchdog_suspended(false),
371                           _got_watchdog(false),
372                           _got_other_trap(false),
373                           _watchdog_met_leader(false)
374 #endif
375                      , t_lock(0ULL), c_lock(0ULL), t_pfh(0ULL), c_pfh(0ULL),
376                            t_syscalls(0ULL), c_syscalls(0ULL), t_kiptime(0ULL),
377                            c_kiptime(0ULL), t_traps(0ULL), c_traps(0ULL), t_handling(0ULL),
378                            t_observer(0ULL), c_observer(0ULL),
379                            t_keepup(0ULL), c_keepup(0ULL),
380                            t_user(0ULL), last_user_resume(0ULL),
381                            t_sync_enter_all(0ULL), t_sync_wait_for_active(0ULL), c_sync_wait_for_active(0ULL),
382                            t_sync_wait(0ULL), t_sync_waitforarrival(0ULL),
383                            t_sync_getdata(0ULL), c_sync_getdata(0ULL),
384                            t_sync_active_validate(0ULL), c_sync_active_validate(0ULL),
385                            t_resume_active(0ULL), c_resume_active(0ULL),
386                            t_resume_passive(0ULL), c_resume_passive(0ULL),
387                            t_resume_enter(0ULL)
388                 {
389                         asm volatile (
390                             "mov %%fs, %0\n\t"
391                             "mov %%gs, %1\n\t"
392                             "mov %%ds, %2\n\t"
393                             : "=r" (_master_fs),
394                                   "=r" (_master_gs),
395                               "=r" (_master_ds));
396
397                         _handler_stack = (char*)memalign(L4_PAGESIZE, HANDLER_STACK_SIZE);
398                         _handler_sp    = top_of_handler_stack();
399                         DEBUG() << "HANDLER STACK: " << (void*)_handler_stack;
400                         _check(!_handler_stack, "could not allocate handler stack");
401
402                         touch_stacks();
403                         alloc_vcpu_cap();
404                         alloc_vcpu_mem();
405
406                         memset(gdt(), 0, gdt_size());
407
408                         DEBUG() << "vCPU cap: " << std::hex << vcpu_cap();
409
410                         DEBUG() << "STACK: " << std::hex << (void*)esp;
411                         vcpu()->r()->ip = eip;
412                         vcpu()->r()->sp = esp;
413                         DEBUG() << "EIP " << (void*)eip << " ESP " << (void*)esp;
414                 }
415
416 #if WATCHDOG
417                 void use_watchdog(bool u) { _use_watchdog = u; }
418                 bool use_watchdog() { return _use_watchdog; }
419                 void watchdog_timeout(l4_umword_t p) { _watchdog_timeout = p; }
420                 l4_umword_t watchdog_timeout() { return _watchdog_timeout; }
421
422                 void watchdog_ss(bool ss) { _watchdog_ss = ss; }
423                 bool watchdog_ss() { return _watchdog_ss; }
424                 unsigned watchdog_ss_count() { return _watchdog_ss_count; }
425                 void increment_watchdog_ss_count() { ++_watchdog_ss_count; }
426                 void reset_watchdog_ss_count() { _watchdog_ss_count = 0; }
427
428                 void its_me_who_passed_the_watchdog(bool p) { _watchdog_passed = p; }
429                 bool its_me_who_passed_the_watchdog() { return _watchdog_passed; }
430
431                 void watchdog_irq(L4::Cap<L4::Irq> irq) { _watchdog_irq = irq; }
432                 L4::Cap<L4::Irq> watchdog_irq() { return _watchdog_irq; }
433
434                 void watchdog_breakpoint(Breakpoint *b) { _watchdog_breakpoint = b; }
435                 Breakpoint *watchdog_breakpoint() { return _watchdog_breakpoint; }
436
437                 void watchdog_breakpointing(bool b) { _watchdog_breakpointing = b; }
438                 bool watchdog_breakpointing() { return _watchdog_breakpointing; }
439
440                 void got_watchdog(bool w) { _got_watchdog = w; }
441                 bool got_watchdog() { return _got_watchdog; }
442
443                 void got_other_trap(bool t) { _got_other_trap = t; }
444                 bool got_other_trap() { return _got_other_trap; }
445
446                 void watchdog_suspended(bool s) { _watchdog_suspended = s; }
447                 bool watchdog_suspended() { return _watchdog_suspended; }
448
449                 void i_have_met_the_leader(bool m) { _watchdog_met_leader = m; }
450                 bool i_have_met_the_leader() { return _watchdog_met_leader; }
451 #endif
452
453                 bool is_active() { return active_handler; }
454                 void activate() { active_handler = true; }
455                 void deactivate() { active_handler = false; }
456
457                 void count_lock(unsigned long long increment)
458                 {
459 #if BENCHMARKING
460                         t_lock += increment; c_lock++;
461 #endif
462                 }
463
464                 void count_pfh(unsigned long long increment)
465                 {
466 #if BENCHMARKING
467                         t_pfh += increment; c_pfh++;
468 #endif
469                 }
470                 void count_syscalls(unsigned long long increment)
471                 {
472 #if BENCHMARKING
473                         t_syscalls += increment; c_syscalls++;
474 #endif
475                 }
476
477                 void count_kiptime(unsigned long long increment)
478                 {
479 #if BENCHMARKING
480                         t_kiptime += increment; c_kiptime++;
481 #endif
482                 }
483
484                 void count_traps(unsigned long long increment)
485                 {
486 #if BENCHMARKING
487                         t_traps += increment; c_traps++;
488 #endif
489                 }
490
491                 void count_handling(unsigned long long increment)
492                 {
493 #if BENCHMARKING
494                         t_handling += increment;
495 #endif
496                 }
497
498                 void ts_from_user() {
499 #if BENCHMARKING
500                         t_sync_enter = l4_rdtsc();
501                         t_user += (t_sync_enter - last_user_resume);
502 #endif
503                 }
504
505                 void ts_sync_entered() {
506 #if BENCHMARKING
507                         t_sync_entered = l4_rdtsc();
508                         t_sync_enter_all += (t_sync_entered - t_sync_enter);
509 #endif
510                 }
511
512                 void ts_sync_leave() {
513 #if BENCHMARKING
514                         t_sync_leave = l4_rdtsc();
515                         if (is_active()) {
516                                 t_sync_active_validate +=  (t_sync_leave - t_sync_enter);
517                                 ++c_sync_active_validate;
518                         } else {
519                                 t_sync_wait_for_active +=  (t_sync_leave - t_sync_enter);
520                                 ++c_sync_wait_for_active;
521                         }
522 #endif
523                 }
524
525                 void ts_resume_start() {
526 #if BENCHMARKING
527                         t_resume_enter = l4_rdtsc();
528                         if (is_active()) {
529                                 t_observer += (t_resume_enter - t_sync_leave);
530                                 c_observer++;
531                         } else {
532                                 t_keepup += (t_resume_enter - t_sync_leave);
533                                 c_keepup++;
534                         }
535 #endif
536                 }
537
538                 void ts_user_resume(bool first = false) {
539 #if BENCHMARKING
540                         last_user_resume = l4_rdtsc();
541
542                         // the first call is only to set the resume TS, don't count
543                         // resume time there
544                         if (first)
545                                 return;
546
547                         if (is_active()) {
548                                 t_resume_active += (last_user_resume - t_resume_enter);
549                                 ++c_resume_active;
550                         } else {
551                                 t_resume_passive += (last_user_resume - t_resume_enter);
552                                 ++c_resume_passive;
553                         }
554                         deactivate(); // simply do this any time we resume
555 #endif
556                 }
557
558                 void inc_wait(unsigned long long increment)
559                 {
560 #if BENCHMARKING
561                         t_sync_wait += increment;
562 #endif
563                 }
564
565                 void inc_waitleader(unsigned long long inc)
566                 {
567 #if BENCHMARKING
568                         t_sync_waitforarrival += inc;
569 #endif
570                 }
571
572
573                 void inc_getdata(unsigned long long increment)
574                 {
575 #if BENCHMARKING
576                         c_sync_getdata++;
577                         t_sync_getdata += increment;
578 #endif
579                 }
580
581
582                 void print_helper(char const *msg, unsigned long long time,
583                                   unsigned long long count = 0, bool withCount = false)
584                 {
585                         if (withCount) {
586                                 INFO() << std::left << std::setw(32) << msg
587                                        << " : " << std::right << std::setw(16) << time
588                                            << " [ " << std::setw(10) << count << " ]";
589                         } else {
590                                 INFO() << std::left << std::setw(32) << msg
591                                        << " : " << std::right << std::setw(16) << time;
592                         }
593                 }
594
595                 void print_stats()
596                 {
597 #if BENCHMARKING
598                         print_helper(GREEN  "Clocks spent in user            " NOCOLOR, t_user);
599                         print_helper(GREEN  "Clocks spent in master          " NOCOLOR, t_handling);
600                         print_helper(YELLOW "       synchronization          " NOCOLOR, t_sync_enter_all + t_sync_active_validate + t_sync_wait_for_active);
601                         print_helper(       "           enter sync           ", t_sync_enter_all);
602                         print_helper(       "           active: check        ", t_sync_active_validate, c_sync_active_validate, true);
603                         print_helper(       "           passive: wait        ", t_sync_wait_for_active, c_sync_wait_for_active, true);
604                         print_helper(       "              (early wait)      ", t_sync_waitforarrival);
605                         print_helper(       "              (total wait)      ", t_sync_wait);
606                         print_helper(       "              (get data)        ", t_sync_getdata, c_sync_getdata, true);
607                         print_helper(YELLOW "       observers                " NOCOLOR, t_observer, c_observer, true);
608                         print_helper(       "           PFH                  ", t_pfh, c_pfh, true);
609                         print_helper(       "           Locking              ", t_lock, c_lock, true);
610                         print_helper(       "           Syscalls             ", t_syscalls, c_syscalls, true);
611                         print_helper(       "           gettime()            ", t_kiptime, c_kiptime, true);
612                         print_helper(       "           CPU Traps            ", t_traps, c_traps, true);
613                         print_helper(YELLOW "       keepup with leader       " NOCOLOR, t_keepup, c_keepup, true);
614                         print_helper(YELLOW "       resume                   " NOCOLOR, t_resume_active + t_resume_passive);
615                         print_helper(       "           active               ", t_resume_active,  c_resume_active, true);
616                         print_helper(       "           passive              ", t_resume_passive, c_resume_passive, true);
617                         INFO() << "    ------------------------------------------------------";
618 #endif
619                 }
620
621                 /*
622                  * Manage fast lookup for the replica's UTCB address
623                  */
624                 void remote_utcb(l4_addr_t a) { _remote_utcb = a; }
625                 l4_addr_t remote_utcb() const { return _remote_utcb; }
626
627                 /*
628                  * Start the vCPU thread
629                  */
630                 void start();
631
632
633                 l4_addr_t            handler_sp()     const { return _handler_sp; }
634                 void handler_sp(l4_addr_t sp) { _handler_sp = sp; }
635
636                 l4_addr_t            thread_sp()      const { return _thread_sp; }
637                 void thread_sp(l4_addr_t sp)  { _thread_sp = sp; }
638
639                 l4_addr_t            thread_entry()   const { return _thread_fn; }
640
641                 l4_umword_t           cpu()           const { return _cpu; }
642                 void                  cpu(l4_umword_t c)    { _cpu = c; }
643                 L4::Cap<L4::Thread>   vcpu_cap()      const { return _vcpu_cap; }
644                 void                  vcpu_cap(L4::Cap<L4::Thread> c) { _vcpu_cap = c; }
645                 L4vcpu::Vcpu         *vcpu()          const { return _vcpu; }
646                 l4_utcb_t            *vcpu_utcb()     const { return _vcpu_utcb; }
647
648                 l4_umword_t           ds()            const { return _master_ds; }
649                 l4_umword_t           fs()            const { return _master_fs; }
650                 l4_umword_t           gs()            const { return _master_gs; }
651 //              void                  gs(l4_addr_t a)       { _master_gs = a; }
652
653                 void *                gdt()           const
654                 {
655                         return (void*)&_client_gdt[0];
656                 }
657                 l4_umword_t           gdt_size()      const { return sizeof(_client_gdt); }
658
659                 /***********************************************************************
660                  * GDT Handling Explained
661                  *
662                  * Fiasco uses the FS register to store the current UTCB address,
663                  * libpthread uses GS for providing thread-local storage. Both require
664                  * a valid entry in the GDT, which user space can access through the
665                  * fiasco_gdt_set() system call. Furthermore, Fiasco provides a range
666                  * of user-defined segment entries at offsets 0x48, 0x50, and 0x58.
667                  *
668                  * By default, the GDT entry for the UTCB address is 0x40. As Romain
669                  * uses pthreads, the first user-defined segment is used for Romain's
670                  * TLS address.
671                  *
672                  * Replicas use user-defined entries 2 and 3:
673                  * - Entry 2 (0x50) contains the replica's UTCB address.
674                  * - Entry 3 (0x58) can later be set up for thread-local storage.
675                  *
676                  * This means there are no free user-defined GDT entries anymore! If we
677                  * wanted to fix this, we'd have to manually swap GDT entries every
678                  * time we switch between replicas and master. This would require two
679                  * additional system calls for modifying the GDT.
680                  ***********************************************************************/
681
682                 /*
683                  * Set up the initial GDT segment (e.g., UTCB address)
684                  * XXX: rename!
685                  */
686                 void setup_utcb_segdesc(l4_addr_t base, l4_addr_t limit)
687                 {
688                         DEBUG() << "Base " << std::hex << base
689                                 << " Limit " << limit;
690                         memset(_client_gdt, 0, sizeof(_client_gdt));
691
692                         _client_gdt[0].limit_low   = limit & 0xFFFF;
693                         _client_gdt[0].base_low    = base & 0xFFFF;
694                         _client_gdt[0].base_middle = (base >> 16) & 0xFF;
695                         _client_gdt[0].base_high   = (base >> 24) & 0xFF;
696                         _client_gdt[0].access      = 0xF2;
697                         _client_gdt[0].granularity = 0x40;
698
699                         _gdt_modified = true;
700                 }
701
702
703                 bool gdt_changed() { return _gdt_modified; }
704
705
706                 /*
707                  * Write the second entry, actually.
708                  * XXX: RENAME!
709                  */
710                 void write_gdt_entry(l4_umword_t *src, l4_umword_t bytes)
711                 {
712                         memcpy(&_client_gdt[1], src, bytes);
713                         _gdt_modified = true;
714                 }
715
716
717                 /*
718                  * Write the user GDT entries
719                  */
720                 void commit_client_gdt();
721
722                 /*
723                  * Schedule a "virtual" trap
724                  *
725                  * The whole thing is used to mark pending events for future
726                  * invocations of some fault observers. These events currently
727                  * include
728                  *
729                  *   - unhandled page fault
730                  */
731                 void set_pending_trap(l4_umword_t no) { _pending_trap |= (1 << no); }
732
733                 void set_unhandled_pf()
734                 {
735                         _events |= Unhandled_Page_Fault;
736                         set_pending_trap(0xE);
737                 }
738
739                 void unset_unhandled_pf() { _events &= ~Unhandled_Page_Fault; }
740                 bool unhandled_pf()       { return _events & Unhandled_Page_Fault; }
741
742                 l4_umword_t events_pending() { return _events; }
743
744                 /*
745                  * Get the next pending trap (and remove it from pending list)
746                  */
747                 l4_umword_t get_pending_trap()
748                 {
749                         l4_umword_t ret = l4util_find_first_set_bit(&_pending_trap, sizeof(_pending_trap));
750                         if (ret >= sizeof(_pending_trap) * 8) {
751                                 return 0;
752                         } else {
753                                 _pending_trap &= ~(1 << ret);
754                         }
755                 
756                         return ret;
757                 }
758
759
760                 void print_vcpu_state()
761                 {
762                         char pref[32];
763                         snprintf(pref, 32, "[VCPU %p] ", vcpu());
764                         vcpu()->print_state(pref);
765                 }
766
767                 l4_umword_t csum_state();
768
769
770                 void halt()
771                 {
772                         INFO() << "   Halting VCPU " << std::hex << vcpu();
773                         l4_sched_param_t sp = l4_sched_param(0);
774                         if (pthread_l4_cap(pthread_self()) != vcpu_cap().cap()) {
775                         chksys(L4Re::Env::env()->scheduler()->run_thread(vcpu_cap(), sp));
776                 }
777                 }
778
779
780                 void return_to(l4_addr_t ret)
781                 {
782                         vcpu()->r()->sp += sizeof(l4_umword_t); // RET: inc. ESP
783                         vcpu()->r()->ip  = ret;                 // RET: return addr 
784                 }
785 };
786
787
788 }
789
790 /*
791  * Common prolog to be executed upon entry to exception handler function. It
792  * restores this VCPU's ES, DS, FS, and GS registers before continuing
793  * execution in the handler address space.
794  */
795 #define handler_prolog(app_thread) \
796         do {  \
797                   asm volatile ( \
798                       "mov %0, %%es;" \
799                       "mov %0, %%ds;" \
800                       "mov %1, %%fs;" \
801                       "mov %2, %%gs;" \
802                       : : \
803                           "r"((app_thread)->ds()), "r"((app_thread)->fs()), \
804                           "r"((app_thread)->gs())); \
805         } while (0)