]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/plr/server/src/app
update
[l4.git] / l4 / pkg / plr / server / src / app
1 // vi: ft=cpp
2
3 /*
4  * app --
5  *
6  *    Definitions of applications, instances
7  *
8  * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
9  *     economic rights: Technische Universität Dresden (Germany)
10  * This file is part of TUD:OS and distributed under the terms of the
11  * GNU General Public License 2.
12  * Please see the COPYING-GPL-2 file for details.
13  */
14
15 #pragma once
16
17 #include <cstdio>
18 #include <stdlib.h>
19 #include <malloc.h>
20 #include <map>
21 #include <semaphore.h>
22
23 #include <l4/sys/types.h>
24 #include <l4/sys/utcb.h>
25 #include <l4/sys/factory>
26 #include <l4/sys/thread>
27 #include <l4/sys/task>
28 #include <l4/sys/scheduler>
29 #include <l4/sys/segment.h>
30 #include <l4/sys/debugger.h>
31
32 #include <l4/vcpu/vcpu>
33 #include <l4/util/util.h>
34 #include <l4/util/bitops.h>
35
36 #include <l4/re/error_helper>
37 #include <l4/re/util/cap_alloc>
38 #include <l4/re/util/kumem_alloc>
39
40 #include "log"
41 #include "exceptions"
42 #include "constants.h"
43 #include "memory"
44
45 using L4Re::chksys;
46 using L4Re::chkcap;
47
48 extern "C" void my_handler(void);
49
50 namespace Romain {
51
52 /*
53  * Instance of an application
54  *
55  * Every instance of the app is run within a dedicated vCPU address space.
56  * Instances are created depending on the amount of redundancy/checking
57  * needed.
58  */
59 class App_instance
60 {
61         // XXX: For multithreading, we might consider having a vCPU task for
62         //      every thread of the app -> see papers on deterministic multithreading
63         L4::Cap<L4::Task>    _vcpu_task;
64         /*
65          * Instance ID
66          */
67         unsigned             _id;
68
69         /*
70          * Map of addr -> addr mappings.
71          *
72          * This is a dirty trick keeping track of all pages in the master AS
73          * that are mapped to the replica AS. We need it, because the usual sequence
74          * for using a dataspace is:
75          *
76          *   ds.alloc()
77          *   ds.attach()
78          *   <raise page faults>
79          *   ds.detach()
80          *   unmap()
81          *
82          * And in the last unmap(), we cannot consult the region map for
83          * this mapping anymore.
84          *
85          * XXX: Real fix would be to slightly adapt the region map for our
86          *      purposes, because otherwise we are storing *a lot* of
87          *      page-to-page mappings here.
88          */
89         std::map<l4_addr_t, l4_addr_t> _mappings;
90
91         enum { debug_name_size = 16 };
92
93         public:
94                 explicit App_instance(char const *name = "", unsigned const instanceID = 0)
95                         : _id(instanceID)
96                 {
97                         /*
98                          * Create instance vCPU
99                          */
100                         _vcpu_task = chkcap(L4Re::Util::cap_alloc.alloc<L4::Task>(),
101                                             "vCPU task alloc");
102                         chksys(L4Re::Env::env()->factory()->create_task(_vcpu_task,
103                                                                         l4_fpage_invalid()),
104                                "create vCPU task");
105
106
107                         /*
108                          * Every replica gets a name set as the debug ID
109                          */
110                         char namebuf[debug_name_size];
111                         snprintf(namebuf, debug_name_size, "V%d %s", _id, name);
112                         l4_debugger_set_object_name(_vcpu_task.cap(), namebuf);
113                 }
114
115                 L4::Cap<L4::Task> vcpu_task()   const { return _vcpu_task; }
116                 unsigned                 id()   const { return _id; }
117
118                 /*
119                  * Map a flexpage in an aligned way.
120                  *
121                  * Current impl.: simply map the page as we indirectly assume that
122                  *                we are always called for a single page.
123                  *
124                  * Future: this should align the local and remote targets and use the
125                  *         largest possible mapping so that we can avoid a couple
126                  *         of page faults if possible. XXX
127                  */
128                 void map_aligned(l4_addr_t local, l4_addr_t remote, unsigned shift, unsigned flags)
129                 {
130                         //DEBUG() << "map_aligned(" << std::hex << local << ", " << remote
131                         //        << ", " << shift << ", " << flags << ")";
132                         l4_fpage_t fp = l4_fpage(local, shift, flags);
133                         //DEBUG() << "fp: " << fp.raw;
134                         l4_msgtag_t tag = vcpu_task()->map(L4Re::This_task, fp, remote);
135                         _check(l4_msgtag_has_error(tag), "error mapping page");
136                         //DEBUG() << "mapped " << std::hex << fp.raw << " : " << std::hex << tag.raw;
137                         for (unsigned offs = 0; offs < (L4_PAGESIZE << (shift - L4_PAGESHIFT));
138                                   offs += L4_PAGESIZE) {
139                                 _mappings[remote + offs] = local + offs;
140                         }
141                 }
142
143
144                 /*
145                  * Map a local region to a remote region using the least
146                  * possible amount of map operations (XXX).
147                  */
148                 void map(l4_addr_t local_start, l4_addr_t remote_start,/* l4_size_t size,*/
149                                  unsigned pageflags, l4_size_t size = L4_PAGESIZE)
150                 {
151                         //DEBUG() << "map " << std::hex << local_start << " -> " << remote_start
152                         //        << " size " << size;
153
154                         while (size > 0) {
155                                 unsigned frame_l = local_start  >> L4_PAGESHIFT;
156                                 unsigned frame_r = remote_start >> L4_PAGESHIFT;
157                                 (void)frame_l; (void)frame_r;
158                                 unsigned shift   = 0;
159
160 /* Macro checks whether the size fits a given number of pages */
161 #define FOO(x) do { \
162         if ((!frame_l & (x-1)) && (!frame_r & (x-1)) && (size >= (x*L4_PAGESIZE))) { \
163                 shift += 1; \
164         } \
165 } while (0)
166                                 //FOO(2); FOO(4); FOO(8); FOO(16); FOO(32); FOO(64);
167                                 map_aligned(local_start, remote_start, L4_PAGESHIFT + shift, pageflags);
168                                 local_start  += (L4_PAGESIZE << shift);
169                                 remote_start += (L4_PAGESIZE << shift);
170                                 size -= (L4_PAGESIZE << shift);
171                         }
172                         //enter_kdebug("mapped");
173                 }
174
175
176                 /*
177                  * Unmap a flexpage from replica
178                  */
179                 void unmap(l4_umword_t fpraw)
180                 {
181                         l4_fpage_t fp;
182                         l4_addr_t remote;
183                         fp.raw = fpraw;
184                         remote = l4_fpage_page(fp) << L4_PAGESHIFT;
185
186                         l4_addr_t a = _mappings[remote];
187                         DEBUG() << "unmap @ " << std::hex << remote << " -> " << "0x" << a;
188                         vcpu_task()->unmap(l4_fpage(a, L4_PAGESIZE, L4_FPAGE_RO), L4_FP_ALL_SPACES);
189                         _mappings[remote] = 0;
190                         //enter_kdebug("unmapped");
191                 }
192 };
193
194
195 /*
196  * Representation of an application-level thread
197  *
198  * In fact, a vCPU is used for every such thread. This class also includes
199  * the stacks needed for setting up the thread and later on running the
200  * VCPU exception handlers.
201  */
202 class App_thread
203 {
204         private:
205                 l4_addr_t _handler_fn; // pointer to exception handler code
206                 l4_addr_t _thread_fn;  // pointer to initial startup code
207
208                 /* Handler stack layout:
209                  *
210                  * +-------------------------------+ _handler_stack + sizeof(_handler_stack)
211                  * | Instance Mgr pointer          |
212                  * | App_instance pointer          |
213                  * | App_thread pointer            |
214                  * | Thread group pointer          |
215                  * | App_model pointer             |
216                  * +-------------------------------+ _initial stack ptr
217                  * |   handler entry ebp           |
218                  * |   ...                         |
219                  * +-------------------------------+ _handler_stack
220                  */
221                 char *_handler_stack;
222
223                 l4_addr_t _handler_sp;
224                 l4_addr_t _thread_sp;
225
226                 l4_umword_t         _cpu;
227                 L4::Cap<L4::Thread> _vcpu_cap;     // cap for vcpu object
228                 L4vcpu::Vcpu       *_vcpu;         // vcpu state area
229                 l4_utcb_t          *_vcpu_utcb;    // vcpu UTCB
230                 pthread_t           _pthread;      // pthread backing this VCPU
231                 l4_addr_t           _remote_utcb;  // address of remote UTCB
232
233                 /*
234                  * Master segment registers. Restored whenever we enter the
235                  * master through a VCPU fault.
236                  */
237                 unsigned long       _master_ds;
238                 unsigned long       _master_fs;
239                 unsigned long       _master_gs;
240
241                 l4_umword_t         _pending_trap; // for injecting HW traps
242                 l4_umword_t         _events;       // keeping track of handle events
243                 enum eventpending {
244                         /* Set if we detected a page fault that could not be handled.
245                          * Thereby, the PF handler can then bail out if this fault is
246                          * raised again. */
247                         Unhandled_Page_Fault = 1,
248                 };
249
250                 struct gdt_entry_struct
251                 {
252                         unsigned short limit_low;      // The lower 16 bits of the limit.
253                         unsigned short base_low;       // The lower 16 bits of the base.
254                         unsigned char base_middle;     // The next 8 bits of the base.
255                         unsigned char access;          // Access flags, determine what ring this segment can be used in.
256                         unsigned char granularity;
257                         unsigned char base_high;       // The last 8 bits of the base.
258                 } __attribute__((packed))
259                 _client_gdt[2];
260
261                 /*
262                  * Get topmost address of exception handler/thread stacks
263                  */
264                 l4_addr_t top_of_handler_stack() { return (l4_addr_t)(_handler_stack + HANDLER_STACK_SIZE); }
265
266                 /*
267                  * Initialize handler and init thread stacks.
268                  *
269                  * This ensures that the handler stack is paged in correctly before we
270                  * do anything. Otherwise the handler might raise a page fault upon
271                  * first entry.
272                  */
273                 void touch_stacks();
274
275
276                 /*
277                  * Create the vCPU kernel object
278                  */
279                 void alloc_vcpu_cap();
280
281
282                 /*
283                  * Alloc and setup vCPU UTCB
284                  *
285                  * The setup code stores a pointer to this App_thread object on
286                  * the handler's stack so that it can be found upon an exception.
287                  */
288                 void alloc_vcpu_mem();
289
290                 App_thread() { }
291                 App_thread(const App_thread&) { }
292
293         public:
294
295                 App_thread(l4_addr_t eip,
296                            l4_addr_t esp,
297                            l4_addr_t handler_fn,
298                            l4_addr_t thread_fn)
299                         :
300                           _handler_fn(handler_fn),
301                           _thread_fn(thread_fn),
302                           _cpu(1),
303                           _vcpu(0),
304                           _vcpu_utcb(0),
305                           _remote_utcb(0xFFFFFFFF),
306                           _pending_trap(0),
307                           _events(0)
308                 {
309                         asm volatile (
310                             "mov %%fs, %0\n\t"
311                             "mov %%gs, %1\n\t"
312                             "mov %%ds, %2\n\t"
313                             : "=r" (_master_fs),
314                                   "=r" (_master_gs),
315                               "=r" (_master_ds));
316
317                         _handler_stack = (char*)memalign(L4_PAGESIZE, HANDLER_STACK_SIZE);
318                         _handler_sp    = top_of_handler_stack();
319                         DEBUG() << "HANDLER STACK: " << (void*)_handler_stack;
320                         _check(!_handler_stack, "could not allocate handler stack");
321
322                         touch_stacks();
323                         alloc_vcpu_cap();
324                         alloc_vcpu_mem();
325
326                         DEBUG() << "vCPU cap: " << std::hex << vcpu_cap();
327
328                         DEBUG() << "STACK: " << std::hex << (void*)esp;
329                         vcpu()->r()->ip = eip;
330                         vcpu()->r()->sp = esp;
331                         DEBUG() << "EIP " << (void*)eip << " ESP " << (void*)esp;
332                 }
333
334
335                 /*
336                  * Manage fast lookup for the replica's UTCB address
337                  */
338                 void remote_utcb(l4_addr_t a) { _remote_utcb = a; }
339                 l4_addr_t remote_utcb() const { return _remote_utcb; }
340
341                 /*
342                  * Start the vCPU thread
343                  */
344                 void start();
345
346
347                 l4_addr_t            handler_sp()     const { return _handler_sp; }
348                 void handler_sp(l4_addr_t sp) { _handler_sp = sp; }
349
350                 l4_addr_t            thread_sp()      const { return _thread_sp; }
351                 void thread_sp(l4_addr_t sp)  { _thread_sp = sp; }
352
353                 l4_addr_t            thread_entry()   const { return _thread_fn; }
354
355                 l4_umword_t           cpu()           const { return _cpu; }
356                 void                  cpu(l4_umword_t c)    { _cpu = c; }
357                 L4::Cap<L4::Thread>   vcpu_cap()      const { return _vcpu_cap; }
358                 void                  vcpu_cap(L4::Cap<L4::Thread> c) { _vcpu_cap = c; }
359                 L4vcpu::Vcpu         *vcpu()          const { return _vcpu; }
360                 l4_utcb_t            *vcpu_utcb()     const { return _vcpu_utcb; }
361
362                 unsigned long         ds()            const { return _master_ds; }
363                 unsigned long         fs()            const { return _master_fs; }
364                 unsigned long         gs()            const { return _master_gs; }
365 //              void                  gs(l4_addr_t a)       { _master_gs = a; }
366
367                 void *                gdt()           const
368                 {
369                         return (void*)&_client_gdt[0];
370                 }
371                 unsigned              gdt_size()      const { return sizeof(_client_gdt); }
372
373                 /***********************************************************************
374                  * GDT Handling Explained
375                  *
376                  * Fiasco uses the FS register to store the current UTCB address,
377                  * libpthread uses GS for providing thread-local storage. Both require
378                  * a valid entry in the GDT, which user space can access through the
379                  * fiasco_gdt_set() system call. Furthermore, Fiasco provides a range
380                  * of user-defined segment entries at offsets 0x48, 0x50, and 0x58.
381                  *
382                  * By default, the GDT entry for the UTCB address is 0x40. As Romain
383                  * uses pthreads, the first user-defined segment is used for Romain's
384                  * TLS address.
385                  *
386                  * Replicas use user-defined entries 2 and 3:
387                  * - Entry 2 (0x50) contains the replica's UTCB address.
388                  * - Entry 3 (0x58) can later be set up for thread-local storage.
389                  *
390                  * This means there are no free user-defined GDT entries anymore! If we
391                  * wanted to fix this, we'd have to manually swap GDT entries every
392                  * time we switch between replicas and master. This would require two
393                  * additional system calls for modifying the GDT.
394                  ***********************************************************************/
395
396                 /*
397                  * Set up the initial GDT segment (e.g., UTCB address)
398                  */
399                 void setup_utcb_segdesc(l4_addr_t base, l4_addr_t limit)
400                 {
401                         memset(_client_gdt, 0, sizeof(_client_gdt));
402
403                         _client_gdt[0].limit_low   = limit & 0xFFFF;
404                         _client_gdt[0].base_low    = base & 0xFFFF;
405                         _client_gdt[0].base_middle = (base >> 16) & 0xFF;
406                         _client_gdt[0].base_high   = (base >> 24) & 0xFF;
407                         _client_gdt[0].access      = 0xF2;
408                         _client_gdt[0].granularity = 0x40;
409                 }
410
411
412                 /*
413                  * Write the second entry, actually.
414                  */
415                 void write_gdt_entry(l4_umword_t *src, l4_umword_t bytes)
416                 {
417                         memcpy(&_client_gdt[1], src, bytes);
418                         vcpu()->r()->gs = fiasco_gdt_set(vcpu_cap().cap(), &_client_gdt[1],
419                                                          sizeof(_client_gdt[1]), 2, l4_utcb());
420                         DEBUG() << "set " << std::hex << vcpu()->r()->gs;
421                 }
422
423
424                 /*
425                  *
426                  */
427                 void commit_client_gdt()
428                 {
429                         vcpu()->r()->fs = fiasco_gdt_set(vcpu_cap().cap(), gdt(),
430                                                          gdt_size()/2, 1, l4_utcb());
431                         DEBUG() << "set " << std::hex << vcpu()->r()->fs;
432                 }
433
434                 /*
435                  * Schedule a "virtual" trap
436                  *
437                  * The whole thing is used to mark pending events for future
438                  * invocations of some fault observers. These events currently
439                  * include
440                  *
441                  *   - unhandled page fault
442                  */
443                 void set_pending_trap(unsigned no) { _pending_trap |= (1 << no); }
444
445                 void set_unhandled_pf()
446                 {
447                         _events |= Unhandled_Page_Fault;
448                         set_pending_trap(0xE);
449                 }
450
451                 void unset_unhandled_pf() { _events &= ~Unhandled_Page_Fault; }
452                 bool unhandled_pf()       { return _events & Unhandled_Page_Fault; }
453
454                 l4_umword_t events_pending() { return _events; }
455
456                 /*
457                  * Get the next pending trap (and remove it from pending list)
458                  */
459                 unsigned get_pending_trap()
460                 {
461                         unsigned ret = l4util_find_first_set_bit(&_pending_trap, sizeof(_pending_trap));
462                         if (ret >= sizeof(_pending_trap) * 8) {
463                                 return 0;
464                         } else {
465                                 _pending_trap &= ~(1 << ret);
466                         }
467                 
468                         return ret;
469                 }
470
471
472                 void print_vcpu_state()
473                 {
474                         char pref[32];
475                         snprintf(pref, 32, "[VCPU %p] ", vcpu());
476                         vcpu()->print_state(pref);
477                 }
478
479                 unsigned long csum_state();
480 };
481
482
483 }
484
485 /*
486  * Common prolog to be executed upon entry to exception handler function. It
487  * restores this VCPU's ES, DS, FS, and GS registers before continuing
488  * execution in the handler address space.
489  */
490 #define handler_prolog(app_thread) \
491         do {  \
492                   asm volatile ( \
493                       "mov %0, %%es;" \
494                       "mov %0, %%ds;" \
495                       "mov %1, %%fs;" \
496                       "mov %2, %%gs;" \
497                       : : \
498                           "r"((app_thread)->ds()), "r"((app_thread)->fs()), \
499                           "r"((app_thread)->gs())); \
500         } while (0)