4 * Instance manager implementation.
6 * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
7 * economic rights: Technische Universität Dresden (Germany)
8 * This file is part of TUD:OS and distributed under the terms of the
9 * GNU General Public License 2.
10 * Please see the COPYING-GPL-2 file for details.
14 #include "app_loading"
15 #include "configuration"
17 #include <l4/sys/segment.h>
18 #include <l4/re/mem_alloc>
21 #include <l4/re/dataspace>
22 #include <l4/re/util/cap_alloc>
23 #include <l4/plr/uu.h>
25 #define MSG() DEBUGf(Romain::Log::Manager)
26 #include "fault_handlers/syscalls_factory.h"
28 Romain::Configuration Romain::globalconfig;
31 L4_INLINE l4_umword_t countbits(long v)
33 v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
34 v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
35 return ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
39 L4_INLINE l4_umword_t count_online_cpus()
41 l4_umword_t maxcpu = 0;
42 l4_sched_cpu_set_t cpuonline = l4_sched_cpu_set(0, 0);
43 if (l4_error(L4Re::Env::env()->scheduler()->info(&maxcpu, &cpuonline)) < 0) {
44 ERROR() << "reading CPU info\n";
47 INFO() << "Online " << countbits(cpuonline.map) << " / MAX " << maxcpu;
49 return countbits(cpuonline.map) > maxcpu ? maxcpu : countbits(cpuonline.map);
53 Romain::InstanceManager::InstanceManager(l4_umword_t argc,
55 l4_umword_t num_instances)
61 _num_inst(num_instances),
63 _argc(argc), // XXX: remove
64 _argv(argv) // XXX: remove
68 _gdt_min = fiasco_gdt_get_entry_offset(L4_INVALID_CAP, l4_utcb());
69 MSG() << "GDT MIN: " << _gdt_min;
71 _num_cpu = count_online_cpus();
73 * initial parameter is argv for the client program, this means
74 * *argv is the file name to load.
78 _am = new Romain::App_model(_name, argc, argv);
79 Romain::Elf_Ldr loader(_am);
83 _init_eip = _am->prog_info()->entry;
84 _init_esp = _am->prog_info()->stack_addr;
85 INFO() << "Program entry point at 0x" << std::hex << _init_eip;
86 INFO() << " stack at 0x" << std::hex << _init_esp;
89 l4_mword_t res = pthread_create(&_split_handler, 0, split_handler_fn, this);
90 _check(res != 0, "could not create split handler thread");
95 void Romain::InstanceManager::configure_logflags(char *flags)
97 printf("flags %p\n", flags);
99 Romain::Log::logFlags = 0;
101 l4_umword_t max = strlen(flags);
102 for (l4_umword_t j = 0; j < max; ++j) {
103 if (flags[j] == ',') flags[j] = 0;
106 char const *c = flags;
107 while (c <= flags + max) {
109 if ((strcmp(c, "mem") == 0) || (strcmp(c, "memory") == 0)) {
110 Romain::Log::logFlags |= Romain::Log::Memory;
111 } else if (strcmp(c, "emulator") == 0) {
112 Romain::Log::logFlags |= Romain::Log::Emulator;
113 } else if (strcmp(c, "manager") == 0) {
114 Romain::Log::logFlags |= Romain::Log::Manager;
115 } else if (strcmp(c, "faults") == 0) {
116 Romain::Log::logFlags |= Romain::Log::Faults;
117 } else if (strcmp(c, "redundancy") == 0) {
118 Romain::Log::logFlags |= Romain::Log::Redundancy;
119 } else if (strcmp(c, "loader") == 0) {
120 Romain::Log::logFlags |= Romain::Log::Loader;
121 } else if (strcmp(c, "swifi") == 0) {
122 Romain::Log::logFlags |= Romain::Log::Swifi;
123 } else if (strcmp(c, "gdb") == 0) {
124 Romain::Log::logFlags |= Romain::Log::Gdb;
125 } else if (strcmp(c, "mso") == 0) {
126 Romain::Log::logFlags |= Romain::Log::MarkShared;
127 } else if (strcmp(c, "all") == 0) {
128 Romain::Log::logFlags = Romain::Log::All;
133 printf("Flags: %08lx\n", Romain::Log::logFlags);
138 void Romain::InstanceManager::configure_fault_observers()
141 * First, register those observers that don't interfere
142 * with anyone else and get notified all the time.
144 DEBUG() << "[observer] vcpu state.";
145 BoolObserverConfig("general:print_vcpu_state",
147 DEBUG() << "[observer] trap limit.";
148 ObserverConfig(this, "trap_limit");
151 * Always needed -- slightly ordered by the number of
152 * calls they are expected to see, so that we minimize
153 * the amount of unnecessary observer callbacks.
155 DEBUG() << "[observer] page faults.";
156 ObserverConfig(this, "pagefaults");
157 DEBUG() << "[observer] MarkShared.";
158 ObserverConfig(this, "mso");
159 DEBUG() << "[observer] syscalls.";
160 ObserverConfig(this, "syscalls");
161 DEBUG() << "[observer] threads.";
162 BoolObserverConfig("general:threads", this, "threads");
163 DEBUG() << "[observer] trap.";
164 ObserverConfig(this, "trap");
166 DEBUG() << "[observer] simpledbg.";
167 StringObserverConfig("general:debug", this);
168 DEBUG() << "[observer] intercept-kip.";
169 BoolObserverConfig("general:intercept_kip", this, "kip-time");
170 DEBUG() << "[observer] swifi.";
171 BoolObserverConfig("general:swifi", this, "swifi");
172 DEBUG() << "[observer] logreplica.";
173 BoolObserverConfig("general:logreplica", this, "replicalog");
177 void Romain::InstanceManager::configure_watchdog()
180 char const *enable = Romain::ConfigStringValue("watchdog:enable");
181 l4_mword_t timeout = Romain::ConfigIntValue("watchdog:timeout");
182 char const *mode = Romain::ConfigStringValue("watchdog:singlestepping");
185 _watchdog_enable = false;
187 if (strcmp(enable, "y") == 0 ||
188 strcmp(enable, "yes") == 0 ||
189 strcmp(enable, "true") == 0) {
190 _watchdog_enable = true;
192 _watchdog_enable = false;
197 if (strcmp(mode, "y") == 0 || strcmp(mode, "yes") == 0
198 || strcmp(mode, "true") == 0)
199 _watchdog_mode = Romain::Watchdog::SingleStepping;
201 _watchdog_mode = Romain::Watchdog::Breakpointing;
203 _watchdog_mode = Romain::Watchdog::Breakpointing;
206 if (_watchdog_enable)
207 _watchdog_timeout = timeout;
209 _watchdog_timeout = 0;
214 void Romain::InstanceManager::configure_redundancy()
216 char const *redundancy = ConfigStringValue("general:redundancy");
217 if (!redundancy) redundancy = "none";
218 INFO() << "red: '" << redundancy << "'";
219 if (strcmp(redundancy, "none") == 0) {
221 } else if (strcmp(redundancy, "dual") == 0) {
223 } else if (strcmp(redundancy, "triple") == 0) {
226 ERROR() << "Invalid redundancy setting: " << redundancy << "\n";
227 enter_kdebug("Invalid redundancy setting");
232 void Romain::InstanceManager::configure_logbuf(l4_mword_t sizeMB)
234 INFO() << "Log buffer size: " << sizeMB << " MB requested.";
235 l4_umword_t size_in_bytes = sizeMB << 20;
237 L4::Cap<L4Re::Dataspace> ds;
239 l4_addr_t addr = Romain::Region_map::allocate_and_attach(&ds, size_in_bytes,
240 0, 0, L4_SUPERPAGESHIFT);
241 INFO() << "Log buffer attached to 0x" << std::hex << addr;
243 memset((void*)addr, 0, size_in_bytes);
244 Romain::globalLogBuf->set_buffer(reinterpret_cast<l4_uint8_t*>(addr), size_in_bytes);
249 * Romain ini file settings
250 * =====================
255 * The 'general' section determines which fault handlers are registered.
257 * print_vcpu_state [bool]
258 * - Registers a handler printing the state of a VCPU upon every
261 * debug [string = {simple,gdb}]
262 * - Configures a debugger stub. 'simple' refers to builtin debugging,
263 * 'gdb' starts a gdb stub. Further configuration for the debuggers
264 * is done in separate INI sections.
266 * page_fault_handling [string = {ro}]
267 * - Specify the way in which paging is done.
268 * 'ro' means that client memory is mapped read-only and write
269 * accesses to the respective regions are emulated.
271 * redundancy [string = {dual, triple}]
272 * - configure the number of replicas that are started
276 * - comma-separated list of strings for configuring logging
277 * - available flags are:
278 * - mem|memory -> memory management
279 * - emulator -> instruction emulation
280 * - manager -> replica management
281 * - faults -> generic fault entry path
282 * - redundancy -> DMR/TMR-specific logs
283 * - swifi -> fault injetion
284 * - gdb -> GDB stub logging
285 * - all -> everything
288 * - establish a log buffer with the given size in MB
289 * - runtime events are logged into this buffer and can later
290 * be dumped for postprocessing -> this is an alternative to
291 * printing a lot of stuff to the serial console
294 * - event generation needs a global timestamp. On real SMP hardware
295 * CPUs disagree on their local TSC values. As a workaround, we start
296 * a dedicated thread that busily writes its local TSC to a global timer
297 * variable that is then read by everyone else. This of course requires
298 * the thread to solely run on a dedicated CPU. This option sets the
301 * logrdtsc [bool] (false)
302 * - use local TSC instead of global time stamp counter for event timestamps
303 * -> use on Qemu where a dedicated timestamp thread does not work properly
305 * logreplica [bool] (false)
306 * - assign each replica a log buffer (mapped to REPLICA_LOG_ADDRESS)
308 * logtimeout [int] (-1)
309 * - run the replicated app for N seconds, then halt all threads and
310 * print replica stats (as if on termination)
312 * replicalogsize [int] (-1)
313 * - buffser size for the replica-specific log buffer
315 * swifi [bool] (false)
316 * - Perform fault injection experiments, details are configured
317 * in the [swifi] section.
319 * kip-time [bool] (false)
320 * - Turn on/off KIP timer access. This is used to turn replica
321 * accesses to the clock field of the KIP into traps (by placing
322 * software breakpoints on specifically configured instructions).
323 * Use this, if your application needs clock info from the KIP.
325 * max_traps [int] (-1)
326 * - Handle a maximum amount of traps before terminating the
327 * replicated application. Use as a debugging aid.
329 * print_time [bool] (true)
330 * - include timing information in printed output.
335 * This section configures the behavior of the GDB stub.
338 * - Configures the GDB stub to use a TCP/IP connection and wait
339 * for a remote GDB to connect on the port specified. If this
340 * option is _not_ set, the GDB stub will try to use a serial
341 * connection through COM2.
343 * XXX make COM port configurable
345 * 'simpledbg' section
346 * -------------------
348 * This section configures Romain's builtin debugger, which is programmed through
349 * INI file commands only and performs a narrow range of debugging tasks only.
352 * - Patches an INT3 breakpoint to the given address. Then executes the program
353 * until the breakpoint is hit and thereafter switches to single-stepping
359 * The KIP-time instrumentation needs a list of addresses that point to
360 * KIP->clock accessing instructions. These are supplied as a comma-separated
361 * list of hex values for the target command.
363 * target [comma-separated list of hex addresses]
368 * Configures fault-injection experiments that are performed on replicas.
369 * By default, SWIFI currently injects faults into replica #0.
372 * specifies an address to place a breakpoint on. Upon hitting this
373 * BP, a SWIFI injection is performed.
376 * specifies what kind of injection to perform when hitting the BP.
378 * - 'gpr' -> flip a random bit in a randomly selected
379 * general-purpose register
381 void Romain::InstanceManager::configure()
383 #define USE_SHARABLE_TIMESTAMP 1
386 l4_mword_t logMB = ConfigIntValue("general:logbuf");
387 #if USE_SHARABLE_TIMESTAMP
388 Romain::globalLogBuf = new Measurements::EventBuf(true);
389 L4::Cap<L4Re::Dataspace> tsds;
390 l4_addr_t ts_addr = Romain::Region_map::allocate_and_attach(&tsds, L4_PAGESIZE);
391 l4_touch_ro((void*)ts_addr, L4_PAGESIZE);
392 Romain::globalLogBuf->set_tsc_buffer(reinterpret_cast<l4_uint64_t*>(ts_addr));
394 Romain::globalLogBuf = new Measurements::EventBuf();
397 configure_logbuf(logMB);
400 Log::logLocalTSC = ConfigBoolValue("general:logrdtsc", false);
403 * These modes are exclusive: either we use the local TSC _xor_ we start a
404 * timer thread on a dedicated CPU.
406 if (!Log::logLocalTSC) {
407 l4_mword_t logCPU = ConfigIntValue("general:logcpu");
409 INFO() << "Starting counter thread on CPU " << logCPU;
410 INFO() << "Timestamp @ 0x" << std::hex << (l4_addr_t)Romain::globalLogBuf->timestamp;
411 Measurements::EventBuf::launchTimerThread((l4_addr_t)Romain::globalLogBuf->timestamp,
415 #endif // EVENT_LOGGING
417 char *log = strdup(ConfigStringValue("general:log", "none"));
418 configure_logflags(log);
420 Log::withtime = ConfigBoolValue("general:print_time", true);
422 configure_fault_observers();
423 configure_redundancy();
424 configure_watchdog();
429 void Romain::InstanceManager::logdump()
432 l4_mword_t logMB = ConfigIntValue("general:logbuf");
434 char const *filename = "sampledump.txt";
436 l4_umword_t oldest = Romain::globalLogBuf->oldest();
437 l4_umword_t dump_start, dump_size;
439 if (oldest == 0) { // half-full -> dump from 0 to index
441 dump_size = Romain::globalLogBuf->index * sizeof(Measurements::GenericEvent);
442 } else { // buffer completely full -> dump full size starting from oldest entry
443 dump_start = oldest * sizeof(Measurements::GenericEvent);
444 dump_size = Romain::globalLogBuf->size * sizeof(Measurements::GenericEvent);
447 uu_dumpz_ringbuffer(filename, Romain::globalLogBuf->buffer,
448 Romain::globalLogBuf->size * sizeof(Measurements::GenericEvent),
449 dump_start, dump_size);
456 * Prepare the stack that is used by the fault handler whenever a
457 * VCPU enters the master task.
459 * This pushes relevant pointers to the stack so that the handler
460 * functions can use them as parameters.
462 l4_addr_t Romain::InstanceManager::prepare_stack(l4_addr_t sp,
463 Romain::App_instance *inst,
464 Romain::App_thread *thread,
465 Romain::Thread_group *tgroup)
467 Romain::Stack st(sp);
474 st.push(0); // this would be the return address, but
475 // handlers return by vcpu_resume()
481 void Romain::InstanceManager::create_instances()
483 for (l4_umword_t i = 0; i < _num_inst; ++i) {
484 _instances.push_back(new Romain::App_instance(_name, i));
490 Romain::InstanceManager::create_thread(l4_umword_t eip, l4_umword_t esp,
491 l4_umword_t instance_id, Romain::Thread_group *group)
493 Romain::App_thread *at = new Romain::App_thread(eip, esp,
494 reinterpret_cast<l4_addr_t>(VCPU_handler),
495 reinterpret_cast<l4_addr_t>(VCPU_startup)
497 , _watchdog_enable, _watchdog_timeout
502 * Set up the VCPU handler thread. It has been allocated in
503 * App_thread's constructor.
505 DEBUG() << "prepare: " << (void*)at->handler_sp();
506 at->handler_sp(prepare_stack(at->handler_sp(),
507 _instances[instance_id], at, group));
510 * phys. CPU assignment, currently done by mapping instances to dedicated
514 INFO() << "inst " << instance_id << " mod numcpu " << (instance_id+1) % _num_cpu
515 << " numcpu " << _num_cpu;
517 l4_umword_t logCPU = 1;
519 /* XXX REPLICAS PER CPU XXX */
520 //logCPU = logicalToCPU(group->uid % _num_cpu);
522 /* XXX INSTANCES PER CPU XXX */
523 //logCPU = logicalToCPU((instance_id + 1) % _num_cpu);
525 /* XXX OVERLAPPING REPLICAS XXX */
526 //logCPU = logicalToCPU((group->uid + instance_id) % _num_cpu);
528 /* XXX RANDOM PLACEMENT XXX */
529 //logCPU = logicalToCPU(random() % _num_cpu);
532 /* XXX Threads assigned RR to CPUs */
533 static l4_mword_t threadcount = 0;
534 //logCPU = logicalToCPU(threadcount % _num_cpu);
537 #define OPTIMIZE_REPLICA_PLACEMENT 1
539 /* XXX The hard-coded placement map:
540 * Manual optimization for pthreads applications. In our scenarios,
541 * pthreads starts a manager as the second thread and this manager
542 * often does nothing. Therefore, instead of placing each idle manager
543 * replica on its own CPU, we put them all on the same CPU and
544 * also add the subsequent real replica
546 l4_mword_t cpumap[3][15] = { // single -> 1:1 mapping
552 #if OPTIMIZE_REPLICA_PLACEMENT
553 // DMR - optimized placement
565 // DMR - sequential placement
572 #if OPTIMIZE_REPLICA_PLACEMENT
573 // TMR - optimized placement
585 // TMR - sequential placement
593 logCPU = logicalToCPU(cpumap[instance_count()-1][threadcount]);
606 Romain::Thread_group *
607 Romain::InstanceManager::create_thread_group(l4_umword_t eip, l4_umword_t esp, std::string n,
608 l4_umword_t cap, l4_umword_t uid)
610 Romain::Thread_group *group = new Romain::Thread_group(n, cap, uid);
611 group->set_redundancy_callback(new DMR(_num_inst));
613 _watchdog = new Watchdog(_num_inst, _watchdog_enable, _watchdog_mode);
614 group->set_watchdog(_watchdog);
615 group->redundancyCB->set_watchdog(_watchdog);
616 group->watchdog->set_redundancy_callback(group->redundancyCB);
619 for (l4_umword_t i = 0; i < _num_inst; ++i) {
623 Romain::App_thread *at = create_thread(eip, esp, i, group);
624 group->add_replica(at);
627 _threadgroups.push_back(group);
632 void Romain::InstanceManager::run_instances()
634 Romain::Thread_group *group = create_thread_group(_init_eip, _init_esp, "init",
635 Romain::FIRST_REPLICA_CAP, 0);
636 DEBUG() << "created group object @ " << (void*)group;
637 theObjectFactory.register_thread_group(group, Romain::FIRST_REPLICA_CAP);
639 _check(group->threads.size() != _num_inst, "not enough threads created?");
641 for (l4_umword_t i = 0; i < _num_inst; ++i) {
643 App_thread *at = group->threads[i];
648 at->thread_sp((l4_addr_t)_am->stack()->relocate(_am->stack()->ptr()));
651 * The initial UTCB address is on top of the app's stack. This location
652 * is used for the first GDT entry, which L4Re later uses to find the
653 * thread's UTCB address.
655 at->setup_utcb_segdesc(_am->stack()->target_top() - 4, 4);
658 * Establish UTCB mapping
660 Romain::Region_handler &rh = const_cast<Romain::Region_handler&>(
661 _am->rm()->find(_am->prog_info()->utcbs_start)->second);
662 _check(_am->rm()->copy_existing_mapping(rh, 0, i) != true,
663 "could not create UTCB copy");
664 at->remote_utcb(rh.local_region(i).start());
667 * Notfiy handlers about an instance that has started
669 startup_notify(_instances[i], at, group, _am);
672 * Start the thread itself
674 at->vcpu()->r()->sp = at->thread_sp();