4 * n-way modular redundancy implementation
6 * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
7 * economic rights: Technische Universität Dresden (Germany)
8 * This file is part of TUD:OS and distributed under the terms of the
9 * GNU General Public License 2.
10 * Please see the COPYING-GPL-2 file for details.
14 #include "../redundancy.h"
15 #include "../app_loading"
16 #include "../fault_observers"
18 #include "../fault_handlers/syscalls_handler.h"
20 #define MSG() DEBUGf(Romain::Log::Redundancy)
21 #define MSGi(inst) MSG() << "[" << (inst)->id() << "] "
23 //extern char * __func__;
25 /* Replication protocol:
26 * =====================
28 * Everyone goes to sleep, except the last thread to enter. This thread becomes
29 * the 'leader'. The leader returns from this function with the First_syscall return
30 * value. It then goes on to execute the system call (in manager.cc). Depending on
33 * a) For replicatable calls: it stores its VCPU state after the system call using
34 * the function put(). All other replicas then use get() to obtain this state.
36 * b) For non-replicatable calls: it sets the other replicas' return value to
37 * Repeat_syscall. The replicas then perform handling themselves.
39 * After all the handling, everyone waits in resume() until the last replica reaches
40 * the resumption point. Then each VCPU goes back to where it came from.
43 * Detection and recovery:
44 * =======================
46 * Before executing the fault handler, the leader checksums all VCPU states. If a
47 * mismatch is found, it calls the recover() function. recover() sets things straight
48 * so that after the handler is done, everyone is in an identical state again. The leader
49 * then goes on to execute the call.
52 Romain::DMR::DMR(unsigned instances)
53 : _enter_count(0), _leave_count(0), _block_count(0),
54 _rv(Romain::RedundancyCallback::Invalid),
55 _num_instances(instances), _num_instances_bak(0)
57 for (unsigned i = 0; i < _num_instances; ++i)
59 _check(pthread_mutex_init(&_enter_mtx, NULL) != 0, "error initializing mtx");
60 _check(pthread_cond_init(&_enter, NULL) != 0, "error initializing condvar");
61 _check(pthread_mutex_init(&_leave_mtx, NULL) != 0, "error initializing mtx");
62 _check(pthread_cond_init(&_leave, NULL) != 0, "error initializing condvar");
63 _check(pthread_mutex_init(&_block_mtx, NULL) != 0, "error initializing mtx");
64 _check(pthread_cond_init(&_block, NULL) != 0, "error initializing condvar");
69 Romain::Replicator::put(Romain::App_thread *t)
71 //memset(&_regs, 0, sizeof(_regs)); // XXX
72 #define PUT(field) _regs.field = t->vcpu()->r()->field
73 PUT(es); PUT(ds); PUT(gs); PUT(fs);
74 PUT(di); PUT(si); PUT(bp); PUT(pfa);
75 PUT(ax); PUT(bx); PUT(cx); PUT(dx);
76 PUT(trapno); PUT(err); PUT(ip); PUT(flags);
79 l4_utcb_t *addr = reinterpret_cast<l4_utcb_t*>(t->remote_utcb());
80 memcpy(&_utcb, addr, L4_UTCB_OFFSET);
85 Romain::Replicator::get(Romain::App_thread *t)
87 #define PUT(field) t->vcpu()->r()->field = _regs.field
88 PUT(es); PUT(ds); PUT(gs); PUT(fs);
89 PUT(di); PUT(si); PUT(bp); PUT(pfa);
90 PUT(ax); PUT(bx); PUT(cx); PUT(dx);
91 PUT(trapno); PUT(err); PUT(ip); PUT(flags);
94 l4_utcb_t *addr = reinterpret_cast<l4_utcb_t*>(t->remote_utcb());
95 memcpy(addr, &_utcb, L4_UTCB_OFFSET);
99 Romain::DMR::checksum_replicas()
101 unsigned long csums[MAX_REPLICAS] = {0, };
105 for (idx = 0; idx < _num_instances; ++idx)
106 csums[idx] = _orig_vcpu[idx]->csum_state();
108 // validate checksums
109 for (idx = 1; idx < _num_instances; ++idx)
110 if (csums[idx] != csums[idx-1]) {
112 ERROR() << "State mismatch detected!";
113 ERROR() << "=== vCPU states ===";
114 for (unsigned cnt = 0; cnt < _num_instances; ++cnt) {
115 ERROR() << "--- instance " << cnt << " @ "
116 << _orig_vcpu[cnt]->vcpu() << " (cs: "
117 << std::hex << csums[cnt] << ") ---";
119 _orig_vcpu[cnt]->vcpu()->print_state();
121 ERROR() << "Instances: " << _num_instances << " this inst " << idx;
122 enter_kdebug("checksum");
134 static __attribute__((noreturn)) void recover()
136 ERROR() << "Aborting after error.";
137 Romain::_the_instance_manager->logdump();
138 enter_kdebug("abort");
139 throw("ERROR -> abort");
144 class RedundancyAbort
147 static void recover(Romain::App_thread** threads, unsigned count,
148 unsigned *good, unsigned *bad)
150 unsigned long csums[count];
154 for (idx = 0; idx < count; ++idx)
155 csums[idx] = threads[idx]->csum_state();
158 for (idx = 1; idx < count; ++idx)
159 if (csums[idx] != csums[idx-1]) { // mismatch
160 if (csums[idx] == csums[(idx + 1) % count]) {
173 Romain::DMR::recover(Romain::App_model *am)
175 if (_num_instances < 3)
176 RecoverAbort::recover(); // noreturn
178 unsigned good = ~0, bad = ~0;
179 RedundancyAbort::recover(_orig_vcpu, _num_instances, &good, &bad);
180 DEBUG() << "good " << good << ", bad " << bad;
182 // XXX: This does not suffice. We also need to copy memory content
183 // from a correct replica to the incorrect one
184 replicator().put(_orig_vcpu[good]);
185 replicator().get(_orig_vcpu[bad]);
186 am->rm()->replicate(good, bad);
189 DEBUG() << "after recovery:";
190 for (unsigned i = 0; i < _num_instances; ++i)
191 DEBUG() << i << " " << std::hex << _orig_vcpu[i]->csum_state();
196 Romain::RedundancyCallback::EnterReturnVal
197 Romain::DMR::enter(Romain::App_instance *i, Romain::App_thread *t,
198 Romain::App_model *a)
201 MSGi(i) << "DMR::enter act(" << _enter_count << ")";
203 Romain::RedundancyCallback::EnterReturnVal ret = Romain::RedundancyCallback::First_syscall;
205 // enter ourselves into the list of faulted threads
206 _orig_vcpu[i->id()] = t;
208 pthread_mutex_lock(&_enter_mtx);
210 /* TODO: select the first replica that makes the sum of all replicas
211 * larger than N/2, if all their states match.
213 if (++_enter_count < _num_instances) {
214 //MSGi(i) << "I'm not the last instance -> going to wait.";
215 // wait for the leader
216 pthread_cond_wait(&_enter, &_enter_mtx);
217 // get the return value set by the leader
220 // everyone is here, so checksum the VCPUs now
221 if (!checksum_replicas())
223 // at this point, recovery has made sure that all replicas
224 // are in the same state.
229 pthread_mutex_unlock(&_enter_mtx);
232 * If the leader told us to skip the syscall, get replicated VCPU and
235 if (ret == Romain::RedundancyCallback::Skip_syscall) {
243 void Romain::DMR::leader_repeat(Romain::App_instance *i, Romain::App_thread *t,
244 Romain::App_model *a)
246 (void)i; (void)t; (void)a;
248 _rv = Romain::RedundancyCallback::Repeat_syscall;
252 void Romain::DMR::leader_replicate(Romain::App_instance *i, Romain::App_thread *t,
253 Romain::App_model *a)
255 (void)i; (void)t; (void)a;
257 _rv = Romain::RedundancyCallback::Skip_syscall;
259 //t->print_vcpu_state();
264 void Romain::DMR::resume(Romain::App_instance *i, Romain::App_thread *t,
265 Romain::App_model *a)
267 (void)i; (void)t; (void)a;
268 //MSGi(i) << "[l] acquiring leave mtx";
269 pthread_mutex_lock(&_leave_mtx);
270 if (_leave_count == 0) {
271 pthread_mutex_lock(&_enter_mtx);
272 pthread_cond_broadcast(&_enter);
273 pthread_mutex_unlock(&_enter_mtx);
276 //MSGi(i) << "++_leave_count " << _leave_count;
277 if (++_leave_count < _num_instances) {
278 MSGi(i) << "Waiting for other replicas to commit their syscall.";
279 //MSGi(i) << "cond_wait(leave)";
280 pthread_cond_wait(&_leave, &_leave_mtx);
281 //MSGi(i) << "success: cond_wait(leave)";
283 for (unsigned i = 0; i < _num_instances; ++i)
285 pthread_cond_broadcast(&_leave);
287 //MSGi(i) << "counts @ resume: " << _enter_count << " " << _leave_count;
289 pthread_mutex_unlock(&_leave_mtx);
291 //enter_kdebug("DMR::resume");
294 void Romain::DMR::wait(Romain::App_instance *i, Romain::App_thread *t,
295 Romain::App_model *a)
298 pthread_mutex_lock(&_block_mtx);
300 MSGi(i) << "going to wait. block_count: " << _block_count;
301 pthread_cond_broadcast(&_enter);
302 pthread_cond_wait(&_block, &_block_mtx);
303 pthread_mutex_unlock(&_block_mtx);
306 void Romain::DMR::silence(Romain::App_instance *i, Romain::App_thread *t,
307 Romain::App_model *a)
310 // 1. Tell anyone who is still waiting to enter that he can now do so.
311 // These replicas will all run until they block on _block_mtx.
312 pthread_cond_broadcast(&_enter);
314 while (_block_count < (_num_instances - 1))
315 l4_sleep(20); // XXX handshake
317 _num_instances_bak = _num_instances;
321 void Romain::DMR::wakeup(Romain::App_instance *i, Romain::App_thread *t,
322 Romain::App_model *a)
326 _num_instances = _num_instances_bak;
327 pthread_cond_broadcast(&_block);