]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/plr/server/src/redundancy/dmr.cc
update
[l4.git] / l4 / pkg / plr / server / src / redundancy / dmr.cc
1 /*
2  * dmr.cc --
3  *
4  *    n-way modular redundancy implementation 
5  *
6  * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
7  *     economic rights: Technische Universität Dresden (Germany)
8  * This file is part of TUD:OS and distributed under the terms of the
9  * GNU General Public License 2.
10  * Please see the COPYING-GPL-2 file for details.
11  */
12
13 #include "../log"
14 #include "../redundancy.h"
15 #include "../app_loading"
16 #include "../fault_observers"
17 #include "../manager"
18 #include "../fault_handlers/syscalls_handler.h"
19
20 #define MSG() DEBUGf(Romain::Log::Redundancy)
21 #define MSGi(inst) MSG() << "[" << (inst)->id() << "] "
22
23 //extern char * __func__;
24
25 /* Replication protocol:
26  * =====================
27  *
28  * Everyone goes to sleep, except the last thread to enter. This thread becomes
29  * the 'leader'. The leader returns from this function with the First_syscall return
30  * value. It then goes on to execute the system call (in manager.cc). Depending on
31  * its return value,
32  *
33  * a) For replicatable calls: it stores its VCPU state after the system call using
34  *    the function put(). All other replicas then use get() to obtain this state.
35  *
36  * b) For non-replicatable calls: it sets the other replicas' return value to 
37  *    Repeat_syscall. The replicas then perform handling themselves.
38  *
39  * After all the handling, everyone waits in resume() until the last replica reaches
40  * the resumption point. Then each VCPU goes back to where it came from.
41  *
42  *
43  * Detection and recovery:
44  * =======================
45  *
46  * Before executing the fault handler, the leader checksums all VCPU states. If a
47  * mismatch is found, it calls the recover() function. recover() sets things straight
48  * so that after the handler is done, everyone is in an identical state again. The leader
49  * then goes on to execute the call.
50  */
51
52 Romain::DMR::DMR(unsigned instances)
53         : _enter_count(0), _leave_count(0), _block_count(0),
54       _rv(Romain::RedundancyCallback::Invalid),
55       _num_instances(instances), _num_instances_bak(0)
56 {
57         for (unsigned i = 0; i < _num_instances; ++i)
58                 _orig_vcpu[i] = 0;
59         _check(pthread_mutex_init(&_enter_mtx, NULL) != 0, "error initializing mtx");
60         _check(pthread_cond_init(&_enter, NULL) != 0,      "error initializing condvar");
61         _check(pthread_mutex_init(&_leave_mtx, NULL) != 0, "error initializing mtx");
62         _check(pthread_cond_init(&_leave, NULL) != 0,      "error initializing condvar");
63         _check(pthread_mutex_init(&_block_mtx, NULL) != 0, "error initializing mtx");
64         _check(pthread_cond_init(&_block, NULL) != 0,      "error initializing condvar");
65 }
66
67
68 void
69 Romain::Replicator::put(Romain::App_thread *t)
70 {
71         //memset(&_regs, 0, sizeof(_regs)); // XXX
72 #define PUT(field) _regs.field = t->vcpu()->r()->field
73         PUT(es); PUT(ds); PUT(gs); PUT(fs);
74         PUT(di); PUT(si); PUT(bp); PUT(pfa);
75         PUT(ax); PUT(bx); PUT(cx); PUT(dx);
76         PUT(trapno); PUT(err); PUT(ip); PUT(flags);
77         PUT(sp); PUT(ss);
78 #undef PUT
79         l4_utcb_t *addr = reinterpret_cast<l4_utcb_t*>(t->remote_utcb());
80         memcpy(&_utcb, addr, L4_UTCB_OFFSET);
81 }
82
83
84 void
85 Romain::Replicator::get(Romain::App_thread *t)
86 {
87 #define PUT(field) t->vcpu()->r()->field = _regs.field
88         PUT(es); PUT(ds); PUT(gs); PUT(fs);
89         PUT(di); PUT(si); PUT(bp); PUT(pfa);
90         PUT(ax); PUT(bx); PUT(cx); PUT(dx);
91         PUT(trapno); PUT(err); PUT(ip); PUT(flags);
92         PUT(sp); PUT(ss);
93 #undef PUT
94         l4_utcb_t *addr = reinterpret_cast<l4_utcb_t*>(t->remote_utcb());
95         memcpy(addr, &_utcb, L4_UTCB_OFFSET);
96 }
97
98 bool
99 Romain::DMR::checksum_replicas()
100 {
101         unsigned long csums[MAX_REPLICAS] = {0, };
102         unsigned idx;
103
104         // calc checksums
105         for (idx = 0; idx < _num_instances; ++idx)
106                 csums[idx] = _orig_vcpu[idx]->csum_state();
107
108         // validate checksums
109         for (idx = 1; idx < _num_instances; ++idx)
110                 if (csums[idx] != csums[idx-1]) {
111 #if 1
112                         ERROR() << "State mismatch detected!";
113                         ERROR() << "=== vCPU states ===";
114                         for (unsigned cnt = 0; cnt < _num_instances; ++cnt) {
115                                 ERROR() << "--- instance " << cnt << " @ "
116                                         << _orig_vcpu[cnt]->vcpu() << " (cs: "
117                                         << std::hex << csums[cnt] << ") ---";
118                                 if (_orig_vcpu[cnt])
119                                         _orig_vcpu[cnt]->vcpu()->print_state();
120                         }
121                         ERROR() << "Instances: " << _num_instances << " this inst " << idx;
122                         enter_kdebug("checksum");
123 #endif
124                         return false;
125                 }
126
127         return true;
128 }
129
130
131 class RecoverAbort
132 {
133         public:
134                 static __attribute__((noreturn)) void recover()
135                 {
136                         ERROR() << "Aborting after error.";
137                         Romain::_the_instance_manager->logdump();
138                         enter_kdebug("abort");
139                         throw("ERROR -> abort");
140                 }
141 };
142
143
144 class RedundancyAbort
145 {
146         public:
147                 static void recover(Romain::App_thread** threads, unsigned count,
148                                                         unsigned *good, unsigned *bad)
149                 {
150                         unsigned long csums[count];
151                         unsigned idx;
152
153                         // calc checksums
154                         for (idx = 0; idx < count; ++idx)
155                                 csums[idx] = threads[idx]->csum_state();
156
157                         // find mismatch
158                         for (idx = 1; idx < count; ++idx)
159                                 if (csums[idx] != csums[idx-1]) { // mismatch
160                                         if (csums[idx] == csums[(idx + 1) % count]) {
161                                                 *good = idx;
162                                                 *bad  = idx-1;
163                                         } else {
164                                                 *good = idx-1;
165                                                 *bad  = idx;
166                                         }
167                                 }
168                 }
169 };
170
171
172 void
173 Romain::DMR::recover(Romain::App_model *am)
174 {
175         if (_num_instances < 3)
176                 RecoverAbort::recover(); // noreturn
177
178         unsigned good = ~0, bad = ~0;
179         RedundancyAbort::recover(_orig_vcpu, _num_instances, &good, &bad);
180         DEBUG() << "good " << good << ", bad " << bad;
181
182         // XXX: This does not suffice. We also need to copy memory content
183         //      from a correct replica to the incorrect one
184         replicator().put(_orig_vcpu[good]);
185         replicator().get(_orig_vcpu[bad]);
186         am->rm()->replicate(good, bad);
187
188 #if 0
189         DEBUG() << "after recovery:";
190         for (unsigned i = 0; i < _num_instances; ++i)
191                 DEBUG() << i << " " << std::hex << _orig_vcpu[i]->csum_state();
192 #endif
193 }
194
195
196 Romain::RedundancyCallback::EnterReturnVal
197 Romain::DMR::enter(Romain::App_instance *i, Romain::App_thread *t,
198                    Romain::App_model *a)
199 {
200         (void)a;
201         MSGi(i) << "DMR::enter act(" << _enter_count << ")";
202
203         Romain::RedundancyCallback::EnterReturnVal ret = Romain::RedundancyCallback::First_syscall;
204
205         // enter ourselves into the list of faulted threads
206         _orig_vcpu[i->id()] = t;
207
208         pthread_mutex_lock(&_enter_mtx);
209
210         /* TODO: select the first replica that makes the sum of all replicas
211          *       larger than N/2, if all their states match.
212          */
213         if (++_enter_count < _num_instances) {
214                 //MSGi(i) << "I'm not the last instance -> going to wait.";
215                 // wait for the leader
216                 pthread_cond_wait(&_enter, &_enter_mtx);
217                 // get the return value set by the leader
218                 ret = _rv;
219         } else {
220                 // everyone is here, so checksum the VCPUs now
221                 if (!checksum_replicas())
222                         recover(a);
223                 // at this point, recovery has made sure that all replicas
224                 // are in the same state.
225         }
226
227         --_enter_count;
228
229         pthread_mutex_unlock(&_enter_mtx);
230
231         /*
232          * If the leader told us to skip the syscall, get replicated VCPU and
233          * UTCB states here.
234          */
235         if (ret == Romain::RedundancyCallback::Skip_syscall) {
236                 replicator().get(t);
237         }
238
239         return ret;
240 }
241
242
243 void Romain::DMR::leader_repeat(Romain::App_instance *i, Romain::App_thread *t,
244                                 Romain::App_model *a)
245 {
246         (void)i; (void)t; (void)a;
247         MSGi(i) << __func__;
248         _rv = Romain::RedundancyCallback::Repeat_syscall;
249 }
250
251
252 void Romain::DMR::leader_replicate(Romain::App_instance *i, Romain::App_thread *t,
253                                    Romain::App_model *a)
254 {
255         (void)i; (void)t; (void)a;
256         MSGi(i) << __func__;
257         _rv = Romain::RedundancyCallback::Skip_syscall;
258
259         //t->print_vcpu_state();
260         replicator().put(t);
261 }
262
263
264 void Romain::DMR::resume(Romain::App_instance *i, Romain::App_thread *t,
265                          Romain::App_model *a)
266 {
267         (void)i; (void)t; (void)a;
268         //MSGi(i) << "[l] acquiring leave mtx";
269         pthread_mutex_lock(&_leave_mtx);
270         if (_leave_count == 0) {
271                 pthread_mutex_lock(&_enter_mtx);
272                 pthread_cond_broadcast(&_enter);
273                 pthread_mutex_unlock(&_enter_mtx);
274         }
275
276         //MSGi(i) << "++_leave_count " << _leave_count;
277         if (++_leave_count < _num_instances) {
278                 MSGi(i) << "Waiting for other replicas to commit their syscall.";
279                 //MSGi(i) << "cond_wait(leave)";
280                 pthread_cond_wait(&_leave, &_leave_mtx);
281                 //MSGi(i) << "success: cond_wait(leave)";
282         } else {
283                 for (unsigned i = 0; i < _num_instances; ++i)
284                         _orig_vcpu[i] = 0;
285                 pthread_cond_broadcast(&_leave);
286         }
287         //MSGi(i) << "counts @ resume: " << _enter_count << " " << _leave_count;
288         --_leave_count;
289         pthread_mutex_unlock(&_leave_mtx);
290
291         //enter_kdebug("DMR::resume");
292 }
293
294 void Romain::DMR::wait(Romain::App_instance *i, Romain::App_thread *t,
295                        Romain::App_model *a)
296 {
297         MSGi(i) << __func__;
298         pthread_mutex_lock(&_block_mtx);
299         ++_block_count;
300         MSGi(i) << "going to wait. block_count: " << _block_count;
301         pthread_cond_broadcast(&_enter);
302         pthread_cond_wait(&_block, &_block_mtx);
303         pthread_mutex_unlock(&_block_mtx);
304 }
305
306 void Romain::DMR::silence(Romain::App_instance *i, Romain::App_thread *t,
307                           Romain::App_model *a)
308 {
309         MSGi(i) << __func__;
310         // 1. Tell anyone who is still waiting to enter that he can now do so.
311         //    These replicas will all run until they block on _block_mtx.
312         pthread_cond_broadcast(&_enter);
313
314         while (_block_count < (_num_instances - 1))
315                 l4_sleep(20); // XXX handshake
316
317         _num_instances_bak = _num_instances;
318         _num_instances     = 1;
319 }
320
321 void Romain::DMR::wakeup(Romain::App_instance *i, Romain::App_thread *t,
322                          Romain::App_model *a)
323 {
324         MSGi(i) << __func__;
325         _block_count   = 0;
326         _num_instances = _num_instances_bak;
327         pthread_cond_broadcast(&_block);
328 }