l4/pkg/plr/server/src/redundancy/dmr.cc

   1 /*
   2  * dmr.cc --
   3  *
   4  *    n-way modular redundancy implementation
   5  *
   6  * (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
   7  *     economic rights: Technische Universität Dresden (Germany)
   8  * This file is part of TUD:OS and distributed under the terms of the
   9  * GNU General Public License 2.
  10  * Please see the COPYING-GPL-2 file for details.
  11  */
  12
  13 #include "../log"
  14 #include "../redundancy.h"
  15 #include "../app_loading"
  16 #include "../fault_observers"
  17 #include "../manager"
  18 #include "../fault_handlers/syscalls_handler.h"
  19
  20 #define MSG() DEBUGf(Romain::Log::Redundancy)
  21 #define MSGi(inst) MSG() << "[" << (inst)->id() << "] "
  22
  23 //extern char * __func__;
  24
  25 /* Replication protocol:
  26  * =====================
  27  *
  28  * Everyone goes to sleep, except the last thread to enter. This thread becomes
  29  * the 'leader'. The leader returns from this function with the First_syscall return
  30  * value. It then goes on to execute the system call (in manager.cc). Depending on
  31  * its return value,
  32  *
  33  * a) For replicatable calls: it stores its VCPU state after the system call using
  34  *    the function put(). All other replicas then use get() to obtain this state.
  35  *
  36  * b) For non-replicatable calls: it sets the other replicas' return value to
  37  *    Repeat_syscall. The replicas then perform handling themselves.
  38  *
  39  * After all the handling, everyone waits in resume() until the last replica reaches
  40  * the resumption point. Then each VCPU goes back to where it came from.
  41  *
  42  *
  43  * Detection and recovery:
  44  * =======================
  45  *
  46  * Before executing the fault handler, the leader checksums all VCPU states. If a
  47  * mismatch is found, it calls the recover() function. recover() sets things straight
  48  * so that after the handler is done, everyone is in an identical state again. The leader
  49  * then goes on to execute the call.
  50  */
  51
  52 Romain::DMR::DMR(unsigned instances)
  53         : _enter_count(0), _leave_count(0), _block_count(0),
  54       _rv(Romain::RedundancyCallback::Invalid),
  55       _num_instances(instances), _num_instances_bak(0)
  56 {
  57         for (unsigned i = 0; i < _num_instances; ++i)
  58                 _orig_vcpu[i] = 0;
  59         _check(pthread_mutex_init(&_enter_mtx, NULL) != 0, "error initializing mtx");
  60         _check(pthread_cond_init(&_enter, NULL) != 0,      "error initializing condvar");
  61         _check(pthread_mutex_init(&_leave_mtx, NULL) != 0, "error initializing mtx");
  62         _check(pthread_cond_init(&_leave, NULL) != 0,      "error initializing condvar");
  63         _check(pthread_mutex_init(&_block_mtx, NULL) != 0, "error initializing mtx");
  64         _check(pthread_cond_init(&_block, NULL) != 0,      "error initializing condvar");
  65 }
  66
  67
  68 void
  69 Romain::Replicator::put(Romain::App_thread *t)
  70 {
  71         //memset(&_regs, 0, sizeof(_regs)); // XXX
  72 #define PUT(field) _regs.field = t->vcpu()->r()->field
  73         PUT(es); PUT(ds); PUT(gs); PUT(fs);
  74         PUT(di); PUT(si); PUT(bp); PUT(pfa);
  75         PUT(ax); PUT(bx); PUT(cx); PUT(dx);
  76         PUT(trapno); PUT(err); PUT(ip); PUT(flags);
  77         PUT(sp); PUT(ss);
  78 #undef PUT
  79         l4_utcb_t *addr = reinterpret_cast<l4_utcb_t*>(t->remote_utcb());
  80         memcpy(&_utcb, addr, L4_UTCB_OFFSET);
  81 }
  82
  83
  84 void
  85 Romain::Replicator::get(Romain::App_thread *t)
  86 {
  87 #define PUT(field) t->vcpu()->r()->field = _regs.field
  88         PUT(es); PUT(ds); PUT(gs); PUT(fs);
  89         PUT(di); PUT(si); PUT(bp); PUT(pfa);
  90         PUT(ax); PUT(bx); PUT(cx); PUT(dx);
  91         PUT(trapno); PUT(err); PUT(ip); PUT(flags);
  92         PUT(sp); PUT(ss);
  93 #undef PUT
  94         l4_utcb_t *addr = reinterpret_cast<l4_utcb_t*>(t->remote_utcb());
  95         memcpy(addr, &_utcb, L4_UTCB_OFFSET);
  96 }
  97
  98 bool
  99 Romain::DMR::checksum_replicas()
 100 {
 101         unsigned long csums[MAX_REPLICAS] = {0, };
 102         unsigned idx;
 103
 104         // calc checksums
 105         for (idx = 0; idx < _num_instances; ++idx)
 106                 csums[idx] = _orig_vcpu[idx]->csum_state();
 107
 108         // validate checksums
 109         for (idx = 1; idx < _num_instances; ++idx)
 110                 if (csums[idx] != csums[idx-1]) {
 111 #if 1
 112                         ERROR() << "State mismatch detected!";
 113                         ERROR() << "=== vCPU states ===";
 114                         for (unsigned cnt = 0; cnt < _num_instances; ++cnt) {
 115                                 ERROR() << "--- instance " << cnt << " @ "
 116                                         << _orig_vcpu[cnt]->vcpu() << " (cs: "
 117                                         << std::hex << csums[cnt] << ") ---";
 118                                 if (_orig_vcpu[cnt])
 119                                         _orig_vcpu[cnt]->vcpu()->print_state();
 120                         }
 121                         ERROR() << "Instances: " << _num_instances << " this inst " << idx;
 122                         enter_kdebug("checksum");
 123 #endif
 124                         return false;
 125                 }
 126
 127         return true;
 128 }
 129
 130
 131 class RecoverAbort
 132 {
 133         public:
 134                 static __attribute__((noreturn)) void recover()
 135                 {
 136                         ERROR() << "Aborting after error.";
 137                         Romain::_the_instance_manager->logdump();
 138                         enter_kdebug("abort");
 139                         throw("ERROR -> abort");
 140                 }
 141 };
 142
 143
 144 class RedundancyAbort
 145 {
 146         public:
 147                 static void recover(Romain::App_thread** threads, unsigned count,
 148                                                         unsigned *good, unsigned *bad)
 149                 {
 150                         unsigned long csums[count];
 151                         unsigned idx;
 152
 153                         // calc checksums
 154                         for (idx = 0; idx < count; ++idx)
 155                                 csums[idx] = threads[idx]->csum_state();
 156
 157                         // find mismatch
 158                         for (idx = 1; idx < count; ++idx)
 159                                 if (csums[idx] != csums[idx-1]) { // mismatch
 160                                         if (csums[idx] == csums[(idx + 1) % count]) {
 161                                                 *good = idx;
 162                                                 *bad  = idx-1;
 163                                         } else {
 164                                                 *good = idx-1;
 165                                                 *bad  = idx;
 166                                         }
 167                                 }
 168                 }
 169 };
 170
 171
 172 void
 173 Romain::DMR::recover(Romain::App_model *am)
 174 {
 175         if (_num_instances < 3)
 176                 RecoverAbort::recover(); // noreturn
 177
 178         unsigned good = ~0, bad = ~0;
 179         RedundancyAbort::recover(_orig_vcpu, _num_instances, &good, &bad);
 180         DEBUG() << "good " << good << ", bad " << bad;
 181
 182         // XXX: This does not suffice. We also need to copy memory content
 183         //      from a correct replica to the incorrect one
 184         replicator().put(_orig_vcpu[good]);
 185         replicator().get(_orig_vcpu[bad]);
 186         am->rm()->replicate(good, bad);
 187
 188 #if 0
 189         DEBUG() << "after recovery:";
 190         for (unsigned i = 0; i < _num_instances; ++i)
 191                 DEBUG() << i << " " << std::hex << _orig_vcpu[i]->csum_state();
 192 #endif
 193 }
 194
 195
 196 Romain::RedundancyCallback::EnterReturnVal
 197 Romain::DMR::enter(Romain::App_instance *i, Romain::App_thread *t,
 198                    Romain::App_model *a)
 199 {
 200         (void)a;
 201         MSGi(i) << "DMR::enter act(" << _enter_count << ")";
 202
 203         Romain::RedundancyCallback::EnterReturnVal ret = Romain::RedundancyCallback::First_syscall;
 204
 205         // enter ourselves into the list of faulted threads
 206         _orig_vcpu[i->id()] = t;
 207
 208         pthread_mutex_lock(&_enter_mtx);
 209
 210         /* TODO: select the first replica that makes the sum of all replicas
 211          *       larger than N/2, if all their states match.
 212          */
 213         if (++_enter_count < _num_instances) {
 214                 //MSGi(i) << "I'm not the last instance -> going to wait.";
 215                 // wait for the leader
 216                 pthread_cond_wait(&_enter, &_enter_mtx);
 217                 // get the return value set by the leader
 218                 ret = _rv;
 219         } else {
 220                 // everyone is here, so checksum the VCPUs now
 221                 if (!checksum_replicas())
 222                         recover(a);
 223                 // at this point, recovery has made sure that all replicas
 224                 // are in the same state.
 225         }
 226
 227         --_enter_count;
 228
 229         pthread_mutex_unlock(&_enter_mtx);
 230
 231         /*
 232          * If the leader told us to skip the syscall, get replicated VCPU and
 233          * UTCB states here.
 234          */
 235         if (ret == Romain::RedundancyCallback::Skip_syscall) {
 236                 replicator().get(t);
 237         }
 238
 239         return ret;
 240 }
 241
 242
 243 void Romain::DMR::leader_repeat(Romain::App_instance *i, Romain::App_thread *t,
 244                                 Romain::App_model *a)
 245 {
 246         (void)i; (void)t; (void)a;
 247         MSGi(i) << __func__;
 248         _rv = Romain::RedundancyCallback::Repeat_syscall;
 249 }
 250
 251
 252 void Romain::DMR::leader_replicate(Romain::App_instance *i, Romain::App_thread *t,
 253                                    Romain::App_model *a)
 254 {
 255         (void)i; (void)t; (void)a;
 256         MSGi(i) << __func__;
 257         _rv = Romain::RedundancyCallback::Skip_syscall;
 258
 259         //t->print_vcpu_state();
 260         replicator().put(t);
 261 }
 262
 263
 264 void Romain::DMR::resume(Romain::App_instance *i, Romain::App_thread *t,
 265                          Romain::App_model *a)
 266 {
 267         (void)i; (void)t; (void)a;
 268         //MSGi(i) << "[l] acquiring leave mtx";
 269         pthread_mutex_lock(&_leave_mtx);
 270         if (_leave_count == 0) {
 271                 pthread_mutex_lock(&_enter_mtx);
 272                 pthread_cond_broadcast(&_enter);
 273                 pthread_mutex_unlock(&_enter_mtx);
 274         }
 275
 276         //MSGi(i) << "++_leave_count " << _leave_count;
 277         if (++_leave_count < _num_instances) {
 278                 MSGi(i) << "Waiting for other replicas to commit their syscall.";
 279                 //MSGi(i) << "cond_wait(leave)";
 280                 pthread_cond_wait(&_leave, &_leave_mtx);
 281                 //MSGi(i) << "success: cond_wait(leave)";
 282         } else {
 283                 for (unsigned i = 0; i < _num_instances; ++i)
 284                         _orig_vcpu[i] = 0;
 285                 pthread_cond_broadcast(&_leave);
 286         }
 287         //MSGi(i) << "counts @ resume: " << _enter_count << " " << _leave_count;
 288         --_leave_count;
 289         pthread_mutex_unlock(&_leave_mtx);
 290
 291         //enter_kdebug("DMR::resume");
 292 }
 293
 294 void Romain::DMR::wait(Romain::App_instance *i, Romain::App_thread *t,
 295                        Romain::App_model *a)
 296 {
 297         MSGi(i) << __func__;
 298         pthread_mutex_lock(&_block_mtx);
 299         ++_block_count;
 300         MSGi(i) << "going to wait. block_count: " << _block_count;
 301         pthread_cond_broadcast(&_enter);
 302         pthread_cond_wait(&_block, &_block_mtx);
 303         pthread_mutex_unlock(&_block_mtx);
 304 }
 305
 306 void Romain::DMR::silence(Romain::App_instance *i, Romain::App_thread *t,
 307                           Romain::App_model *a)
 308 {
 309         MSGi(i) << __func__;
 310         // 1. Tell anyone who is still waiting to enter that he can now do so.
 311         //    These replicas will all run until they block on _block_mtx.
 312         pthread_cond_broadcast(&_enter);
 313
 314         while (_block_count < (_num_instances - 1))
 315                 l4_sleep(20); // XXX handshake
 316
 317         _num_instances_bak = _num_instances;
 318         _num_instances     = 1;
 319 }
 320
 321 void Romain::DMR::wakeup(Romain::App_instance *i, Romain::App_thread *t,
 322                          Romain::App_model *a)
 323 {
 324         MSGi(i) << __func__;
 325         _block_count   = 0;
 326         _num_instances = _num_instances_bak;
 327         pthread_cond_broadcast(&_block);
 328 }