]> rtime.felk.cvut.cz Git - l4.git/blob - kernel/fiasco/src/kern/thread-ipc.cpp
update
[l4.git] / kernel / fiasco / src / kern / thread-ipc.cpp
1 INTERFACE:
2
3 #include "l4_buf_iter.h"
4 #include "l4_error.h"
5
6 class Syscall_frame;
7
8 EXTENSION class Thread
9 {
10 protected:
11   struct Log_pf_invalid
12   {
13     Mword pfa;
14     Mword cap_idx;
15     Mword err;
16   };
17
18   struct Log_exc_invalid
19   {
20     Mword cap_idx;
21   };
22
23   enum Check_sender_result
24   {
25     Ok = 0,
26     Queued = 2,
27     Done = 4,
28     Failed = 1,
29   };
30
31   Syscall_frame *_snd_regs;
32   unsigned char _ipc_send_rights;
33 };
34
35 class Buf_utcb_saver
36 {
37 public:
38   Buf_utcb_saver(Utcb const *u);
39   void restore(Utcb *u);
40 private:
41   L4_buf_desc buf_desc;
42   Mword buf[2];
43 };
44
45 /**
46  * Save critical contents of UTCB during nested IPC.
47  */
48 class Pf_msg_utcb_saver : public Buf_utcb_saver
49 {
50 public:
51   Pf_msg_utcb_saver(Utcb const *u);
52   void restore(Utcb *u);
53 private:
54   Mword msg[2];
55 };
56
57 // ------------------------------------------------------------------------
58 INTERFACE [debug]:
59
60 #include "tb_entry.h"
61
62 EXTENSION class Thread
63 {
64 protected:
65   static unsigned log_fmt_pf_invalid(Tb_entry *, int max, char *buf) asm ("__fmt_page_fault_invalid_pager");
66   static unsigned log_fmt_exc_invalid(Tb_entry *, int max, char *buf) asm ("__fmt_exception_invalid_handler");
67 };
68
69 // ------------------------------------------------------------------------
70 IMPLEMENTATION:
71
72 // IPC setup, and handling of ``short IPC'' and page-fault IPC
73
74 // IDEAS for enhancing this implementation: 
75
76 // Volkmar has suggested a possible optimization for
77 // short-flexpage-to-long-message-buffer transfers: Currently, we have
78 // to resort to long IPC in that case because the message buffer might
79 // contain a receive-flexpage option.  An easy optimization would be
80 // to cache the receive-flexpage option in the TCB for that case.
81 // This would save us the long-IPC setup because we wouldn't have to
82 // touch the receiver's user memory in that case.  Volkmar argues that
83 // cases like that are quite common -- for example, imagine a pager
84 // which at the same time is also a server for ``normal'' requests.
85
86 // The handling of cancel and timeout conditions could be improved as
87 // follows: Cancel and Timeout should not reset the ipc_in_progress
88 // flag.  Instead, they should just set and/or reset a flag of their
89 // own that is checked every time an (IPC) system call wants to go to
90 // sleep.  That would mean that IPCs that do not block are not
91 // cancelled or aborted.
92 //-
93
94 #include <cstdlib>              // panic()
95
96 #include "l4_types.h"
97 #include "l4_msg_item.h"
98
99 #include "config.h"
100 #include "cpu_lock.h"
101 #include "ipc_timeout.h"
102 #include "lock_guard.h"
103 #include "logdefs.h"
104 #include "map_util.h"
105 #include "processor.h"
106 #include "timer.h"
107 #include "kdb_ke.h"
108 #include "warn.h"
109
110 PUBLIC
111 virtual void
112 Thread::ipc_receiver_aborted()
113 {
114   assert_kdb (receiver());
115
116   sender_dequeue(receiver()->sender_list());
117   receiver()->vcpu_update_state();
118   set_receiver(0);
119
120   remote_ready_enqueue();
121 }
122
123 PUBLIC inline
124 void
125 Thread::ipc_receiver_ready()
126 {
127   vcpu_disable_irqs();
128   state_change_dirty(~Thread_ipc_mask, Thread_receive_in_progress);
129 }
130
131 PRIVATE
132 void
133 Thread::ipc_send_msg(Receiver *recv)
134 {
135   Syscall_frame *regs = _snd_regs;
136   bool success = transfer_msg(regs->tag(), nonull_static_cast<Thread*>(recv), regs,
137                               _ipc_send_rights);
138   sender_dequeue(recv->sender_list());
139   recv->vcpu_update_state();
140   //printf("  done\n");
141   regs->tag(L4_msg_tag(regs->tag(), success ? 0 : L4_msg_tag::Error));
142
143   Mword state_del = Thread_ipc_mask | Thread_ipc_transfer;
144   Mword state_add = Thread_ready;
145   if (Receiver::prepared())
146     // same as in Receiver::prepare_receive_dirty_2
147     state_add |= Thread_receive_wait;
148
149   if (cpu() == current_cpu())
150     {
151       state_change_dirty(~state_del, state_add);
152       if (current_sched()->deblock(cpu(), current_sched(), true))
153         recv->switch_to_locked(this);
154     }
155   else
156     {
157       drq_state_change(~state_del, state_add);
158       current()->schedule_if(current()->handle_drq());
159     }
160 }
161
162 PUBLIC virtual
163 void
164 Thread::modify_label(Mword const *todo, int cnt)
165 {
166   assert_kdb (_snd_regs);
167   Mword l = _snd_regs->from_spec();
168   for (int i = 0; i < cnt*4; i += 4)
169     {
170       Mword const test_mask = todo[i];
171       Mword const test      = todo[i+1];
172       if ((l & test_mask) == test)
173         {
174           Mword const del_mask = todo[i+2];
175           Mword const add_mask = todo[i+3];
176
177           l = (l & ~del_mask) | add_mask;
178           _snd_regs->from(l);
179           return;
180         }
181     }
182 }
183
184 PRIVATE inline
185 void
186 Thread::snd_regs(Syscall_frame *r)
187 { _snd_regs = r; }
188
189
190 /** Page fault handler.
191     This handler suspends any ongoing IPC, then sets up page-fault IPC.
192     Finally, the ongoing IPC's state (if any) is restored.
193     @param pfa page-fault virtual address
194     @param error_code page-fault error code.
195  */
196 PRIVATE
197 bool
198 Thread::handle_page_fault_pager(Thread_ptr const &_pager,
199                                 Address pfa, Mword error_code,
200                                 L4_msg_tag::Protocol protocol)
201 {
202 #ifndef NDEBUG
203   // do not handle user space page faults from kernel mode if we're
204   // already handling a request
205   if (EXPECT_FALSE(!PF::is_usermode_error(error_code)
206                    && thread_lock()->test() == Thread_lock::Locked))
207     {
208       kdb_ke("Fiasco BUG: page fault, under lock");
209       panic("page fault in locked operation");
210     }
211 #endif
212
213   if (EXPECT_FALSE((state() & Thread_alien)))
214     return false;
215
216   Lock_guard<Cpu_lock> guard(&cpu_lock);
217
218   unsigned char rights;
219   Kobject_iface *pager = _pager.ptr(space(), &rights);
220
221   if (!pager)
222     {
223       WARN("CPU%d: Pager of %lx is invalid (pfa=" L4_PTR_FMT
224            ", errorcode=" L4_PTR_FMT ") to %lx (pc=%lx)\n",
225            current_cpu(), dbg_id(), pfa, error_code,
226            _pager.raw(), regs()->ip());
227
228
229       LOG_TRACE("Page fault invalid pager", "pf", this,
230                 __fmt_page_fault_invalid_pager,
231                 Log_pf_invalid *l = tbe->payload<Log_pf_invalid>();
232                 l->cap_idx = _pager.raw();
233                 l->err     = error_code;
234                 l->pfa     = pfa);
235
236       pager = this; // block on ourselves
237     }
238
239   // set up a register block used as an IPC parameter block for the
240   // page fault IPC
241   Syscall_frame r;
242   Utcb *utcb = this->utcb().access(true);
243
244   // save the UTCB fields affected by PF IPC
245   Pf_msg_utcb_saver saved_utcb_fields(utcb);
246
247
248   utcb->buf_desc = L4_buf_desc(0, 0, 0, L4_buf_desc::Inherit_fpu);
249   utcb->buffers[0] = L4_msg_item::map(0).raw();
250   utcb->buffers[1] = L4_fpage::all_spaces().raw();
251
252   utcb->values[0] = PF::addr_to_msgword0 (pfa, error_code);
253   utcb->values[1] = regs()->ip(); //PF::pc_to_msgword1 (regs()->ip(), error_code));
254
255   L4_timeout_pair timeout(L4_timeout::Never, L4_timeout::Never);
256
257   L4_msg_tag tag(2, 0, 0, protocol);
258
259   r.timeout(timeout);
260   r.tag(tag);
261   r.from(0);
262   r.ref(L4_obj_ref(_pager.raw() << L4_obj_ref::Cap_shift, L4_obj_ref::Ipc_call_ipc));
263   pager->invoke(r.ref(), rights, &r, utcb);
264
265
266   bool success = true;
267
268   if (EXPECT_FALSE(r.tag().has_error()))
269     {
270       if (Config::conservative)
271         {
272           printf(" page fault %s error = 0x%lx\n",
273                  utcb->error.snd_phase() ? "send" : "rcv",
274                  utcb->error.raw());
275           kdb_ke("ipc to pager failed");
276         }
277
278       if (utcb->error.snd_phase()
279           && (utcb->error.error() == L4_error::Not_existent)
280           && PF::is_usermode_error(error_code)
281           && !(state() & Thread_cancel))
282         {
283           success = false;
284         }
285     }
286   else // no error
287     {
288       // If the pager rejects the mapping, it replies -1 in msg.w0
289       if (EXPECT_FALSE (utcb->values[0] == Mword(-1)))
290         success = false;
291     }
292
293   // restore previous IPC state
294
295   saved_utcb_fields.restore(utcb);
296   return success;
297 }
298
299 PRIVATE inline
300 Mword
301 Thread::check_sender(Thread *sender, bool timeout)
302 {
303   if (EXPECT_FALSE(is_invalid()))
304     {
305       sender->utcb().access()->error = L4_error::Not_existent;
306       return Failed;
307     }
308
309   if (EXPECT_FALSE(!sender_ok(sender)))
310     {
311       if (!timeout)
312         {
313           sender->utcb().access()->error = L4_error::Timeout;
314           return Failed;
315         }
316
317       sender->set_receiver(this);
318       sender->sender_enqueue(sender_list(), sender->sched_context()->prio());
319       vcpu_set_irq_pending();
320       return Queued;
321     }
322
323   return Ok;
324 }
325
326
327 PRIVATE inline NEEDS["timer.h"]
328 void Thread::goto_sleep(L4_timeout const &t, Sender *sender, Utcb *utcb)
329 {
330   IPC_timeout timeout;
331
332   if (EXPECT_FALSE(t.is_finite() && !_timeout))
333     {
334       state_del_dirty(Thread_ready);
335
336       Unsigned64 sysclock = Timer::system_clock();
337       Unsigned64 tval = t.microsecs(sysclock, utcb);
338
339       if (EXPECT_TRUE((tval > sysclock)))
340         {
341           set_timeout(&timeout);
342           timeout.set(tval, cpu());
343         }
344       else // timeout already hit
345         state_change_dirty(~Thread_ipc_mask, Thread_ready | Thread_timeout);
346     }
347   else
348     {
349       if (EXPECT_TRUE(t.is_never()))
350         state_del_dirty(Thread_ready);
351       else
352         state_change_dirty(~Thread_ipc_mask, Thread_ready | Thread_timeout);
353     }
354
355   if (sender == this)
356     switch_sched(sched());
357
358   schedule();
359
360   if (EXPECT_FALSE((long)_timeout))
361     {
362       timeout.reset();
363       set_timeout(0);
364     }
365
366   assert_kdb (state() & Thread_ready);
367 }
368
369
370
371 /**
372  * @pre cpu_lock must be held
373  */
374 PRIVATE inline NEEDS["logdefs.h"]
375 unsigned
376 Thread::handshake_receiver(Thread *partner, L4_timeout snd_t)
377 {
378   assert_kdb(cpu_lock.test());
379
380   switch (__builtin_expect(partner->check_sender(this, !snd_t.is_zero()), Ok))
381     {
382     case Failed:
383       return Failed;
384     case Queued:
385       state_add_dirty(Thread_send_wait);
386       return Queued;
387     default:
388       partner->state_change_dirty(~(Thread_ipc_mask | Thread_ready), Thread_ipc_transfer);
389       return Ok;
390     }
391 }
392
393 PRIVATE inline
394 void
395 Thread::set_ipc_error(L4_error const &e, Thread *rcv)
396 {
397   utcb().access()->error = e;
398   rcv->utcb().access()->error = L4_error(e, L4_error::Rcv);
399 }
400
401
402 PRIVATE inline
403 Sender *
404 Thread::get_next_sender(Sender *sender)
405 {
406   if (sender_list()->head())
407     {
408       if (sender) // closed wait
409         {
410           if (sender->in_sender_list() && this == sender->receiver())
411             return sender;
412         }
413       else // open wait
414         {
415           Sender *next = Sender::cast(sender_list()->head());
416           assert_kdb (next->in_sender_list());
417           set_partner(next);
418           return next;
419         }
420     }
421   return 0;
422 }
423
424
425 /**
426  * Send an IPC message.
427  *        Block until we can send the message or the timeout hits.
428  * @param partner the receiver of our message
429  * @param t a timeout specifier
430  * @param regs sender's IPC registers
431  * @pre cpu_lock must be held
432  * @return sender's IPC error code
433  */
434 PUBLIC
435 void
436 Thread::do_ipc(L4_msg_tag const &tag, bool have_send, Thread *partner,
437                bool have_receive, Sender *sender,
438                L4_timeout_pair t, Syscall_frame *regs,
439                unsigned char rights)
440 {
441   assert_kdb (cpu_lock.test());
442   assert_kdb (this == current());
443
444   bool do_switch = false;
445
446   assert_kdb (!(state() & Thread_ipc_mask));
447
448   prepare_receive(sender, have_receive ? regs : 0);
449   bool activate_partner = false;
450
451   if (have_send)
452     {
453       assert_kdb(!in_sender_list());
454       do_switch = tag.do_switch();
455
456       bool ok;
457       unsigned result;
458
459       set_ipc_send_rights(rights);
460
461       if (EXPECT_FALSE(partner->cpu() != current_cpu()) ||
462           ((result = handshake_receiver(partner, t.snd)) == Failed
463            && partner->drq_pending()))
464         {
465           // we have either per se X-CPU IPC or we ran into a
466           // IPC during migration (indicated by the pending DRQ)
467           do_switch = false;
468           result = remote_handshake_receiver(tag, partner, have_receive, t.snd,
469                                              regs, rights);
470         }
471
472       switch (__builtin_expect(result, Ok))
473         {
474         case Done:
475           ok = true;
476           break;
477
478         case Queued:
479           // set _snd_regs, to enable active receiving
480           snd_regs(regs);
481           ok = do_send_wait(partner, t.snd); // --- blocking point ---
482           break;
483
484         case Failed:
485           state_del_dirty(Thread_ipc_mask);
486           ok = false;
487           break;
488
489         default:
490           // mmh, we can reset the receivers timeout
491           // ping pong with timeouts will profit from it, because
492           // it will require much less sorting overhead
493           // if we dont reset the timeout, the possibility is very high
494           // that the receiver timeout is in the timeout queue
495           partner->reset_timeout();
496
497           ok = transfer_msg(tag, partner, regs, rights);
498
499           // switch to receiving state
500           state_del_dirty(Thread_ipc_mask);
501           if (ok && have_receive)
502             state_add_dirty(Thread_receive_wait);
503
504           activate_partner = partner != this;
505           break;
506         }
507
508       if (EXPECT_FALSE(!ok))
509         {
510           // send failed, so do not switch to receiver directly and skip receive phase
511           have_receive = false;
512           regs->tag(L4_msg_tag(0, 0, L4_msg_tag::Error, 0));
513         }
514     }
515   else
516     {
517       assert_kdb (have_receive);
518       state_add_dirty(Thread_receive_wait);
519     }
520
521   // only do direct switch on closed wait (call) or if we run on a foreign
522   // scheduling context
523   Sender *next = 0;
524
525   have_receive = state() & Thread_receive_wait;
526
527   if (have_receive)
528     {
529       assert_kdb (!in_sender_list());
530       assert_kdb (!(state() & Thread_send_wait));
531       next = get_next_sender(sender);
532     }
533
534   if (activate_partner)
535     {
536       if (partner->cpu() == current_cpu())
537         {
538           Sched_context *cs = Sched_context::rq(cpu()).current_sched();
539           do_switch = do_switch && ((have_receive && sender) || cs->context() != this)
540                       && !(next && current_sched()->dominates(cs));
541           partner->state_change_dirty(~Thread_ipc_transfer, Thread_ready);
542           if (do_switch)
543             schedule_if(handle_drq() || switch_exec_locked(partner, Context::Not_Helping));
544           else if (partner->current_sched()->deblock(current_cpu(), current_sched(), true))
545             switch_to_locked(partner);
546         }
547       else
548         partner->drq_state_change(~Thread_ipc_transfer, Thread_ready);
549     }
550
551   if (next)
552     {
553       ipc_receiver_ready();
554       next->ipc_send_msg(this);
555       state_del_dirty(Thread_ipc_mask);
556     }
557   else if (have_receive)
558     {
559       if ((state() & Thread_full_ipc_mask) == Thread_receive_wait)
560         goto_sleep(t.rcv, sender, utcb().access(true));
561     }
562
563   if (EXPECT_TRUE (!(state() & Thread_full_ipc_mask)))
564     return;
565
566   while (EXPECT_FALSE(state() & Thread_ipc_transfer))
567     {
568       state_del_dirty(Thread_ready);
569       schedule();
570     }
571
572   if (EXPECT_TRUE (!(state() & Thread_full_ipc_mask)))
573     return;
574
575   Utcb *utcb = this->utcb().access(true);
576   // the IPC has not been finished.  could be timeout or cancel
577   // XXX should only modify the error-code part of the status code
578
579   if (EXPECT_FALSE(state() & Thread_cancel))
580     {
581       // we've presumably been reset!
582       regs->tag(commit_error(utcb, L4_error::R_canceled, regs->tag()));
583     }
584   else
585     regs->tag(commit_error(utcb, L4_error::R_timeout, regs->tag()));
586   state_del(Thread_full_ipc_mask);
587 }
588
589
590 PRIVATE inline NEEDS ["map_util.h", Thread::copy_utcb_to]
591 bool
592 Thread::transfer_msg(L4_msg_tag tag, Thread *receiver,
593                      Syscall_frame *sender_regs, unsigned char rights)
594 {
595   Syscall_frame* dst_regs = receiver->rcv_regs();
596
597   bool success = copy_utcb_to(tag, receiver, rights);
598   tag.set_error(!success);
599   dst_regs->tag(tag);
600   dst_regs->from(sender_regs->from_spec());
601
602   // setup the reply capability in case of a call
603   if (success && partner() == receiver)
604     receiver->set_caller(this, rights);
605
606   return success;
607 }
608
609
610
611 IMPLEMENT inline
612 Buf_utcb_saver::Buf_utcb_saver(const Utcb *u)
613 {
614   buf_desc = u->buf_desc;
615   buf[0] = u->buffers[0];
616   buf[1] = u->buffers[1];
617 }
618
619 IMPLEMENT inline
620 void
621 Buf_utcb_saver::restore(Utcb *u)
622 {
623   u->buf_desc = buf_desc;
624   u->buffers[0] = buf[0];
625   u->buffers[1] = buf[1];
626 }
627
628 IMPLEMENT inline
629 Pf_msg_utcb_saver::Pf_msg_utcb_saver(Utcb const *u) : Buf_utcb_saver(u)
630 {
631   msg[0] = u->values[0];
632   msg[1] = u->values[1];
633 }
634
635 IMPLEMENT inline
636 void
637 Pf_msg_utcb_saver::restore(Utcb *u)
638 {
639   Buf_utcb_saver::restore(u);
640   u->values[0] = msg[0];
641   u->values[1] = msg[1];
642 }
643
644
645 /**
646  * \pre must run with local IRQs disabled (CPU lock held)
647  * to ensure that handler does not dissapear meanwhile.
648  */
649 PRIVATE
650 bool
651 Thread::exception(Kobject_iface *handler, Trap_state *ts, Mword rights)
652 {
653   Syscall_frame r;
654   L4_timeout_pair timeout(L4_timeout::Never, L4_timeout::Never);
655
656   CNT_EXC_IPC;
657
658   void *old_utcb_handler = _utcb_handler;
659   _utcb_handler = ts;
660
661   // fill registers for IPC
662   Utcb *utcb = this->utcb().access(true);
663   Buf_utcb_saver saved_state(utcb);
664
665   utcb->buf_desc = L4_buf_desc(0, 0, 0, L4_buf_desc::Inherit_fpu);
666   utcb->buffers[0] = L4_msg_item::map(0).raw();
667   utcb->buffers[1] = L4_fpage::all_spaces().raw();
668
669   // clear regs
670   L4_msg_tag tag(L4_exception_ipc::Msg_size, 0, L4_msg_tag::Transfer_fpu,
671                  L4_msg_tag::Label_exception);
672
673   r.tag(tag);
674   r.timeout(timeout);
675   r.from(0);
676   r.ref(L4_obj_ref(_exc_handler.raw() << L4_obj_ref::Cap_shift, L4_obj_ref::Ipc_call_ipc));
677   spill_user_state();
678   handler->invoke(r.ref(), rights, &r, utcb);
679   fill_user_state();
680
681   saved_state.restore(utcb);
682
683   if (EXPECT_FALSE(r.tag().has_error()))
684     {
685       if (Config::conservative)
686         {
687           printf(" exception fault %s error = 0x%lx\n",
688                  utcb->error.snd_phase() ? "send" : "rcv",
689                  utcb->error.raw());
690           kdb_ke("ipc to pager failed");
691         }
692
693       state_del(Thread_in_exception);
694     }
695    else if (r.tag().proto() == L4_msg_tag::Label_allow_syscall)
696      state_add(Thread_dis_alien);
697
698   // restore original utcb_handler
699   _utcb_handler = old_utcb_handler;
700
701   // FIXME: handle not existing pager properly
702   // for now, just ignore any errors
703   return 1;
704 }
705
706 /* return 1 if exception could be handled
707  * return 0 if not for send_exception and halt thread
708  */
709 PUBLIC inline NEEDS["task.h", "trap_state.h",
710                     Thread::fast_return_to_user,
711                     Thread::save_fpu_state_to_utcb]
712 int
713 Thread::send_exception(Trap_state *ts)
714 {
715   assert(cpu_lock.test());
716
717   Vcpu_state *vcpu = vcpu_state().access();
718
719   if (vcpu_exceptions_enabled(vcpu))
720     {
721       // do not reflect debug exceptions to the VCPU but handle them in
722       // Fiasco
723       if (EXPECT_FALSE(ts->is_debug_exception()
724                        && !(vcpu->state & Vcpu_state::F_debug_exc)))
725         return 0;
726
727       if (_exc_cont.valid())
728         return 1;
729       if (vcpu_enter_kernel_mode(vcpu))
730         {
731           // enter_kernel_mode has switched the address space from user to
732           // kernel space, so reevaluate the address of the VCPU state area
733           vcpu = vcpu_state().access();
734         }
735
736       spill_user_state();
737       LOG_TRACE("VCPU events", "vcpu", this, __context_vcpu_log_fmt,
738           Vcpu_log *l = tbe->payload<Vcpu_log>();
739           l->type = 2;
740           l->state = vcpu->_saved_state;
741           l->ip = ts->ip();
742           l->sp = ts->sp();
743           l->trap = ts->trapno();
744           l->err = ts->error();
745           l->space = vcpu_user_space() ? static_cast<Task*>(vcpu_user_space())->dbg_id() : ~0;
746           );
747       memcpy(&vcpu->_ts, ts, sizeof(Trap_state));
748       save_fpu_state_to_utcb(ts, utcb().access());
749       fast_return_to_user(vcpu->_entry_ip, vcpu->_sp, vcpu_state().usr().get());
750     }
751
752   // local IRQs must be disabled because we dereference a Thread_ptr
753   if (EXPECT_FALSE(_exc_handler.is_kernel()))
754     return 0;
755
756   if (!send_exception_arch(ts))
757     return 0; // do not send exception
758
759   unsigned char rights = 0;
760   Kobject_iface *pager = _exc_handler.ptr(space(), &rights);
761
762   if (EXPECT_FALSE(!pager))
763     {
764       /* no pager (anymore), just ignore the exception, return success */
765       LOG_TRACE("Exception invalid handler", "exc", this,
766                 __fmt_exception_invalid_handler,
767                 Log_exc_invalid *l = tbe->payload<Log_exc_invalid>();
768                 l->cap_idx = _exc_handler.raw());
769       if (EXPECT_FALSE(space() == sigma0_task))
770         {
771           WARNX(Error, "Sigma0 raised an exception --> HALT\n");
772           panic("...");
773         }
774
775       pager = this; // block on ourselves
776     }
777
778   state_change(~Thread_cancel, Thread_in_exception);
779
780   return exception(pager, ts, rights);
781 }
782
783 PRIVATE static
784 bool
785 Thread::try_transfer_local_id(L4_buf_iter::Item const *const buf,
786                               L4_fpage sfp, Mword *rcv_word, Thread* snd,
787                               Thread *rcv)
788 {
789   if (buf->b.is_rcv_id())
790     {
791       if (snd->space() == rcv->space())
792         {
793           rcv_word[-2] |= 6;
794           rcv_word[-1] = sfp.raw();
795           return true;
796         }
797       else
798         {
799           unsigned char rights = 0;
800           Obj_space::Capability cap = snd->space()->obj_space()->lookup(sfp.obj_index());
801           Kobject_iface *o = cap.obj();
802           rights = cap.rights();
803           if (EXPECT_TRUE(o && o->is_local(rcv->space())))
804             {
805               rcv_word[-2] |= 4;
806               rcv_word[-1] = o->obj_id() | Mword(rights);
807               return true;
808             }
809         }
810     }
811   return false;
812 }
813
814 PRIVATE static inline
815 bool FIASCO_WARN_RESULT
816 Thread::copy_utcb_to_utcb(L4_msg_tag const &tag, Thread *snd, Thread *rcv,
817                           unsigned char rights)
818 {
819   assert (cpu_lock.test());
820
821   Utcb *snd_utcb = snd->utcb().access();
822   Utcb *rcv_utcb = rcv->utcb().access();
823   Mword s = tag.words();
824   Mword r = Utcb::Max_words;
825
826   Mem::memcpy_mwords(rcv_utcb->values, snd_utcb->values, r < s ? r : s);
827
828   bool success = true;
829   if (tag.items())
830     success = transfer_msg_items(tag, snd, snd_utcb, rcv, rcv_utcb, rights);
831
832   if (tag.transfer_fpu() && rcv_utcb->inherit_fpu() && (rights & L4_fpage::W))
833     snd->transfer_fpu(rcv);
834
835   return success;
836 }
837
838
839 PUBLIC inline NEEDS[Thread::copy_utcb_to_ts, Thread::copy_utcb_to_utcb,
840                     Thread::copy_ts_to_utcb]
841 bool FIASCO_WARN_RESULT
842 Thread::copy_utcb_to(L4_msg_tag const &tag, Thread* receiver,
843                      unsigned char rights)
844 {
845   // we cannot copy trap state to trap state!
846   assert_kdb (!this->_utcb_handler || !receiver->_utcb_handler);
847   if (EXPECT_FALSE(this->_utcb_handler != 0))
848     return copy_ts_to_utcb(tag, this, receiver, rights);
849   else if (EXPECT_FALSE(receiver->_utcb_handler != 0))
850     return copy_utcb_to_ts(tag, this, receiver, rights);
851   else
852     return copy_utcb_to_utcb(tag, this, receiver, rights);
853 }
854
855 PRIVATE static
856 bool
857 Thread::transfer_msg_items(L4_msg_tag const &tag, Thread* snd, Utcb *snd_utcb,
858                            Thread *rcv, Utcb *rcv_utcb,
859                            unsigned char rights)
860 {
861   // LOG_MSG_3VAL(current(), "map bd=", rcv_utcb->buf_desc.raw(), 0, 0);
862   L4_buf_iter mem_buffer(rcv_utcb, rcv_utcb->buf_desc.mem());
863   L4_buf_iter io_buffer(rcv_utcb, rcv_utcb->buf_desc.io());
864   L4_buf_iter obj_buffer(rcv_utcb, rcv_utcb->buf_desc.obj());
865   L4_snd_item_iter snd_item(snd_utcb, tag.words());
866   register int items = tag.items();
867   Mword *rcv_word = rcv_utcb->values + tag.words();
868
869   // XXX: damn X-CPU state modification
870   // snd->prepare_long_ipc(rcv);
871   Reap_list rl;
872
873   for (;items > 0 && snd_item.more();)
874     {
875       if (EXPECT_FALSE(!snd_item.next()))
876         {
877           snd->set_ipc_error(L4_error::Overflow, rcv);
878           return false;
879         }
880
881       L4_snd_item_iter::Item const *const item = snd_item.get();
882
883       if (item->b.is_void())
884         { // XXX: not sure if void fpages are needed
885           // skip send item and current rcv_buffer
886           --items;
887           continue;
888         }
889
890       L4_buf_iter *buf_iter = 0;
891
892       switch (item->b.type())
893         {
894         case L4_msg_item::Map:
895           switch (L4_fpage(item->d).type())
896             {
897             case L4_fpage::Memory: buf_iter = &mem_buffer; break;
898             case L4_fpage::Io:     buf_iter = &io_buffer; break;
899             case L4_fpage::Obj:    buf_iter = &obj_buffer; break;
900             default: break;
901             }
902           break;
903         default:
904           break;
905         }
906
907       if (EXPECT_FALSE(!buf_iter))
908         {
909           // LOG_MSG_3VAL(snd, "lIPCm0", 0, 0, 0);
910           snd->set_ipc_error(L4_error::Overflow, rcv);
911           return false;
912         }
913
914       L4_buf_iter::Item const *const buf = buf_iter->get();
915
916       if (EXPECT_FALSE(buf->b.is_void() || buf->b.type() != item->b.type()))
917         {
918           // LOG_MSG_3VAL(snd, "lIPCm1", buf->b.raw(), item->b.raw(), 0);
919           snd->set_ipc_error(L4_error::Overflow, rcv);
920           return false;
921         }
922
923         {
924           assert_kdb (item->b.type() == L4_msg_item::Map);
925           L4_fpage sfp(item->d);
926           *rcv_word = (item->b.raw() & ~0x0ff7) | (sfp.raw() & 0x0ff0);
927
928           rcv_word += 2;
929
930           if (!try_transfer_local_id(buf, sfp, rcv_word, snd, rcv))
931             {
932               // we need to do a real mapping¿
933
934               // diminish when sending via restricted ipc gates
935               if (sfp.type() == L4_fpage::Obj)
936                 sfp.mask_rights(L4_fpage::Rights(rights | L4_fpage::RX));
937
938               L4_error err = fpage_map(snd->space(), sfp,
939                   rcv->space(), L4_fpage(buf->d), item->b.raw(), &rl);
940
941               if (EXPECT_FALSE(!err.ok()))
942                 {
943                   snd->set_ipc_error(err, rcv);
944                   return false;
945                 }
946             }
947         }
948
949       --items;
950
951       if (!item->b.compund())
952         buf_iter->next();
953     }
954
955   if (EXPECT_FALSE(items))
956     {
957       snd->set_ipc_error(L4_error::Overflow, rcv);
958       return false;
959     }
960
961   return true;
962 }
963
964
965 /**
966  * \pre Runs on the sender CPU
967  */
968 PRIVATE inline
969 bool
970 Thread::abort_send(L4_error const &e, Thread *partner)
971 {
972   state_del_dirty(Thread_full_ipc_mask);
973
974   if (_timeout && _timeout->is_set())
975     _timeout->reset();
976
977   set_timeout(0);
978   Abort_state abt = Abt_ipc_done;
979
980   if (partner->cpu() == current_cpu())
981     {
982       if (in_sender_list())
983         {
984           sender_dequeue(partner->sender_list());
985           partner->vcpu_update_state();
986           abt = Abt_ipc_cancel;
987
988         }
989       else if (partner->in_ipc(this))
990         abt = Abt_ipc_in_progress;
991     }
992   else
993     abt = partner->Receiver::abort_send(this);
994
995   switch (abt)
996     {
997     default:
998     case Abt_ipc_done:
999       return true;
1000     case Abt_ipc_cancel:
1001       utcb().access()->error = e;
1002       return false;
1003     case Abt_ipc_in_progress:
1004       state_add_dirty(Thread_ipc_transfer);
1005       while (state() & Thread_ipc_transfer)
1006         {
1007           state_del_dirty(Thread_ready);
1008           schedule();
1009         }
1010       return true;
1011     }
1012 }
1013
1014
1015
1016 /**
1017  * \pre Runs on the sender CPU
1018  */
1019 PRIVATE inline
1020 bool
1021 Thread::do_send_wait(Thread *partner, L4_timeout snd_t)
1022 {
1023   IPC_timeout timeout;
1024
1025   if (EXPECT_FALSE(snd_t.is_finite()))
1026     {
1027       Unsigned64 tval = snd_t.microsecs(Timer::system_clock(), utcb().access(true));
1028       // Zero timeout or timeout expired already -- give up
1029       if (tval == 0)
1030         return abort_send(L4_error::Timeout, partner);
1031
1032       set_timeout(&timeout);
1033       timeout.set(tval, cpu());
1034     }
1035
1036   register Mword ipc_state;
1037
1038   while (((ipc_state = state() & (Thread_send_wait | Thread_ipc_abort_mask))) == Thread_send_wait)
1039     {
1040       state_del_dirty(Thread_ready);
1041       schedule();
1042     }
1043
1044   if (EXPECT_FALSE(ipc_state == (Thread_cancel | Thread_send_wait)))
1045     return abort_send(L4_error::Canceled, partner);
1046
1047   if (EXPECT_FALSE(ipc_state == (Thread_timeout | Thread_send_wait)))
1048     return abort_send(L4_error::Timeout, partner);
1049
1050   timeout.reset();
1051   set_timeout(0);
1052
1053   return true;
1054 }
1055
1056 PRIVATE inline
1057 void
1058 Thread::set_ipc_send_rights(unsigned char c)
1059 {
1060   _ipc_send_rights = c;
1061 }
1062
1063 //---------------------------------------------------------------------
1064 IMPLEMENTATION [!mp]:
1065
1066 PRIVATE inline NEEDS ["l4_types.h"]
1067 unsigned
1068 Thread::remote_handshake_receiver(L4_msg_tag const &, Thread *,
1069                                   bool, L4_timeout, Syscall_frame *, unsigned char)
1070 {
1071   kdb_ke("Remote IPC in UP kernel");
1072   return Failed;
1073 }
1074
1075 //---------------------------------------------------------------------
1076 INTERFACE [mp]:
1077
1078 struct Ipc_remote_request;
1079
1080 struct Ipc_remote_request
1081 {
1082   L4_msg_tag tag;
1083   Thread *partner;
1084   Syscall_frame *regs;
1085   unsigned char rights;
1086   bool timeout;
1087   bool have_rcv;
1088
1089   unsigned result;
1090 };
1091
1092 struct Ready_queue_request
1093 {
1094   Thread *thread;
1095   Mword state_add;
1096   Mword state_del;
1097
1098   enum Result { Done, Wrong_cpu, Not_existent };
1099   Result result;
1100 };
1101
1102 //---------------------------------------------------------------------
1103 IMPLEMENTATION [mp]:
1104
1105
1106 PRIVATE inline NOEXPORT
1107 bool
1108 Thread::remote_ipc_send(Context *src, Ipc_remote_request *rq)
1109 {
1110   (void)src;
1111   // LOG_MSG_3VAL(this, "rse", current_cpu(), (Mword)src, (Mword)this);
1112 #if 0
1113   LOG_MSG_3VAL(this, "rsend", (Mword)src, 0, 0);
1114   printf("CPU[%u]: remote IPC send ...\n"
1115          "  partner=%p [%u]\n"
1116          "  sender =%p [%u] regs=%p\n"
1117          "  timeout=%u\n",
1118          current_cpu(),
1119          rq->partner, rq->partner->cpu(),
1120          src, src->cpu(),
1121          rq->regs,
1122          rq->timeout);
1123 #endif
1124
1125   switch (__builtin_expect(rq->partner->check_sender(this, rq->timeout), Ok))
1126     {
1127     case Failed:
1128       rq->result = Failed;
1129       return false;
1130     case Queued:
1131       rq->result = Queued;
1132       return false;
1133     default:
1134       break;
1135     }
1136
1137   // trigger remote_ipc_receiver_ready path, because we may need to grab locks
1138   // and this is forbidden in a DRQ handler. So transfer the IPC in usual
1139   // thread code. However, this induces a overhead of two extra IPIs.
1140   if (rq->tag.items())
1141     {
1142       //LOG_MSG_3VAL(rq->partner, "pull", dbg_id(), 0, 0);
1143       rq->partner->state_change_dirty(~(Thread_ipc_mask | Thread_ready), Thread_ipc_transfer);
1144       rq->result = Ok;
1145       return true;
1146     }
1147   rq->partner->vcpu_disable_irqs();
1148   bool success = transfer_msg(rq->tag, rq->partner, rq->regs, _ipc_send_rights);
1149   rq->result = success ? Done : Failed;
1150
1151   rq->partner->state_change_dirty(~Thread_ipc_mask, Thread_ready);
1152   // hm, should be done by lazy queueing: rq->partner->ready_enqueue();
1153   return true;
1154 }
1155
1156 PRIVATE static
1157 unsigned
1158 Thread::handle_remote_ipc_send(Drq *src, Context *, void *_rq)
1159 {
1160   Ipc_remote_request *rq = (Ipc_remote_request*)_rq;
1161   bool r = nonull_static_cast<Thread*>(src->context())->remote_ipc_send(src->context(), rq);
1162   //LOG_MSG_3VAL(src, "rse<", current_cpu(), (Mword)src, r);
1163   return r ? Drq::Need_resched : 0;
1164 }
1165
1166 /**
1167  * \pre Runs on the sender CPU
1168  */
1169 PRIVATE //inline NEEDS ["mp_request.h"]
1170 unsigned
1171 Thread::remote_handshake_receiver(L4_msg_tag const &tag, Thread *partner,
1172                                   bool have_receive,
1173                                   L4_timeout snd_t, Syscall_frame *regs,
1174                                   unsigned char rights)
1175 {
1176   // Flag that there must be no switch in the receive path.
1177   // This flag also prevents the receive path from accessing
1178   // the thread state of a remote sender.
1179   Ipc_remote_request rq;
1180   rq.tag = tag;
1181   rq.have_rcv = have_receive;
1182   rq.partner = partner;
1183   rq.timeout = !snd_t.is_zero();
1184   rq.regs = regs;
1185   rq.rights = rights;
1186   _snd_regs = regs;
1187
1188   set_receiver(partner);
1189
1190   state_add_dirty(Thread_send_wait);
1191
1192   partner->drq(handle_remote_ipc_send, &rq,
1193                remote_prepare_receive);
1194
1195   return rq.result;
1196 }
1197
1198 PRIVATE static
1199 unsigned
1200 Thread::remote_prepare_receive(Drq *src, Context *, void *arg)
1201 {
1202   Context *c = src->context();
1203   Ipc_remote_request *rq = (Ipc_remote_request*)arg;
1204   //printf("CPU[%2u:%p]: remote_prepare_receive (err=%x)\n", current_cpu(), c, rq->err.error());
1205
1206   // No atomic switch to receive state if we are queued, or the IPC must be done by
1207   // the sender's CPU
1208   if (EXPECT_FALSE(rq->result == Queued || rq->result == Ok))
1209     return 0;
1210
1211   c->state_del(Thread_ipc_mask);
1212   if (EXPECT_FALSE((rq->result & Failed) || !rq->have_rcv))
1213     return 0;
1214
1215   c->state_add_dirty(Thread_receive_wait);
1216   return 0;
1217 }
1218
1219 //---------------------------------------------------------------------------
1220 IMPLEMENTATION [debug]:
1221
1222 IMPLEMENT
1223 unsigned
1224 Thread::log_fmt_pf_invalid(Tb_entry *e, int max, char *buf)
1225 {
1226   Log_pf_invalid *l = e->payload<Log_pf_invalid>();
1227   return snprintf(buf, max, "InvCap C:%lx pfa=%lx err=%lx", l->cap_idx, l->pfa, l->err);
1228 }
1229
1230 IMPLEMENT
1231 unsigned
1232 Thread::log_fmt_exc_invalid(Tb_entry *e, int max, char *buf)
1233 {
1234   Log_exc_invalid *l = e->payload<Log_exc_invalid>();
1235   return snprintf(buf, max, "InvCap C:%lx", l->cap_idx);
1236 }