3 * Shared between UX and native IA32.
5 INTERFACE [ia32,amd64,ux]:
7 #include "trap_state.h"
11 EXTENSION class Thread
15 * Return code segment used for exception reflection to user mode
17 static Mword exception_cs();
21 Unsigned16 _idt_limit;
23 static Trap_state::Handler nested_trap_handler FIASCO_FASTCALL;
27 //----------------------------------------------------------------------------
28 INTERFACE [ia32,amd64]:
32 EXTENSION class Thread
35 static int (*int3_handler)(Trap_state*);
39 //----------------------------------------------------------------------------
40 IMPLEMENTATION [ia32,amd64,ux]:
48 #include "mem_layout.h"
51 #include "processor.h" // for cli/sti
53 #include "std_macros.h"
56 #include "trap_state.h"
57 #include "vmem_alloc.h"
59 Trap_state::Handler Thread::nested_trap_handler FIASCO_FASTCALL;
63 : Receiver(&_thread_lock),
64 Sender(0), // select optimized version of constructor
65 _pager(Thread_ptr::Invalid),
66 _exc_handler(Thread_ptr::Invalid),
69 //assert (current() == thread_lock()->lock_owner());
70 assert (state() == Thread_invalid);
74 if (Config::stack_depth)
75 std::memset((char*)this + sizeof(Thread), '5',
76 Config::thread_block_size-sizeof(Thread)-64);
83 *reinterpret_cast<void(**)()> (--_kernel_sp) = user_invoke;
87 state_add_dirty(Thread_dead | Thread_suspended);
89 // ok, we're ready to go!
92 IMPLEMENT inline NEEDS[Thread::exception_triggered]
94 Thread::user_ip() const
95 { return exception_triggered()?_exc_cont.ip():regs()->ip(); }
99 Thread::user_flags() const
100 { return regs()->flags(); }
106 Thread::is_privileged_for_debug(Trap_state * /*ts*/)
109 return ((ts->flags() & EFLAGS_IOPL) == EFLAGS_IOPL_U);
115 /** Check if the pagefault occured at a special place: At some places in the
116 kernel we want to ensure that a specific address is mapped. The regular
117 case is "mapped", the exception or slow case is "not mapped". The fastest
118 way to check this is to touch into the memory. If there is no mapping for
119 the address we get a pagefault. Now the pagefault exception handler can
120 recognize that situation by scanning the code. The trick is that the
121 assembler instruction "andl $0xffffffff, %ss:(%ecx)" _clears_ the carry
122 flag normally (see Intel reference manual). The pager wants to inform the
123 code that there was a pagefault and therefore _sets_ the carry flag. So
124 the code has only to check if the carry flag is set. If yes, there was
125 a pagefault at this instruction.
126 @param ip pagefault address */
129 Thread::pagein_tcb_request(Return_frame *regs)
131 unsigned long new_ip = regs->ip();
132 if (*(Unsigned8*)new_ip == 0x48) // REX.W
135 register Unsigned16 op = *(Unsigned16*)new_ip;
136 //LOG_MSG_3VAL(current(),"TCB", op, new_ip, 0);
137 if ((op & 0xc0ff) == 0x8b) // Context::is_tcb_mapped() and Context::state()
139 regs->ip(new_ip + 2);
147 Mword *reg = ((Mword*)regs) - 2 - Return_frame::Pf_ax_offset;
149 LOG_MSG_3VAL(current(),"TCB", op, regs->ip(), (Mword)reg);
150 LOG_MSG_3VAL(current(),"TCBX", reg[-3], reg[-4], reg[-5]);
151 LOG_MSG_3VAL(current(),"TCB0", reg[0], reg[-1], reg[-2]);
152 LOG_MSG_3VAL(current(),"TCB1", reg[1], reg[2], reg[3]);
154 assert((op >> 11) <= 2);
155 reg[-(op>>11)] = 0; // op==0 => eax, op==1 => ecx, op==2 => edx
157 // tell program that a pagefault occured we cannot handle
158 regs->flags(regs->flags() | 0x41); // set carry and zero flag in EFLAGS
161 else if (*(Unsigned32*)regs->ip() == 0xff01f636) // used in shortcut.S
163 regs->ip(regs->ip() + 4);
164 regs->flags(regs->flags() | 1); // set carry flag in EFLAGS
172 extern "C" FIASCO_FASTCALL
174 thread_restore_exc_state()
176 current_thread()->restore_exc_state();
181 Thread::print_page_fault_error(Mword e)
187 * The global trap handler switch.
188 * This function handles CPU-exception reflection, emulation of CPU
189 * instructions (LIDT, WRMSR, RDMSR), int3 debug messages,
190 * kernel-debugger invocation, and thread crashes (if a trap cannot be
192 * @param state trap state
193 * @return 0 if trap has been consumed by handler;
194 * -1 if trap could not be handled.
198 Thread::handle_slow_trap(Trap_state *ts)
201 int from_user = ts->cs() & 3;
203 if (EXPECT_FALSE(ts->_trapno == 0xee)) //debug IPI
205 Ipi::eoi(Ipi::Debug);
209 if (from_user && (state() & Thread_vcpu_user_mode) && send_exception(ts))
212 // XXX We might be forced to raise an excepton. In this case, our return
213 // CS:IP points to leave_by_trigger_exception() which will trigger the
214 // exception just before returning to userland. But if we were inside an
215 // IPC while we was ex-regs'd, we will generate the 'exception after the
216 // syscall' _before_ we leave the kernel.
217 if (ts->_trapno == 13 && (ts->_err & 6) == 6)
218 goto check_exception;
221 // XXX: need to do in a different way, if on debug stack e.g.
223 if (EXPECT_FALSE(!in_context_area((void*)Proc::stack_pointer())))
224 goto generic_debug; // we're in the GDB stub or in jdb
225 // -- let generic handler handle it
230 if (!check_trap13_kernel (ts))
233 if (EXPECT_FALSE(!from_user))
235 // get also here if a pagefault was not handled by the user level pager
236 if (ts->_trapno == 14)
237 goto check_exception;
239 goto generic_debug; // we were in kernel mode -- nothing to emulate
242 if (EXPECT_FALSE(ts->_trapno == 2))
243 goto generic_debug; // NMI always enters kernel debugger
245 if (EXPECT_FALSE(ts->_trapno == 0xffffffff))
246 goto generic_debug; // debugger interrupt
250 // so we were in user mode -- look for something to emulate
252 // We continue running with interrupts off -- no sti() here. But
253 // interrupts may be enabled by the pagefault handler if we get a
254 // pagefault in peek_user().
256 // Set up exception handling. If we suffer an un-handled user-space
257 // page fault, kill the thread.
260 if (EXPECT_FALSE ((error = setjmp(pf_recovery)) != 0) )
263 "\033[1mUnhandled page fault, code=%08x\033[m\n",
268 _recover_jmpbuf = &pf_recovery;
270 switch (handle_io_page_fault(ts, from_user))
272 case 1: goto success;
278 // check for "invalid opcode" exception
279 if (EXPECT_FALSE (Config::Kip_syscalls && ts->_trapno == 6))
282 // Check "lock; nop" opcode
283 if (mem_space()->peek ((Unsigned16*) ip, from_user) == 0x90f0)
285 ts->consume_instruction(2);
286 ts->value(task()->map_kip());
292 // just print out some warning, we do the normal exception handling
293 handle_sysenter_trap(ts, ip, from_user);
295 // check for general protection exception
296 if (ts->_trapno == 13 && (ts->_err & 0xffff) == 0)
298 // find out if we are a privileged task
299 bool is_privileged = trap_is_privileged (ts);
301 // check for "lidt (%eax)"
303 (ip < Kmem::mem_user_max - 4 &&
304 (mem_space()->peek((Mword*) ip, from_user) & 0xffffff) == 0x18010f))
306 // emulate "lidt (%eax)"
309 if (ts->value() >= Kmem::mem_user_max - 6)
313 = mem_space()->peek((Idt_entry**)(ts->value() + 2), from_user);
314 Unsigned16 limit = mem_space()->peek ((Unsigned16*) ts->value(), from_user);
316 if ((Address)idt >= (Address)Kmem::mem_user_max-limit-1)
319 // OK; store descriptor
321 _idt_limit = (limit + 1) / sizeof(Idt_entry);
323 // consume instruction and continue
324 ts->consume_instruction(3);
329 // check for "wrmsr (%eax)"
332 && (ip < Kmem::mem_user_max - 2)
333 && (mem_space()->peek((Unsigned16*) ip, from_user)) == 0x300f
334 && (Cpu::cpus.cpu(cpu()).can_wrmsr())))
336 printf("Detected wrmsr at %lx\n", ip);
339 do_wrmsr_in_kernel (ts);
341 // consume instruction and continue
342 ts->consume_instruction(2);
348 // check for "rdmsr (%eax)"
351 && (ip < Kmem::mem_user_max - 2)
352 && (mem_space()->peek((Unsigned16*) ip, from_user)) == 0x320f
353 && (Cpu::cpus.cpu(cpu()).can_wrmsr())))
355 printf("Detected rdmsr at %lx\n", ip);
358 do_rdmsr_in_kernel(ts);
360 // consume instruction and continue
361 ts->consume_instruction(2);
371 // send exception IPC if requested
372 if (send_exception(ts))
375 // let's see if we have a trampoline to invoke
376 if (ts->_trapno < 0x20 && ts->_trapno < _idt_limit)
378 Idt_entry e = mem_space()->peek(_idt + ts->_trapno, 1);
380 if ((e.ist() & 0xe0) == 0x00
381 && (e.access() & 0x1f) == 0x0f) // gate descriptor ok?
383 Address handler = e.offset();
386 && handler < Kmem::mem_user_max // in user space?
387 && ts->sp() <= Kmem::mem_user_max
388 && ts->sp() > 5 * sizeof (Mword)) // enough space on user stack?
390 // OK, reflect the trap to user mode
391 if (1 /* !raise_exception (ts, handler) */)
393 // someone interfered and changed our state
394 assert (state() & Thread_cancel);
395 state_del(Thread_cancel);
398 goto success; // we've consumed the trap
403 // backward compatibility cruft: check for those insane "int3" debug
404 // messaging command sequences
405 if (ts->_trapno == 3 && is_privileged_for_debug(ts))
407 if (int3_handler && int3_handler(ts))
413 // privileged tasks also may invoke the kernel debugger with a debug
415 if (ts->_trapno == 1 && is_privileged_for_debug (ts))
420 // can't handle trap -- kill the thread
422 "\033[1mUnhandled trap \033[m\n",
426 if (Config::warn_level >= Warning)
429 if (Config::conservative)
430 kdb_ke("thread killed");
441 if (!nested_trap_handler)
442 return handle_not_nested_trap(ts);
444 return call_nested_trap_handler(ts);
448 * The low-level page fault handler called from entry.S. We're invoked with
449 * interrupts turned off. Apart from turning on interrupts in almost
450 * all cases (except for kernel page faults in TCB area), just forwards
451 * the call to Thread::handle_page_fault().
452 * @param pfa page-fault virtual address
453 * @param error_code CPU error code
454 * @return true if page fault could be resolved, false otherwise
456 extern "C" FIASCO_FASTCALL
458 thread_page_fault(Address pfa, Mword error_code, Address ip, Mword flags,
462 // XXX: need to do in a different way, if on debug stack e.g.
464 // If we're in the GDB stub -- let generic handler handle it
465 if (EXPECT_FALSE (!in_context_area((void*)Proc::stack_pointer())))
469 // Pagefault in user mode or interrupts were enabled
470 if (PF::is_usermode_error(error_code))
472 if (current_thread()->vcpu_pagefault(pfa, error_code, ip))
477 else if(flags & EFLAGS_IF)
480 // Pagefault in kernel mode and interrupts were disabled
483 // page fault in kernel memory region
484 if (Kmem::is_kmem_page_fault(pfa, error_code))
486 // We've interrupted a context in the kernel with disabled interrupts,
487 // the page fault address is in the kernel region, the error code is
488 // "not mapped" (as opposed to "access error"), and the region is
489 // actually valid (that is, mapped in Kmem's shared page directory,
490 // just not in the currently active page directory)
493 else if (!Config::conservative &&
494 !Kmem::is_kmem_page_fault(pfa, error_code))
496 // No error -- just enable interrupts.
501 // Error: We interrupted a cli'd kernel context touching kernel space
502 if (!Thread::log_page_fault())
503 printf("*P[%lx,%lx,%lx] ", pfa, error_code & 0xffff, ip);
505 kdb_ke ("page fault in cli mode");
509 return current_thread()->handle_page_fault(pfa, error_code, ip, regs);
512 /** The catch-all trap entry point. Called by assembly code when a
513 CPU trap (that's not specially handled, such as system calls) occurs.
514 Just forwards the call to Thread::handle_slow_trap().
515 @param state trap state
516 @return 0 if trap has been consumed by handler;
517 -1 if trap could not be handled.
519 extern "C" FIASCO_FASTCALL
521 thread_handle_trap(Trap_state *ts, unsigned)
523 return current_thread()->handle_slow_trap(ts);
533 Thread::handle_sigma0_page_fault(Address pfa)
537 // Check if mapping a superpage doesn't exceed the size of physical memory
538 if (Cpu::have_superpages()
539 // Some distributions do not allow to mmap below a certain threshold
540 // (like 64k on Ubuntu 8.04) so we cannot map a superpage at 0 if
542 && (!Config::Is_ux || !(pfa < Config::SUPERPAGE_SIZE)))
544 pfa &= Config::SUPERPAGE_MASK;
545 size = Config::SUPERPAGE_SIZE;
549 pfa &= Config::PAGE_MASK;
550 size = Config::PAGE_SIZE;
553 return mem_space()->v_insert(Mem_space::Phys_addr(pfa), Mem_space::Addr(pfa),
554 Mem_space::Size(size),
555 Mem_space::Page_writable
556 | Mem_space::Page_user_accessible)
557 != Mem_space::Insert_err_nomem;
560 PRIVATE static inline
562 Thread::save_fpu_state_to_utcb(Trap_state *, Utcb *)
565 /* return 1 if this exception should be sent, return 0 if not
567 PUBLIC inline NEEDS["trap_state.h"]
569 Thread::send_exception_arch(Trap_state *ts)
571 // Do not send exception IPC but return 'not for us' if thread is a normal
572 // thread (not alien) and it's a debug trap,
573 // debug traps for aliens are always reflected as exception IPCs
574 if (!(state() & Thread_alien)
575 && (ts->_trapno == 1 || ts->_trapno == 3))
576 return 0; // we do not handle this
578 if (ts->_trapno == 3)
580 if (state() & Thread_dis_alien)
582 state_del(Thread_dis_alien);
583 return 0; // no exception
586 // set IP back on the int3 instruction
587 ts->ip(ts->ip() - 1);
590 return 1; // make it an exception
594 //----------------------------------------------------------------------------
595 IMPLEMENTATION [ia32 || amd64]:
599 Thread::vcpu_resume_user_arch()
602 //----------------------------------------------------------------------------
607 Thread::vcpu_resume_user_arch()
609 switch_gdt_user_entries(this);
612 //----------------------------------------------------------------------------
613 IMPLEMENTATION [ux || amd64]:
615 IMPLEMENT inline NEEDS[Thread::exception_triggered]
617 Thread::user_ip(Mword ip)
619 if (exception_triggered())
623 Entry_frame *r = regs();
628 //----------------------------------------------------------------------------
629 IMPLEMENTATION [ia32]:
631 #include "utcb_init.h"
633 PROTECTED inline NEEDS["utcb_init.h"]
635 Thread::arch_init_seg()
637 _gs = _fs = Utcb_init::utcb_segment();
640 //----------------------------------------------------------------------------
641 IMPLEMENTATION [amd64]:
645 Thread::arch_init_seg()
649 //----------------------------------------------------------------------------
650 IMPLEMENTATION [(ia32,amd64,ux) && !io]:
654 Thread::handle_io_page_fault(Trap_state *, bool)
659 Thread::get_ioport(Address /*eip*/, Trap_state * /*ts*/,
660 unsigned * /*port*/, unsigned * /*size*/)
664 //---------------------------------------------------------------------------
665 IMPLEMENTATION[ia32 || amd64]:
668 #include "fpu_alloc.h"
669 #include "fpu_state.h"
671 #include "globalconfig.h"
673 #include "simpleio.h"
674 #include "static_init.h"
675 #include "terminate.h"
677 int (*Thread::int3_handler)(Trap_state*);
678 Per_cpu<Thread::Dbg_stack> DEFINE_PER_CPU Thread::dbg_stack;
680 STATIC_INITIALIZER_P (int3_handler_init, KDB_INIT_PRIO);
686 Thread::set_int3_handler(Thread::handle_int3);
689 IMPLEMENT static inline NEEDS ["gdt.h"]
691 Thread::exception_cs()
693 return Gdt::gdt_code_user | Gdt::Selector_user;
697 * The ia32 specific part of the thread constructor.
699 PRIVATE inline NEEDS ["gdt.h"]
703 // clear out user regs that can be returned from the thread_ex_regs
704 // system call to prevent covert channel
705 Entry_frame *r = regs();
706 if (Config::enable_io_protection)
707 r->flags(EFLAGS_IOPL_K | EFLAGS_IF | 2); // ei
709 r->flags(EFLAGS_IOPL_U | EFLAGS_IF | 2); // XXX iopl=kernel
710 r->cs(Gdt::gdt_code_user | Gdt::Selector_user);
711 r->ss(Gdt::gdt_data_user | Gdt::Selector_user);
714 // after cs initialisation as ip() requires proper cs
721 /** A C interface for Context::handle_fpu_trap, callable from assembly code.
724 // The "FPU not available" trap entry point
727 thread_handle_fputrap()
731 return current_thread()->switchin_fpu();
736 Thread::set_int3_handler(int (*handler)(Trap_state *ts))
738 int3_handler = handler;
742 * Default handle for int3 extensions if JDB is disabled. If the JDB is
743 * available, Jdb::handle_int3_threadctx is called instead.
744 * @return 0 not handled, wait for user response
745 * 1 successfully handled
749 Thread::handle_int3(Trap_state *ts)
751 Mem_space *s = current_mem_space();
752 int from_user = ts->cs() & 3;
753 Address ip = ts->ip();
754 Unsigned8 todo = s->peek((Unsigned8*)ip, from_user);
761 case 0xeb: // jmp == enter_kdebug()
762 len = s->peek((Unsigned8*)(ip+1), from_user);
763 str = (Unsigned8*)(ip + 2);
769 putchar(s->peek(str++, from_user));
774 case 0x90: // nop == l4kd_display()
775 if ( s->peek((Unsigned8*)(ip+1), from_user) != 0xeb /*jmp*/
776 || (len = s->peek((Unsigned8*)(ip+2), from_user)) <= 0)
779 str = (Unsigned8*)(ip + 3);
781 putchar(s->peek(str++, from_user));
785 todo = s->peek((Unsigned8*)(ip+1), from_user);
788 case 0: // l4kd_outchar
789 putchar(ts->value() & 0xff);
791 case 1: // l4kd_outnstring
792 str = (Unsigned8*)ts->value();
794 for(; len > 0; len--)
795 putchar(s->peek(str++, from_user));
797 case 2: // l4kd_outstr
798 str = (Unsigned8*)ts->value();
799 for (; (c=s->peek(str++, from_user)); )
802 case 5: // l4kd_outhex32
803 printf("%08lx", ts->value() & 0xffffffff);
805 case 6: // l4kd_outhex20
806 printf("%05lx", ts->value() & 0xfffff);
808 case 7: // l4kd_outhex16
809 printf("%04lx", ts->value() & 0xffff);
811 case 8: // l4kd_outhex12
812 printf("%03lx", ts->value() & 0xfff);
814 case 9: // l4kd_outhex8
815 printf("%02lx", ts->value() & 0xff);
817 case 11: // l4kd_outdec
818 printf("%ld", ts->value());
821 switch (ts->value2())
825 Watchdog::user_enable();
829 Watchdog::user_disable();
832 // user takes over the control of watchdog and is from now on
833 // responsible for calling "I'm still alive" events (function 5)
834 Watchdog::user_takeover_control();
837 // user returns control of watchdog to kernel
838 Watchdog::user_giveback_control();
866 Thread::check_f00f_bug(Trap_state *ts)
868 // If we page fault on the IDT, it must be because of the F00F bug.
869 // Figure out exception slot and raise the corresponding exception.
870 // XXX: Should we also modify the error code?
871 if (ts->_trapno == 14 // page fault?
872 && ts->_cr2 >= Idt::idt()
873 && ts->_cr2 < Idt::idt() + Idt::_idt_max * 8)
874 ts->_trapno = (ts->_cr2 - Idt::idt()) / 8;
880 Thread::check_io_bitmap_delimiter_fault(Trap_state *ts)
882 // check for page fault at the byte following the IO bitmap
883 if (ts->_trapno == 14 // page fault?
884 && (ts->_err & 4) == 0 // in supervisor mode?
885 && ts->ip() < Kmem::mem_user_max // delimiter byte accessed?
886 && (ts->_cr2 == Mem_layout::Io_bitmap + Mem_layout::Io_port_max / 8))
888 // page fault in the first byte following the IO bitmap
889 // map in the cpu_page read_only at the place
890 Mem_space::Status result =
891 mem_space()->v_insert(
892 Mem_space::Phys_addr(mem_space()->virt_to_phys_s0((void*)Kmem::io_bitmap_delimiter_page())),
893 Mem_space::Addr::create(Mem_layout::Io_bitmap + Mem_layout::Io_port_max / 8),
894 Mem_space::Size::create(Config::PAGE_SIZE),
899 case Mem_space::Insert_ok:
901 case Mem_space::Insert_err_nomem:
902 // kernel failure, translate this into a general protection
903 // violation and hope that somebody handles it
908 // no other error code possible
918 Thread::handle_sysenter_trap(Trap_state *ts, Address eip, bool from_user)
921 ((ts->_trapno == 6 || ts->_trapno == 13)
922 && (ts->_err & 0xffff) == 0
923 && (eip < Kmem::mem_user_max - 2)
924 && (mem_space()->peek((Unsigned16*) eip, from_user)) == 0x340f))
926 // somebody tried to do sysenter on a machine without support for it
927 WARN("tcb=%p killed:\n"
928 "\033[1;31mSYSENTER not supported on this machine\033[0m",
931 if (Cpu::have_sysenter())
932 // GP exception if sysenter is not correctly set up..
933 WARN("MSR_SYSENTER_CS: %llx", Cpu::rdmsr(MSR_SYSENTER_CS));
935 // We get UD exception on processors without SYSENTER/SYSEXIT.
936 WARN("SYSENTER/EXIT not available.");
946 Thread::trap_is_privileged(Trap_state *)
947 { return space()->has_io_privileges(); }
951 Thread::do_wrmsr_in_kernel(Trap_state *ts)
953 // do "wrmsr (msr[ecx], edx:eax)" in kernel
954 Cpu::wrmsr (ts->value(), ts->value3(), ts->value2());
959 Thread::do_rdmsr_in_kernel(Trap_state *ts)
961 // do "rdmsr (msr[ecx], edx:eax)" in kernel
962 Unsigned64 msr = Cpu::rdmsr(ts->value2());
963 ts->value((Unsigned32) msr);
964 ts->value3((Unsigned32) (msr >> 32));
969 Thread::handle_not_nested_trap(Trap_state *ts)
971 // no kernel debugger present
972 printf(" %p IP="L4_PTR_FMT" Trap=%02lx [Ret/Esc]\n",
973 this, ts->ip(), ts->_trapno);
976 // cannot use normal getchar because it may block with hlt and irq's
978 while ((r=Kconsole::console()->getchar(false)) == -1)
989 Thread::sys_control_arch(Utcb *)
995 //---------------------------------------------------------------------------
996 IMPLEMENTATION [(ia32 |amd64) & !(debug | kdb)]:
998 /** There is no nested trap handler if both jdb and kdb are disabled.
999 * Important: We don't need the nested_handler_stack here.
1001 PRIVATE static inline
1003 Thread::call_nested_trap_handler(Trap_state *)