3 * Shared between UX and native IA32.
5 INTERFACE [ia32,amd64,ux]:
7 #include "trap_state.h"
11 EXTENSION class Thread
15 * Return code segment used for exception reflection to user mode
17 static Mword exception_cs();
21 Unsigned16 _idt_limit;
23 static Trap_state::Handler nested_trap_handler FIASCO_FASTCALL;
27 //----------------------------------------------------------------------------
28 INTERFACE [ia32,amd64]:
32 EXTENSION class Thread
35 static int (*int3_handler)(Trap_state*);
39 //----------------------------------------------------------------------------
40 IMPLEMENTATION [ia32,amd64,ux]:
48 #include "mem_layout.h"
51 #include "processor.h" // for cli/sti
53 #include "std_macros.h"
56 #include "trap_state.h"
57 #include "vmem_alloc.h"
59 Trap_state::Handler Thread::nested_trap_handler FIASCO_FASTCALL;
63 : Receiver(&_thread_lock),
64 Sender(0), // select optimized version of constructor
65 _pager(Thread_ptr::Invalid),
66 _exc_handler(Thread_ptr::Invalid),
69 //assert (current() == thread_lock()->lock_owner());
70 assert (state() == Thread_invalid);
74 if (Config::stack_depth)
75 std::memset((char*)this + sizeof(Thread), '5',
76 Config::thread_block_size-sizeof(Thread)-64);
83 *reinterpret_cast<void(**)()> (--_kernel_sp) = user_invoke;
87 state_add_dirty(Thread_dead | Thread_suspended);
89 // ok, we're ready to go!
92 IMPLEMENT inline NEEDS[Thread::exception_triggered]
94 Thread::user_ip() const
95 { return exception_triggered()?_exc_cont.ip():regs()->ip(); }
99 Thread::user_flags() const
100 { return regs()->flags(); }
106 Thread::is_privileged_for_debug(Trap_state * /*ts*/)
109 return ((ts->flags() & EFLAGS_IOPL) == EFLAGS_IOPL_U);
115 /** Check if the pagefault occured at a special place: At some places in the
116 kernel we want to ensure that a specific address is mapped. The regular
117 case is "mapped", the exception or slow case is "not mapped". The fastest
118 way to check this is to touch into the memory. If there is no mapping for
119 the address we get a pagefault. Now the pagefault exception handler can
120 recognize that situation by scanning the code. The trick is that the
121 assembler instruction "andl $0xffffffff, %ss:(%ecx)" _clears_ the carry
122 flag normally (see Intel reference manual). The pager wants to inform the
123 code that there was a pagefault and therefore _sets_ the carry flag. So
124 the code has only to check if the carry flag is set. If yes, there was
125 a pagefault at this instruction.
126 @param ip pagefault address */
129 Thread::pagein_tcb_request(Return_frame *regs)
131 unsigned long new_ip = regs->ip();
132 if (*(Unsigned8*)new_ip == 0x48) // REX.W
135 register Unsigned16 op = *(Unsigned16*)new_ip;
136 //LOG_MSG_3VAL(current(),"TCB", op, new_ip, 0);
137 if ((op & 0xc0ff) == 0x8b) // Context::is_tcb_mapped() and Context::state()
139 regs->ip(new_ip + 2);
147 Mword *reg = ((Mword*)regs) - 2 - Return_frame::Pf_ax_offset;
149 LOG_MSG_3VAL(current(),"TCB", op, regs->ip(), (Mword)reg);
150 LOG_MSG_3VAL(current(),"TCBX", reg[-3], reg[-4], reg[-5]);
151 LOG_MSG_3VAL(current(),"TCB0", reg[0], reg[-1], reg[-2]);
152 LOG_MSG_3VAL(current(),"TCB1", reg[1], reg[2], reg[3]);
154 assert((op >> 11) <= 2);
155 reg[-(op>>11)] = 0; // op==0 => eax, op==1 => ecx, op==2 => edx
157 // tell program that a pagefault occured we cannot handle
158 regs->flags(regs->flags() | 0x41); // set carry and zero flag in EFLAGS
161 else if (*(Unsigned32*)regs->ip() == 0xff01f636) // used in shortcut.S
163 regs->ip(regs->ip() + 4);
164 regs->flags(regs->flags() | 1); // set carry flag in EFLAGS
172 extern "C" FIASCO_FASTCALL
174 thread_restore_exc_state()
176 current_thread()->restore_exc_state();
181 Thread::print_page_fault_error(Mword e)
187 * The global trap handler switch.
188 * This function handles CPU-exception reflection, emulation of CPU
189 * instructions (LIDT, WRMSR, RDMSR), int3 debug messages,
190 * kernel-debugger invocation, and thread crashes (if a trap cannot be
192 * @param state trap state
193 * @return 0 if trap has been consumed by handler;
194 * -1 if trap could not be handled.
198 Thread::handle_slow_trap(Trap_state *ts)
201 int from_user = ts->cs() & 3;
203 if (EXPECT_FALSE(ts->_trapno == 0xee)) //debug IPI
205 Ipi::eoi(Ipi::Debug);
209 if (from_user && (state() & Thread_vcpu_user_mode) && send_exception(ts))
212 // XXX We might be forced to raise an excepton. In this case, our return
213 // CS:IP points to leave_by_trigger_exception() which will trigger the
214 // exception just before returning to userland. But if we were inside an
215 // IPC while we was ex-regs'd, we will generate the 'exception after the
216 // syscall' _before_ we leave the kernel.
217 if (ts->_trapno == 13 && (ts->_err & 6) == 6)
218 goto check_exception;
221 // XXX: need to do in a different way, if on debug stack e.g.
223 if (EXPECT_FALSE(!in_context_area((void*)Proc::stack_pointer())))
224 goto generic_debug; // we're in the GDB stub or in jdb
225 // -- let generic handler handle it
230 if (!check_trap13_kernel (ts))
233 if (EXPECT_FALSE(!from_user))
235 // get also here if a pagefault was not handled by the user level pager
236 if (ts->_trapno == 14)
237 goto check_exception;
239 goto generic_debug; // we were in kernel mode -- nothing to emulate
242 if (EXPECT_FALSE(ts->_trapno == 2))
243 goto generic_debug; // NMI always enters kernel debugger
245 if (EXPECT_FALSE(ts->_trapno == 0xffffffff))
246 goto generic_debug; // debugger interrupt
250 // so we were in user mode -- look for something to emulate
252 // We continue running with interrupts off -- no sti() here. But
253 // interrupts may be enabled by the pagefault handler if we get a
254 // pagefault in peek_user().
256 // Set up exception handling. If we suffer an un-handled user-space
257 // page fault, kill the thread.
260 if (EXPECT_FALSE ((error = setjmp(pf_recovery)) != 0) )
263 "\033[1mUnhandled page fault, code=%08x\033[m\n",
268 _recover_jmpbuf = &pf_recovery;
270 switch (handle_io_page_fault(ts, from_user))
272 case 1: goto success;
278 // check for "invalid opcode" exception
279 if (EXPECT_FALSE (Config::Kip_syscalls && ts->_trapno == 6))
282 // Check "lock; nop" opcode
283 if (mem_space()->peek ((Unsigned16*) ip, from_user) == 0x90f0)
285 ts->consume_instruction(2);
286 ts->value(task()->map_kip());
292 // just print out some warning, we do the normal exception handling
293 handle_sysenter_trap(ts, ip, from_user);
295 // check for general protection exception
296 if (ts->_trapno == 13 && (ts->_err & 0xffff) == 0)
298 // find out if we are a privileged task
299 bool is_privileged = trap_is_privileged (ts);
301 // check for "lidt (%eax)"
303 (ip < Kmem::mem_user_max - 4 &&
304 (mem_space()->peek((Mword*) ip, from_user) & 0xffffff) == 0x18010f))
306 // emulate "lidt (%eax)"
309 if (ts->value() >= Kmem::mem_user_max - 6)
313 = mem_space()->peek((Idt_entry**)(ts->value() + 2), from_user);
314 Unsigned16 limit = mem_space()->peek ((Unsigned16*) ts->value(), from_user);
316 if ((Address)idt >= (Address)Kmem::mem_user_max-limit-1)
319 // OK; store descriptor
321 _idt_limit = (limit + 1) / sizeof(Idt_entry);
323 // consume instruction and continue
324 ts->consume_instruction(3);
329 // check for "wrmsr (%eax)"
332 && (ip < Kmem::mem_user_max - 2)
333 && (mem_space()->peek((Unsigned16*) ip, from_user)) == 0x300f
334 && (Cpu::cpus.cpu(cpu()).can_wrmsr())))
336 printf("Detected wrmsr at %lx\n", ip);
339 do_wrmsr_in_kernel (ts);
341 // consume instruction and continue
342 ts->consume_instruction(2);
348 // check for "rdmsr (%eax)"
351 && (ip < Kmem::mem_user_max - 2)
352 && (mem_space()->peek((Unsigned16*) ip, from_user)) == 0x320f
353 && (Cpu::cpus.cpu(cpu()).can_wrmsr())))
355 printf("Detected rdmsr at %lx\n", ip);
358 do_rdmsr_in_kernel(ts);
360 // consume instruction and continue
361 ts->consume_instruction(2);
371 // send exception IPC if requested
372 if (send_exception(ts))
375 // let's see if we have a trampoline to invoke
376 if (ts->_trapno < 0x20 && ts->_trapno < _idt_limit)
378 Idt_entry e = mem_space()->peek(_idt + ts->_trapno, 1);
380 if ((e.ist() & 0xe0) == 0x00
381 && (e.access() & 0x1f) == 0x0f) // gate descriptor ok?
383 Address handler = e.offset();
386 && handler < Kmem::mem_user_max // in user space?
387 && ts->sp() <= Kmem::mem_user_max
388 && ts->sp() > 5 * sizeof (Mword)) // enough space on user stack?
390 // OK, reflect the trap to user mode
391 if (1 /* !raise_exception (ts, handler) */)
393 // someone interfered and changed our state
394 assert (state() & Thread_cancel);
395 state_del(Thread_cancel);
398 goto success; // we've consumed the trap
403 // backward compatibility cruft: check for those insane "int3" debug
404 // messaging command sequences
405 if (ts->_trapno == 3 && is_privileged_for_debug(ts))
407 if (int3_handler && int3_handler(ts))
413 // privileged tasks also may invoke the kernel debugger with a debug
415 if (ts->_trapno == 1 && is_privileged_for_debug (ts))
420 // can't handle trap -- kill the thread
422 "\033[1mUnhandled trap \033[m\n",
426 if (Config::warn_level >= Warning)
429 if (Config::conservative)
430 kdb_ke("thread killed");
441 if (!nested_trap_handler)
442 return handle_not_nested_trap(ts);
444 return call_nested_trap_handler(ts);
448 * The low-level page fault handler called from entry.S. We're invoked with
449 * interrupts turned off. Apart from turning on interrupts in almost
450 * all cases (except for kernel page faults in TCB area), just forwards
451 * the call to Thread::handle_page_fault().
452 * @param pfa page-fault virtual address
453 * @param error_code CPU error code
454 * @return true if page fault could be resolved, false otherwise
456 extern "C" FIASCO_FASTCALL
458 thread_page_fault(Address pfa, Mword error_code, Address ip, Mword flags,
462 // XXX: need to do in a different way, if on debug stack e.g.
464 // If we're in the GDB stub -- let generic handler handle it
465 if (EXPECT_FALSE (!in_context_area((void*)Proc::stack_pointer())))
469 // Pagefault in user mode or interrupts were enabled
470 if (PF::is_usermode_error(error_code))
472 if (current_thread()->vcpu_pagefault(pfa, error_code, ip))
477 else if(flags & EFLAGS_IF)
480 // Pagefault in kernel mode and interrupts were disabled
483 // page fault in kernel memory region
484 if (Kmem::is_kmem_page_fault(pfa, error_code))
486 // We've interrupted a context in the kernel with disabled interrupts,
487 // the page fault address is in the kernel region, the error code is
488 // "not mapped" (as opposed to "access error"), and the region is
489 // actually valid (that is, mapped in Kmem's shared page directory,
490 // just not in the currently active page directory)
493 else if (!Config::conservative &&
494 !Kmem::is_kmem_page_fault(pfa, error_code))
496 // No error -- just enable interrupts.
501 // Error: We interrupted a cli'd kernel context touching kernel space
502 if (!Thread::log_page_fault())
503 printf("*P[%lx,%lx,%lx] ", pfa, error_code & 0xffff, ip);
505 kdb_ke ("page fault in cli mode");
509 return current_thread()->handle_page_fault(pfa, error_code, ip, regs);
512 /** The catch-all trap entry point. Called by assembly code when a
513 CPU trap (that's not specially handled, such as system calls) occurs.
514 Just forwards the call to Thread::handle_slow_trap().
515 @param state trap state
516 @return 0 if trap has been consumed by handler;
517 -1 if trap could not be handled.
519 extern "C" FIASCO_FASTCALL
521 thread_handle_trap(Trap_state *ts, unsigned)
523 return current_thread()->handle_slow_trap(ts);
533 Thread::handle_sigma0_page_fault(Address pfa)
537 // Check if mapping a superpage doesn't exceed the size of physical memory
538 if (Cpu::have_superpages()
539 // Some distributions do not allow to mmap below a certain threshold
540 // (like 64k on Ubuntu 8.04) so we cannot map a superpage at 0 if
542 && (!Config::Is_ux || !(pfa < Config::SUPERPAGE_SIZE)))
544 pfa &= Config::SUPERPAGE_MASK;
545 size = Config::SUPERPAGE_SIZE;
549 pfa &= Config::PAGE_MASK;
550 size = Config::PAGE_SIZE;
553 return mem_space()->v_insert(Mem_space::Phys_addr(pfa), Mem_space::Addr(pfa),
554 Mem_space::Size(size),
555 Mem_space::Page_writable
556 | Mem_space::Page_user_accessible)
557 != Mem_space::Insert_err_nomem;
562 Thread::access_utcb() const
565 PRIVATE static inline
567 Thread::save_fpu_state_to_utcb(Trap_state *, Utcb *)
570 /* return 1 if this exception should be sent, return 0 if not
572 PUBLIC inline NEEDS["trap_state.h"]
574 Thread::send_exception_arch(Trap_state *ts)
576 // Do not send exception IPC but return 'not for us' if thread is a normal
577 // thread (not alien) and it's a debug trap,
578 // debug traps for aliens are always reflected as exception IPCs
579 if (!(state() & Thread_alien)
580 && (ts->_trapno == 1 || ts->_trapno == 3))
581 return 0; // we do not handle this
583 if (ts->_trapno == 3)
585 if (state() & Thread_dis_alien)
587 state_del(Thread_dis_alien);
588 return 0; // no exception
591 // set IP back on the int3 instruction
592 ts->ip(ts->ip() - 1);
595 return 1; // make it an exception
599 //----------------------------------------------------------------------------
600 IMPLEMENTATION [ia32 || amd64]:
604 Thread::vcpu_resume_user_arch()
607 //----------------------------------------------------------------------------
612 Thread::vcpu_resume_user_arch()
614 switch_gdt_user_entries(this);
617 //----------------------------------------------------------------------------
618 IMPLEMENTATION [ux || amd64]:
620 IMPLEMENT inline NEEDS[Thread::exception_triggered]
622 Thread::user_ip(Mword ip)
624 if (exception_triggered())
628 Entry_frame *r = regs();
634 //----------------------------------------------------------------------------
635 IMPLEMENTATION [(ia32,amd64,ux) && !io]:
639 Thread::handle_io_page_fault(Trap_state *, bool)
644 Thread::get_ioport(Address /*eip*/, Trap_state * /*ts*/,
645 unsigned * /*port*/, unsigned * /*size*/)
649 //---------------------------------------------------------------------------
650 IMPLEMENTATION[ia32 || amd64]:
653 #include "fpu_alloc.h"
654 #include "fpu_state.h"
656 #include "globalconfig.h"
658 #include "simpleio.h"
659 #include "static_init.h"
660 #include "terminate.h"
661 #include "utcb_init.h"
663 int (*Thread::int3_handler)(Trap_state*);
664 Per_cpu<Thread::Dbg_stack> DEFINE_PER_CPU Thread::dbg_stack;
666 STATIC_INITIALIZER_P (int3_handler_init, KDB_INIT_PRIO);
672 Thread::set_int3_handler(Thread::handle_int3);
675 IMPLEMENT static inline NEEDS ["gdt.h"]
677 Thread::exception_cs()
679 return Gdt::gdt_code_user | Gdt::Selector_user;
683 * The ia32 specific part of the thread constructor.
685 PRIVATE inline NEEDS ["gdt.h"]
689 // clear out user regs that can be returned from the thread_ex_regs
690 // system call to prevent covert channel
691 Entry_frame *r = regs();
692 if (Config::enable_io_protection)
693 r->flags(EFLAGS_IOPL_K | EFLAGS_IF | 2); // ei
695 r->flags(EFLAGS_IOPL_U | EFLAGS_IF | 2); // XXX iopl=kernel
696 r->cs(Gdt::gdt_code_user | Gdt::Selector_user);
697 r->ss(Gdt::gdt_data_user | Gdt::Selector_user);
700 // after cs initialisation as ip() requires proper cs
703 #ifdef CONFIG_HANDLE_SEGMENTS
704 _gs = _fs = Utcb_init::utcb_segment();
706 Cpu::set_gs(Utcb_init::utcb_segment());
707 Cpu::set_fs(Utcb_init::utcb_segment());
712 /** A C interface for Context::handle_fpu_trap, callable from assembly code.
715 // The "FPU not available" trap entry point
718 thread_handle_fputrap()
722 return current_thread()->switchin_fpu();
727 Thread::set_int3_handler(int (*handler)(Trap_state *ts))
729 int3_handler = handler;
733 * Default handle for int3 extensions if JDB is disabled. If the JDB is
734 * available, Jdb::handle_int3_threadctx is called instead.
735 * @return 0 not handled, wait for user response
736 * 1 successfully handled
740 Thread::handle_int3(Trap_state *ts)
742 Mem_space *s = current_mem_space();
743 int from_user = ts->cs() & 3;
744 Address ip = ts->ip();
745 Unsigned8 todo = s->peek((Unsigned8*)ip, from_user);
752 case 0xeb: // jmp == enter_kdebug()
753 len = s->peek((Unsigned8*)(ip+1), from_user);
754 str = (Unsigned8*)(ip + 2);
760 putchar(s->peek(str++, from_user));
765 case 0x90: // nop == l4kd_display()
766 if ( s->peek((Unsigned8*)(ip+1), from_user) != 0xeb /*jmp*/
767 || (len = s->peek((Unsigned8*)(ip+2), from_user)) <= 0)
770 str = (Unsigned8*)(ip + 3);
772 putchar(s->peek(str++, from_user));
776 todo = s->peek((Unsigned8*)(ip+1), from_user);
779 case 0: // l4kd_outchar
780 putchar(ts->value() & 0xff);
782 case 1: // l4kd_outnstring
783 str = (Unsigned8*)ts->value();
785 for(; len > 0; len--)
786 putchar(s->peek(str++, from_user));
788 case 2: // l4kd_outstr
789 str = (Unsigned8*)ts->value();
790 for (; (c=s->peek(str++, from_user)); )
793 case 5: // l4kd_outhex32
794 printf("%08lx", ts->value() & 0xffffffff);
796 case 6: // l4kd_outhex20
797 printf("%05lx", ts->value() & 0xfffff);
799 case 7: // l4kd_outhex16
800 printf("%04lx", ts->value() & 0xffff);
802 case 8: // l4kd_outhex12
803 printf("%03lx", ts->value() & 0xfff);
805 case 9: // l4kd_outhex8
806 printf("%02lx", ts->value() & 0xff);
808 case 11: // l4kd_outdec
809 printf("%ld", ts->value());
812 switch (ts->value2())
816 Watchdog::user_enable();
820 Watchdog::user_disable();
823 // user takes over the control of watchdog and is from now on
824 // responsible for calling "I'm still alive" events (function 5)
825 Watchdog::user_takeover_control();
828 // user returns control of watchdog to kernel
829 Watchdog::user_giveback_control();
857 Thread::check_f00f_bug(Trap_state *ts)
859 // If we page fault on the IDT, it must be because of the F00F bug.
860 // Figure out exception slot and raise the corresponding exception.
861 // XXX: Should we also modify the error code?
862 if (ts->_trapno == 14 // page fault?
863 && ts->_cr2 >= Idt::idt()
864 && ts->_cr2 < Idt::idt() + Idt::_idt_max * 8)
865 ts->_trapno = (ts->_cr2 - Idt::idt()) / 8;
871 Thread::check_io_bitmap_delimiter_fault(Trap_state *ts)
873 // check for page fault at the byte following the IO bitmap
874 if (ts->_trapno == 14 // page fault?
875 && (ts->_err & 4) == 0 // in supervisor mode?
876 && ts->ip() < Kmem::mem_user_max // delimiter byte accessed?
877 && (ts->_cr2 == Mem_layout::Io_bitmap + Mem_layout::Io_port_max / 8))
879 // page fault in the first byte following the IO bitmap
880 // map in the cpu_page read_only at the place
881 Mem_space::Status result =
882 mem_space()->v_insert(
883 Mem_space::Phys_addr(mem_space()->virt_to_phys_s0((void*)Kmem::io_bitmap_delimiter_page())),
884 Mem_space::Addr::create(Mem_layout::Io_bitmap + Mem_layout::Io_port_max / 8),
885 Mem_space::Size::create(Config::PAGE_SIZE),
890 case Mem_space::Insert_ok:
892 case Mem_space::Insert_err_nomem:
893 // kernel failure, translate this into a general protection
894 // violation and hope that somebody handles it
899 // no other error code possible
909 Thread::handle_sysenter_trap(Trap_state *ts, Address eip, bool from_user)
912 ((ts->_trapno == 6 || ts->_trapno == 13)
913 && (ts->_err & 0xffff) == 0
914 && (eip < Kmem::mem_user_max - 2)
915 && (mem_space()->peek((Unsigned16*) eip, from_user)) == 0x340f))
917 // somebody tried to do sysenter on a machine without support for it
918 WARN("tcb=%p killed:\n"
919 "\033[1;31mSYSENTER not supported on this machine\033[0m",
922 if (Cpu::have_sysenter())
923 // GP exception if sysenter is not correctly set up..
924 WARN("MSR_SYSENTER_CS: %llx", Cpu::rdmsr(MSR_SYSENTER_CS));
926 // We get UD exception on processors without SYSENTER/SYSEXIT.
927 WARN("SYSENTER/EXIT not available.");
937 Thread::trap_is_privileged(Trap_state *)
938 { return space()->has_io_privileges(); }
942 Thread::do_wrmsr_in_kernel(Trap_state *ts)
944 // do "wrmsr (msr[ecx], edx:eax)" in kernel
945 Cpu::wrmsr (ts->value(), ts->value3(), ts->value2());
950 Thread::do_rdmsr_in_kernel(Trap_state *ts)
952 // do "rdmsr (msr[ecx], edx:eax)" in kernel
953 Unsigned64 msr = Cpu::rdmsr(ts->value2());
954 ts->value((Unsigned32) msr);
955 ts->value3((Unsigned32) (msr >> 32));
960 Thread::handle_not_nested_trap(Trap_state *ts)
962 // no kernel debugger present
963 printf(" %p IP="L4_PTR_FMT" Trap=%02lx [Ret/Esc]\n",
964 this, ts->ip(), ts->_trapno);
967 // cannot use normal getchar because it may block with hlt and irq's
969 while ((r=Kconsole::console()->getchar(false)) == -1)
980 Thread::sys_control_arch(Utcb *)
986 //---------------------------------------------------------------------------
987 IMPLEMENTATION [(ia32 |amd64) & !(debug | kdb)]:
989 /** There is no nested trap handler if both jdb and kdb are disabled.
990 * Important: We don't need the nested_handler_stack here.
992 PRIVATE static inline
994 Thread::call_nested_trap_handler(Trap_state *)