2 * (c) 2009 Adam Lackorzynski <adam@os.inf.tu-dresden.de>,
3 * Frank Mehnert <fm3@os.inf.tu-dresden.de>
4 * economic rights: Technische Universität Dresden (Germany)
6 * This file is part of TUD:OS and distributed under the terms of the
7 * GNU General Public License 2.
8 * Please see the COPYING-GPL-2 file for details.
17 #include "boot_paging.h"
20 unsigned KERNEL_CS_64 = 0x20; // XXX
33 INTEL_PTE_VALID = 0x0000000000000001LL,
34 INTEL_PTE_WRITE = 0x0000000000000002LL,
35 INTEL_PTE_USER = 0x0000000000000004LL,
36 INTEL_PTE_WTHRU = 0x00000008,
37 INTEL_PTE_NCACHE = 0x00000010,
38 INTEL_PTE_REF = 0x00000020,
39 INTEL_PTE_MOD = 0x00000040,
40 INTEL_PTE_GLOBAL = 0x00000100,
41 INTEL_PTE_AVAIL = 0x00000e00,
42 INTEL_PTE_PFN = 0x000ffffffffff000LL,
44 INTEL_PDE_VALID = 0x0000000000000001LL,
45 INTEL_PDE_WRITE = 0x0000000000000002LL,
46 INTEL_PDE_USER = 0x0000000000000004LL,
47 INTEL_PDE_WTHRU = 0x00000008,
48 INTEL_PDE_NCACHE = 0x00000010,
49 INTEL_PDE_REF = 0x00000020,
50 INTEL_PDE_MOD = 0x00000040,
51 INTEL_PDE_SUPERPAGE = 0x0000000000000080LL,
52 INTEL_PDE_GLOBAL = 0x00000100,
53 INTEL_PDE_AVAIL = 0x00000e00,
54 INTEL_PDE_PFN = 0x000ffffffffff000LL,
56 INTEL_PDPE_VALID = 0x0000000000000001LL,
57 INTEL_PDPE_WRITE = 0x0000000000000002LL,
58 INTEL_PDPE_USER = 0x0000000000000004LL,
59 INTEL_PDPE_PFN = 0x000ffffffffff000LL,
61 INTEL_PML4E_VALID = 0x0000000000000001LL,
62 INTEL_PML4E_WRITE = 0x0000000000000002LL,
63 INTEL_PML4E_USER = 0x0000000000000004LL,
64 INTEL_PML4E_PFN = 0x000ffffffffff000LL,
66 CPUF_4MB_PAGES = 0x00000008,
73 EFER_LME = 0x00000100,
79 DBF_TSS = 0x28, // XXX check this value
90 SZ_CODE_64 = 0x2, // XXX 64 Bit Code Segment
92 GDTSZ = (0x30/8), // XXX check this value
97 struct pseudo_descriptor
101 l4_uint32_t linear_base;
106 l4_uint16_t limit_low; /* limit 0..15 */
107 l4_uint16_t base_low; /* base 0..15 */
108 l4_uint8_t base_med; /* base 16..23 */
109 l4_uint8_t access; /* access byte */
110 l4_uint8_t limit_high:4; /* limit 16..19 */
111 l4_uint8_t granularity:4; /* granularity */
112 l4_uint8_t base_high; /* base 24..31 */
113 } __attribute__((packed));
117 l4_uint16_t offset_low; /* offset 0..15 */
118 l4_uint16_t selector;
119 l4_uint8_t word_count;
121 l4_uint16_t offset_high; /* offset 16..31 */
122 } __attribute__((packed));
126 l4_uint32_t back_link;
127 l4_uint32_t esp0, ss0;
128 l4_uint32_t esp1, ss1;
129 l4_uint32_t esp2, ss2;
131 l4_uint32_t eip, eflags;
132 l4_uint32_t eax, ecx, edx, ebx, esp, ebp, esi, edi;
133 l4_uint32_t es, cs, ss, ds, fs, gs;
135 l4_uint16_t trace_trap;
136 l4_uint16_t io_bit_map_offset;
139 struct gate_init_entry
141 l4_uint32_t entrypoint;
148 l4_uint32_t gs, fs, es, ds;
149 l4_uint32_t edi, esi, ebp, cr2, ebx, edx, ecx, eax;
150 l4_uint32_t trapno, err;
151 l4_uint32_t eip, cs, eflags, esp, ss;
154 static l4_uint32_t cpu_feature_flags;
155 static l4_uint32_t base_pml4_pa;
156 static struct x86_tss base_tss;
157 static struct x86_desc base_gdt[GDTSZ];
158 static struct x86_gate base_idt[IDTSZ];
160 static void handle_dbf(void);
161 static char dbf_stack[2048];
162 static struct x86_tss dbf_tss =
165 0/*esp0*/, 0/*ss0*/, 0/*esp1*/, 0/*ss1*/, 0/*esp2*/, 0/*ss2*/,
167 (l4_uint32_t)handle_dbf/*eip*/, 0x00000082/*eflags*/,
168 0/*eax*/, 0/*ecx*/, 0/*edx*/, 0/*ebx*/,
169 (l4_uint32_t)dbf_stack + sizeof(dbf_stack)/*esp*/,
170 0/*ebp*/, 0/*esi*/, 0/*edi*/,
171 KERNEL_DS/*es*/, KERNEL_CS/*cs*/, KERNEL_DS/*ss*/,
172 KERNEL_DS/*ds*/, KERNEL_DS/*fs*/, KERNEL_DS/*gs*/,
173 0/*ldt*/, 0/*trace_trap*/, 0x8000/*io_bit_map_offset*/
176 static inline l4_uint64_t* find_pml4e(l4_uint32_t pml4_pa, l4_uint64_t la)
177 { return (&((l4_uint64_t*)pml4_pa)[(la >> PML4ESHIFT) & PML4EMASK]); }
179 static inline l4_uint64_t* find_pdpe(l4_uint32_t pdp_pa, l4_uint64_t la)
180 { return (&((l4_uint64_t*)pdp_pa)[(la >> PDPESHIFT) & PDPEMASK]); }
182 static inline l4_uint64_t* find_pde(l4_uint32_t pdir_pa, l4_uint64_t la)
183 { return (&((l4_uint64_t*)pdir_pa)[(la >> PDESHIFT) & PDEMASK]); }
185 static inline l4_uint64_t* find_pte(l4_uint32_t ptab_pa, l4_uint64_t la)
186 { return (&((l4_uint64_t*)ptab_pa)[(la >> PTESHIFT) & PTEMASK]); }
188 static inline l4_uint32_t get_eflags(void)
189 { l4_uint32_t efl; asm volatile("pushf ; popl %0" : "=r" (efl)); return efl; }
191 static inline void set_eflags(l4_uint32_t efl)
192 { asm volatile("pushl %0 ; popf" : : "r" (efl) : "memory"); }
194 static inline void set_ds(l4_uint16_t ds)
195 { asm volatile("movw %w0,%%ds" : : "r" (ds)); }
197 static inline void set_es(l4_uint16_t es)
198 { asm volatile("movw %w0,%%es" : : "r" (es)); }
200 static inline void set_fs(l4_uint16_t fs)
201 { asm volatile("movw %w0,%%fs" : : "r" (fs)); }
203 static inline void set_gs(l4_uint16_t gs)
204 { asm volatile("movw %w0,%%gs" : : "r" (gs)); }
206 static inline void set_ss(l4_uint16_t ss)
207 { asm volatile("movw %w0,%%ss" : : "r" (ss)); }
209 static inline l4_uint16_t get_ss(void)
210 { l4_uint16_t ss; asm volatile("movw %%ss,%w0" : "=r" (ss)); return ss; }
212 #define set_idt(pseudo_desc) \
213 asm volatile("lidt %0" : : "m" ((pseudo_desc)->limit) : "memory")
215 #define set_gdt(pseudo_desc) \
216 asm volatile("lgdt %0" : : "m" ((pseudo_desc)->limit) : "memory")
218 #define set_tr(seg) \
219 asm volatile("ltr %0" : : "rm" ((l4_uint16_t)(seg)))
222 ({ register l4_uint32_t _temp__; \
223 asm("movl %%esp, %0" : "=r" (_temp__)); _temp__; })
226 ({ register l4_uint32_t _temp__; \
227 asm volatile("mov %%cr0, %0" : "=r" (_temp__)); _temp__; })
229 #define set_cr3(value) \
230 ({ register l4_uint32_t _temp__ = (value); \
231 asm volatile("mov %0, %%cr3" : : "r" (_temp__)); })
234 ({ register l4_uint32_t _temp__; \
235 asm volatile("mov %%cr4, %0" : "=r" (_temp__)); _temp__; })
237 #define set_cr4(value) \
238 ({ register l4_uint32_t _temp__ = (value); \
239 asm volatile("mov %0, %%cr4" : : "r" (_temp__)); })
242 static inline void enable_longmode(void)
245 asm volatile("rdmsr; bts $8, %%eax; wrmsr"
246 :"=a"(dummy), "=d"(dummy) : "c"(0xc0000080));
250 fill_descriptor(struct x86_desc *desc, l4_uint32_t base, l4_uint32_t limit,
251 l4_uint8_t access, l4_uint8_t sizebits)
258 desc->limit_low = limit & 0xffff;
259 desc->base_low = base & 0xffff;
260 desc->base_med = (base >> 16) & 0xff;
261 desc->access = access | ACC_P;
262 desc->limit_high = limit >> 16;
263 desc->granularity = sizebits;
264 desc->base_high = base >> 24;
268 fill_gate(struct x86_gate *gate, l4_uint32_t offset,
269 l4_uint16_t selector, l4_uint8_t access)
271 gate->offset_low = offset & 0xffff;
272 gate->selector = selector;
273 gate->word_count = 0;
274 gate->access = access | ACC_P;
275 gate->offset_high = (offset >> 16) & 0xffff;
279 paging_enable(l4_uint32_t pml4)
281 /* Enable Physical l4_uint64_t Extension (PAE). */
282 set_cr4(get_cr4() | CR4_PAE);
284 /* Load the page map level 4. */
287 /* Enable long mode. */
290 /* Turn on paging and switch to long mode. */
291 asm volatile("movl %0,%%cr0 ; jmp 1f ; 1:" : : "r" (get_cr0() | CR0_PG));
295 panic(const char *str)
297 printf("PANIC: %s\n", str);
306 int orig_eflags = get_eflags();
308 /* Check for a dumb old 386 by trying to toggle the AC flag. */
309 set_eflags(orig_eflags ^ EFL_AC);
310 if ((get_eflags() ^ orig_eflags) & EFL_AC)
312 /* It's a 486 or better. Now try toggling the ID flag. */
313 set_eflags(orig_eflags ^ EFL_ID);
314 if ((get_eflags() ^ orig_eflags) & EFL_ID)
316 int highest_val, dummy;
319 : "a" (0) : "ebx", "ecx", "edx");
321 if (highest_val >= 1)
325 "=d" (cpu_feature_flags)
332 set_eflags(orig_eflags);
335 extern struct gate_init_entry boot_idt_inittab[];
339 struct x86_gate *dst = base_idt;
340 const struct gate_init_entry *src = boot_idt_inittab;
342 while (src->entrypoint)
344 if ((src->type & 0x1f) == 0x05)
346 fill_gate(&dst[src->vector], 0, src->entrypoint, src->type);
349 fill_gate(&dst[src->vector], src->entrypoint, KERNEL_CS, src->type);
357 /* Initialize the base TSS descriptor. */
358 fill_descriptor(&base_gdt[BASE_TSS / 8],
359 (l4_uint32_t)&base_tss, sizeof(base_tss) - 1,
360 ACC_PL_K | ACC_TSS, 0);
361 /* Initialize the TSS descriptor for the double fault handler */
362 fill_descriptor(&base_gdt[DBF_TSS / 8],
363 (l4_uint32_t)&dbf_tss, sizeof(dbf_tss) - 1,
364 ACC_PL_K | ACC_TSS, 0);
365 /* Initialize the 32-bit kernel code and data segment descriptors
366 to point to the base of the kernel linear space region. */
367 fill_descriptor(&base_gdt[KERNEL_CS / 8], 0, 0xffffffff,
368 ACC_PL_K | ACC_CODE_R, SZ_32);
369 fill_descriptor(&base_gdt[KERNEL_DS / 8], 0, 0xffffffff,
370 ACC_PL_K | ACC_DATA_W, SZ_32);
371 /* XXX Initialize the 64-bit kernel code segment descriptor */
372 fill_descriptor(&base_gdt[KERNEL_CS_64 / 8], 0, 0xffffffff,
373 ACC_PL_K | ACC_CODE_R, SZ_CODE_64);
379 base_tss.ss0 = KERNEL_DS;
380 base_tss.esp0 = get_esp(); /* only temporary */
381 base_tss.io_bit_map_offset = sizeof(base_tss);
387 struct pseudo_descriptor pdesc;
389 /* Create a pseudo-descriptor describing the GDT. */
390 pdesc.limit = sizeof(base_gdt) - 1;
391 pdesc.linear_base = (l4_uint32_t)&base_gdt;
393 /* Load it into the CPU. */
396 /* Reload all the segment registers from the new GDT. */
397 asm volatile("ljmp %0,$1f ; 1:" : : "i" (KERNEL_CS));
408 struct pseudo_descriptor pdesc;
410 /* Create a pseudo-descriptor describing the GDT. */
411 pdesc.limit = sizeof(base_idt) - 1;
412 pdesc.linear_base = (l4_uint32_t)&base_idt;
419 /* Make sure the TSS isn't marked busy. */
420 base_gdt[BASE_TSS / 8].access &= ~ACC_TSS_BUSY;
421 asm volatile ("" : : : "memory");
432 // force tables to memory before loading segment registers
433 asm volatile ("" : : : "memory");
439 struct ptab64_mem_info_t ptab64_mem_info;
442 ptab_alloc(l4_uint32_t *out_ptab_pa)
444 static char pool[6 << 12] __attribute__((aligned(4096)));
445 static l4_uint32_t pdirs;
446 static int initialized;
451 ptab64_mem_info.addr = (l4_uint32_t)pool;
452 ptab64_mem_info.size = sizeof(pool);
453 memset(pool, 0, sizeof(pool));
454 pdirs = ((l4_uint32_t)pool + PAGE_SIZE - 1) & ~PAGE_MASK;
457 if (pdirs > (l4_uint32_t)pool + sizeof(pool))
458 panic("Cannot allocate page table -- increase ptab_alloc::pool");
460 *out_ptab_pa = pdirs;
465 pdir_map_range(l4_uint32_t pml4_pa, l4_uint64_t la, l4_uint64_t pa,
466 l4_uint64_t size, l4_uint32_t mapping_bits)
469 assert(la+size-1 > la); // avoid 4GB wrap around
473 l4_uint64_t *pml4e = find_pml4e(pml4_pa, la);
475 /* Create new pml4e with corresponding pdp (page directory pointer)
476 * if no valid entry exists. */
477 if (!(*pml4e & INTEL_PML4E_VALID))
481 /* Allocate new page for pdp. */
484 /* Set the pml4 to point to it. */
485 *pml4e = (pdp_pa & INTEL_PML4E_PFN)
486 | INTEL_PML4E_VALID | INTEL_PML4E_USER | INTEL_PML4E_WRITE;
491 l4_uint64_t *pdpe = find_pdpe(*pml4e & INTEL_PML4E_PFN, la);
493 /* Create new pdpe with corresponding pd (page directory)
494 * if no valid entry exists. */
495 if (!(*pdpe & INTEL_PDPE_VALID))
499 /* Allocate new page for pd. */
502 /* Set the pdpe to point to it. */
503 *pdpe = (pd_pa & INTEL_PDPE_PFN)
504 | INTEL_PDPE_VALID | INTEL_PDPE_USER | INTEL_PDPE_WRITE;
509 l4_uint64_t *pde = find_pde(*pdpe & INTEL_PDPE_PFN, la);
511 /* Use a 2MB page if we can. */
512 if (superpage_aligned(la) && superpage_aligned(pa)
513 && (size >= SUPERPAGE_SIZE))
514 //&& (cpu_feature_flags & CPUF_4MB_PAGES)) XXX
516 /* a failed assertion here may indicate a memory wrap
518 assert(!(*pde & INTEL_PDE_VALID));
519 /* XXX what if an empty page table exists
520 from previous finer-granularity mappings? */
521 *pde = pa | mapping_bits | INTEL_PDE_SUPERPAGE;
522 la += SUPERPAGE_SIZE;
523 pa += SUPERPAGE_SIZE;
524 size -= SUPERPAGE_SIZE;
528 /* Find the page table, creating one if necessary. */
529 if (!(*pde & INTEL_PDE_VALID))
533 /* Allocate a new page table. */
534 ptab_alloc(&ptab_pa);
536 /* Set the pde to point to it. */
537 *pde = (ptab_pa & INTEL_PTE_PFN)
538 | INTEL_PDE_VALID | INTEL_PDE_USER | INTEL_PDE_WRITE;
540 assert(!(*pde & INTEL_PDE_SUPERPAGE));
543 /* Use normal 4KB page mappings. */
546 l4_uint64_t *pte = find_pte(*pde & INTEL_PDE_PFN, la);
547 assert(!(*pte & INTEL_PTE_VALID));
549 /* Insert the mapping. */
550 *pte = pa | mapping_bits;
552 /* Advance to the next page. */
558 while ((size > 0) && !superpage_aligned(la));
561 while ((size > 0) && !pd_aligned(la));
563 while ((size > 0) && !pdp_aligned(la));
568 base_paging_init(l4_uint64_t phys_mem_max)
570 ptab_alloc(&base_pml4_pa);
572 // Establish one-to-one mappings for the physical memory
573 pdir_map_range(base_pml4_pa, 0, 0, phys_mem_max,
574 INTEL_PDE_VALID | INTEL_PDE_WRITE | INTEL_PDE_USER);
576 //dbf_tss.cr3 = base_pml4_pa;
578 // XXX Turn on paging and activate long mode
579 paging_enable(base_pml4_pa);
582 void trap_dump_panic(const struct trap_state *st);
583 void trap_dump_panic(const struct trap_state *st)
585 int from_user = st->cs & 3;
588 printf("EAX %08x EBX %08x ECX %08x EDX %08x\n",
589 st->eax, st->ebx, st->ecx, st->edx);
590 printf("ESI %08x EDI %08x EBP %08x ESP %08x\n",
591 st->esi, st->edi, st->ebp,
592 from_user ? st->esp : (l4_uint32_t)&st->esp);
593 printf("EIP %08x EFLAGS %08x\n", st->eip, st->eflags);
594 printf("CS %04x SS %04x DS %04x ES %04x FS %04x GS %04x\n",
595 st->cs & 0xffff, from_user ? st->ss & 0xffff : get_ss(),
596 st->ds & 0xffff, st->es & 0xffff,
597 st->fs & 0xffff, st->gs & 0xffff);
598 printf("trapno %d, error %08x, from %s mode\n",
599 st->trapno, st->err, from_user ? "user" : "kernel");
601 if (st->trapno == 0x0d)
604 printf("(external event");
606 printf("(internal event");
608 printf(" regarding IDT gate descriptor no. 0x%02x)\n", st->err >> 3);
610 printf(" regarding %s entry no. 0x%02x)\n",
611 st->err & 4 ? "LDT" : "GDT", st->err >> 3);
613 else if (st->trapno == 0x0e)
614 printf("page fault linear address %08x\n", st->cr2);
617 for (i = 0; i < 32; i++)
618 printf("%08x%c", (&st->esp)[i], ((i & 7) == 7) ? '\n' : ' ');
620 panic("Unexpected trap while booting Fiasco!");
628 "EAX %08x EBX %08x ECX %08x EDX %08x\n"
629 "ESI %08x EDI %08x EBP %08x ESP %08x\n"
630 "EIP %08x EFLAGS %08x\n"
631 "CS %04x SS %04x DS %04x ES %04x FS %04x GS %04x\n\n",
632 base_tss.eax, base_tss.ebx, base_tss.ecx, base_tss.edx,
633 base_tss.esi, base_tss.edi, base_tss.ebp, base_tss.esp,
634 base_tss.eip, base_tss.eflags,
635 base_tss.cs & 0xffff, base_tss.ss & 0xffff, base_tss.ds & 0xffff,
636 base_tss.es & 0xffff, base_tss.fs & 0xffff, base_tss.gs & 0xffff);
638 panic("Unexpected DOUBLE FAULT while booting Fiasco!");