diff --git a/arch/x86/32/mmu.c b/arch/x86/32/mmu.c index ff36507f..e00a28c1 100644 --- a/arch/x86/32/mmu.c +++ b/arch/x86/32/mmu.c @@ -23,7 +23,12 @@ #include #include +// TODO: +// - proper tlb flush (local and SMP) +// - synchronization of top level page tables for user space aspaces + #define LOCAL_TRACE 0 +#define TRACE_CONTEXT_SWITCH 0 /* top level kernel page tables, initialized in start.S */ #if X86_LEGACY @@ -309,8 +314,6 @@ static status_t x86_mmu_unmap(map_addr_t * const init_table, const vaddr_t vaddr } int arch_mmu_unmap(arch_aspace_t * const aspace, const vaddr_t vaddr, const uint count) { - map_addr_t init_table_from_cr3; - LTRACEF("aspace %p, vaddr %#lx, count %u\n", aspace, vaddr, count); DEBUG_ASSERT(aspace); @@ -321,10 +324,7 @@ int arch_mmu_unmap(arch_aspace_t * const aspace, const vaddr_t vaddr, const uint if (count == 0) return NO_ERROR; - DEBUG_ASSERT(x86_get_cr3()); - init_table_from_cr3 = x86_get_cr3(); - - return (x86_mmu_unmap(paddr_to_kvaddr(init_table_from_cr3), vaddr, count)); + return (x86_mmu_unmap(aspace->cr3, vaddr, count)); } /** @@ -372,12 +372,9 @@ status_t arch_mmu_query(arch_aspace_t * const aspace, const vaddr_t vaddr, paddr if (!paddr) return ERR_INVALID_ARGS; - DEBUG_ASSERT(x86_get_cr3()); - uint32_t current_cr3_val = (map_addr_t)x86_get_cr3(); - arch_flags_t ret_flags; uint32_t ret_level; - status_t stat = x86_mmu_get_mapping(paddr_to_kvaddr(current_cr3_val), vaddr, &ret_level, &ret_flags, paddr); + status_t stat = x86_mmu_get_mapping(aspace->cr3, vaddr, &ret_level, &ret_flags, paddr); if (stat) return stat; @@ -404,15 +401,12 @@ int arch_mmu_map(arch_aspace_t * const aspace, const vaddr_t vaddr, const paddr_ if (count == 0) return NO_ERROR; - DEBUG_ASSERT(x86_get_cr3()); - uint32_t current_cr3_val = (map_addr_t)x86_get_cr3(); - struct map_range range; range.start_vaddr = vaddr; range.start_paddr = (map_addr_t)paddr; range.size = count * PAGE_SIZE; - return (x86_mmu_map_range(paddr_to_kvaddr(current_cr3_val), &range, flags)); + return (x86_mmu_map_range(aspace->cr3, &range, flags)); } bool arch_mmu_supports_nx_mappings(void) { return false; } @@ -447,8 +441,43 @@ void x86_mmu_init(void) { status_t arch_mmu_init_aspace(arch_aspace_t * const aspace, const vaddr_t base, const size_t size, const uint flags) { DEBUG_ASSERT(aspace); - if ((flags & ARCH_ASPACE_FLAG_KERNEL) == 0) { - return ERR_NOT_SUPPORTED; + TRACEF("aspace %p, base %#lx, size %#zx, flags %#x\n", aspace, base, size, flags); + + /* validate that the base + size is sane and doesn't wrap */ + DEBUG_ASSERT(size > PAGE_SIZE); + DEBUG_ASSERT(base + size - 1 > base); + + aspace->flags = flags; + aspace->flags = flags; + if (flags & ARCH_ASPACE_FLAG_KERNEL) { + /* at the moment we can only deal with address spaces as globally defined */ + DEBUG_ASSERT(base == KERNEL_ASPACE_BASE); + DEBUG_ASSERT(size == KERNEL_ASPACE_SIZE); + + aspace->base = base; + aspace->size = size; + aspace->cr3 = kernel_pd; + aspace->cr3_phys = vaddr_to_paddr(aspace->cr3); + } else { + DEBUG_ASSERT(base == USER_ASPACE_BASE); + DEBUG_ASSERT(size == USER_ASPACE_SIZE); + + aspace->base = base; + aspace->size = size; + + map_addr_t *va = pmm_alloc_kpages(1, NULL); + if (!va) { + return ERR_NO_MEMORY; + } + + aspace->cr3 = va; + aspace->cr3_phys = vaddr_to_paddr(aspace->cr3); + + /* copy the top entries from the kernel top table */ + memcpy(aspace->cr3 + NO_OF_PT_ENTRIES/2, kernel_pd + NO_OF_PT_ENTRIES/2, PAGE_SIZE/2); + + /* zero out the rest */ + memset(aspace->cr3, 0, PAGE_SIZE/2); } return NO_ERROR; @@ -459,8 +488,22 @@ status_t arch_mmu_destroy_aspace(arch_aspace_t * const aspace) { } void arch_mmu_context_switch(arch_aspace_t * const aspace) { - if (aspace != NULL) { - PANIC_UNIMPLEMENTED; + if (TRACE_CONTEXT_SWITCH) + TRACEF("aspace %p\n", aspace); + + uint64_t cr3; + if (aspace) { + DEBUG_ASSERT((aspace->flags & ARCH_ASPACE_FLAG_KERNEL) == 0); + + cr3 = aspace->cr3_phys; + } else { + // TODO save copy of this + cr3 = vaddr_to_paddr(kernel_pd); } + if (TRACE_CONTEXT_SWITCH) { + TRACEF("cr3 %#llx\n", cr3); + } + + x86_set_cr3(cr3); } diff --git a/arch/x86/64/mmu.c b/arch/x86/64/mmu.c index 8038d7a9..fd08fb1a 100644 --- a/arch/x86/64/mmu.c +++ b/arch/x86/64/mmu.c @@ -26,6 +26,10 @@ #define LOCAL_TRACE 0 #define TRACE_CONTEXT_SWITCH 0 +// TODO: +// - proper tlb flush (local and SMP) +// - synchronization of top level page tables for user space aspaces + /* Address width including virtual/physical address*/ static uint8_t vaddr_width = 0; static uint8_t paddr_width = 0; @@ -672,7 +676,7 @@ void x86_mmu_init(void) { status_t arch_mmu_init_aspace(arch_aspace_t * const aspace, const vaddr_t base, const size_t size, const uint flags) { DEBUG_ASSERT(aspace); - LTRACEF("aspace %p, base %#lx, size %zu, flags %#x\n", aspace, base, size, flags); + LTRACEF("aspace %p, base %#lx, size %#zx, flags %#x\n", aspace, base, size, flags); /* validate that the base + size is sane and doesn't wrap */ DEBUG_ASSERT(size > PAGE_SIZE); diff --git a/arch/x86/faults.c b/arch/x86/faults.c index a59ec33e..2d1fbcd4 100644 --- a/arch/x86/faults.c +++ b/arch/x86/faults.c @@ -26,6 +26,7 @@ extern enum handler_return platform_irq(x86_iframe_t *frame); static void dump_fault_frame(x86_iframe_t *frame) { + dprintf(CRITICAL, "cpu %u:\n", arch_curr_cpu_num()); #if ARCH_X86_32 dprintf(CRITICAL, " CS: %04hx EIP: %08x EFL: %08x CR2: %08lx\n", frame->cs, frame->ip, frame->flags, x86_get_cr2()); diff --git a/arch/x86/mp.c b/arch/x86/mp.c index 36c11e88..c45cd592 100644 --- a/arch/x86/mp.c +++ b/arch/x86/mp.c @@ -52,7 +52,7 @@ void x86_configure_percpu_early(uint cpu_num, uint apic_id) { write_msr(X86_MSR_IA32_GS_BASE, (uint64_t)percpu); #else // set up a gs descriptor for this cpu - uint16_t selector = PERCPU_SELECTOR_BASE + cpu_num; + uint16_t selector = PERCPU_SELECTOR_BASE + cpu_num * 8; x86_set_gdt_descriptor(selector, percpu, sizeof(*percpu), 1, 0, 1, SEG_TYPE_DATA_RW, 0, 1); x86_set_gs(selector); #endif @@ -84,8 +84,7 @@ status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) { return NO_ERROR; } -void arch_mp_init_percpu(void) { -} +void arch_mp_init_percpu(void) {} uint32_t x86_get_apic_id_from_hardware(void) { // read the apic id out of cpuid leaf 1, which should be present if SMP is enabled. diff --git a/platform/pc/mp-boot.S b/platform/pc/mp-boot.S index a3d15bc9..c902153c 100644 --- a/platform/pc/mp-boot.S +++ b/platform/pc/mp-boot.S @@ -1,26 +1,32 @@ #include #include +#if WITH_SMP + #define LOAD_ADDRESS 0x4000 #define MSR_EFER 0xc0000080 #define EFER_LME 0x00000100 #define ARGS_ADDRESS (LOAD_ADDRESS + 0x1000) #define ARGS_CR3 (ARGS_ADDRESS + 0x00) +#if ARCH_X86_64 #define ARGS_STACK (ARGS_ADDRESS + 0x08) +#else +#define ARGS_STACK (ARGS_ADDRESS + 0x04) +#endif .text .code16 // secondary cpu boot entry point and switch to protected mode // enters with the following state: // real mode, CS 0x0400, PC 0 (physical address 0x4000) +// LOAD_ADDRESS (physical) == mp_boot_start (virtual) FUNCTION(mp_boot_start) // jump over the temp GDT below and switch to a flat memory segment (0) - ljmp $0, $(LOAD_ADDRESS + 0x28) + ljmp $0, $(LOAD_ADDRESS + (.Lafter_gdt - mp_boot_start)) .org 0x8 .Lgdt: - // temporary GDT to get us into protected mode // stuff the GDTR in the first entry .short (8*4) .int (LOAD_ADDRESS + 0x8) // address of .Lgdt @@ -50,7 +56,7 @@ FUNCTION(mp_boot_start) .byte 0b10101111 /* G(1) D(0) L(1) AVL(0) limit 19:16 */ .byte 0x0 /* base 31:24 */ -.org 0x28 // 0x08 + 0x20 +.Lafter_gdt: // load the above GDT lgdt (LOAD_ADDRESS + 0x08) @@ -60,10 +66,9 @@ FUNCTION(mp_boot_start) movl %eax, %cr0 // jump to 32bit mode - ljmpl $0x8, $(LOAD_ADDRESS + 0x40) -.org 0x40 - .code32 + ljmpl $0x8, $(LOAD_ADDRESS + (.Lprot - mp_boot_start)) .Lprot: + .code32 // we're now in 32bit mode, set up the 32bit data segment registers mov $0x10, %ax mov %ax, %ss @@ -94,16 +99,16 @@ FUNCTION(mp_boot_start) btsl $(31), %eax mov %eax, %cr0 + // load a very temporary stack pointer movl $(LOAD_ADDRESS + 0x800), %esp // Use a far jump to get into 64bit mode pushl $0x18 - pushl $(LOAD_ADDRESS + 0x90) + pushl $(LOAD_ADDRESS + (.Lfarjump64 - mp_boot_start)) lret -.org 0x90 .code64 -farjump64: +.Lfarjump64: /* branch to our high address */ movq (.Lhigh_addr), %rax jmp *%rax @@ -118,15 +123,18 @@ farjump64: or $(1<<4), %eax mov %eax, %cr4 - // XXX load trampoline page table + // load trampoline page table + movl (ARGS_CR3), %eax + mov %eax, %cr3 - // get into high address + // enable paging + mov %cr0, %eax + btsl $(31), %eax + mov %eax, %cr0 - // set up stack pointer - - // call into C - cld - jmp . + // Branch to the high address + lea mp_boot_start_high, %eax + jmp *%eax #endif DATA(mp_boot_end) @@ -155,13 +163,38 @@ FUNCTION(mp_boot_start_high) // call into C cld - mov $(ARGS_ADDRESS), %rdi + mov $ARGS_ADDRESS, %rdi call secondary_entry jmp . #else // ARCH_X86_32 + // set up stack pointer + mov (ARGS_STACK), %esp + // load the real GDT + lgdt _gdtr + + push $CODE_SELECTOR + lea .Lnext, %eax + push %eax + lret +.Lnext: + + // Load the real segment registers + mov $DATA_SELECTOR, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + mov %ax, %ss + + // call into C + cld + push $ARGS_ADDRESS + call secondary_entry jmp . #endif -END_FUNCTION(mp_boot_start_high) \ No newline at end of file +END_FUNCTION(mp_boot_start_high) + +#endif // WITH_SMP \ No newline at end of file diff --git a/platform/pc/mp.c b/platform/pc/mp.c index 85ffdd7f..d60ebe0f 100644 --- a/platform/pc/mp.c +++ b/platform/pc/mp.c @@ -30,6 +30,7 @@ struct bootstrap_args { uintptr_t trampoline_cr3; uintptr_t stack_top; + // referenced in C, okay to move uintptr_t cpu_num; volatile uint32_t *boot_completed_ptr; // set by the secondary cpu when it's done };