[arch][x86] get SMP working on x86-32

- Added very basic user page table support (needed to bootstrap the
  secondary cpus)
- Added MP bootup code for 32bit.
This commit is contained in:
Travis Geiselbrecht
2025-04-06 19:09:32 -07:00
parent 8fdadd9b33
commit 71e795de19
6 changed files with 121 additions and 40 deletions

View File

@@ -23,7 +23,12 @@
#include <string.h> #include <string.h>
#include <sys/types.h> #include <sys/types.h>
// TODO:
// - proper tlb flush (local and SMP)
// - synchronization of top level page tables for user space aspaces
#define LOCAL_TRACE 0 #define LOCAL_TRACE 0
#define TRACE_CONTEXT_SWITCH 0
/* top level kernel page tables, initialized in start.S */ /* top level kernel page tables, initialized in start.S */
#if X86_LEGACY #if X86_LEGACY
@@ -309,8 +314,6 @@ static status_t x86_mmu_unmap(map_addr_t * const init_table, const vaddr_t vaddr
} }
int arch_mmu_unmap(arch_aspace_t * const aspace, const vaddr_t vaddr, const uint count) { int arch_mmu_unmap(arch_aspace_t * const aspace, const vaddr_t vaddr, const uint count) {
map_addr_t init_table_from_cr3;
LTRACEF("aspace %p, vaddr %#lx, count %u\n", aspace, vaddr, count); LTRACEF("aspace %p, vaddr %#lx, count %u\n", aspace, vaddr, count);
DEBUG_ASSERT(aspace); DEBUG_ASSERT(aspace);
@@ -321,10 +324,7 @@ int arch_mmu_unmap(arch_aspace_t * const aspace, const vaddr_t vaddr, const uint
if (count == 0) if (count == 0)
return NO_ERROR; return NO_ERROR;
DEBUG_ASSERT(x86_get_cr3()); return (x86_mmu_unmap(aspace->cr3, vaddr, count));
init_table_from_cr3 = x86_get_cr3();
return (x86_mmu_unmap(paddr_to_kvaddr(init_table_from_cr3), vaddr, count));
} }
/** /**
@@ -372,12 +372,9 @@ status_t arch_mmu_query(arch_aspace_t * const aspace, const vaddr_t vaddr, paddr
if (!paddr) if (!paddr)
return ERR_INVALID_ARGS; return ERR_INVALID_ARGS;
DEBUG_ASSERT(x86_get_cr3());
uint32_t current_cr3_val = (map_addr_t)x86_get_cr3();
arch_flags_t ret_flags; arch_flags_t ret_flags;
uint32_t ret_level; uint32_t ret_level;
status_t stat = x86_mmu_get_mapping(paddr_to_kvaddr(current_cr3_val), vaddr, &ret_level, &ret_flags, paddr); status_t stat = x86_mmu_get_mapping(aspace->cr3, vaddr, &ret_level, &ret_flags, paddr);
if (stat) if (stat)
return stat; return stat;
@@ -404,15 +401,12 @@ int arch_mmu_map(arch_aspace_t * const aspace, const vaddr_t vaddr, const paddr_
if (count == 0) if (count == 0)
return NO_ERROR; return NO_ERROR;
DEBUG_ASSERT(x86_get_cr3());
uint32_t current_cr3_val = (map_addr_t)x86_get_cr3();
struct map_range range; struct map_range range;
range.start_vaddr = vaddr; range.start_vaddr = vaddr;
range.start_paddr = (map_addr_t)paddr; range.start_paddr = (map_addr_t)paddr;
range.size = count * PAGE_SIZE; range.size = count * PAGE_SIZE;
return (x86_mmu_map_range(paddr_to_kvaddr(current_cr3_val), &range, flags)); return (x86_mmu_map_range(aspace->cr3, &range, flags));
} }
bool arch_mmu_supports_nx_mappings(void) { return false; } bool arch_mmu_supports_nx_mappings(void) { return false; }
@@ -447,8 +441,43 @@ void x86_mmu_init(void) {
status_t arch_mmu_init_aspace(arch_aspace_t * const aspace, const vaddr_t base, const size_t size, const uint flags) { status_t arch_mmu_init_aspace(arch_aspace_t * const aspace, const vaddr_t base, const size_t size, const uint flags) {
DEBUG_ASSERT(aspace); DEBUG_ASSERT(aspace);
if ((flags & ARCH_ASPACE_FLAG_KERNEL) == 0) { TRACEF("aspace %p, base %#lx, size %#zx, flags %#x\n", aspace, base, size, flags);
return ERR_NOT_SUPPORTED;
/* validate that the base + size is sane and doesn't wrap */
DEBUG_ASSERT(size > PAGE_SIZE);
DEBUG_ASSERT(base + size - 1 > base);
aspace->flags = flags;
aspace->flags = flags;
if (flags & ARCH_ASPACE_FLAG_KERNEL) {
/* at the moment we can only deal with address spaces as globally defined */
DEBUG_ASSERT(base == KERNEL_ASPACE_BASE);
DEBUG_ASSERT(size == KERNEL_ASPACE_SIZE);
aspace->base = base;
aspace->size = size;
aspace->cr3 = kernel_pd;
aspace->cr3_phys = vaddr_to_paddr(aspace->cr3);
} else {
DEBUG_ASSERT(base == USER_ASPACE_BASE);
DEBUG_ASSERT(size == USER_ASPACE_SIZE);
aspace->base = base;
aspace->size = size;
map_addr_t *va = pmm_alloc_kpages(1, NULL);
if (!va) {
return ERR_NO_MEMORY;
}
aspace->cr3 = va;
aspace->cr3_phys = vaddr_to_paddr(aspace->cr3);
/* copy the top entries from the kernel top table */
memcpy(aspace->cr3 + NO_OF_PT_ENTRIES/2, kernel_pd + NO_OF_PT_ENTRIES/2, PAGE_SIZE/2);
/* zero out the rest */
memset(aspace->cr3, 0, PAGE_SIZE/2);
} }
return NO_ERROR; return NO_ERROR;
@@ -459,8 +488,22 @@ status_t arch_mmu_destroy_aspace(arch_aspace_t * const aspace) {
} }
void arch_mmu_context_switch(arch_aspace_t * const aspace) { void arch_mmu_context_switch(arch_aspace_t * const aspace) {
if (aspace != NULL) { if (TRACE_CONTEXT_SWITCH)
PANIC_UNIMPLEMENTED; TRACEF("aspace %p\n", aspace);
uint64_t cr3;
if (aspace) {
DEBUG_ASSERT((aspace->flags & ARCH_ASPACE_FLAG_KERNEL) == 0);
cr3 = aspace->cr3_phys;
} else {
// TODO save copy of this
cr3 = vaddr_to_paddr(kernel_pd);
} }
if (TRACE_CONTEXT_SWITCH) {
TRACEF("cr3 %#llx\n", cr3);
}
x86_set_cr3(cr3);
} }

View File

@@ -26,6 +26,10 @@
#define LOCAL_TRACE 0 #define LOCAL_TRACE 0
#define TRACE_CONTEXT_SWITCH 0 #define TRACE_CONTEXT_SWITCH 0
// TODO:
// - proper tlb flush (local and SMP)
// - synchronization of top level page tables for user space aspaces
/* Address width including virtual/physical address*/ /* Address width including virtual/physical address*/
static uint8_t vaddr_width = 0; static uint8_t vaddr_width = 0;
static uint8_t paddr_width = 0; static uint8_t paddr_width = 0;
@@ -672,7 +676,7 @@ void x86_mmu_init(void) {
status_t arch_mmu_init_aspace(arch_aspace_t * const aspace, const vaddr_t base, const size_t size, const uint flags) { status_t arch_mmu_init_aspace(arch_aspace_t * const aspace, const vaddr_t base, const size_t size, const uint flags) {
DEBUG_ASSERT(aspace); DEBUG_ASSERT(aspace);
LTRACEF("aspace %p, base %#lx, size %zu, flags %#x\n", aspace, base, size, flags); LTRACEF("aspace %p, base %#lx, size %#zx, flags %#x\n", aspace, base, size, flags);
/* validate that the base + size is sane and doesn't wrap */ /* validate that the base + size is sane and doesn't wrap */
DEBUG_ASSERT(size > PAGE_SIZE); DEBUG_ASSERT(size > PAGE_SIZE);

View File

@@ -26,6 +26,7 @@
extern enum handler_return platform_irq(x86_iframe_t *frame); extern enum handler_return platform_irq(x86_iframe_t *frame);
static void dump_fault_frame(x86_iframe_t *frame) { static void dump_fault_frame(x86_iframe_t *frame) {
dprintf(CRITICAL, "cpu %u:\n", arch_curr_cpu_num());
#if ARCH_X86_32 #if ARCH_X86_32
dprintf(CRITICAL, " CS: %04hx EIP: %08x EFL: %08x CR2: %08lx\n", dprintf(CRITICAL, " CS: %04hx EIP: %08x EFL: %08x CR2: %08lx\n",
frame->cs, frame->ip, frame->flags, x86_get_cr2()); frame->cs, frame->ip, frame->flags, x86_get_cr2());

View File

@@ -52,7 +52,7 @@ void x86_configure_percpu_early(uint cpu_num, uint apic_id) {
write_msr(X86_MSR_IA32_GS_BASE, (uint64_t)percpu); write_msr(X86_MSR_IA32_GS_BASE, (uint64_t)percpu);
#else #else
// set up a gs descriptor for this cpu // set up a gs descriptor for this cpu
uint16_t selector = PERCPU_SELECTOR_BASE + cpu_num; uint16_t selector = PERCPU_SELECTOR_BASE + cpu_num * 8;
x86_set_gdt_descriptor(selector, percpu, sizeof(*percpu), 1, 0, 1, SEG_TYPE_DATA_RW, 0, 1); x86_set_gdt_descriptor(selector, percpu, sizeof(*percpu), 1, 0, 1, SEG_TYPE_DATA_RW, 0, 1);
x86_set_gs(selector); x86_set_gs(selector);
#endif #endif
@@ -84,8 +84,7 @@ status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) {
return NO_ERROR; return NO_ERROR;
} }
void arch_mp_init_percpu(void) { void arch_mp_init_percpu(void) {}
}
uint32_t x86_get_apic_id_from_hardware(void) { uint32_t x86_get_apic_id_from_hardware(void) {
// read the apic id out of cpuid leaf 1, which should be present if SMP is enabled. // read the apic id out of cpuid leaf 1, which should be present if SMP is enabled.

View File

@@ -1,26 +1,32 @@
#include <lk/asm.h> #include <lk/asm.h>
#include <arch/x86/descriptor.h> #include <arch/x86/descriptor.h>
#if WITH_SMP
#define LOAD_ADDRESS 0x4000 #define LOAD_ADDRESS 0x4000
#define MSR_EFER 0xc0000080 #define MSR_EFER 0xc0000080
#define EFER_LME 0x00000100 #define EFER_LME 0x00000100
#define ARGS_ADDRESS (LOAD_ADDRESS + 0x1000) #define ARGS_ADDRESS (LOAD_ADDRESS + 0x1000)
#define ARGS_CR3 (ARGS_ADDRESS + 0x00) #define ARGS_CR3 (ARGS_ADDRESS + 0x00)
#if ARCH_X86_64
#define ARGS_STACK (ARGS_ADDRESS + 0x08) #define ARGS_STACK (ARGS_ADDRESS + 0x08)
#else
#define ARGS_STACK (ARGS_ADDRESS + 0x04)
#endif
.text .text
.code16 .code16
// secondary cpu boot entry point and switch to protected mode // secondary cpu boot entry point and switch to protected mode
// enters with the following state: // enters with the following state:
// real mode, CS 0x0400, PC 0 (physical address 0x4000) // real mode, CS 0x0400, PC 0 (physical address 0x4000)
// LOAD_ADDRESS (physical) == mp_boot_start (virtual)
FUNCTION(mp_boot_start) FUNCTION(mp_boot_start)
// jump over the temp GDT below and switch to a flat memory segment (0) // jump over the temp GDT below and switch to a flat memory segment (0)
ljmp $0, $(LOAD_ADDRESS + 0x28) ljmp $0, $(LOAD_ADDRESS + (.Lafter_gdt - mp_boot_start))
.org 0x8 .org 0x8
.Lgdt: .Lgdt:
// temporary GDT to get us into protected mode
// stuff the GDTR in the first entry // stuff the GDTR in the first entry
.short (8*4) .short (8*4)
.int (LOAD_ADDRESS + 0x8) // address of .Lgdt .int (LOAD_ADDRESS + 0x8) // address of .Lgdt
@@ -50,7 +56,7 @@ FUNCTION(mp_boot_start)
.byte 0b10101111 /* G(1) D(0) L(1) AVL(0) limit 19:16 */ .byte 0b10101111 /* G(1) D(0) L(1) AVL(0) limit 19:16 */
.byte 0x0 /* base 31:24 */ .byte 0x0 /* base 31:24 */
.org 0x28 // 0x08 + 0x20 .Lafter_gdt:
// load the above GDT // load the above GDT
lgdt (LOAD_ADDRESS + 0x08) lgdt (LOAD_ADDRESS + 0x08)
@@ -60,10 +66,9 @@ FUNCTION(mp_boot_start)
movl %eax, %cr0 movl %eax, %cr0
// jump to 32bit mode // jump to 32bit mode
ljmpl $0x8, $(LOAD_ADDRESS + 0x40) ljmpl $0x8, $(LOAD_ADDRESS + (.Lprot - mp_boot_start))
.org 0x40
.code32
.Lprot: .Lprot:
.code32
// we're now in 32bit mode, set up the 32bit data segment registers // we're now in 32bit mode, set up the 32bit data segment registers
mov $0x10, %ax mov $0x10, %ax
mov %ax, %ss mov %ax, %ss
@@ -94,16 +99,16 @@ FUNCTION(mp_boot_start)
btsl $(31), %eax btsl $(31), %eax
mov %eax, %cr0 mov %eax, %cr0
// load a very temporary stack pointer
movl $(LOAD_ADDRESS + 0x800), %esp movl $(LOAD_ADDRESS + 0x800), %esp
// Use a far jump to get into 64bit mode // Use a far jump to get into 64bit mode
pushl $0x18 pushl $0x18
pushl $(LOAD_ADDRESS + 0x90) pushl $(LOAD_ADDRESS + (.Lfarjump64 - mp_boot_start))
lret lret
.org 0x90
.code64 .code64
farjump64: .Lfarjump64:
/* branch to our high address */ /* branch to our high address */
movq (.Lhigh_addr), %rax movq (.Lhigh_addr), %rax
jmp *%rax jmp *%rax
@@ -118,15 +123,18 @@ farjump64:
or $(1<<4), %eax or $(1<<4), %eax
mov %eax, %cr4 mov %eax, %cr4
// XXX load trampoline page table // load trampoline page table
movl (ARGS_CR3), %eax
mov %eax, %cr3
// get into high address // enable paging
mov %cr0, %eax
btsl $(31), %eax
mov %eax, %cr0
// set up stack pointer // Branch to the high address
lea mp_boot_start_high, %eax
// call into C jmp *%eax
cld
jmp .
#endif #endif
DATA(mp_boot_end) DATA(mp_boot_end)
@@ -155,13 +163,38 @@ FUNCTION(mp_boot_start_high)
// call into C // call into C
cld cld
mov $(ARGS_ADDRESS), %rdi mov $ARGS_ADDRESS, %rdi
call secondary_entry call secondary_entry
jmp . jmp .
#else // ARCH_X86_32 #else // ARCH_X86_32
// set up stack pointer
mov (ARGS_STACK), %esp
// load the real GDT
lgdt _gdtr
push $CODE_SELECTOR
lea .Lnext, %eax
push %eax
lret
.Lnext:
// Load the real segment registers
mov $DATA_SELECTOR, %ax
mov %ax, %ds
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
mov %ax, %ss
// call into C
cld
push $ARGS_ADDRESS
call secondary_entry
jmp . jmp .
#endif #endif
END_FUNCTION(mp_boot_start_high) END_FUNCTION(mp_boot_start_high)
#endif // WITH_SMP

View File

@@ -30,6 +30,7 @@ struct bootstrap_args {
uintptr_t trampoline_cr3; uintptr_t trampoline_cr3;
uintptr_t stack_top; uintptr_t stack_top;
// referenced in C, okay to move
uintptr_t cpu_num; uintptr_t cpu_num;
volatile uint32_t *boot_completed_ptr; // set by the secondary cpu when it's done volatile uint32_t *boot_completed_ptr; // set by the secondary cpu when it's done
}; };