diff --git a/arch/arm/arm/arch.c b/arch/arm/arm/arch.c index ffe82cba..ae9fb49d 100644 --- a/arch/arm/arm/arch.c +++ b/arch/arm/arm/arch.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/arm/include/arch/arm/mp.h b/arch/arm/arm/include/arch/arm/mp.h new file mode 100644 index 00000000..d375b68c --- /dev/null +++ b/arch/arm/arm/include/arch/arm/mp.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2025 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include + +__BEGIN_CDECLS + +void arch_mp_init_percpu(void); + +__END_CDECLS diff --git a/arch/arm64/arch.c b/arch/arm64/arch.c index 140dc00c..46d03414 100644 --- a/arch/arm64/arch.c +++ b/arch/arm64/arch.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -20,16 +21,12 @@ #include #include +#include "arm64_priv.h" + #define LOCAL_TRACE 0 -#if WITH_SMP -/* smp boot lock */ -static spin_lock_t arm_boot_cpu_lock = 1; -static volatile int secondaries_to_init = 0; -#endif - // initial setup per cpu immediately after entering C code -static void arm64_early_init_percpu(void) { +void arm64_early_init_percpu(void) { // set the vector base ARM64_WRITE_SYSREG(VBAR_EL1, (uint64_t)&arm64_exception_base); @@ -75,21 +72,7 @@ void arch_early_init(void) { // called after the kernel has been initialized and threading is enabled on the boot cpu void arch_init(void) { #if WITH_SMP - arch_mp_init_percpu(); - - LTRACEF("midr_el1 0x%llx\n", ARM64_READ_SYSREG(midr_el1)); - - secondaries_to_init = SMP_MAX_CPUS - 1; /* TODO: get count from somewhere else, or add cpus as they boot */ - - lk_init_secondary_cpus(secondaries_to_init); - - LTRACEF("releasing %d secondary cpus\n", secondaries_to_init); - - /* release the secondary cpus */ - spin_unlock(&arm_boot_cpu_lock); - - /* flush the release of the lock, since the secondary cpus are running without cache on */ - arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock)); + arm64_mp_init(); #endif } @@ -162,32 +145,3 @@ void arch_stacktrace(uint64_t fp, uint64_t pc) { frame.fp = *((uint64_t *)frame.fp); } } - -#if WITH_SMP -/* called from assembly */ -void arm64_secondary_entry(ulong); -void arm64_secondary_entry(ulong asm_cpu_num) { - uint cpu = arch_curr_cpu_num(); - if (cpu != asm_cpu_num) { - return; - } - - arm64_early_init_percpu(); - - spin_lock(&arm_boot_cpu_lock); - spin_unlock(&arm_boot_cpu_lock); - - /* run early secondary cpu init routines up to the threading level */ - lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); - - arch_mp_init_percpu(); - - LTRACEF("cpu num %d\n", cpu); - - /* we're done, tell the main cpu we're up */ - atomic_add(&secondaries_to_init, -1); - __asm__ volatile("sev"); - - lk_secondary_cpu_entry(); -} -#endif diff --git a/arch/arm64/arm64_priv.h b/arch/arm64/arm64_priv.h new file mode 100644 index 00000000..c35644d6 --- /dev/null +++ b/arch/arm64/arm64_priv.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2025 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include +#include + +__BEGIN_CDECLS + +void arm64_early_init_percpu(void); + +// void riscv_early_init_percpu(void); +// void riscv_init_percpu(void); +// void riscv_boot_secondaries(void); +// void riscv_configure_percpu_mp_early(uint hart_id, uint cpu_num); +// void riscv_early_mmu_init(void); +// void riscv_mmu_init(void); +// void riscv_mmu_init_secondaries(void); + +__END_CDECLS diff --git a/arch/arm64/asm.S b/arch/arm64/asm.S index a5dd607a..1bb28114 100644 --- a/arch/arm64/asm.S +++ b/arch/arm64/asm.S @@ -8,19 +8,21 @@ #include #include +// stay in sync with arm64/thread.c arm64_context_switch() + /* void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp); */ FUNCTION(arm64_context_switch) /* save old frame */ - push x28, x29 - push x26, x27 - push x24, x25 - push x22, x23 - push x20, x21 - push x18, x19 - mrs x18, tpidr_el0 - mrs x19, tpidrro_el0 - push x18, x19 - push x30, xzr + push x29, lr + push x27, x28 + push x25, x26 + push x23, x24 + push x21, x22 + push x19, x20 + // skip x18, it is our per cpu pointer + mrs x16, tpidr_el0 + mrs x17, tpidrro_el0 + push x16, x17 /* save old sp */ mov x15, sp @@ -30,16 +32,15 @@ FUNCTION(arm64_context_switch) mov sp, x1 /* restore new frame */ - pop x30, xzr - pop x18, x19 - msr tpidr_el0, x18 - msr tpidrro_el0, x19 - pop x18, x19 - pop x20, x21 - pop x22, x23 - pop x24, x25 - pop x26, x27 - pop x28, x29 + pop x16, x17 + msr tpidr_el0, x16 + msr tpidrro_el0, x17 + pop x19, x20 + pop x21, x22 + pop x23, x24 + pop x25, x26 + pop x27, x28 + pop x29, lr ret diff --git a/arch/arm64/exceptions.S b/arch/arm64/exceptions.S index 57a691b7..f5943d9f 100644 --- a/arch/arm64/exceptions.S +++ b/arch/arm64/exceptions.S @@ -15,6 +15,8 @@ #define regsave_long_offset 0xf0 #define regsave_short_offset 0x90 +// NOTE: stay in sync with arm64_iframe_long et al. in include/arch/arm64.h + .macro regsave_long sub sp, sp, #32 push x28, x29 @@ -72,7 +74,7 @@ pop x10, x11 pop x12, x13 pop x14, x15 pop x16, x17 -pop x18, x19 +pop xzr, x19 // Do not restore x18, it is our per cpu pointer pop x20, x21 pop x22, x23 pop x24, x25 diff --git a/arch/arm64/include/arch/arch_ops.h b/arch/arm64/include/arch/arch_ops.h index e69482b3..7c0889b4 100644 --- a/arch/arm64/include/arch/arch_ops.h +++ b/arch/arm64/include/arch/arch_ops.h @@ -14,6 +14,7 @@ #include #include #include +#include __BEGIN_CDECLS @@ -89,17 +90,6 @@ static inline void arch_set_current_thread(struct thread *t) { ARM64_WRITE_SYSREG(tpidr_el1, (uint64_t)t); } -#if WITH_SMP -static inline uint arch_curr_cpu_num(void) { - uint64_t mpidr = ARM64_READ_SYSREG(mpidr_el1); - return ((mpidr & ((1U << SMP_CPU_ID_BITS) - 1)) >> 8 << SMP_CPU_CLUSTER_SHIFT) | (mpidr & 0xff); -} -#else -static inline uint arch_curr_cpu_num(void) { - return 0; -} -#endif - __END_CDECLS #endif // ASSEMBLY diff --git a/arch/arm64/include/arch/arm64.h b/arch/arm64/include/arch/arm64.h index 2ae97499..67a170d8 100644 --- a/arch/arm64/include/arch/arm64.h +++ b/arch/arm64/include/arch/arm64.h @@ -62,7 +62,7 @@ void arm64_fpu_exception(struct arm64_iframe_long *iframe); void arm64_fpu_save_state(struct thread *thread); static inline void arm64_fpu_pre_context_switch(struct thread *thread) { - uint32_t cpacr = ARM64_READ_SYSREG(cpacr_el1); + uint64_t cpacr = ARM64_READ_SYSREG(cpacr_el1); if ((cpacr >> 20) & 3) { arm64_fpu_save_state(thread); cpacr &= ~(3 << 20); diff --git a/arch/arm64/include/arch/arm64/mp.h b/arch/arm64/include/arch/arm64/mp.h new file mode 100644 index 00000000..4a2668b9 --- /dev/null +++ b/arch/arm64/include/arch/arm64/mp.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +__BEGIN_CDECLS + +void arm64_mp_init(void); +void arm64_mp_init_percpu(void); + +// Tell the ARM64 code how many secondary cpus to expect, which +// will cause it to allocate percpu structures for them. +void arm64_set_secondary_cpu_count(int count); + +struct arm64_percpu { + uint cpu_num; + uint64_t mpidr; +} __CPU_ALIGN; + +static inline void arm64_set_percpu(struct arm64_percpu *pc) { + __asm__ volatile("mov x18, %0" ::"r"(pc)); +} + +static inline struct arm64_percpu *arm64_get_percpu(void) { + struct arm64_percpu *pc; + __asm__ volatile("mov %0, x18" : "=r"(pc)); + return pc; +} + +static inline uint arch_curr_cpu_num(void) { +#if WITH_SMP + const struct arm64_percpu *pc = arm64_get_percpu(); + return pc->cpu_num; +#else + return 0; +#endif +} + +__END_CDECLS diff --git a/arch/arm64/mp.c b/arch/arm64/mp.c index ac2b72c7..5e44ce25 100644 --- a/arch/arm64/mp.c +++ b/arch/arm64/mp.c @@ -5,12 +5,21 @@ * license that can be found in the LICENSE file or at * https://opensource.org/licenses/MIT */ -#include +#include "arch/arm64/mp.h" +#include +#include #include +#include #include +#include +#include #include #include +#include +#include + +#include "arm64_priv.h" #if WITH_DEV_INTERRUPT_ARM_GIC #include @@ -25,6 +34,11 @@ extern void bcm28xx_send_ipi(uint irq, uint cpu_mask); #define GIC_IPI_BASE (14) +// percpu structures for the boot cpu and secondaries +static struct arm64_percpu boot_percpu; +static struct arm64_percpu *secondary_percpu; +static uint secondaries_to_init = 0; + status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) { LTRACEF("target 0x%x, ipi %u\n", target, ipi); @@ -60,10 +74,79 @@ static enum handler_return arm_ipi_reschedule_handler(void *arg) { return mp_mbx_reschedule_irq(); } -void arch_mp_init_percpu(void) { +void arm64_mp_init_percpu(void) { register_int_handler(MP_IPI_GENERIC + GIC_IPI_BASE, &arm_ipi_generic_handler, 0); register_int_handler(MP_IPI_RESCHEDULE + GIC_IPI_BASE, &arm_ipi_reschedule_handler, 0); // unmask_interrupt(MP_IPI_GENERIC + GIC_IPI_BASE); // unmask_interrupt(MP_IPI_RESCHEDULE + GIC_IPI_BASE); } + +// Special case, called from start.S code on the boot cpu, which willJ always be numbered 0 +// called from assembly +void arm64_init_boot_percpu(void); +void arm64_init_boot_percpu(void) { + arm64_set_percpu(&boot_percpu); + boot_percpu.cpu_num = 0; + boot_percpu.mpidr = ARM64_READ_SYSREG(mpidr_el1); +} + +static void arm64_init_secondary_percpu(uint cpu_num) { + // If we're out of range, just hang + if (cpu_num > secondaries_to_init) { + for (;;) { + __asm__ volatile("wfi"); + } + } + + struct arm64_percpu *percpu = &secondary_percpu[cpu_num - 1]; + arm64_set_percpu(percpu); + percpu->cpu_num = cpu_num; + percpu->mpidr = ARM64_READ_SYSREG(mpidr_el1); +} + +void arm64_set_secondary_cpu_count(int count) { + secondaries_to_init = count; + + DEBUG_ASSERT(secondary_percpu == NULL); + + // clamp the secondary cpu count to SMP_MAX_CPUS - 1 + if (secondaries_to_init > (SMP_MAX_CPUS - 1)) { + dprintf(INFO, "ARM64: clamping secondary cpu count from %d to %d\n", secondaries_to_init, SMP_MAX_CPUS - 1); + secondaries_to_init = SMP_MAX_CPUS - 1; + } + + // Allocate percpu structures for the secondaries + if (secondaries_to_init > 0) { + const size_t len = sizeof(struct arm64_percpu) * secondaries_to_init; + secondary_percpu = memalign(CACHE_LINE, len); + DEBUG_ASSERT(secondary_percpu); + memset(secondary_percpu, 0, len); + } +} + +void arm64_mp_init(void) { + arm64_mp_init_percpu(); +} + +/* called from assembly */ +void arm64_secondary_entry(ulong); +void arm64_secondary_entry(ulong asm_cpu_num) { + arm64_init_secondary_percpu(asm_cpu_num); + + uint cpu = arch_curr_cpu_num(); + if (cpu != asm_cpu_num) { + return; + } + + arm64_early_init_percpu(); + + /* run early secondary cpu init routines up to the threading level */ + lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); + + arm64_mp_init_percpu(); + + LTRACEF("cpu num %d\n", cpu); + + lk_secondary_cpu_entry(); +} diff --git a/arch/arm64/rules.mk b/arch/arm64/rules.mk index f4145af5..b944e5b3 100644 --- a/arch/arm64/rules.mk +++ b/arch/arm64/rules.mk @@ -18,17 +18,13 @@ MODULE_SRCS += \ $(LOCAL_DIR)/start.S \ $(LOCAL_DIR)/cache-ops.S \ -# if its requested we build with SMP, default to 4 cpus +# if its requested we build with SMP, default to 8 cpus ifeq (true,$(call TOBOOL,$(WITH_SMP))) -SMP_MAX_CPUS ?= 4 -SMP_CPU_CLUSTER_SHIFT ?= 8 -SMP_CPU_ID_BITS ?= 24 # Ignore aff3 bits for now since they are not next to aff2 +SMP_MAX_CPUS ?= 8 GLOBAL_DEFINES += \ WITH_SMP=1 \ - SMP_MAX_CPUS=$(SMP_MAX_CPUS) \ - SMP_CPU_CLUSTER_SHIFT=$(SMP_CPU_CLUSTER_SHIFT) \ - SMP_CPU_ID_BITS=$(SMP_CPU_ID_BITS) + SMP_MAX_CPUS=$(SMP_MAX_CPUS) MODULE_SRCS += \ $(LOCAL_DIR)/mp.c @@ -105,6 +101,7 @@ include $(LOCAL_DIR)/toolchain.mk TOOLCHAIN_PREFIX := $(ARCH_$(ARCH)_TOOLCHAIN_PREFIX) ARCH_COMPILEFLAGS += $(ARCH_$(ARCH)_COMPILEFLAGS) +ARCH_COMPILEFLAGS += -ffixed-x18 ARCH_COMPILEFLAGS += -fno-omit-frame-pointer ARCH_COMPILEFLAGS_NOFLOAT := -mgeneral-regs-only ARCH_COMPILEFLAGS_FLOAT := @@ -140,4 +137,6 @@ $(BUILDDIR)/system-onesegment.ld: $(LOCAL_DIR)/system-onesegment.ld $(wildcard a linkerscript.phony: .PHONY: linkerscript.phony +MODULE_OPTIONS := extra_warnings + include make/module.mk diff --git a/arch/arm64/start.S b/arch/arm64/start.S index 6c04a85c..2a775b99 100644 --- a/arch/arm64/start.S +++ b/arch/arm64/start.S @@ -61,7 +61,7 @@ arm_reset: #if WITH_SMP /* if the cpu id is != 0 it's a secondary cpu */ mrs cpuid, mpidr_el1 - ubfx cpuid, cpuid, #0, #SMP_CPU_ID_BITS + ubfx cpuid, cpuid, #0, #24 /* ignore aff3 bits for now since they are not next to aff2 */ #if WITH_KERNEL_VM cbnz cpuid, .Lmmu_enable_secondary @@ -382,22 +382,24 @@ arm_reset: cbnz tmp2, .L__bss_loop .L__bss_loop_done: +#if WITH_SMP + bl arm64_init_boot_percpu +#endif + /* load the boot args we had saved previously */ adrp tmp, arm64_boot_args add tmp, tmp, :lo12:arm64_boot_args ldp x0, x1, [tmp], #16 ldp x2, x3, [tmp] - bl lk_main - b . + bl lk_main + b . #if WITH_SMP .Lsecondary_boot: - and tmp, cpuid, #0xff - cmp tmp, #(1 << SMP_CPU_CLUSTER_SHIFT) - bge .Lunsupported_cpu_trap - bic cpuid, cpuid, #0xff - orr cpuid, tmp, cpuid, LSR #(8 - SMP_CPU_CLUSTER_SHIFT) + // if we came in from PSCI x0 has the cpu number + // TODO: more cleanly handle other boot paths + mov cpuid, x0 cmp cpuid, #SMP_MAX_CPUS bge .Lunsupported_cpu_trap diff --git a/arch/arm64/thread.c b/arch/arm64/thread.c index c93526e0..0f71288b 100644 --- a/arch/arm64/thread.c +++ b/arch/arm64/thread.c @@ -6,6 +6,7 @@ * https://opensource.org/licenses/MIT */ #include +#include #include #include #include @@ -16,11 +17,8 @@ #define LOCAL_TRACE 0 struct context_switch_frame { - vaddr_t lr; - vaddr_t pad; // Padding to keep frame size a multiple of vaddr_t tpidr_el0; // sp alignment requirements (16 bytes) vaddr_t tpidrro_el0; - vaddr_t r18; vaddr_t r19; vaddr_t r20; vaddr_t r21; @@ -32,7 +30,9 @@ struct context_switch_frame { vaddr_t r27; vaddr_t r28; vaddr_t r29; + vaddr_t lr; // x30 }; +static_assert(sizeof(struct context_switch_frame) % 16 == 0, "context_switch_frame size must be multiple of 16"); static void initial_thread_func(void) __NO_RETURN; static void initial_thread_func(void) { diff --git a/arch/include/arch/mp.h b/arch/include/arch/mp.h index 7312b757..068ce3e0 100644 --- a/arch/include/arch/mp.h +++ b/arch/include/arch/mp.h @@ -16,6 +16,4 @@ __BEGIN_CDECLS /* send inter processor interrupt, if supported */ status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi); -void arch_mp_init_percpu(void); - __END_CDECLS diff --git a/lib/fdtwalk/helpers.cpp b/lib/fdtwalk/helpers.cpp index 4a6965fc..ceb8621b 100644 --- a/lib/fdtwalk/helpers.cpp +++ b/lib/fdtwalk/helpers.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,9 @@ #if ARCH_ARM || ARCH_ARM64 #include #endif +#if ARCH_ARM64 +#include +#endif #if WITH_DEV_BUS_PCI #include #endif @@ -217,6 +221,15 @@ status_t fdtwalk_setup_cpus_arm(const void *fdt) { LTRACEF("booting %zu cpus\n", cpu_count); + // TODO: revamp the ARM32 path so we do not need the special case here +#if ARCH_ARM64 + // tell the arm64 layer how many cores we have to start + arm64_set_secondary_cpu_count(cpu_count - 1); + + // have the upper layer prepare for the secondary cpus + lk_init_secondary_cpus(cpu_count - 1); +#endif + /* boot the secondary cpus using the Power State Coordintion Interface */ for (size_t i = 1; i < cpu_count; i++) { /* note: assumes cpuids are numbered like MPIDR 0:0:0:N */