From 91128ad729f3efcdb3baadfa6966c2fc3f4a4238 Mon Sep 17 00:00:00 2001 From: Travis Geiselbrecht Date: Sat, 4 Oct 2025 14:32:50 -0700 Subject: [PATCH] [arch][arm64] clean up how secondary cpus are initialized and tracked - Add a percpu structure for each cpu, akin to x86-64 and riscv. Pointed to by x18, which is now reserved for this in the kernel. Tweaked exception and context switch routines to leave x18 alone. - Remove the cpu-trapping spinlock logic that is unused in mainline, probably. (Can add a new version of it back if it's necessary). - Switch fdtwalk helper to using the newer, cleaner way of initializing secondaries using the PSCI CPU_ON argument that should be pretty standard on modern implementations. (Possibly an issue with old firmware). - Remove the notion of computing the cpu ID from the Affinity levels, which doesn't really work properly on modern ARM CPUs which more or less abandoned the logical meaning of AFFn. --- arch/arm/arm/arch.c | 1 + arch/arm/arm/include/arch/arm/mp.h | 16 ++++++ arch/arm64/arch.c | 56 ++----------------- arch/arm64/arm64_priv.h | 25 +++++++++ arch/arm64/asm.S | 41 +++++++------- arch/arm64/exceptions.S | 4 +- arch/arm64/include/arch/arch_ops.h | 12 +---- arch/arm64/include/arch/arm64.h | 2 +- arch/arm64/include/arch/arm64/mp.h | 50 +++++++++++++++++ arch/arm64/mp.c | 87 +++++++++++++++++++++++++++++- arch/arm64/rules.mk | 13 +++-- arch/arm64/start.S | 18 ++++--- arch/arm64/thread.c | 6 +-- arch/include/arch/mp.h | 2 - lib/fdtwalk/helpers.cpp | 13 +++++ 15 files changed, 240 insertions(+), 106 deletions(-) create mode 100644 arch/arm/arm/include/arch/arm/mp.h create mode 100644 arch/arm64/arm64_priv.h create mode 100644 arch/arm64/include/arch/arm64/mp.h diff --git a/arch/arm/arm/arch.c b/arch/arm/arm/arch.c index ffe82cba..ae9fb49d 100644 --- a/arch/arm/arm/arch.c +++ b/arch/arm/arm/arch.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/arm/include/arch/arm/mp.h b/arch/arm/arm/include/arch/arm/mp.h new file mode 100644 index 00000000..d375b68c --- /dev/null +++ b/arch/arm/arm/include/arch/arm/mp.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2025 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include + +__BEGIN_CDECLS + +void arch_mp_init_percpu(void); + +__END_CDECLS diff --git a/arch/arm64/arch.c b/arch/arm64/arch.c index 140dc00c..46d03414 100644 --- a/arch/arm64/arch.c +++ b/arch/arm64/arch.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -20,16 +21,12 @@ #include #include +#include "arm64_priv.h" + #define LOCAL_TRACE 0 -#if WITH_SMP -/* smp boot lock */ -static spin_lock_t arm_boot_cpu_lock = 1; -static volatile int secondaries_to_init = 0; -#endif - // initial setup per cpu immediately after entering C code -static void arm64_early_init_percpu(void) { +void arm64_early_init_percpu(void) { // set the vector base ARM64_WRITE_SYSREG(VBAR_EL1, (uint64_t)&arm64_exception_base); @@ -75,21 +72,7 @@ void arch_early_init(void) { // called after the kernel has been initialized and threading is enabled on the boot cpu void arch_init(void) { #if WITH_SMP - arch_mp_init_percpu(); - - LTRACEF("midr_el1 0x%llx\n", ARM64_READ_SYSREG(midr_el1)); - - secondaries_to_init = SMP_MAX_CPUS - 1; /* TODO: get count from somewhere else, or add cpus as they boot */ - - lk_init_secondary_cpus(secondaries_to_init); - - LTRACEF("releasing %d secondary cpus\n", secondaries_to_init); - - /* release the secondary cpus */ - spin_unlock(&arm_boot_cpu_lock); - - /* flush the release of the lock, since the secondary cpus are running without cache on */ - arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock)); + arm64_mp_init(); #endif } @@ -162,32 +145,3 @@ void arch_stacktrace(uint64_t fp, uint64_t pc) { frame.fp = *((uint64_t *)frame.fp); } } - -#if WITH_SMP -/* called from assembly */ -void arm64_secondary_entry(ulong); -void arm64_secondary_entry(ulong asm_cpu_num) { - uint cpu = arch_curr_cpu_num(); - if (cpu != asm_cpu_num) { - return; - } - - arm64_early_init_percpu(); - - spin_lock(&arm_boot_cpu_lock); - spin_unlock(&arm_boot_cpu_lock); - - /* run early secondary cpu init routines up to the threading level */ - lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); - - arch_mp_init_percpu(); - - LTRACEF("cpu num %d\n", cpu); - - /* we're done, tell the main cpu we're up */ - atomic_add(&secondaries_to_init, -1); - __asm__ volatile("sev"); - - lk_secondary_cpu_entry(); -} -#endif diff --git a/arch/arm64/arm64_priv.h b/arch/arm64/arm64_priv.h new file mode 100644 index 00000000..c35644d6 --- /dev/null +++ b/arch/arm64/arm64_priv.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2025 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include +#include + +__BEGIN_CDECLS + +void arm64_early_init_percpu(void); + +// void riscv_early_init_percpu(void); +// void riscv_init_percpu(void); +// void riscv_boot_secondaries(void); +// void riscv_configure_percpu_mp_early(uint hart_id, uint cpu_num); +// void riscv_early_mmu_init(void); +// void riscv_mmu_init(void); +// void riscv_mmu_init_secondaries(void); + +__END_CDECLS diff --git a/arch/arm64/asm.S b/arch/arm64/asm.S index a5dd607a..1bb28114 100644 --- a/arch/arm64/asm.S +++ b/arch/arm64/asm.S @@ -8,19 +8,21 @@ #include #include +// stay in sync with arm64/thread.c arm64_context_switch() + /* void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp); */ FUNCTION(arm64_context_switch) /* save old frame */ - push x28, x29 - push x26, x27 - push x24, x25 - push x22, x23 - push x20, x21 - push x18, x19 - mrs x18, tpidr_el0 - mrs x19, tpidrro_el0 - push x18, x19 - push x30, xzr + push x29, lr + push x27, x28 + push x25, x26 + push x23, x24 + push x21, x22 + push x19, x20 + // skip x18, it is our per cpu pointer + mrs x16, tpidr_el0 + mrs x17, tpidrro_el0 + push x16, x17 /* save old sp */ mov x15, sp @@ -30,16 +32,15 @@ FUNCTION(arm64_context_switch) mov sp, x1 /* restore new frame */ - pop x30, xzr - pop x18, x19 - msr tpidr_el0, x18 - msr tpidrro_el0, x19 - pop x18, x19 - pop x20, x21 - pop x22, x23 - pop x24, x25 - pop x26, x27 - pop x28, x29 + pop x16, x17 + msr tpidr_el0, x16 + msr tpidrro_el0, x17 + pop x19, x20 + pop x21, x22 + pop x23, x24 + pop x25, x26 + pop x27, x28 + pop x29, lr ret diff --git a/arch/arm64/exceptions.S b/arch/arm64/exceptions.S index 57a691b7..f5943d9f 100644 --- a/arch/arm64/exceptions.S +++ b/arch/arm64/exceptions.S @@ -15,6 +15,8 @@ #define regsave_long_offset 0xf0 #define regsave_short_offset 0x90 +// NOTE: stay in sync with arm64_iframe_long et al. in include/arch/arm64.h + .macro regsave_long sub sp, sp, #32 push x28, x29 @@ -72,7 +74,7 @@ pop x10, x11 pop x12, x13 pop x14, x15 pop x16, x17 -pop x18, x19 +pop xzr, x19 // Do not restore x18, it is our per cpu pointer pop x20, x21 pop x22, x23 pop x24, x25 diff --git a/arch/arm64/include/arch/arch_ops.h b/arch/arm64/include/arch/arch_ops.h index e69482b3..7c0889b4 100644 --- a/arch/arm64/include/arch/arch_ops.h +++ b/arch/arm64/include/arch/arch_ops.h @@ -14,6 +14,7 @@ #include #include #include +#include __BEGIN_CDECLS @@ -89,17 +90,6 @@ static inline void arch_set_current_thread(struct thread *t) { ARM64_WRITE_SYSREG(tpidr_el1, (uint64_t)t); } -#if WITH_SMP -static inline uint arch_curr_cpu_num(void) { - uint64_t mpidr = ARM64_READ_SYSREG(mpidr_el1); - return ((mpidr & ((1U << SMP_CPU_ID_BITS) - 1)) >> 8 << SMP_CPU_CLUSTER_SHIFT) | (mpidr & 0xff); -} -#else -static inline uint arch_curr_cpu_num(void) { - return 0; -} -#endif - __END_CDECLS #endif // ASSEMBLY diff --git a/arch/arm64/include/arch/arm64.h b/arch/arm64/include/arch/arm64.h index 2ae97499..67a170d8 100644 --- a/arch/arm64/include/arch/arm64.h +++ b/arch/arm64/include/arch/arm64.h @@ -62,7 +62,7 @@ void arm64_fpu_exception(struct arm64_iframe_long *iframe); void arm64_fpu_save_state(struct thread *thread); static inline void arm64_fpu_pre_context_switch(struct thread *thread) { - uint32_t cpacr = ARM64_READ_SYSREG(cpacr_el1); + uint64_t cpacr = ARM64_READ_SYSREG(cpacr_el1); if ((cpacr >> 20) & 3) { arm64_fpu_save_state(thread); cpacr &= ~(3 << 20); diff --git a/arch/arm64/include/arch/arm64/mp.h b/arch/arm64/include/arch/arm64/mp.h new file mode 100644 index 00000000..4a2668b9 --- /dev/null +++ b/arch/arm64/include/arch/arm64/mp.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +__BEGIN_CDECLS + +void arm64_mp_init(void); +void arm64_mp_init_percpu(void); + +// Tell the ARM64 code how many secondary cpus to expect, which +// will cause it to allocate percpu structures for them. +void arm64_set_secondary_cpu_count(int count); + +struct arm64_percpu { + uint cpu_num; + uint64_t mpidr; +} __CPU_ALIGN; + +static inline void arm64_set_percpu(struct arm64_percpu *pc) { + __asm__ volatile("mov x18, %0" ::"r"(pc)); +} + +static inline struct arm64_percpu *arm64_get_percpu(void) { + struct arm64_percpu *pc; + __asm__ volatile("mov %0, x18" : "=r"(pc)); + return pc; +} + +static inline uint arch_curr_cpu_num(void) { +#if WITH_SMP + const struct arm64_percpu *pc = arm64_get_percpu(); + return pc->cpu_num; +#else + return 0; +#endif +} + +__END_CDECLS diff --git a/arch/arm64/mp.c b/arch/arm64/mp.c index ac2b72c7..5e44ce25 100644 --- a/arch/arm64/mp.c +++ b/arch/arm64/mp.c @@ -5,12 +5,21 @@ * license that can be found in the LICENSE file or at * https://opensource.org/licenses/MIT */ -#include +#include "arch/arm64/mp.h" +#include +#include #include +#include #include +#include +#include #include #include +#include +#include + +#include "arm64_priv.h" #if WITH_DEV_INTERRUPT_ARM_GIC #include @@ -25,6 +34,11 @@ extern void bcm28xx_send_ipi(uint irq, uint cpu_mask); #define GIC_IPI_BASE (14) +// percpu structures for the boot cpu and secondaries +static struct arm64_percpu boot_percpu; +static struct arm64_percpu *secondary_percpu; +static uint secondaries_to_init = 0; + status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) { LTRACEF("target 0x%x, ipi %u\n", target, ipi); @@ -60,10 +74,79 @@ static enum handler_return arm_ipi_reschedule_handler(void *arg) { return mp_mbx_reschedule_irq(); } -void arch_mp_init_percpu(void) { +void arm64_mp_init_percpu(void) { register_int_handler(MP_IPI_GENERIC + GIC_IPI_BASE, &arm_ipi_generic_handler, 0); register_int_handler(MP_IPI_RESCHEDULE + GIC_IPI_BASE, &arm_ipi_reschedule_handler, 0); // unmask_interrupt(MP_IPI_GENERIC + GIC_IPI_BASE); // unmask_interrupt(MP_IPI_RESCHEDULE + GIC_IPI_BASE); } + +// Special case, called from start.S code on the boot cpu, which willJ always be numbered 0 +// called from assembly +void arm64_init_boot_percpu(void); +void arm64_init_boot_percpu(void) { + arm64_set_percpu(&boot_percpu); + boot_percpu.cpu_num = 0; + boot_percpu.mpidr = ARM64_READ_SYSREG(mpidr_el1); +} + +static void arm64_init_secondary_percpu(uint cpu_num) { + // If we're out of range, just hang + if (cpu_num > secondaries_to_init) { + for (;;) { + __asm__ volatile("wfi"); + } + } + + struct arm64_percpu *percpu = &secondary_percpu[cpu_num - 1]; + arm64_set_percpu(percpu); + percpu->cpu_num = cpu_num; + percpu->mpidr = ARM64_READ_SYSREG(mpidr_el1); +} + +void arm64_set_secondary_cpu_count(int count) { + secondaries_to_init = count; + + DEBUG_ASSERT(secondary_percpu == NULL); + + // clamp the secondary cpu count to SMP_MAX_CPUS - 1 + if (secondaries_to_init > (SMP_MAX_CPUS - 1)) { + dprintf(INFO, "ARM64: clamping secondary cpu count from %d to %d\n", secondaries_to_init, SMP_MAX_CPUS - 1); + secondaries_to_init = SMP_MAX_CPUS - 1; + } + + // Allocate percpu structures for the secondaries + if (secondaries_to_init > 0) { + const size_t len = sizeof(struct arm64_percpu) * secondaries_to_init; + secondary_percpu = memalign(CACHE_LINE, len); + DEBUG_ASSERT(secondary_percpu); + memset(secondary_percpu, 0, len); + } +} + +void arm64_mp_init(void) { + arm64_mp_init_percpu(); +} + +/* called from assembly */ +void arm64_secondary_entry(ulong); +void arm64_secondary_entry(ulong asm_cpu_num) { + arm64_init_secondary_percpu(asm_cpu_num); + + uint cpu = arch_curr_cpu_num(); + if (cpu != asm_cpu_num) { + return; + } + + arm64_early_init_percpu(); + + /* run early secondary cpu init routines up to the threading level */ + lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); + + arm64_mp_init_percpu(); + + LTRACEF("cpu num %d\n", cpu); + + lk_secondary_cpu_entry(); +} diff --git a/arch/arm64/rules.mk b/arch/arm64/rules.mk index f4145af5..b944e5b3 100644 --- a/arch/arm64/rules.mk +++ b/arch/arm64/rules.mk @@ -18,17 +18,13 @@ MODULE_SRCS += \ $(LOCAL_DIR)/start.S \ $(LOCAL_DIR)/cache-ops.S \ -# if its requested we build with SMP, default to 4 cpus +# if its requested we build with SMP, default to 8 cpus ifeq (true,$(call TOBOOL,$(WITH_SMP))) -SMP_MAX_CPUS ?= 4 -SMP_CPU_CLUSTER_SHIFT ?= 8 -SMP_CPU_ID_BITS ?= 24 # Ignore aff3 bits for now since they are not next to aff2 +SMP_MAX_CPUS ?= 8 GLOBAL_DEFINES += \ WITH_SMP=1 \ - SMP_MAX_CPUS=$(SMP_MAX_CPUS) \ - SMP_CPU_CLUSTER_SHIFT=$(SMP_CPU_CLUSTER_SHIFT) \ - SMP_CPU_ID_BITS=$(SMP_CPU_ID_BITS) + SMP_MAX_CPUS=$(SMP_MAX_CPUS) MODULE_SRCS += \ $(LOCAL_DIR)/mp.c @@ -105,6 +101,7 @@ include $(LOCAL_DIR)/toolchain.mk TOOLCHAIN_PREFIX := $(ARCH_$(ARCH)_TOOLCHAIN_PREFIX) ARCH_COMPILEFLAGS += $(ARCH_$(ARCH)_COMPILEFLAGS) +ARCH_COMPILEFLAGS += -ffixed-x18 ARCH_COMPILEFLAGS += -fno-omit-frame-pointer ARCH_COMPILEFLAGS_NOFLOAT := -mgeneral-regs-only ARCH_COMPILEFLAGS_FLOAT := @@ -140,4 +137,6 @@ $(BUILDDIR)/system-onesegment.ld: $(LOCAL_DIR)/system-onesegment.ld $(wildcard a linkerscript.phony: .PHONY: linkerscript.phony +MODULE_OPTIONS := extra_warnings + include make/module.mk diff --git a/arch/arm64/start.S b/arch/arm64/start.S index 6c04a85c..2a775b99 100644 --- a/arch/arm64/start.S +++ b/arch/arm64/start.S @@ -61,7 +61,7 @@ arm_reset: #if WITH_SMP /* if the cpu id is != 0 it's a secondary cpu */ mrs cpuid, mpidr_el1 - ubfx cpuid, cpuid, #0, #SMP_CPU_ID_BITS + ubfx cpuid, cpuid, #0, #24 /* ignore aff3 bits for now since they are not next to aff2 */ #if WITH_KERNEL_VM cbnz cpuid, .Lmmu_enable_secondary @@ -382,22 +382,24 @@ arm_reset: cbnz tmp2, .L__bss_loop .L__bss_loop_done: +#if WITH_SMP + bl arm64_init_boot_percpu +#endif + /* load the boot args we had saved previously */ adrp tmp, arm64_boot_args add tmp, tmp, :lo12:arm64_boot_args ldp x0, x1, [tmp], #16 ldp x2, x3, [tmp] - bl lk_main - b . + bl lk_main + b . #if WITH_SMP .Lsecondary_boot: - and tmp, cpuid, #0xff - cmp tmp, #(1 << SMP_CPU_CLUSTER_SHIFT) - bge .Lunsupported_cpu_trap - bic cpuid, cpuid, #0xff - orr cpuid, tmp, cpuid, LSR #(8 - SMP_CPU_CLUSTER_SHIFT) + // if we came in from PSCI x0 has the cpu number + // TODO: more cleanly handle other boot paths + mov cpuid, x0 cmp cpuid, #SMP_MAX_CPUS bge .Lunsupported_cpu_trap diff --git a/arch/arm64/thread.c b/arch/arm64/thread.c index c93526e0..0f71288b 100644 --- a/arch/arm64/thread.c +++ b/arch/arm64/thread.c @@ -6,6 +6,7 @@ * https://opensource.org/licenses/MIT */ #include +#include #include #include #include @@ -16,11 +17,8 @@ #define LOCAL_TRACE 0 struct context_switch_frame { - vaddr_t lr; - vaddr_t pad; // Padding to keep frame size a multiple of vaddr_t tpidr_el0; // sp alignment requirements (16 bytes) vaddr_t tpidrro_el0; - vaddr_t r18; vaddr_t r19; vaddr_t r20; vaddr_t r21; @@ -32,7 +30,9 @@ struct context_switch_frame { vaddr_t r27; vaddr_t r28; vaddr_t r29; + vaddr_t lr; // x30 }; +static_assert(sizeof(struct context_switch_frame) % 16 == 0, "context_switch_frame size must be multiple of 16"); static void initial_thread_func(void) __NO_RETURN; static void initial_thread_func(void) { diff --git a/arch/include/arch/mp.h b/arch/include/arch/mp.h index 7312b757..068ce3e0 100644 --- a/arch/include/arch/mp.h +++ b/arch/include/arch/mp.h @@ -16,6 +16,4 @@ __BEGIN_CDECLS /* send inter processor interrupt, if supported */ status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi); -void arch_mp_init_percpu(void); - __END_CDECLS diff --git a/lib/fdtwalk/helpers.cpp b/lib/fdtwalk/helpers.cpp index 4a6965fc..ceb8621b 100644 --- a/lib/fdtwalk/helpers.cpp +++ b/lib/fdtwalk/helpers.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,9 @@ #if ARCH_ARM || ARCH_ARM64 #include #endif +#if ARCH_ARM64 +#include +#endif #if WITH_DEV_BUS_PCI #include #endif @@ -217,6 +221,15 @@ status_t fdtwalk_setup_cpus_arm(const void *fdt) { LTRACEF("booting %zu cpus\n", cpu_count); + // TODO: revamp the ARM32 path so we do not need the special case here +#if ARCH_ARM64 + // tell the arm64 layer how many cores we have to start + arm64_set_secondary_cpu_count(cpu_count - 1); + + // have the upper layer prepare for the secondary cpus + lk_init_secondary_cpus(cpu_count - 1); +#endif + /* boot the secondary cpus using the Power State Coordintion Interface */ for (size_t i = 1; i < cpu_count; i++) { /* note: assumes cpuids are numbered like MPIDR 0:0:0:N */