[arch][arm64] clean up how secondary cpus are initialized and tracked

- Add a percpu structure for each cpu, akin to x86-64 and riscv. Pointed
  to by x18, which is now reserved for this in the kernel. Tweaked
  exception and context switch routines to leave x18 alone.
- Remove the cpu-trapping spinlock logic that is unused in mainline,
  probably. (Can add a new version of it back if it's necessary).
- Switch fdtwalk helper to using the newer, cleaner way of initializing
  secondaries using the PSCI CPU_ON argument that should be pretty
  standard on modern implementations. (Possibly an issue with old
  firmware).
- Remove the notion of computing the cpu ID from the Affinity levels,
  which doesn't really work properly on modern ARM CPUs which more or
  less abandoned the logical meaning of AFFn.
This commit is contained in:
Travis Geiselbrecht
2025-10-04 14:32:50 -07:00
parent 1684855b9a
commit 91128ad729
15 changed files with 240 additions and 106 deletions

View File

@@ -18,6 +18,7 @@
#include <arch/mmu.h>
#include <arch/arm.h>
#include <arch/arm/mmu.h>
#include <arch/arm/mp.h>
#include <arch/mp.h>
#include <kernel/spinlock.h>
#include <kernel/thread.h>

View File

@@ -0,0 +1,16 @@
/*
* Copyright (c) 2025 Travis Geiselbrecht
*
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/MIT
*/
#pragma once
#include <lk/compiler.h>
__BEGIN_CDECLS
void arch_mp_init_percpu(void);
__END_CDECLS

View File

@@ -8,6 +8,7 @@
#include <arch.h>
#include <arch/arm64.h>
#include <arch/arm64/mmu.h>
#include <arch/arm64/mp.h>
#include <arch/atomic.h>
#include <arch/mp.h>
#include <arch/ops.h>
@@ -20,16 +21,12 @@
#include <platform.h>
#include <stdlib.h>
#include "arm64_priv.h"
#define LOCAL_TRACE 0
#if WITH_SMP
/* smp boot lock */
static spin_lock_t arm_boot_cpu_lock = 1;
static volatile int secondaries_to_init = 0;
#endif
// initial setup per cpu immediately after entering C code
static void arm64_early_init_percpu(void) {
void arm64_early_init_percpu(void) {
// set the vector base
ARM64_WRITE_SYSREG(VBAR_EL1, (uint64_t)&arm64_exception_base);
@@ -75,21 +72,7 @@ void arch_early_init(void) {
// called after the kernel has been initialized and threading is enabled on the boot cpu
void arch_init(void) {
#if WITH_SMP
arch_mp_init_percpu();
LTRACEF("midr_el1 0x%llx\n", ARM64_READ_SYSREG(midr_el1));
secondaries_to_init = SMP_MAX_CPUS - 1; /* TODO: get count from somewhere else, or add cpus as they boot */
lk_init_secondary_cpus(secondaries_to_init);
LTRACEF("releasing %d secondary cpus\n", secondaries_to_init);
/* release the secondary cpus */
spin_unlock(&arm_boot_cpu_lock);
/* flush the release of the lock, since the secondary cpus are running without cache on */
arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock));
arm64_mp_init();
#endif
}
@@ -162,32 +145,3 @@ void arch_stacktrace(uint64_t fp, uint64_t pc) {
frame.fp = *((uint64_t *)frame.fp);
}
}
#if WITH_SMP
/* called from assembly */
void arm64_secondary_entry(ulong);
void arm64_secondary_entry(ulong asm_cpu_num) {
uint cpu = arch_curr_cpu_num();
if (cpu != asm_cpu_num) {
return;
}
arm64_early_init_percpu();
spin_lock(&arm_boot_cpu_lock);
spin_unlock(&arm_boot_cpu_lock);
/* run early secondary cpu init routines up to the threading level */
lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1);
arch_mp_init_percpu();
LTRACEF("cpu num %d\n", cpu);
/* we're done, tell the main cpu we're up */
atomic_add(&secondaries_to_init, -1);
__asm__ volatile("sev");
lk_secondary_cpu_entry();
}
#endif

25
arch/arm64/arm64_priv.h Normal file
View File

@@ -0,0 +1,25 @@
/*
* Copyright (c) 2025 Travis Geiselbrecht
*
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/MIT
*/
#pragma once
#include <sys/types.h>
#include <lk/compiler.h>
__BEGIN_CDECLS
void arm64_early_init_percpu(void);
// void riscv_early_init_percpu(void);
// void riscv_init_percpu(void);
// void riscv_boot_secondaries(void);
// void riscv_configure_percpu_mp_early(uint hart_id, uint cpu_num);
// void riscv_early_mmu_init(void);
// void riscv_mmu_init(void);
// void riscv_mmu_init_secondaries(void);
__END_CDECLS

View File

@@ -8,19 +8,21 @@
#include <lk/asm.h>
#include <arch/asm_macros.h>
// stay in sync with arm64/thread.c arm64_context_switch()
/* void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp); */
FUNCTION(arm64_context_switch)
/* save old frame */
push x28, x29
push x26, x27
push x24, x25
push x22, x23
push x20, x21
push x18, x19
mrs x18, tpidr_el0
mrs x19, tpidrro_el0
push x18, x19
push x30, xzr
push x29, lr
push x27, x28
push x25, x26
push x23, x24
push x21, x22
push x19, x20
// skip x18, it is our per cpu pointer
mrs x16, tpidr_el0
mrs x17, tpidrro_el0
push x16, x17
/* save old sp */
mov x15, sp
@@ -30,16 +32,15 @@ FUNCTION(arm64_context_switch)
mov sp, x1
/* restore new frame */
pop x30, xzr
pop x18, x19
msr tpidr_el0, x18
msr tpidrro_el0, x19
pop x18, x19
pop x20, x21
pop x22, x23
pop x24, x25
pop x26, x27
pop x28, x29
pop x16, x17
msr tpidr_el0, x16
msr tpidrro_el0, x17
pop x19, x20
pop x21, x22
pop x23, x24
pop x25, x26
pop x27, x28
pop x29, lr
ret

View File

@@ -15,6 +15,8 @@
#define regsave_long_offset 0xf0
#define regsave_short_offset 0x90
// NOTE: stay in sync with arm64_iframe_long et al. in include/arch/arm64.h
.macro regsave_long
sub sp, sp, #32
push x28, x29
@@ -72,7 +74,7 @@ pop x10, x11
pop x12, x13
pop x14, x15
pop x16, x17
pop x18, x19
pop xzr, x19 // Do not restore x18, it is our per cpu pointer
pop x20, x21
pop x22, x23
pop x24, x25

View File

@@ -14,6 +14,7 @@
#include <lk/compiler.h>
#include <lk/reg.h>
#include <arch/arm64.h>
#include <arch/arm64/mp.h>
__BEGIN_CDECLS
@@ -89,17 +90,6 @@ static inline void arch_set_current_thread(struct thread *t) {
ARM64_WRITE_SYSREG(tpidr_el1, (uint64_t)t);
}
#if WITH_SMP
static inline uint arch_curr_cpu_num(void) {
uint64_t mpidr = ARM64_READ_SYSREG(mpidr_el1);
return ((mpidr & ((1U << SMP_CPU_ID_BITS) - 1)) >> 8 << SMP_CPU_CLUSTER_SHIFT) | (mpidr & 0xff);
}
#else
static inline uint arch_curr_cpu_num(void) {
return 0;
}
#endif
__END_CDECLS
#endif // ASSEMBLY

View File

@@ -62,7 +62,7 @@ void arm64_fpu_exception(struct arm64_iframe_long *iframe);
void arm64_fpu_save_state(struct thread *thread);
static inline void arm64_fpu_pre_context_switch(struct thread *thread) {
uint32_t cpacr = ARM64_READ_SYSREG(cpacr_el1);
uint64_t cpacr = ARM64_READ_SYSREG(cpacr_el1);
if ((cpacr >> 20) & 3) {
arm64_fpu_save_state(thread);
cpacr &= ~(3 << 20);

View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2025 Travis Geiselbrecht
*
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/MIT
*/
#pragma once
#include <arch/arm64.h>
#include <arch/defines.h>
#include <arch/ops.h>
#include <lk/compiler.h>
#include <stdint.h>
#include <sys/types.h>
__BEGIN_CDECLS
void arm64_mp_init(void);
void arm64_mp_init_percpu(void);
// Tell the ARM64 code how many secondary cpus to expect, which
// will cause it to allocate percpu structures for them.
void arm64_set_secondary_cpu_count(int count);
struct arm64_percpu {
uint cpu_num;
uint64_t mpidr;
} __CPU_ALIGN;
static inline void arm64_set_percpu(struct arm64_percpu *pc) {
__asm__ volatile("mov x18, %0" ::"r"(pc));
}
static inline struct arm64_percpu *arm64_get_percpu(void) {
struct arm64_percpu *pc;
__asm__ volatile("mov %0, x18" : "=r"(pc));
return pc;
}
static inline uint arch_curr_cpu_num(void) {
#if WITH_SMP
const struct arm64_percpu *pc = arm64_get_percpu();
return pc->cpu_num;
#else
return 0;
#endif
}
__END_CDECLS

View File

@@ -5,12 +5,21 @@
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/MIT
*/
#include <arch/mp.h>
#include "arch/arm64/mp.h"
#include <arch/atomic.h>
#include <arch/mp.h>
#include <arch/ops.h>
#include <assert.h>
#include <lk/err.h>
#include <lk/init.h>
#include <lk/main.h>
#include <lk/trace.h>
#include <platform/interrupts.h>
#include <stdlib.h>
#include <string.h>
#include "arm64_priv.h"
#if WITH_DEV_INTERRUPT_ARM_GIC
#include <dev/interrupt/arm_gic.h>
@@ -25,6 +34,11 @@ extern void bcm28xx_send_ipi(uint irq, uint cpu_mask);
#define GIC_IPI_BASE (14)
// percpu structures for the boot cpu and secondaries
static struct arm64_percpu boot_percpu;
static struct arm64_percpu *secondary_percpu;
static uint secondaries_to_init = 0;
status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) {
LTRACEF("target 0x%x, ipi %u\n", target, ipi);
@@ -60,10 +74,79 @@ static enum handler_return arm_ipi_reschedule_handler(void *arg) {
return mp_mbx_reschedule_irq();
}
void arch_mp_init_percpu(void) {
void arm64_mp_init_percpu(void) {
register_int_handler(MP_IPI_GENERIC + GIC_IPI_BASE, &arm_ipi_generic_handler, 0);
register_int_handler(MP_IPI_RESCHEDULE + GIC_IPI_BASE, &arm_ipi_reschedule_handler, 0);
// unmask_interrupt(MP_IPI_GENERIC + GIC_IPI_BASE);
// unmask_interrupt(MP_IPI_RESCHEDULE + GIC_IPI_BASE);
}
// Special case, called from start.S code on the boot cpu, which willJ always be numbered 0
// called from assembly
void arm64_init_boot_percpu(void);
void arm64_init_boot_percpu(void) {
arm64_set_percpu(&boot_percpu);
boot_percpu.cpu_num = 0;
boot_percpu.mpidr = ARM64_READ_SYSREG(mpidr_el1);
}
static void arm64_init_secondary_percpu(uint cpu_num) {
// If we're out of range, just hang
if (cpu_num > secondaries_to_init) {
for (;;) {
__asm__ volatile("wfi");
}
}
struct arm64_percpu *percpu = &secondary_percpu[cpu_num - 1];
arm64_set_percpu(percpu);
percpu->cpu_num = cpu_num;
percpu->mpidr = ARM64_READ_SYSREG(mpidr_el1);
}
void arm64_set_secondary_cpu_count(int count) {
secondaries_to_init = count;
DEBUG_ASSERT(secondary_percpu == NULL);
// clamp the secondary cpu count to SMP_MAX_CPUS - 1
if (secondaries_to_init > (SMP_MAX_CPUS - 1)) {
dprintf(INFO, "ARM64: clamping secondary cpu count from %d to %d\n", secondaries_to_init, SMP_MAX_CPUS - 1);
secondaries_to_init = SMP_MAX_CPUS - 1;
}
// Allocate percpu structures for the secondaries
if (secondaries_to_init > 0) {
const size_t len = sizeof(struct arm64_percpu) * secondaries_to_init;
secondary_percpu = memalign(CACHE_LINE, len);
DEBUG_ASSERT(secondary_percpu);
memset(secondary_percpu, 0, len);
}
}
void arm64_mp_init(void) {
arm64_mp_init_percpu();
}
/* called from assembly */
void arm64_secondary_entry(ulong);
void arm64_secondary_entry(ulong asm_cpu_num) {
arm64_init_secondary_percpu(asm_cpu_num);
uint cpu = arch_curr_cpu_num();
if (cpu != asm_cpu_num) {
return;
}
arm64_early_init_percpu();
/* run early secondary cpu init routines up to the threading level */
lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1);
arm64_mp_init_percpu();
LTRACEF("cpu num %d\n", cpu);
lk_secondary_cpu_entry();
}

View File

@@ -18,17 +18,13 @@ MODULE_SRCS += \
$(LOCAL_DIR)/start.S \
$(LOCAL_DIR)/cache-ops.S \
# if its requested we build with SMP, default to 4 cpus
# if its requested we build with SMP, default to 8 cpus
ifeq (true,$(call TOBOOL,$(WITH_SMP)))
SMP_MAX_CPUS ?= 4
SMP_CPU_CLUSTER_SHIFT ?= 8
SMP_CPU_ID_BITS ?= 24 # Ignore aff3 bits for now since they are not next to aff2
SMP_MAX_CPUS ?= 8
GLOBAL_DEFINES += \
WITH_SMP=1 \
SMP_MAX_CPUS=$(SMP_MAX_CPUS) \
SMP_CPU_CLUSTER_SHIFT=$(SMP_CPU_CLUSTER_SHIFT) \
SMP_CPU_ID_BITS=$(SMP_CPU_ID_BITS)
SMP_MAX_CPUS=$(SMP_MAX_CPUS)
MODULE_SRCS += \
$(LOCAL_DIR)/mp.c
@@ -105,6 +101,7 @@ include $(LOCAL_DIR)/toolchain.mk
TOOLCHAIN_PREFIX := $(ARCH_$(ARCH)_TOOLCHAIN_PREFIX)
ARCH_COMPILEFLAGS += $(ARCH_$(ARCH)_COMPILEFLAGS)
ARCH_COMPILEFLAGS += -ffixed-x18
ARCH_COMPILEFLAGS += -fno-omit-frame-pointer
ARCH_COMPILEFLAGS_NOFLOAT := -mgeneral-regs-only
ARCH_COMPILEFLAGS_FLOAT :=
@@ -140,4 +137,6 @@ $(BUILDDIR)/system-onesegment.ld: $(LOCAL_DIR)/system-onesegment.ld $(wildcard a
linkerscript.phony:
.PHONY: linkerscript.phony
MODULE_OPTIONS := extra_warnings
include make/module.mk

View File

@@ -61,7 +61,7 @@ arm_reset:
#if WITH_SMP
/* if the cpu id is != 0 it's a secondary cpu */
mrs cpuid, mpidr_el1
ubfx cpuid, cpuid, #0, #SMP_CPU_ID_BITS
ubfx cpuid, cpuid, #0, #24 /* ignore aff3 bits for now since they are not next to aff2 */
#if WITH_KERNEL_VM
cbnz cpuid, .Lmmu_enable_secondary
@@ -382,22 +382,24 @@ arm_reset:
cbnz tmp2, .L__bss_loop
.L__bss_loop_done:
#if WITH_SMP
bl arm64_init_boot_percpu
#endif
/* load the boot args we had saved previously */
adrp tmp, arm64_boot_args
add tmp, tmp, :lo12:arm64_boot_args
ldp x0, x1, [tmp], #16
ldp x2, x3, [tmp]
bl lk_main
b .
bl lk_main
b .
#if WITH_SMP
.Lsecondary_boot:
and tmp, cpuid, #0xff
cmp tmp, #(1 << SMP_CPU_CLUSTER_SHIFT)
bge .Lunsupported_cpu_trap
bic cpuid, cpuid, #0xff
orr cpuid, tmp, cpuid, LSR #(8 - SMP_CPU_CLUSTER_SHIFT)
// if we came in from PSCI x0 has the cpu number
// TODO: more cleanly handle other boot paths
mov cpuid, x0
cmp cpuid, #SMP_MAX_CPUS
bge .Lunsupported_cpu_trap

View File

@@ -6,6 +6,7 @@
* https://opensource.org/licenses/MIT
*/
#include <arch/arm64.h>
#include <assert.h>
#include <kernel/thread.h>
#include <lk/debug.h>
#include <lk/trace.h>
@@ -16,11 +17,8 @@
#define LOCAL_TRACE 0
struct context_switch_frame {
vaddr_t lr;
vaddr_t pad; // Padding to keep frame size a multiple of
vaddr_t tpidr_el0; // sp alignment requirements (16 bytes)
vaddr_t tpidrro_el0;
vaddr_t r18;
vaddr_t r19;
vaddr_t r20;
vaddr_t r21;
@@ -32,7 +30,9 @@ struct context_switch_frame {
vaddr_t r27;
vaddr_t r28;
vaddr_t r29;
vaddr_t lr; // x30
};
static_assert(sizeof(struct context_switch_frame) % 16 == 0, "context_switch_frame size must be multiple of 16");
static void initial_thread_func(void) __NO_RETURN;
static void initial_thread_func(void) {

View File

@@ -16,6 +16,4 @@ __BEGIN_CDECLS
/* send inter processor interrupt, if supported */
status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi);
void arch_mp_init_percpu(void);
__END_CDECLS

View File

@@ -12,6 +12,7 @@
#include <libfdt.h>
#include <lk/cpp.h>
#include <lk/err.h>
#include <lk/main.h>
#include <lk/trace.h>
#include <stdio.h>
#include <sys/types.h>
@@ -27,6 +28,9 @@
#if ARCH_ARM || ARCH_ARM64
#include <dev/power/psci.h>
#endif
#if ARCH_ARM64
#include <arch/arm64/mp.h>
#endif
#if WITH_DEV_BUS_PCI
#include <dev/bus/pci.h>
#endif
@@ -217,6 +221,15 @@ status_t fdtwalk_setup_cpus_arm(const void *fdt) {
LTRACEF("booting %zu cpus\n", cpu_count);
// TODO: revamp the ARM32 path so we do not need the special case here
#if ARCH_ARM64
// tell the arm64 layer how many cores we have to start
arm64_set_secondary_cpu_count(cpu_count - 1);
// have the upper layer prepare for the secondary cpus
lk_init_secondary_cpus(cpu_count - 1);
#endif
/* boot the secondary cpus using the Power State Coordintion Interface */
for (size_t i = 1; i < cpu_count; i++) {
/* note: assumes cpuids are numbered like MPIDR 0:0:0:N */