From acfe991c7f18c20c5e7350e631225cd85218e8bc Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Wed, 4 Dec 2019 09:08:57 -0800 Subject: [PATCH] [arch][riscv] Expose RISC-V mp kernel start Support mp lk start on RISC-V. Several changes throughout were required: - Add signal in asm start to force secondary harts to wait for bss to be cleared. - Use mhartid in arch_curr_cpu_num, PLIC, and CLINT - Use tp register as thread pointer instead of global variable. - Support sending IPIs between harts using CLINT - Add spinlock implementation --- arch/riscv/arch.c | 49 ++++++++++++++ arch/riscv/asm.S | 50 +++++++------- arch/riscv/clint.c | 19 +++++- arch/riscv/exceptions.c | 6 +- arch/riscv/include/arch/arch_ops.h | 11 ++-- arch/riscv/include/arch/arch_thread.h | 1 - arch/riscv/include/arch/riscv.h | 2 + arch/riscv/include/arch/spinlock.h | 27 +++++--- arch/riscv/linker-onesegment.ld | 2 + arch/riscv/mp.c | 66 +++++++++++++++++++ arch/riscv/rules.mk | 9 ++- arch/riscv/start.S | 32 ++++++++- platform/sifive/plic.c | 24 +++---- target/sifive-e/include/platform/sifive.h | 1 + .../include/platform/sifive.h | 2 + target/sifive-unleashed/rules.mk | 3 + 16 files changed, 243 insertions(+), 61 deletions(-) create mode 100644 arch/riscv/mp.c diff --git a/arch/riscv/arch.c b/arch/riscv/arch.c index 1b70d28d..a563c32d 100644 --- a/arch/riscv/arch.c +++ b/arch/riscv/arch.c @@ -11,9 +11,17 @@ #include #include #include +#include +#include +#include #define LOCAL_TRACE 0 +#if WITH_SMP +static spin_lock_t boot_cpu_lock = 1; +static volatile int secondaries_to_init = SMP_MAX_CPUS - 1; +#endif + void arch_early_init(void) { // set the top level exception handler riscv_csr_write(mtvec, (uintptr_t)&riscv_exception_entry); @@ -35,8 +43,49 @@ void arch_init(void) { // enable external interrupts riscv_csr_set(mie, RISCV_MIE_MEIE); + +#if WITH_SMP + arch_mp_init_percpu(); + + lk_init_secondary_cpus(secondaries_to_init); + + LTRACEF("RISCV: Waiting for %d secondary harts to come up\n", secondaries_to_init); + /* release the secondary cpus */ + spin_unlock(&boot_cpu_lock); + // while (secondaries_to_init) arch_idle(); + // spin_lock(&boot_cpu_lock); +#endif } +#if WITH_SMP +void riscv_secondary_entry(void) { + arch_early_init(); + + if (unlikely(riscv_csr_read(mhartid) >= SMP_MAX_CPUS)) + while (1) arch_idle(); + + spin_lock(&boot_cpu_lock); + spin_unlock(&boot_cpu_lock); + + // enable external interrupts + riscv_csr_set(mie, RISCV_MIE_MEIE); + + /* run early secondary cpu init routines up to the threading level */ + lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); + + arch_mp_init_percpu(); + + dprintf(INFO, "RISCV: secondary hart coming up: mvendorid %#lx marchid %#lx mimpid %#lx mhartid %#lx\n", + riscv_csr_read(mvendorid), riscv_csr_read(marchid), + riscv_csr_read(mimpid), riscv_csr_read(mhartid)); + + // atomic_add(&secondaries_to_init, -1); + // arch_mp_send_ipi(1 << 0, MP_IPI_GENERIC); // wake up hart0 to let it know this CPU has come up + + lk_secondary_cpu_entry(); +} +#endif + void arch_idle(void) { // disabled for now, QEMU seems to have some trouble emulating wfi properly // also have trouble breaking into sifive-e board with openocd when wfi diff --git a/arch/riscv/asm.S b/arch/riscv/asm.S index 33873be1..3a10d47d 100644 --- a/arch/riscv/asm.S +++ b/arch/riscv/asm.S @@ -27,33 +27,31 @@ FUNCTION(riscv_context_switch) # a1 = newcs STR ra, REGOFF(0)(a0) STR sp, REGOFF(1)(a0) - STR tp, REGOFF(2)(a0) - STR s0, REGOFF(3)(a0) - STR s1, REGOFF(4)(a0) - STR s2, REGOFF(5)(a0) - STR s3, REGOFF(6)(a0) - STR s4, REGOFF(7)(a0) - STR s5, REGOFF(8)(a0) - STR s6, REGOFF(9)(a0) - STR s7, REGOFF(10)(a0) - STR s8, REGOFF(11)(a0) - STR s9, REGOFF(12)(a0) - STR s10, REGOFF(13)(a0) - STR s11, REGOFF(14)(a0) + STR s0, REGOFF(2)(a0) + STR s1, REGOFF(3)(a0) + STR s2, REGOFF(4)(a0) + STR s3, REGOFF(5)(a0) + STR s4, REGOFF(6)(a0) + STR s5, REGOFF(7)(a0) + STR s6, REGOFF(8)(a0) + STR s7, REGOFF(9)(a0) + STR s8, REGOFF(10)(a0) + STR s9, REGOFF(11)(a0) + STR s10, REGOFF(12)(a0) + STR s11, REGOFF(13)(a0) - LDR s11, REGOFF(14)(a1) - LDR s10, REGOFF(13)(a1) - LDR s9, REGOFF(12)(a1) - LDR s8, REGOFF(11)(a1) - LDR s7, REGOFF(10)(a1) - LDR s6, REGOFF(9)(a1) - LDR s5, REGOFF(8)(a1) - LDR s4, REGOFF(7)(a1) - LDR s3, REGOFF(6)(a1) - LDR s2, REGOFF(5)(a1) - LDR s1, REGOFF(4)(a1) - LDR s0, REGOFF(3)(a1) - LDR tp, REGOFF(2)(a1) + LDR s11, REGOFF(13)(a1) + LDR s10, REGOFF(12)(a1) + LDR s9, REGOFF(11)(a1) + LDR s8, REGOFF(10)(a1) + LDR s7, REGOFF(9)(a1) + LDR s6, REGOFF(8)(a1) + LDR s5, REGOFF(7)(a1) + LDR s4, REGOFF(6)(a1) + LDR s3, REGOFF(5)(a1) + LDR s2, REGOFF(4)(a1) + LDR s1, REGOFF(3)(a1) + LDR s0, REGOFF(2)(a1) LDR sp, REGOFF(1)(a1) LDR ra, REGOFF(0)(a1) diff --git a/arch/riscv/clint.c b/arch/riscv/clint.c index b01c90f6..64983460 100644 --- a/arch/riscv/clint.c +++ b/arch/riscv/clint.c @@ -25,9 +25,24 @@ #endif #define CLINT_MSIP(x) (ARCH_RISCV_CLINT_BASE + 4 * (x)) -#define CLINT_MTIMECMP(x) (ARCH_RISCV_CLINT_BASE + 0x4000 + 4 * (x)) +#define CLINT_MTIMECMP(x) (ARCH_RISCV_CLINT_BASE + 0x4000 + 8 * (x)) #define CLINT_MTIME (ARCH_RISCV_CLINT_BASE + 0xbff8) +void clint_ipi_send(unsigned long target_hart) { + if (target_hart >= SMP_MAX_CPUS) + return; + + *REG64(CLINT_MSIP(target_hart)) = 1; +} + + +void clint_ipi_clear(unsigned long target_hart) { + if (target_hart >= SMP_MAX_CPUS) + return; + + *REG64(CLINT_MSIP(target_hart)) = 0; +} + lk_bigtime_t current_time_hires(void) { #if ARCH_RISCV_MTIME_RATE < 10000000 return current_time() * 1000llu; // hack to deal with slow clocks @@ -54,7 +69,7 @@ status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg // convert interval to ticks uint64_t ticks = (interval * ARCH_RISCV_MTIME_RATE) / 1000u; - *REG64(CLINT_MTIMECMP(0)) = *REG64(CLINT_MTIME) + ticks; + *REG64(CLINT_MTIMECMP(riscv_current_hart())) = *REG64(CLINT_MTIME) + ticks; // enable the timer riscv_csr_set(mie, RISCV_MIE_MTIE); diff --git a/arch/riscv/exceptions.c b/arch/riscv/exceptions.c index 7b22f7f7..bedf9bf1 100644 --- a/arch/riscv/exceptions.c +++ b/arch/riscv/exceptions.c @@ -36,18 +36,22 @@ struct riscv_short_iframe { }; extern enum handler_return riscv_platform_irq(void); +extern enum handler_return riscv_software_exception(void); void riscv_exception_handler(ulong cause, ulong epc, struct riscv_short_iframe *frame) { LTRACEF("cause %#lx epc %#lx mstatus %#lx\n", cause, epc, frame->mstatus); DEBUG_ASSERT(arch_ints_disabled()); - DEBUG_ASSERT(frame->mstatus & RISCV_STATUS_MPIE); + // DEBUG_ASSERT(frame->mstatus & RISCV_STATUS_MPIE); // top bit of the cause register determines if it's an interrupt or not const ulong int_bit = (__riscv_xlen == 32) ? (1ul<<31) : (1ul<<63); enum handler_return ret = INT_NO_RESCHEDULE; switch (cause) { + case int_bit | 0x3: // machine software interrupt + ret = riscv_software_exception(); + break; case int_bit | 0x7: // machine timer interrupt ret = riscv_timer_exception(); break; diff --git a/arch/riscv/include/arch/arch_ops.h b/arch/riscv/include/arch/arch_ops.h index 1ab01112..b8eeb0e1 100644 --- a/arch/riscv/include/arch/arch_ops.h +++ b/arch/riscv/include/arch/arch_ops.h @@ -40,15 +40,14 @@ static inline int atomic_swap(volatile int *ptr, int val) { return __atomic_exchange_n(ptr, val, __ATOMIC_RELAXED); } -/* use a global pointer to store the current_thread */ -extern struct thread *_current_thread; - static inline struct thread *get_current_thread(void) { - return _current_thread; + struct thread *current_thread; + __asm__ volatile("mv %0,tp" : "=r"(current_thread)); + return current_thread; } static inline void set_current_thread(struct thread *t) { - _current_thread = t; + __asm__ volatile("mv tp,%0" : : "r"(t) : "tp"); } static inline uint32_t arch_cycle_count(void) { @@ -60,6 +59,6 @@ static inline uint32_t arch_cycle_count(void) { } static inline uint arch_curr_cpu_num(void) { - return 0; + return riscv_csr_read(mhartid); } diff --git a/arch/riscv/include/arch/arch_thread.h b/arch/riscv/include/arch/arch_thread.h index fa16a79c..78291792 100644 --- a/arch/riscv/include/arch/arch_thread.h +++ b/arch/riscv/include/arch/arch_thread.h @@ -12,7 +12,6 @@ struct riscv_context_switch_frame { unsigned long ra; // return address (x1) unsigned long sp; // stack pointer (x2) - unsigned long tp; // thread pointer (x4) unsigned long s0; // x8-x9 unsigned long s1; diff --git a/arch/riscv/include/arch/riscv.h b/arch/riscv/include/arch/riscv.h index 5540d191..7cd28c54 100644 --- a/arch/riscv/include/arch/riscv.h +++ b/arch/riscv/include/arch/riscv.h @@ -73,5 +73,7 @@ __val; \ }) +#define riscv_current_hart() riscv_csr_read(mhartid) + void riscv_exception_entry(void); enum handler_return riscv_timer_exception(void); diff --git a/arch/riscv/include/arch/spinlock.h b/arch/riscv/include/arch/spinlock.h index bddae5ad..6c76bba3 100644 --- a/arch/riscv/include/arch/spinlock.h +++ b/arch/riscv/include/arch/spinlock.h @@ -10,23 +10,32 @@ #include #include -#if WITH_SMP -#error microblaze does not support SMP -#endif - #define SPIN_LOCK_INITIAL_VALUE (0) -typedef unsigned int spin_lock_t; +typedef volatile unsigned int spin_lock_t; typedef unsigned long spin_lock_saved_state_t; typedef unsigned int spin_lock_save_flags_t; -static inline void arch_spin_lock(spin_lock_t *lock) { - *lock = 1; +static inline int arch_spin_trylock(spin_lock_t *lock) { + int tmp = 1, busy; + + __asm__ __volatile__( + " amoswap.w %0, %2, %1\n" + " fence r , rw\n" + : "=r"(busy), "+A"(*lock) + : "r" (tmp) + : "memory" + ); + + return !busy; } -static inline int arch_spin_trylock(spin_lock_t *lock) { - return 0; +static inline void arch_spin_lock(spin_lock_t *lock) { + while (1) { + if (*lock) continue; + if (arch_spin_trylock(lock)) break; + } } static inline void arch_spin_unlock(spin_lock_t *lock) { diff --git a/arch/riscv/linker-onesegment.ld b/arch/riscv/linker-onesegment.ld index 7759166d..d5932d36 100644 --- a/arch/riscv/linker-onesegment.ld +++ b/arch/riscv/linker-onesegment.ld @@ -96,6 +96,8 @@ SECTIONS __bss_end = .; } + /* Align the end to ensure anything after the kernel ends up on its own pages */ + . = ALIGN(4096); _end = .; . = %KERNEL_BASE% + %MEMSIZE%; diff --git a/arch/riscv/mp.c b/arch/riscv/mp.c new file mode 100644 index 00000000..15f47a7d --- /dev/null +++ b/arch/riscv/mp.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Elliot Berman + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ + +#include +#include +#include +#include + +#include + +#define LOCAL_TRACE 0 + +static mp_ipi_t ipi_data[SMP_MAX_CPUS]; + +extern void clint_ipi_send(unsigned long target_hart); +extern void clint_ipi_clear(unsigned long target_hart); + +status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) { + LTRACEF("target 0x%x, ipi %u\n", target, ipi); + + unsigned long cur_hart = riscv_csr_read(mhartid); + unsigned long h = 0; + mp_cpu_mask_t m = target; + for (; h < SMP_MAX_CPUS && m; h++, m >>= 1) { + if ((m & 1) && (h != cur_hart)) { + ipi_data[h] = ipi; + asm volatile(" fence iorw,iorw"); + clint_ipi_send(h); + } + } + + if(target & (1 << cur_hart)) { + clint_ipi_send(cur_hart); + } + + return NO_ERROR; +} + +enum handler_return riscv_software_exception(void) { + clint_ipi_clear(riscv_csr_read(mhartid)); + asm volatile(" fence ir,ir"); + mp_ipi_t reason = ipi_data[riscv_current_hart()]; + ipi_data[riscv_current_hart()] = 0; + switch (reason) { + case MP_IPI_RESCHEDULE: + return INT_RESCHEDULE; + case MP_IPI_GENERIC: + break; + default: + TRACEF("unhandled ipi cause %#x, hartid %#lx\n", reason, riscv_current_hart()); + panic("stopping"); + break; + } + + return INT_NO_RESCHEDULE; + +} + +void arch_mp_init_percpu(void) { + riscv_csr_set(mie, RISCV_MIE_MSIE); +} diff --git a/arch/riscv/rules.mk b/arch/riscv/rules.mk index 208ece7b..28ec4259 100644 --- a/arch/riscv/rules.mk +++ b/arch/riscv/rules.mk @@ -8,10 +8,17 @@ MODULE_SRCS += $(LOCAL_DIR)/asm.S MODULE_SRCS += $(LOCAL_DIR)/clint.c MODULE_SRCS += $(LOCAL_DIR)/exceptions.c MODULE_SRCS += $(LOCAL_DIR)/thread.c +MODULE_SRCS += $(LOCAL_DIR)/mp.c -GLOBAL_DEFINES += SMP_MAX_CPUS=1 +SMP_MAX_CPUS ?= 1 + +GLOBAL_DEFINES += SMP_MAX_CPUS=$(SMP_MAX_CPUS) GLOBAL_DEFINES += PLATFORM_HAS_DYNAMIC_TIMER=1 +ifeq ($(WITH_SMP),1) +GLOBAL_DEFINES += WITH_SMP=1 +endif + SUBARCH ?= 32 WITH_LINKER_GC ?= 0 diff --git a/arch/riscv/start.S b/arch/riscv/start.S index ba153bd0..885b27e7 100644 --- a/arch/riscv/start.S +++ b/arch/riscv/start.S @@ -6,6 +6,8 @@ * https://opensource.org/licenses/MIT */ #include +#include +#include "config.h" .section ".text.boot" FUNCTION(_start) @@ -17,10 +19,21 @@ FUNCTION(_start) // if our hart isnt 0, trap the cpu csrr t0, mhartid - bnez t0, .Lsecondary_trap // set the default stack la sp, default_stack_top + // default stack locations for each hart: + // LOW ------------ HIGH + // [hart2][hart1][hart0] + li t1, ARCH_DEFAULT_STACK_SIZE + mul t1, t1, a0 + sub sp, sp, t1 + + // everyone stores zero in _boot_status + la t5, _boot_status + sw zero, (t5) + + bnez t0, .Lsecondary_trap #if ARCH_RISCV_TWOSEGMENT // copy preinitialized data from flash to memory @@ -46,19 +59,32 @@ FUNCTION(_start) add t0, t0, 4 bne t0, t1, 0b + // Release any other harts into riscv_secondary_entry + fence w, w + add t0, zero, 1 + sw t0, (t5) + // call main jal lk_main // should never return here j . - // secondary cpus are trapped here (for now) .Lsecondary_trap: +#if WITH_SMP + // wait for _boot_status to be nonzero, then go into riscv_secondary_entry + lw t0, (t5) + beqz t0, .Lsecondary_trap + jal riscv_secondary_entry +#else wfi j . +#endif .bss .align 4 LOCAL_DATA(default_stack) - .skip 1024 + .skip ARCH_DEFAULT_STACK_SIZE * SMP_MAX_CPUS LOCAL_DATA(default_stack_top) +LOCAL_DATA(_boot_status) + .dword diff --git a/platform/sifive/plic.c b/platform/sifive/plic.c index 11978a57..3333d171 100644 --- a/platform/sifive/plic.c +++ b/platform/sifive/plic.c @@ -21,12 +21,12 @@ // Driver for PLIC implementation in SiFive E and U boards -#define PLIC_PRIORITY(x) (PLIC_BASE + 4 * (x)) -#define PLIC_PENDING(x) (PLIC_BASE + 0x1000 + 4 * ((x) / 32)) -#define PLIC_ENABLE(x) (PLIC_BASE + 0x2000 + 4 * ((x) / 32)) -#define PLIC_THRESHOLD (PLIC_BASE + 0x200000) -#define PLIC_COMPLETE (PLIC_BASE + 0x200004) -#define PLIC_CLAIM PLIC_COMPLETE +#define PLIC_PRIORITY(irq) (PLIC_BASE + 4 * (irq)) +#define PLIC_PENDING(irq) (PLIC_BASE + 0x1000 + (4 * ((irq) / 32))) +#define PLIC_ENABLE(irq, hart) (PLIC_BASE + 0x2000 + (0x80 * PLIC_HART_IDX(hart)) + (4 * ((irq) / 32))) +#define PLIC_THRESHOLD(hart) (PLIC_BASE + 0x200000 + (0x1000 * PLIC_HART_IDX(hart))) +#define PLIC_COMPLETE(hart) (PLIC_BASE + 0x200004 + (0x1000 * PLIC_HART_IDX(hart))) +#define PLIC_CLAIM(hart) PLIC_COMPLETE(hart) static struct int_handlers { int_handler handler; @@ -36,24 +36,24 @@ static struct int_handlers { void plic_early_init(void) { // mask all irqs and set their priority to 1 for (int i = 1; i < SIFIVE_NUM_IRQS; i++) { - *REG32(PLIC_ENABLE(i)) &= ~(1 << (i % 32)); + *REG32(PLIC_ENABLE(i, riscv_current_hart())) &= ~(1 << (i % 32)); *REG32(PLIC_PRIORITY(i)) = 1; } // set global priority threshold to 0 - *REG32(PLIC_THRESHOLD) = 0; + *REG32(PLIC_THRESHOLD(riscv_current_hart())) = 0; } void plic_init(void) { } status_t mask_interrupt(unsigned int vector) { - *REG32(PLIC_ENABLE(vector)) &= ~(1 << (vector % 32)); + *REG32(PLIC_ENABLE(vector, riscv_current_hart())) &= ~(1 << (vector % 32)); return NO_ERROR; } status_t unmask_interrupt(unsigned int vector) { - *REG32(PLIC_ENABLE(vector)) |= (1 << (vector % 32)); + *REG32(PLIC_ENABLE(vector, riscv_current_hart())) |= (1 << (vector % 32)); return NO_ERROR; } @@ -68,7 +68,7 @@ void register_int_handler(unsigned int vector, int_handler handler, void *arg) { enum handler_return riscv_platform_irq(void) { // see what irq triggered it - uint32_t vector = *REG32(PLIC_CLAIM); + uint32_t vector = *REG32(PLIC_CLAIM(riscv_current_hart())); LTRACEF("vector %u\n", vector); if (unlikely(vector == 0)) { @@ -85,7 +85,7 @@ enum handler_return riscv_platform_irq(void) { } // ack the interrupt - *REG32(PLIC_COMPLETE) = vector; + *REG32(PLIC_COMPLETE(riscv_current_hart())) = vector; KEVLOG_IRQ_EXIT(vector); diff --git a/target/sifive-e/include/platform/sifive.h b/target/sifive-e/include/platform/sifive.h index e427944b..edaf65ce 100644 --- a/target/sifive-e/include/platform/sifive.h +++ b/target/sifive-e/include/platform/sifive.h @@ -25,3 +25,4 @@ #define GPIO_REG_IOF_EN 14 #define GPIO_REG_IOF_SEL 15 +#define PLIC_HART_IDX(hart) 0 diff --git a/target/sifive-unleashed/include/platform/sifive.h b/target/sifive-unleashed/include/platform/sifive.h index 8351cfec..d21429e7 100644 --- a/target/sifive-unleashed/include/platform/sifive.h +++ b/target/sifive-unleashed/include/platform/sifive.h @@ -20,3 +20,5 @@ #define PWM0_BASE 0x10020000 #define PWM1_BASE 0x10021000 #define GPIO_BASE 0x10060000 + +#define PLIC_HART_IDX(hart) ((hart) ? ((2 * (hart)) - 1) : 0) diff --git a/target/sifive-unleashed/rules.mk b/target/sifive-unleashed/rules.mk index 976df500..12aebb68 100644 --- a/target/sifive-unleashed/rules.mk +++ b/target/sifive-unleashed/rules.mk @@ -4,6 +4,9 @@ MODULE := $(LOCAL_DIR) PLATFORM := sifive VARIANT := sifive_u +WITH_SMP := 1 +SMP_MAX_CPUS := 5 + GLOBAL_DEFINES += SIFIVE_FREQ=500000000 # 500 MHz MEMBASE ?= 0x080000000