diff --git a/app/tests/fibo.c b/app/tests/fibo.c index ac6a395f..3c3320f2 100644 --- a/app/tests/fibo.c +++ b/app/tests/fibo.c @@ -41,12 +41,15 @@ static int fibo_thread(void *argv) if (fibo == 1) return 1; - t[0] = thread_create("fibo", &fibo_thread, (void *)(fibo - 1), DEFAULT_PRIORITY, DEFAULT_STACK_SIZE); + char name[32]; + snprintf(name, sizeof(name), "fibo %lu", fibo - 1); + t[0] = thread_create(name, &fibo_thread, (void *)(fibo - 1), DEFAULT_PRIORITY, DEFAULT_STACK_SIZE); if (!t[0]) { printf("error creating thread for fibo %d\n", fibo-1); return 0; } - t[1] = thread_create("fibo", &fibo_thread, (void *)(fibo - 2), DEFAULT_PRIORITY, DEFAULT_STACK_SIZE); + snprintf(name, sizeof(name), "fibo %lu", fibo - 2); + t[1] = thread_create(name, &fibo_thread, (void *)(fibo - 2), DEFAULT_PRIORITY, DEFAULT_STACK_SIZE); if (!t[1]) { printf("error creating thread for fibo %d\n", fibo-2); thread_resume(t[0]); @@ -89,4 +92,5 @@ int fibo(int argc, const cmd_args *argv) return NO_ERROR; } +// vim: set noexpandtab: diff --git a/app/tests/include/app/tests.h b/app/tests/include/app/tests.h index 69a0bd63..19bb8408 100644 --- a/app/tests/include/app/tests.h +++ b/app/tests/include/app/tests.h @@ -32,6 +32,7 @@ void clock_tests(void); void float_tests(void); void benchmarks(void); int fibo(int argc, const cmd_args *argv); +int spinner(int argc, const cmd_args *argv); #endif diff --git a/app/tests/tests.c b/app/tests/tests.c index 275b968f..3ef56f61 100644 --- a/app/tests/tests.c +++ b/app/tests/tests.c @@ -38,6 +38,7 @@ STATIC_COMMAND("float_tests", "floating point test", (console_cmd)&float_tests) #endif STATIC_COMMAND("bench", "miscellaneous benchmarks", (console_cmd)&benchmarks) STATIC_COMMAND("fibo", "threaded fibonacci", (console_cmd)&fibo) +STATIC_COMMAND("spinner", "create a spinning thread", (console_cmd)&spinner) STATIC_COMMAND_END(tests); #endif diff --git a/app/tests/thread_tests.c b/app/tests/thread_tests.c index bc34c1a8..b065172d 100644 --- a/app/tests/thread_tests.c +++ b/app/tests/thread_tests.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -130,7 +131,7 @@ static int semaphore_test(void) static int mutex_thread(void *arg) { int i; - const int iterations = 50000; + const int iterations = 1000000; static volatile int shared = 0; @@ -405,9 +406,11 @@ static int atomic_tester(void *arg) int add = (intptr_t)arg; int i; - TRACEF("add %d\n", add); + const int iter = 10000000; - for (i=0; i < 1000000; i++) { + TRACEF("add %d, %d iterations\n", add, iter); + + for (i=0; i < iter; i++) { atomic_add(&atomic, add); } @@ -455,6 +458,7 @@ static int preempt_tester(void *arg) printf("exiting ts %lld\n", current_time_hires()); atomic_add(&preempt_count, -1); +#undef COUNT return 0; } @@ -571,12 +575,53 @@ static void join_test(void) printf("thread_join returns err %d, retval %d (should be 0 and 55)\n", err, ret); } +static void spinlock_test(void) +{ + spin_lock_saved_state_t state; + spin_lock_t lock; + + spin_lock_init(&lock); + + // verify basic functionality (single core) + printf("testing spinlock:\n"); + ASSERT(!spin_lock_held(&lock)); + ASSERT(!arch_ints_disabled()); + spin_lock_irqsave(&lock, state); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&lock)); + spin_unlock_irqrestore(&lock, state); + ASSERT(!spin_lock_held(&lock)); + ASSERT(!arch_ints_disabled()); + printf("seems to work\n"); + +#define COUNT (1024*1024) + uint32_t c = arch_cycle_count(); + for (uint i = 0; i < COUNT; i++) { + spin_lock(&lock); + spin_unlock(&lock); + } + c = arch_cycle_count() - c; + + printf("%u cycles to acquire/release lock %u times (%u cycles per)\n", c, COUNT, c / COUNT); + + c = arch_cycle_count(); + for (uint i = 0; i < COUNT; i++) { + spin_lock_irqsave(&lock, state); + spin_unlock_irqrestore(&lock, state); + } + c = arch_cycle_count() - c; + + printf("%u cycles to acquire/release lock w/irqsave %u times (%u cycles per)\n", c, COUNT, c / COUNT); +#undef COUNT +} + int thread_tests(void) { mutex_test(); semaphore_test(); event_test(); + spinlock_test(); atomic_test(); thread_sleep(200); @@ -589,4 +634,27 @@ int thread_tests(void) return 0; } +static int spinner_thread(void *arg) +{ + for (;;) + ; + + return 0; +} + +int spinner(int argc, const cmd_args *argv) +{ + if (argc < 2) { + printf("not enough args\n"); + printf("usage: %s \n", argv[0].str); + return -1; + } + + thread_t *t = thread_create("spinner", spinner_thread, NULL, argv[1].u, DEFAULT_STACK_SIZE); + if (t) + thread_resume(t); + + return 0; +} + /* vim: set ts=4 sw=4 noexpandtab: */ diff --git a/arch/arm/arm-m/arch.c b/arch/arm/arm-m/arch.c index 47266e34..41f6873e 100644 --- a/arch/arm/arm-m/arch.c +++ b/arch/arm/arm-m/arch.c @@ -122,8 +122,6 @@ void _arm_cm_set_irqpri(uint32_t pri) void arm_cm_irq_entry(void) { - inc_critical_section(); - THREAD_STATS_INC(interrupts); KEVLOG_IRQ_ENTER(__get_IPSR()); } @@ -134,7 +132,6 @@ void arm_cm_irq_exit(bool reschedule) arm_cm_trigger_preempt(); KEVLOG_IRQ_EXIT(__get_IPSR()); - dec_critical_section(); } void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3) diff --git a/arch/arm/arm-m/exceptions.c b/arch/arm/arm-m/exceptions.c index c66faa8d..ab566606 100644 --- a/arch/arm/arm-m/exceptions.c +++ b/arch/arm/arm-m/exceptions.c @@ -43,7 +43,6 @@ static void dump_frame(const struct arm_cm_exception_frame *frame) static void hardfault(struct arm_cm_exception_frame *frame) { - inc_critical_section(); printf("hardfault: "); dump_frame(frame); @@ -54,7 +53,6 @@ static void hardfault(struct arm_cm_exception_frame *frame) static void usagefault(struct arm_cm_exception_frame *frame) { - inc_critical_section(); printf("usagefault: "); dump_frame(frame); @@ -63,7 +61,6 @@ static void usagefault(struct arm_cm_exception_frame *frame) static void busfault(struct arm_cm_exception_frame *frame) { - inc_critical_section(); printf("busfault: "); dump_frame(frame); @@ -74,7 +71,6 @@ static void busfault(struct arm_cm_exception_frame *frame) void _nmi(void) { - inc_critical_section(); printf("nmi\n"); platform_halt(HALT_ACTION_HALT, HALT_REASON_SW_PANIC); } @@ -92,7 +88,6 @@ __NAKED void _hardfault(void) void _memmanage(void) { - inc_critical_section(); printf("memmanage\n"); platform_halt(HALT_ACTION_HALT, HALT_REASON_SW_PANIC); } @@ -122,7 +117,6 @@ void _usagefault(void) /* systick handler */ void __WEAK _systick(void) { - inc_critical_section(); printf("systick\n"); platform_halt(HALT_ACTION_HALT, HALT_REASON_SW_PANIC); } diff --git a/arch/arm/arm-m/systick/systick.c b/arch/arm/arm-m/systick/systick.c index 03f7292f..e9f0765f 100644 --- a/arch/arm/arm-m/systick/systick.c +++ b/arch/arm/arm-m/systick/systick.c @@ -90,8 +90,6 @@ status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg DEBUG_ASSERT(tick_rate != 0 && tick_rate_mhz != 0); - enter_critical_section(); - cb = callback; cb_args = arg; @@ -99,8 +97,6 @@ status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg tick_interval_us = interval * 1000; arm_cm_systick_set_periodic(interval); - exit_critical_section(); - return NO_ERROR; } diff --git a/arch/arm/arm-m/thread.c b/arch/arm/arm-m/thread.c index 32285b6a..301a5e6b 100644 --- a/arch/arm/arm-m/thread.c +++ b/arch/arm/arm-m/thread.c @@ -57,8 +57,9 @@ static void initial_thread_func(void) dump_thread(_current_thread); #endif - /* exit the implicit critical section we're within */ - exit_critical_section(); + /* release the thread lock that was implicitly held across the reschedule */ + spin_unlock(&thread_lock); + arch_enable_ints(); ret = _current_thread->entry(_current_thread->arg); @@ -89,9 +90,6 @@ volatile struct arm_cm_exception_frame_long *preempt_frame; static void pendsv(struct arm_cm_exception_frame_long *frame) { arch_disable_ints(); - inc_critical_section(); - - ASSERT(critical_section_count == 1); LTRACEF("preempting thread %p (%s)\n", _current_thread, _current_thread->name); @@ -104,7 +102,6 @@ static void pendsv(struct arm_cm_exception_frame_long *frame) /* if we got here, there wasn't anything to switch to, so just fall through and exit */ preempt_frame = NULL; - dec_critical_section(); arch_enable_ints(); } @@ -190,13 +187,6 @@ void arch_context_switch(struct thread *oldthread, struct thread *newthread) { LTRACE_ENTRY; - if (newthread->arch.was_preempted) { - /* we're about to return directly to a thread that was preempted (in user space), - * so push its critical section count back down to zero - */ - critical_section_count = newthread->saved_critical_section_count = 0; - } - /* if preempt_frame is set, we are being preempted */ if (preempt_frame) { oldthread->arch.was_preempted = true; @@ -250,3 +240,12 @@ void arch_context_switch(struct thread *oldthread, struct thread *newthread) } +void arch_dump_thread(thread_t *t) +{ + if (t->state != THREAD_RUNNING) { + dprintf(INFO, "\tarch: "); + dprintf(INFO, "sp 0x%lx, was preempted %u\n", t->arch.sp, t->arch.was_preempted); + } +} + + diff --git a/arch/arm/arm/arch.c b/arch/arm/arm/arch.c index 13ef3316..697f91c2 100644 --- a/arch/arm/arm/arch.c +++ b/arch/arm/arm/arch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2014 Travis Geiselbrecht + * Copyright (c) 2008-2015 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -24,48 +24,197 @@ #include #include #include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include #include #include #include #define LOCAL_TRACE 0 +#if WITH_DEV_TIMER_ARM_CORTEX_A9 +#include +#endif +#if WITH_DEV_INTERRUPT_ARM_GIC +#include +#endif +#if WITH_DEV_CACHE_PL310 +#include +#endif + +/* initial and abort stacks */ +uint8_t abort_stack[ARCH_DEFAULT_STACK_SIZE * SMP_MAX_CPUS] __CPU_ALIGN; + +static void arm_basic_setup(void); +static void spinlock_test(void); +static void spinlock_test_secondary(void); + +#if WITH_SMP +/* smp boot lock */ +spin_lock_t arm_boot_cpu_lock = 1; +volatile int secondaries_to_init = 0; +#endif + void arch_early_init(void) { /* turn off the cache */ arch_disable_cache(UCACHE); +#if WITH_DEV_CACHE_PL310 + pl310_set_enable(false); +#endif - /* set the vector base to our exception vectors so we dont need to double map at 0 */ -#if ARM_ISA_ARMV7 - arm_write_vbar(KERNEL_BASE + KERNEL_LOAD_OFFSET); + arm_basic_setup(); + +#if WITH_SMP && ARM_CPU_CORTEX_A9 + /* enable snoop control */ + addr_t scu_base = arm_read_cbar(); + *REG32(scu_base) |= (1<<0); /* enable SCU */ #endif #if ARM_WITH_MMU - arm_mmu_init(); + arm_mmu_early_init(); platform_init_mmu_mappings(); #endif /* turn the cache back on */ +#if WITH_DEV_CACHE_PL310 + pl310_set_enable(true); +#endif arch_enable_cache(UCACHE); +} -#if ARM_WITH_VFP - /* enable cp10 and cp11 */ - uint32_t val = arm_read_cpacr(); - val |= (3<<22)|(3<<20); - arm_write_cpacr(val); +void arch_init(void) +{ +#if WITH_SMP + arch_mp_init_percpu(); - /* make sure the fpu starts off disabled */ - arm_fpu_set_enable(false); + LTRACEF("midr 0x%x\n", arm_read_midr()); + LTRACEF("sctlr 0x%x\n", arm_read_sctlr()); + LTRACEF("actlr 0x%x\n", arm_read_actlr()); +#if ARM_CPU_CORTEX_A9 + LTRACEF("cbar 0x%x\n", arm_read_cbar()); +#endif + LTRACEF("mpidr 0x%x\n", arm_read_mpidr()); + LTRACEF("ttbcr 0x%x\n", arm_read_ttbcr()); + LTRACEF("ttbr0 0x%x\n", arm_read_ttbr0()); + LTRACEF("dacr 0x%x\n", arm_read_dacr()); +#if ARM_CPU_CORTEX_A7 + LTRACEF("l2ctlr 0x%x\n", arm_read_l2ctlr()); + LTRACEF("l2ectlr 0x%x\n", arm_read_l2ectlr()); #endif -#if ENABLE_CYCLE_COUNTER -#if ARM_ISA_ARMV7 +#if ARM_CPU_CORTEX_A9 + addr_t scu_base = arm_read_cbar(); + uint32_t scu_config = *REG32(scu_base + 4); + secondaries_to_init = scu_config & 0x3; +#elif ARM_CPU_CORTEX_A7 + uint32_t l2ctlr = arm_read_l2ctlr(); + secondaries_to_init = (l2ctlr >> 24); +#else + secondaries_to_init = SMP_MAX_CPUS - 1; /* TODO: get count from somewhere else, or add cpus as they boot */ +#endif + + lk_init_secondary_cpus(secondaries_to_init); + + dprintf(SPEW, "releasing %d secondary cpu%c\n", secondaries_to_init, secondaries_to_init > 1 ? 's' : ' '); + + /* release the secondary cpus */ + spin_unlock(&arm_boot_cpu_lock); + + /* flush the release of the lock, since the secondary cpus are running without cache on */ + arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock)); +#endif + + //spinlock_test(); + + /* finish intializing the mmu */ + arm_mmu_init(); +} + +#if WITH_SMP +void arm_secondary_entry(uint asm_cpu_num) +{ + uint cpu = arch_curr_cpu_num(); + if (cpu != asm_cpu_num) + return; + + arm_basic_setup(); + + /* enable the local L1 cache */ + //arch_enable_cache(UCACHE); + + // XXX may not be safe, but just hard enable i and d cache here + // at the moment cannot rely on arch_enable_cache not dumping the L2 + uint32_t sctlr = arm_read_sctlr(); + sctlr |= (1<<12) | (1<<2); // enable i and dcache + arm_write_sctlr(sctlr); + + /* run early secondary cpu init routines up to the threading level */ + lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); + + arch_mp_init_percpu(); + + LTRACEF("cpu num %d\n", cpu); + LTRACEF("sctlr 0x%x\n", arm_read_sctlr()); + LTRACEF("actlr 0x%x\n", arm_read_actlr()); + + /* we're done, tell the main cpu we're up */ + atomic_add(&secondaries_to_init, -1); + smp_mb(); + __asm__ volatile("sev"); + + lk_secondary_cpu_entry(); +} +#endif + +static void arm_basic_setup(void) +{ + uint32_t sctlr = arm_read_sctlr(); + + /* ARMV7 bits */ + sctlr &= ~(1<<10); /* swp disable */ + sctlr |= (1<<11); /* enable program flow prediction */ + sctlr &= ~(1<<14); /* random cache/tlb replacement */ + sctlr &= ~(1<<25); /* E bit set to 0 on exception */ + sctlr &= ~(1<<30); /* no thumb exceptions */ + + arm_write_sctlr(sctlr); + + uint32_t actlr = arm_read_actlr(); +#if ARM_CPU_CORTEX_A9 + actlr |= (1<<2); /* enable dcache prefetch */ +#if WITH_DEV_CACHE_PL310 + actlr |= (1<<7); /* L2 exclusive cache */ + actlr |= (1<<3); /* L2 write full line of zeroes */ + actlr |= (1<<1); /* L2 prefetch hint enable */ +#endif +#if WITH_SMP + /* enable smp mode, cache and tlb broadcast */ + actlr |= (1<<6) | (1<<0); +#endif +#endif // ARM_CPU_CORTEX_A9 +#if ARM_CPU_CORTEX_A7 +#if WITH_SMP + /* enable smp mode */ + actlr |= (1<<6); +#endif +#endif // ARM_CPU_CORTEX_A7 + + arm_write_actlr(actlr); + +#if ENABLE_CYCLE_COUNTER && ARM_ISA_ARMV7 /* enable the cycle count register */ uint32_t en; __asm__ volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (en)); @@ -77,11 +226,26 @@ void arch_early_init(void) en = (1<<31); __asm__ volatile("mcr p15, 0, %0, c9, c12, 1" :: "r" (en)); #endif -#endif -} -void arch_init(void) -{ +#if ARM_WITH_VFP + /* enable cp10 and cp11 */ + uint32_t val = arm_read_cpacr(); + val |= (3<<22)|(3<<20); + arm_write_cpacr(val); + + /* set enable bit in fpexc */ + __asm__ volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (val)); + val |= (1<<30); + __asm__ volatile("mcr p10, 7, %0, c8, c0, 0" :: "r" (val)); + + /* make sure the fpu starts off disabled */ + arm_fpu_set_enable(false); +#endif + + /* set the vector base to our exception vectors so we dont need to double map at 0 */ +#if ARM_ISA_ARMV7 + arm_write_vbar(KERNEL_BASE + KERNEL_LOAD_OFFSET); +#endif } void arch_quiesce(void) @@ -112,9 +276,15 @@ void arch_quiesce(void) /* virtual to physical translation */ status_t arm_vtop(addr_t va, addr_t *pa) { - arm_write_ats1cpr(va & 0xfffff000); + spin_lock_saved_state_t irqstate; + + arch_interrupt_save(&irqstate, SPIN_LOCK_FLAG_INTERRUPTS); + + arm_write_ats1cpr(va & ~(PAGE_SIZE-1)); uint32_t par = arm_read_par(); + arch_interrupt_restore(irqstate, SPIN_LOCK_FLAG_INTERRUPTS); + if (par & 1) return ERR_NOT_FOUND; @@ -131,7 +301,7 @@ void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3 LTRACEF("entry %p, args 0x%lx 0x%lx 0x%lx 0x%lx\n", entry, arg0, arg1, arg2, arg3); /* we are going to shut down the system, start by disabling interrupts */ - enter_critical_section(); + arch_disable_ints(); /* give target and platform a chance to put hardware into a suitable * state for chain loading. @@ -172,6 +342,9 @@ void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3 LTRACEF("disabling instruction/data cache\n"); arch_disable_cache(UCACHE); +#if WITH_DEV_CACHE_PL310 + pl310_set_enable(false); +#endif LTRACEF("branching to physical address of loader\n"); @@ -183,4 +356,37 @@ void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3 #endif } +static spin_lock_t lock = 0; + +static void spinlock_test(void) +{ + TRACE_ENTRY; + + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); + + TRACEF("cpu0: i have the lock\n"); + spin(1000000); + TRACEF("cpu0: releasing it\n"); + + spin_unlock_irqrestore(&lock, state); + + spin(1000000); +} + +static void spinlock_test_secondary(void) +{ + TRACE_ENTRY; + + spin(500000); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); + + TRACEF("cpu1: i have the lock\n"); + spin(250000); + TRACEF("cpu1: releasing it\n"); + + spin_unlock_irqrestore(&lock, state); +} + /* vim: set ts=4 sw=4 noexpandtab: */ diff --git a/arch/arm/arm/asm.S b/arch/arm/arm/asm.S index bab4ee2d..9e4a34fe 100644 --- a/arch/arm/arm/asm.S +++ b/arch/arm/arm/asm.S @@ -71,6 +71,9 @@ strex_spot: FUNCTION(arm_save_mode_regs) mrs r1, cpsr + stmia r0, { r13, r14 }^ /* usr */ + add r0, #8 + cps #0x11 /* fiq */ str r13, [r0], #4 str r14, [r0], #4 diff --git a/arch/arm/arm/cache-ops.S b/arch/arm/arm/cache-ops.S index ce36db40..05715dc1 100644 --- a/arch/arm/arm/cache-ops.S +++ b/arch/arm/arm/cache-ops.S @@ -137,11 +137,6 @@ FUNCTION(arch_disable_cache) bic r0, #(1<<1) mcr p15, 0, r0, c1, c0, 1 // disable L2 dcache #endif -#if WITH_DEV_CACHE_PL310 - bl pl310_flush_invalidate - mov r0, #0 - bl pl310_set_enable -#endif .Licache_disable: tst r7, #ICACHE @@ -185,11 +180,6 @@ FUNCTION(arch_enable_cache) orr r0, #(1<<1) mcr p15, 0, r0, c1, c0, 1 // enable L2 dcache #endif -#if WITH_DEV_CACHE_PL310 - bl pl310_invalidate - mov r0, #1 - bl pl310_set_enable -#endif mrc p15, 0, r0, c1, c0, 0 // cr1 orr r0, #(1<<2) @@ -207,6 +197,7 @@ FUNCTION(arch_enable_cache) mcr p15, 0, r0, c1, c0, 0 // enable icache .Ldone_enable: + isb msr cpsr, r8 ldmfd sp!, {r4-r12, pc} diff --git a/arch/arm/arm/exceptions.S b/arch/arm/arm/exceptions.S index 40154995..7df629f2 100644 --- a/arch/arm/arm/exceptions.S +++ b/arch/arm/arm/exceptions.S @@ -176,6 +176,7 @@ FUNCTION(arm_undefined) restore +#ifndef WITH_LIB_SYSCALL FUNCTION(arm_syscall) saveall #0x13 /* r0 now holds pointer to iframe */ @@ -183,6 +184,7 @@ FUNCTION(arm_syscall) bl arm_syscall_handler restoreall +#endif FUNCTION(arm_prefetch_abort) saveall_offset #4, #0x17 @@ -214,12 +216,6 @@ FUNCTION(arm_irq) /* r0 now holds pointer to iframe */ - /* increment the global critical section count */ - LOADCONST(r2, critical_section_count) - ldr r1, [r2] - add r1, #1 - str r1, [r2] - /* track that we're inside an irq handler */ LOADCONST(r2, __arm_in_handler) mov r1, #1 @@ -237,12 +233,6 @@ FUNCTION(arm_irq) cmp r0, #0 blne thread_preempt - /* decrement the global critical section count */ - LOADCONST(r1, critical_section_count) - ldr r0, [r1] - sub r0, r0, #1 - str r0, [r1] - restore FUNCTION(arm_fiq) diff --git a/arch/arm/arm/faults.c b/arch/arm/arm/faults.c index d6397fa3..ad4cd56f 100644 --- a/arch/arm/arm/faults.c +++ b/arch/arm/arm/faults.c @@ -31,6 +31,7 @@ static void dump_mode_regs(uint32_t spsr) struct arm_mode_regs regs; arm_save_mode_regs(®s); + dprintf(CRITICAL, "%c%s r13 0x%08x r14 0x%08x\n", ((spsr & MODE_MASK) == MODE_USR) ? '*' : ' ', "usr", regs.usr_r13, regs.usr_r14); dprintf(CRITICAL, "%c%s r13 0x%08x r14 0x%08x\n", ((spsr & MODE_MASK) == MODE_FIQ) ? '*' : ' ', "fiq", regs.fiq_r13, regs.fiq_r14); dprintf(CRITICAL, "%c%s r13 0x%08x r14 0x%08x\n", ((spsr & MODE_MASK) == MODE_IRQ) ? '*' : ' ', "irq", regs.irq_r13, regs.irq_r14); dprintf(CRITICAL, "%c%s r13 0x%08x r14 0x%08x\n", ((spsr & MODE_MASK) == MODE_SVC) ? '*' : ' ', "svc", regs.svc_r13, regs.svc_r14); @@ -67,6 +68,11 @@ static void dump_mode_regs(uint32_t spsr) static void dump_fault_frame(struct arm_fault_frame *frame) { + struct thread *current_thread = get_current_thread(); + + dprintf(CRITICAL, "current_thread %p, name %s\n", + current_thread, current_thread ? current_thread->name : ""); + dprintf(CRITICAL, "r0 0x%08x r1 0x%08x r2 0x%08x r3 0x%08x\n", frame->r[0], frame->r[1], frame->r[2], frame->r[3]); dprintf(CRITICAL, "r4 0x%08x r5 0x%08x r6 0x%08x r7 0x%08x\n", frame->r[4], frame->r[5], frame->r[6], frame->r[7]); dprintf(CRITICAL, "r8 0x%08x r9 0x%08x r10 0x%08x r11 0x%08x\n", frame->r[8], frame->r[9], frame->r[10], frame->r[11]); @@ -87,7 +93,6 @@ static void dump_iframe(struct arm_iframe *frame) static void exception_die(struct arm_fault_frame *frame, const char *msg) { - inc_critical_section(); dprintf(CRITICAL, msg); dump_fault_frame(frame); @@ -97,7 +102,6 @@ static void exception_die(struct arm_fault_frame *frame, const char *msg) static void exception_die_iframe(struct arm_iframe *frame, const char *msg) { - inc_critical_section(); dprintf(CRITICAL, msg); dump_iframe(frame); @@ -112,8 +116,6 @@ void arm_syscall_handler(struct arm_fault_frame *frame) void arm_undefined_handler(struct arm_iframe *frame) { - inc_critical_section(); - /* look at the undefined instruction, figure out if it's something we can handle */ bool in_thumb = frame->spsr & (1<<5); if (in_thumb) { @@ -157,7 +159,6 @@ void arm_undefined_handler(struct arm_iframe *frame) #if ARM_WITH_VFP fpu: arm_fpu_undefined_instruction(frame); - dec_critical_section(); #endif } @@ -168,7 +169,7 @@ void arm_data_abort_handler(struct arm_fault_frame *frame) uint32_t fault_status = (BIT(fsr, 10) ? (1<<4) : 0) | BITS(fsr, 3, 0); - dprintf(CRITICAL, "\n\ndata abort, "); + dprintf(CRITICAL, "\n\ncpu %u data abort, ", arch_curr_cpu_num()); bool write = !!BIT(fsr, 11); /* decode the fault status (from table B3-23) */ @@ -228,7 +229,7 @@ void arm_prefetch_abort_handler(struct arm_fault_frame *frame) uint32_t fault_status = (BIT(fsr, 10) ? (1<<4) : 0) | BITS(fsr, 3, 0); - dprintf(CRITICAL, "\n\nprefetch abort, "); + dprintf(CRITICAL, "\n\ncpu %u prefetch abort, ", arch_curr_cpu_num()); /* decode the fault status (from table B3-23) */ switch (fault_status) { diff --git a/arch/arm/arm/mmu.c b/arch/arm/arm/mmu.c index d8054da7..165f313f 100644 --- a/arch/arm/arm/mmu.c +++ b/arch/arm/arm/mmu.c @@ -55,6 +55,9 @@ static uint32_t mmu_flags_to_l1_arch_flags(uint flags) switch (flags & ARCH_MMU_FLAG_CACHE_MASK) { case ARCH_MMU_FLAG_CACHED: arch_flags |= MMU_MEMORY_L1_TYPE_NORMAL_WRITE_BACK_ALLOCATE; +#if WITH_SMP + arch_flags |= MMU_MEMORY_L1_SECTION_SHAREABLE; +#endif break; case ARCH_MMU_FLAG_UNCACHED: arch_flags |= MMU_MEMORY_L1_TYPE_STRONGLY_ORDERED; @@ -97,7 +100,13 @@ static uint32_t mmu_flags_to_l2_arch_flags(uint flags) uint32_t arch_flags = 0; switch (flags & ARCH_MMU_FLAG_CACHE_MASK) { case ARCH_MMU_FLAG_CACHED: +#if WITH_SMP + arch_flags |= MMU_MEMORY_L2_SHAREABLE; +#endif arch_flags |= MMU_MEMORY_L2_TYPE_NORMAL_WRITE_BACK_ALLOCATE; +#if WITH_SMP + arch_flags |= MMU_MEMORY_L2_SHAREABLE; +#endif break; case ARCH_MMU_FLAG_UNCACHED: arch_flags |= MMU_MEMORY_L2_TYPE_STRONGLY_ORDERED; @@ -151,14 +160,23 @@ static void arm_mmu_map_section(addr_t paddr, addr_t vaddr, uint flags) arm_kernel_translation_table[index] = (paddr & ~(MB-1)) | (MMU_MEMORY_DOMAIN_MEM << 5) | MMU_MEMORY_L1_DESCRIPTOR_SECTION | flags; } +static void arm_mmu_unmap_l1_entry(uint32_t index) +{ + DEBUG_ASSERT(index < countof(arm_kernel_translation_table)); + + arm_kernel_translation_table[index] = 0; + DSB; + arm_invalidate_tlb_mva_no_barrier((vaddr_t)index * SECTION_SIZE); +} + static void arm_mmu_unmap_section(addr_t vaddr) { DEBUG_ASSERT(IS_SECTION_ALIGNED(vaddr)); + arm_mmu_unmap_l1_entry(vaddr / SECTION_SIZE); +} - uint index = vaddr / SECTION_SIZE; - arm_kernel_translation_table[index] = 0; - - arm_invalidate_tlb_mva(vaddr); +void arm_mmu_early_init(void) +{ } void arm_mmu_init(void) @@ -180,6 +198,7 @@ void arm_mmu_init(void) } map++; } + arm_after_invalidate_tlb_barrier(); } void arch_disable_mmu(void) @@ -211,6 +230,8 @@ status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags) if (flags) { *flags = 0; + if (tt_entry & MMU_MEMORY_L1_SECTION_NON_SECURE) + *flags |= ARCH_MMU_FLAG_NS; switch (tt_entry & MMU_MEMORY_L1_TYPE_MASK) { case MMU_MEMORY_L1_TYPE_STRONGLY_ORDERED: *flags |= ARCH_MMU_FLAG_UNCACHED; @@ -256,6 +277,9 @@ status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags) if (flags) { *flags = 0; + /* NS flag is only present on L1 entry */ + if (tt_entry & MMU_MEMORY_L1_SECTION_NON_SECURE) + *flags |= ARCH_MMU_FLAG_NS; switch (l2_entry & MMU_MEMORY_L2_TYPE_MASK) { case MMU_MEMORY_L2_TYPE_STRONGLY_ORDERED: *flags |= ARCH_MMU_FLAG_UNCACHED; @@ -291,10 +315,138 @@ status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags) return NO_ERROR; } + +/* + * We allow up to 4 adjacent L1 entries to point within the same memory page + * allocated for L2 page tables. + * + * L1: | 0 | 1 | 2 | 3 | .... | N+0 | N+1 | N+2 | N+3 | + * L2: [ 0 | .....[ (N/4) | + */ +#define L1E_PER_PAGE 4 + +static status_t get_l2_table(uint32_t l1_index, paddr_t *ppa) +{ + status_t ret; + paddr_t pa; + uint32_t tt_entry; + + DEBUG_ASSERT(ppa); + + /* lookup an existing l2 pagetable */ + for(uint i = 0; i < L1E_PER_PAGE; i++) { + tt_entry = arm_kernel_translation_table[ROUNDDOWN(l1_index, L1E_PER_PAGE) + i]; + if ((tt_entry & MMU_MEMORY_L1_DESCRIPTOR_MASK) + == MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE) { + *ppa = (paddr_t)ROUNDDOWN(MMU_MEMORY_L1_PAGE_TABLE_ADDR(tt_entry), PAGE_SIZE) + + (PAGE_SIZE / L1E_PER_PAGE) * (l1_index & (L1E_PER_PAGE-1)); + return NO_ERROR; + } + } + + /* not found: allocate it */ + uint32_t *l2_va = pmm_alloc_kpage(); + if (!l2_va) + return ERR_NO_MEMORY; + + /* wipe it clean to set no access */ + memset(l2_va, 0, PAGE_SIZE); + + /* get physical address */ + ret = arm_vtop((vaddr_t)l2_va, &pa); + ASSERT(!ret); + ASSERT(paddr_to_kvaddr(pa)); + + DEBUG_ASSERT(IS_PAGE_ALIGNED((vaddr_t)l2_va)); + DEBUG_ASSERT(IS_PAGE_ALIGNED(pa)); + + *ppa = pa + (PAGE_SIZE / L1E_PER_PAGE) * (l1_index & (L1E_PER_PAGE-1)); + + LTRACEF("allocated pagetable at %p, pa 0x%lx, pa 0x%lx\n", l2_va, pa, *ppa); + return NO_ERROR; +} + + +vm_page_t *address_to_page(paddr_t addr); // move to common + +static void put_l2_table(uint32_t l1_index, paddr_t l2_pa) +{ + /* check if any l1 entry points to this l2 table */ + for (uint i = 0; i < L1E_PER_PAGE; i++) { + uint32_t tt_entry = arm_kernel_translation_table[ROUNDDOWN(l1_index, L1E_PER_PAGE) + i]; + if ((tt_entry & MMU_MEMORY_L1_DESCRIPTOR_MASK) + == MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE) { + return; + } + } + + /* we can free this l2 table */ + vm_page_t *page = address_to_page(l2_pa); + if (!page) + panic("bad page table paddr 0x%lx\n", l2_pa); + + LTRACEF("freeing pagetable at 0x%lx\n", l2_pa); + pmm_free_page(page); +} + +#if WITH_ARCH_MMU_PICK_SPOT + +static inline bool are_regions_compatible(uint new_region_flags, + uint adjacent_region_flags) +{ + /* + * Two regions are compatible if NS flag matches. + */ + uint mask = ARCH_MMU_FLAG_NS; + + if ((new_region_flags & mask) == (adjacent_region_flags & mask)) + return true; + + return false; +} + + +vaddr_t arch_mmu_pick_spot(vaddr_t base, uint prev_region_flags, + vaddr_t end, uint next_region_flags, + vaddr_t align, size_t size, uint flags) +{ + LTRACEF("base = 0x%lx, end=0x%lx, align=%ld, size=%zd, flags=0x%x\n", + base, end, align, size, flags); + + vaddr_t spot; + + if (align >= SECTION_SIZE || + are_regions_compatible(flags, prev_region_flags)) { + spot = ALIGN(base, align); + } else { + spot = ALIGN(base, SECTION_SIZE); + } + + vaddr_t spot_end = spot + size - 1; + if (spot_end < spot || spot_end > end) + return end; /* wrapped around or it does not fit */ + + if ((spot_end / SECTION_SIZE) == (end / SECTION_SIZE)) { + if (!are_regions_compatible(flags, next_region_flags)) + return end; + } + + return spot; +} +#endif /* WITH_ARCH_MMU_PICK_SPOT */ + + int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags) { LTRACEF("vaddr 0x%lx paddr 0x%lx count %u flags 0x%x\n", vaddr, paddr, count, flags); +#if !WITH_ARCH_MMU_PICK_SPOT + if (flags & ARCH_MMU_FLAG_NS) { + /* WITH_ARCH_MMU_PICK_SPOT is required to support NS memory */ + panic("NS mem is not supported\n"); + } +#endif + /* paddr and vaddr must be aligned */ DEBUG_ASSERT(IS_PAGE_ALIGNED(vaddr)); DEBUG_ASSERT(IS_PAGE_ALIGNED(paddr)); @@ -332,33 +484,16 @@ int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags) PANIC_UNIMPLEMENTED; break; case MMU_MEMORY_L1_DESCRIPTOR_INVALID: { - /* alloc and put in a L2 page table */ - uint32_t *l2_table = pmm_alloc_kpage(); - if (!l2_table) { + paddr_t l2_pa = 0; + if (get_l2_table(l1_index, &l2_pa) != NO_ERROR) { TRACEF("failed to allocate pagetable\n"); goto done; } + tt_entry = l2_pa | MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE; + if (flags & ARCH_MMU_FLAG_NS) + tt_entry |= MMU_MEMORY_L1_PAGETABLE_NON_SECURE; - /* get physical address */ - paddr_t l2_pa = 0; - arm_vtop((vaddr_t)l2_table, &l2_pa); - - LTRACEF("allocated pagetable at %p, pa 0x%lx\n", l2_table, l2_pa); - - DEBUG_ASSERT(IS_PAGE_ALIGNED((vaddr_t)l2_table)); - DEBUG_ASSERT(IS_PAGE_ALIGNED(l2_pa)); - - /* zero the L2 table and add it to the L1 table */ - memset(l2_table, 0, PAGE_SIZE); - - /* put it in the adjacent 4 entries filling in 1K page tables at once */ - l1_index = ROUNDDOWN(l1_index, 4); - arm_kernel_translation_table[l1_index] = l2_pa | MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE; - arm_kernel_translation_table[l1_index + 1] = (l2_pa + 1024) | MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE; - arm_kernel_translation_table[l1_index + 2] = (l2_pa + 2048) | MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE; - arm_kernel_translation_table[l1_index + 3] = (l2_pa + 3072) | MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE; - tt_entry = arm_kernel_translation_table[l1_index]; - + arm_kernel_translation_table[l1_index] = tt_entry; /* fallthrough */ } case MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE: { @@ -373,14 +508,14 @@ int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags) uint arch_flags = mmu_flags_to_l2_arch_flags(flags) | MMU_MEMORY_L2_DESCRIPTOR_SMALL_PAGE; - /* add the entry */ uint l2_index = (vaddr % SECTION_SIZE) / PAGE_SIZE; - l2_table[l2_index] = paddr | arch_flags; - - count--; - mapped++; - vaddr += PAGE_SIZE; - paddr += PAGE_SIZE; + do { + l2_table[l2_index++] = paddr | arch_flags; + count--; + mapped++; + vaddr += PAGE_SIZE; + paddr += PAGE_SIZE; + } while (count && (l2_index != (SECTION_SIZE / PAGE_SIZE))); break; } default: @@ -390,6 +525,7 @@ int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags) } done: + DSB; return mapped; } @@ -399,15 +535,21 @@ int arch_mmu_unmap(vaddr_t vaddr, uint count) if (!IS_PAGE_ALIGNED(vaddr)) return ERR_INVALID_ARGS; + LTRACEF("vaddr 0x%lx count %u\n", vaddr, count); + int unmapped = 0; while (count > 0) { uint l1_index = vaddr / SECTION_SIZE; uint32_t tt_entry = arm_kernel_translation_table[l1_index]; switch (tt_entry & MMU_MEMORY_L1_DESCRIPTOR_MASK) { - case MMU_MEMORY_L1_DESCRIPTOR_INVALID: + case MMU_MEMORY_L1_DESCRIPTOR_INVALID: { /* this top level page is not mapped, move on to the next one */ - goto next_page; + uint page_cnt = MIN((SECTION_SIZE - (vaddr % SECTION_SIZE)) / PAGE_SIZE, count); + vaddr += page_cnt * PAGE_SIZE; + count -= page_cnt; + break; + } case MMU_MEMORY_L1_DESCRIPTOR_SECTION: if (IS_SECTION_ALIGNED(vaddr) && count >= SECTION_SIZE / PAGE_SIZE) { /* we're asked to remove at least all of this section, so just zero it out */ @@ -417,25 +559,60 @@ int arch_mmu_unmap(vaddr_t vaddr, uint count) vaddr += SECTION_SIZE; count -= SECTION_SIZE / PAGE_SIZE; unmapped += SECTION_SIZE / PAGE_SIZE; - goto next; } else { // XXX handle unmapping just part of a section // will need to convert to a L2 table and then unmap the parts we are asked to PANIC_UNIMPLEMENTED; } break; + case MMU_MEMORY_L1_DESCRIPTOR_PAGE_TABLE: { + uint32_t *l2_table = paddr_to_kvaddr(MMU_MEMORY_L1_PAGE_TABLE_ADDR(tt_entry)); + uint page_idx = (vaddr % SECTION_SIZE) / PAGE_SIZE; + uint page_cnt = MIN((SECTION_SIZE / PAGE_SIZE) - page_idx, count); + + /* unmap page run */ + for (uint i = 0; i < page_cnt; i++) { + l2_table[page_idx++] = 0; + } + DSB; + + /* invalidate tlb */ + for (uint i = 0; i < page_cnt; i++) { + arm_invalidate_tlb_mva_no_barrier(vaddr); + vaddr += PAGE_SIZE; + } + count -= page_cnt; + unmapped += page_cnt; + + /* + * Check if all pages related to this l1 entry are deallocated. + * We only need to check pages that we did not clear above starting + * from page_idx and wrapped around SECTION. + */ + page_cnt = (SECTION_SIZE / PAGE_SIZE) - page_cnt; + while (page_cnt) { + if (page_idx == (SECTION_SIZE / PAGE_SIZE)) + page_idx = 0; + if (l2_table[page_idx++]) + break; + page_cnt--; + } + if (!page_cnt) { + /* we can kill l1 entry */ + arm_mmu_unmap_l1_entry(l1_index); + + /* try to free l2 page itself */ + put_l2_table(l1_index, MMU_MEMORY_L1_PAGE_TABLE_ADDR(tt_entry)); + } + break; + } + default: // XXX not implemented supersections or L2 tables PANIC_UNIMPLEMENTED; } - -next_page: - vaddr += PAGE_SIZE; - count--; -next: - ; } - + arm_after_invalidate_tlb_barrier(); return unmapped; } diff --git a/arch/arm/arm/mp.c b/arch/arm/arm/mp.c new file mode 100644 index 00000000..3e4fa95e --- /dev/null +++ b/arch/arm/arm/mp.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include + +#include +#include +#include +#include +#include + +#if WITH_DEV_INTERRUPT_ARM_GIC +#include +#elif PLATFORM_BCM2835 +/* bcm2835 has a weird custom interrupt controller for MP */ +extern void bcm2835_send_ipi(uint irq, uint cpu_mask); +#else +#error need other implementation of interrupt controller that can ipi +#endif + +#define LOCAL_TRACE 0 + +#define GIC_IPI_BASE (14) + +status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) +{ + LTRACEF("target 0x%x, ipi %u\n", target, ipi); + +#if WITH_DEV_INTERRUPT_ARM_GIC + uint gic_ipi_num = ipi + GIC_IPI_BASE; + + /* filter out targets outside of the range of cpus we care about */ + target &= ((1UL << SMP_MAX_CPUS) - 1); + if (target != 0) { + LTRACEF("target 0x%x, gic_ipi %u\n", target, gic_ipi_num); + u_int flags = 0; +#if WITH_LIB_SM + flags |= ARM_GIC_SGI_FLAG_NS; +#endif + arm_gic_sgi(gic_ipi_num, flags, target); + } +#elif PLATFORM_BCM2835 + /* filter out targets outside of the range of cpus we care about */ + target &= ((1UL << SMP_MAX_CPUS) - 1); + if (target != 0) { + bcm2835_send_ipi(ipi, target); + } +#endif + + return NO_ERROR; +} + +enum handler_return arm_ipi_generic_handler(void *arg) +{ + LTRACEF("cpu %u, arg %p\n", arch_curr_cpu_num(), arg); + + return INT_NO_RESCHEDULE; +} + +enum handler_return arm_ipi_reschedule_handler(void *arg) +{ + LTRACEF("cpu %u, arg %p\n", arch_curr_cpu_num(), arg); + + return mp_mbx_reschedule_irq(); +} + +void arch_mp_init_percpu(void) +{ +#if WITH_DEV_INTERRUPT_ARM_GIC + register_int_handler(MP_IPI_GENERIC + GIC_IPI_BASE, &arm_ipi_generic_handler, 0); + register_int_handler(MP_IPI_RESCHEDULE + GIC_IPI_BASE, &arm_ipi_reschedule_handler, 0); +#endif +} + diff --git a/arch/arm/arm/ops.S b/arch/arm/arm/ops.S index cae2baed..7e75a952 100644 --- a/arch/arm/arm/ops.S +++ b/arch/arm/arm/ops.S @@ -93,7 +93,7 @@ FUNCTION(_atomic_or) mov r0, r12 bx lr -FUNCTION(spin_trylock) +FUNCTION(arch_spin_trylock) mov r2, r0 mov r1, #1 ldrex r0, [r2] @@ -102,7 +102,7 @@ FUNCTION(spin_trylock) dmb bx lr -FUNCTION(spin_lock) +FUNCTION(arch_spin_lock) mov r1, #1 1: ldrex r2, [r0] @@ -114,7 +114,7 @@ FUNCTION(spin_lock) dmb bx lr -FUNCTION(spin_unlock) +FUNCTION(arch_spin_unlock) mov r1, #0 dmb str r1, [r0] diff --git a/arch/arm/arm/start.S b/arch/arm/arm/start.S index f791daa5..8a2419a2 100644 --- a/arch/arm/arm/start.S +++ b/arch/arm/arm/start.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Travis Geiselbrecht + * Copyright (c) 2008-2015 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -36,6 +36,9 @@ _start: b arm_reserved b arm_irq b arm_fiq +#if WITH_SMP + b arm_reset +#endif .weak platform_reset platform_reset: @@ -43,29 +46,42 @@ platform_reset: .globl arm_reset arm_reset: - /* do some cpu setup */ -#if ARM_WITH_CP15 + /* do some early cpu setup */ mrc p15, 0, r12, c1, c0, 0 - /* XXX this is currently for arm926, revist with armv6 cores */ - /* new thumb behavior, low exception vectors, i/d cache disable, mmu disabled */ - bic r12, #(1<<15| 1<<13 | 1<<12) - bic r12, #(1<<2 | 1<<1 | 1<<0) -#if ARM_ARCH_LEVEL < 6 - /* enable alignment faults on pre-ARMv6 hardware. On v6+, - * GCC is free to generate unaligned accesses. - */ - orr r12, #(1<<1) -#endif + /* i/d cache disable, mmu disabled */ + bic r12, #(1<<12) + bic r12, #(1<<2 | 1<<0) +#if WITH_KERNEL_VM + /* enable caches so atomics and spinlocks work */ + orr r12, r12, #(1<<12) + orr r12, r12, #(1<<2) +#endif // WITH_KERNEL_VM mcr p15, 0, r12, c1, c0, 0 -#endif + + /* calculate the physical offset from our eventual virtual location */ +.Lphys_offset: + ldr r4, =.Lphys_offset + adr r11, .Lphys_offset + sub r11, r11, r4 + +#if WITH_SMP + /* figure out our cpu number */ + mrc p15, 0, r12, c0, c0, 5 /* read MPIDR */ + + /* mask off the bottom bits to test cluster number:cpu number */ + ubfx r12, r12, #0, #SMP_CPU_ID_BITS + + /* if we're not cpu 0:0, fall into a trap and wait */ + teq r12, #0 + movne r0, r12 + bne arm_secondary_setup +#endif // WITH_SMP #if WITH_CPU_EARLY_INIT /* call platform/arch/etc specific init code */ bl __cpu_early_init -#endif +#endif // WITH_CPU_EARLY_INIT -#if WITH_KERNEL_VM -__relocate_start: #if WITH_NO_PHYS_RELOCATION /* assume that image is properly loaded in physical memory */ #else @@ -73,7 +89,7 @@ __relocate_start: adr r4, _start /* this emits sub r4, pc, #constant */ ldr r5, =(MEMBASE + KERNEL_LOAD_OFFSET) /* calculate the binary's physical load address */ subs r12, r4, r5 /* calculate the delta between where we're loaded and the proper spot */ - beq .Lsetup_mmu + beq .Lrelocate_done /* we need to relocate ourselves to the proper spot */ ldr r6, =__data_end @@ -89,25 +105,22 @@ __relocate_start: /* we're relocated, jump to the right address */ sub pc, r12 - nop -#endif + nop /* skipped in the add to pc */ -__mmu_start: + /* recalculate the physical offset */ + sub r11, r11, r12 + +.Lrelocate_done: +#endif // !WITH_NO_PHYS_RELOCATION + +#if WITH_KERNEL_VM .Lsetup_mmu: + /* set up the mmu according to mmu_initial_mappings */ - /* calculate our physical to virtual offset */ - mov r12, pc - ldr r5, =.Laddr1 -.Laddr1: - sub r12, r5 - - /* r12 now holds the offset from virtual to physical: - * virtual + r12 = physical */ - /* load the base of the translation table and clear the table */ ldr r4, =arm_kernel_translation_table - add r4, r12 + add r4, r4, r11 /* r4 = physical address of translation table */ mov r5, #0 @@ -122,13 +135,19 @@ __mmu_start: /* load the address of the mmu_initial_mappings table and start processing */ ldr r5, =mmu_initial_mappings - add r5, r12 + add r5, r5, r11 /* r5 = physical address of mmu initial mapping table */ .Linitial_mapping_loop: ldmia r5!, { r6-r10 } /* r6 = phys, r7 = virt, r8 = size, r9 = flags, r10 = name */ + /* round size up to 1MB alignment */ + ubfx r10, r6, #0, #20 + add r8, r8, r10 + add r8, r8, #(1 << 20) + sub r8, r8, #1 + /* mask all the addresses and sizes to 1MB boundaries */ lsr r6, #20 /* r6 = physical address / 1MB */ lsr r7, #20 /* r7 = virtual address / 1MB */ @@ -148,11 +167,11 @@ __mmu_start: /* r10 = mmu entry flags */ 0: - orr r11, r10, r6, lsl #20 - /* r11 = phys addr | flags */ + orr r12, r10, r6, lsl #20 + /* r12 = phys addr | flags */ /* store into appropriate translation table entry */ - str r11, [r4, r7, lsl #2] + str r12, [r4, r7, lsl #2] /* loop until we're done */ add r6, #1 @@ -164,81 +183,49 @@ __mmu_start: .Linitial_mapping_done: - /* set up the mmu */ +#if MMU_WITH_TRAMPOLINE + /* move arm_kernel_translation_table address to r8 and + * set cacheable attributes on translation walk + */ + orr r8, r4, #MMU_TTBRx_FLAGS - /* Invalidate TLB */ - mov r12, #0 - mcr p15, 0, r12, c8, c7, 0 - isb + /* Prepare tt_trampoline page table */ + /* Calculate pagetable physical addresses */ + ldr r4, =tt_trampoline /* r4 = tt_trampoline vaddr */ + add r4, r4, r11 /* r4 = tt_trampoline paddr */ - /* Write 0 to TTBCR */ - mcr p15, 0, r12, c2, c0, 2 - isb - - /* set cacheable attributes on translation walk */ - /* (SMP extensions) non-shareable, inner write-back write-allocate */ - orr r4, #(1<<6 | 0<<1) - /* outer write-back write-allocate */ - orr r4, #(1<<3) - - /* Write ttbr with phys addr of the translation table */ - mcr p15, 0, r4, c2, c0, 0 - isb - - /* Write DACR */ - mov r12, #0x1 - mcr p15, 0, r12, c3, c0, 0 - isb - - /* Read SCTLR */ - mrc p15, 0, r12, c1, c0, 0 - - /* Disable TRE/AFE */ - bic r12, #(1<<29 | 1<<28) - - /* Turn on the MMU */ - orr r12, #0x1 - - /* Write back SCTLR */ - mcr p15, 0, r12, c1, c0, 0 - isb - - /* Jump to virtual code address */ - ldr pc, =1f + /* Zero tt_trampoline translation tables */ + mov r6, #0 + mov r7, #0 1: + str r7, [r4, r6, lsl#2] + add r6, #1 + cmp r6, #0x1000 + blt 1b - /* Invalidate TLB */ - mov r12, #0 - mcr p15, 0, r12, c8, c7, 0 - isb + /* Setup 1M section mapping at + * phys -> phys and + * virt -> phys + */ + lsr r6, pc, #20 /* r6 = paddr index */ + ldr r7, =MMU_KERNEL_L1_PTE_FLAGS + add r7, r7, r6, lsl #20 /* r7 = pt entry */ -#else - /* see if we need to relocate */ - mov r4, pc - sub r4, r4, #(.Laddr - _start) -.Laddr: - ldr r5, =_start - cmp r4, r5 - beq .Lstack_setup + str r7, [r4, r6, lsl #2] /* tt_trampoline[paddr index] = pt entry */ - /* we need to relocate ourselves to the proper spot */ - ldr r6, =__data_end + rsb r6, r11, r6, lsl #20 /* r6 = vaddr */ + str r7, [r4, r6, lsr #(20 - 2)] /* tt_trampoline[vaddr index] = pt entry */ +#endif // MMU_WITH_TRAMPOLINE -.Lrelocate_loop: - ldr r7, [r4], #4 - str r7, [r5], #4 - cmp r5, r6 - bne .Lrelocate_loop - - /* we're relocated, jump to the right address */ - ldr r4, =.Lstack_setup - bx r4 -#endif + /* set up the mmu */ + bl .Lmmu_setup +#endif // WITH_KERNEL_VM /* at this point we're running at our final location in virtual memory (if enabled) */ .Lstack_setup: /* set up the stack for irq, fiq, abort, undefined, system/user, and lastly supervisor mode */ - ldr r12, =abort_stack_top + ldr r12, =abort_stack + add r12, #ARCH_DEFAULT_STACK_SIZE cpsid i,#0x12 /* irq */ mov sp, r12 @@ -287,18 +274,162 @@ __mmu_start: bl lk_main b . +#if WITH_KERNEL_VM + /* per cpu mmu setup, shared between primary and secondary cpus + args: + r4 == translation table physical + r8 == final translation table physical (if using trampoline) + */ +.Lmmu_setup: + /* Invalidate TLB */ + mov r12, #0 + mcr p15, 0, r12, c8, c7, 0 + isb + + /* Write 0 to TTBCR */ + mcr p15, 0, r12, c2, c0, 2 + isb + + /* set cacheable attributes on translation walk */ + /* inner write-back write-allocate */ + orr r12, r4, #(1<<6 | 0<<1) + /* outer write-back write-allocate */ + orr r12, #(1<<3) +#if WITH_SMP + /* (SMP extensions) shareable, outer shareable */ + orr r12, #(1<<1 | 0<<5) +#endif + + /* Write ttbr with phys addr of the translation table */ + mcr p15, 0, r12, c2, c0, 0 + isb + + /* Write DACR */ + mov r12, #0x1 + mcr p15, 0, r12, c3, c0, 0 + isb + + /* Read SCTLR into r12 */ + mrc p15, 0, r12, c1, c0, 0 + + /* Disable TRE/AFE */ + bic r12, #(1<<29 | 1<<28) + + /* Turn on the MMU */ + orr r12, #0x1 + + /* Write back SCTLR */ + mcr p15, 0, r12, c1, c0, 0 + isb + + /* Jump to virtual code address */ + ldr pc, =1f +1: + +#if MMU_WITH_TRAMPOLINE + /* Switch to main page table */ + mcr p15, 0, r8, c2, c0, 0 + isb +#endif + + /* Invalidate TLB */ + mov r12, #0 + mcr p15, 0, r12, c8, c7, 0 + isb + + /* assume lr was in physical memory, adjust it before returning */ + sub lr, r11 + bx lr +#endif + +#if WITH_SMP + /* secondary cpu entry point */ + /* r0 holds cpu number */ + /* r11 hold phys offset */ +FUNCTION(arm_secondary_setup) + /* all other cpus, trap and wait to be released */ +1: + wfe + ldr r12, =arm_boot_cpu_lock + add r12, r12, r11 + ldr r12, [r12] + cmp r12, #0 + bne 1b + + and r1, r0, #0xff + cmp r1, #(1 << SMP_CPU_CLUSTER_SHIFT) + bge unsupported_cpu_trap + bic r0, r0, #0xff + orr r0, r1, r0, LSR #(8 - SMP_CPU_CLUSTER_SHIFT) + + cmp r0, #SMP_MAX_CPUS + bge unsupported_cpu_trap + mov r5, r0 /* save cpu num */ + + /* set up the stack for irq, fiq, abort, undefined, system/user, and lastly supervisor mode */ + ldr r1, =abort_stack + mov r2, #ARCH_DEFAULT_STACK_SIZE + add r0, #1 + mul r2, r2, r0 + add r1, r2 + + cpsid i,#0x12 /* irq */ + mov sp, r1 + + cpsid i,#0x11 /* fiq */ + mov sp, r1 + + cpsid i,#0x17 /* abort */ + mov sp, r1 + + cpsid i,#0x1b /* undefined */ + mov sp, r1 + + cpsid i,#0x1f /* system */ + mov sp, r1 + + cpsid i,#0x13 /* supervisor */ + mov sp, r1 + +#if WITH_KERNEL_VM + /* load the physical base of the translation table and clear the table */ + ldr r4, =arm_kernel_translation_table + add r4, r4, r11 + +#if MMU_WITH_TRAMPOLINE + /* move arm_kernel_translation_table address to r8 and + * set cacheable attributes on translation walk + */ + orr r8, r4, #MMU_TTBRx_FLAGS + + /* Prepare tt_trampoline page table */ + /* Calculate pagetable physical addresses */ + ldr r4, =tt_trampoline /* r4 = tt_trampoline vaddr */ + add r4, r4, r11 /* r4 = tt_trampoline paddr */ +#endif + + /* set up the mmu on this cpu and switch to virtual memory */ + bl .Lmmu_setup +#endif + + /* stay in supervisor and call into arm arch code to continue setup */ + mov r0, r5 + bl arm_secondary_entry + + /* cpus above the number we claim to support get trapped here */ +unsupported_cpu_trap: + wfe + b unsupported_cpu_trap +#endif + .ltorg -.bss -.align 3 - /* the abort stack is for unrecoverable errors. - * also note the initial working stack is set to here. - * when the threading system starts up it'll switch to a new - * dynamically allocated stack, so we don't need it for very long - */ -LOCAL_DATA(abort_stack) - .skip 4096 -LOCAL_DATA(abort_stack_top) +#if WITH_KERNEL_VM && MMU_WITH_TRAMPOLINE +.section ".bss.prebss.translation_table" +.align 14 +DATA(tt_trampoline) + .skip 16384 +#endif .data .align 2 diff --git a/arch/arm/arm/thread.c b/arch/arm/arm/thread.c index ec377742..3e605b48 100644 --- a/arch/arm/arm/thread.c +++ b/arch/arm/arm/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Travis Geiselbrecht + * Copyright (c) 2008-2014 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -49,8 +50,9 @@ static void initial_thread_func(void) // dprintf("initial_thread_func: thread %p calling %p with arg %p\n", current_thread, current_thread->entry, current_thread->arg); // dump_thread(current_thread); - /* exit the implicit critical section we're within */ - exit_critical_section(); + /* release the thread lock that was implicitly held across the reschedule */ + spin_unlock(&thread_lock); + arch_enable_ints(); thread_t *ct = get_current_thread(); ret = ct->entry(ct->arg); @@ -85,7 +87,7 @@ void arch_thread_initialize(thread_t *t) void arch_context_switch(thread_t *oldthread, thread_t *newthread) { -// dprintf("arch_context_switch: old %p (%s), new %p (%s)\n", oldthread, oldthread->name, newthread, newthread->name); +// TRACEF("arch_context_switch: cpu %u old %p (%s), new %p (%s)\n", arch_curr_cpu_num(), oldthread, oldthread->name, newthread, newthread->name); #if ARM_WITH_VFP arm_fpu_thread_swap(oldthread, newthread); #endif @@ -94,3 +96,11 @@ void arch_context_switch(thread_t *oldthread, thread_t *newthread) } +void arch_dump_thread(thread_t *t) +{ + if (t->state != THREAD_RUNNING) { + dprintf(INFO, "\tarch: "); + dprintf(INFO, "sp 0x%lx\n", t->arch.sp); + } +} + diff --git a/arch/arm/include/arch/arch_ops.h b/arch/arm/include/arch/arch_ops.h index b7d181b5..aa2ea9b8 100644 --- a/arch/arm/include/arch/arch_ops.h +++ b/arch/arm/include/arch/arch_ops.h @@ -20,8 +20,7 @@ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef __ARCH_ARM_OPS_H -#define __ARCH_ARM_OPS_H +#pragma once #ifndef ASSEMBLY @@ -196,7 +195,7 @@ static inline int atomic_cmpxchg(volatile int *ptr, int oldval, int newval) "ldrex %[old], [%[ptr]]\n" "mov %[test], #0\n" "teq %[old], %[oldval]\n" -#if ARM_ISA_ARMV7M +#if (ARM_ISA_ARMV7M || __thumb__) "bne 0f\n" "strex %[test], %[newval], [%[ptr]]\n" "0:\n" @@ -233,6 +232,19 @@ static inline uint32_t arch_cycle_count(void) #endif } +#if WITH_SMP && ARM_ISA_ARMV7 +static inline uint arch_curr_cpu_num(void) +{ + uint32_t mpidr = arm_read_mpidr(); + return ((mpidr & ((1U << SMP_CPU_ID_BITS) - 1)) >> 8 << SMP_CPU_CLUSTER_SHIFT) | (mpidr & 0xff); +} +#else +static inline uint arch_curr_cpu_num(void) +{ + return 0; +} +#endif + /* defined in kernel/thread.h */ #if !ARM_ISA_ARMV7M @@ -305,63 +317,5 @@ static inline uint32_t arch_cycle_count(void) { return _arch_cycle_count(); } #define smp_rmb() CF #endif -typedef unsigned long spin_lock_t; -void spin_lock(spin_lock_t *lock); /* interrupts should already be disabled */ -int spin_trylock(spin_lock_t *lock); /* Returns 0 on success, non-0 on failure */ -void spin_unlock(spin_lock_t *lock); - -typedef ulong spin_lock_saved_state_t; -typedef ulong spin_lock_save_flags_t; - -enum { - /* Possible future flags: - * SPIN_LOCK_FLAG_PMR_MASK = 0x000000ff, - * SPIN_LOCK_FLAG_PREEMPTION = 0x10000000, - * SPIN_LOCK_FLAG_SET_PMR = 0x20000000, - */ - - /* ARM specific flags */ - SPIN_LOCK_FLAG_IRQ = 0x40000000, - SPIN_LOCK_FLAG_FIQ = 0x80000000, /* Do not use unless IRQs are already disabled */ - SPIN_LOCK_FLAG_IRQ_FIQ = SPIN_LOCK_FLAG_IRQ | SPIN_LOCK_FLAG_FIQ, - - /* Generic flags */ - SPIN_LOCK_FLAG_INTERRUPTS = SPIN_LOCK_FLAG_IRQ, -}; - -enum { - /* private */ - SPIN_LOCK_STATE_RESTORE_IRQ = 1, - SPIN_LOCK_STATE_RESTORE_FIQ = 2, -}; - -static inline void -spin_lock_save(spin_lock_t *lock, spin_lock_saved_state_t *statep, spin_lock_save_flags_t flags) -{ - spin_lock_saved_state_t state = 0; - if ((flags & SPIN_LOCK_FLAG_IRQ) && !arch_ints_disabled()) { - state |= SPIN_LOCK_STATE_RESTORE_IRQ; - arch_disable_ints(); - } - if ((flags & SPIN_LOCK_FLAG_FIQ) && !arch_fiqs_disabled()) { - state |= SPIN_LOCK_STATE_RESTORE_FIQ; - arch_disable_fiqs(); - } - *statep = state; - spin_lock(lock); -} - -static inline void -spin_unlock_restore(spin_lock_t *lock, spin_lock_saved_state_t old_state, spin_lock_save_flags_t flags) -{ - spin_unlock(lock); - if ((flags & SPIN_LOCK_FLAG_FIQ) && (old_state & SPIN_LOCK_STATE_RESTORE_FIQ)) - arch_enable_fiqs(); - if ((flags & SPIN_LOCK_FLAG_IRQ) && (old_state & SPIN_LOCK_STATE_RESTORE_IRQ)) - arch_enable_ints(); -} - #endif // ASSEMBLY -#endif - diff --git a/arch/arm/include/arch/arm.h b/arch/arm/include/arch/arm.h index 38d137ef..47c7824d 100644 --- a/arch/arm/include/arch/arm.h +++ b/arch/arm/include/arch/arm.h @@ -47,6 +47,7 @@ __BEGIN_CDECLS #else #error unhandled arm isa #endif +#define NOP __asm__ volatile("nop"); void arm_context_switch(vaddr_t *old_sp, vaddr_t new_sp); @@ -98,6 +99,7 @@ struct arm_fault_frame { #define MODE_SYS 0x1f struct arm_mode_regs { + uint32_t usr_r13, usr_r14; uint32_t fiq_r13, fiq_r14; uint32_t irq_r13, irq_r14; uint32_t svc_r13, svc_r14; @@ -115,6 +117,12 @@ static inline __ALWAYS_INLINE uint32_t arm_read_##reg(void) { \ return val; \ } \ \ +static inline __ALWAYS_INLINE uint32_t arm_read_##reg##_relaxed(void) { \ + uint32_t val; \ + __asm__("mrc " #cp ", " #op1 ", %0, " #c1 "," #c2 "," #op2 : "=r" (val)); \ + return val; \ +} \ +\ static inline __ALWAYS_INLINE void arm_write_##reg(uint32_t val) { \ __asm__ volatile("mcr " #cp ", " #op1 ", %0, " #c1 "," #c2 "," #op2 :: "r" (val)); \ ISB; \ @@ -156,6 +164,7 @@ GEN_CP15_REG_FUNCS(tpidrprw, 0, c13, c0, 4); GEN_CP15_REG_FUNCS(midr, 0, c0, c0, 0); GEN_CP15_REG_FUNCS(mpidr, 0, c0, c0, 5); GEN_CP15_REG_FUNCS(vbar, 0, c12, c0, 0); +GEN_CP15_REG_FUNCS(cbar, 4, c15, c0, 0); GEN_CP15_REG_FUNCS(ats1cpr, 0, c7, c8, 0); GEN_CP15_REG_FUNCS(ats1cpw, 0, c7, c8, 1); @@ -167,6 +176,11 @@ GEN_CP15_REG_FUNCS(ats12nsour, 0, c7, c8, 6); GEN_CP15_REG_FUNCS(ats12nsouw, 0, c7, c8, 7); GEN_CP15_REG_FUNCS(par, 0, c7, c4, 0); +/* Branch predictor invalidate */ +GEN_CP15_REG_FUNCS(bpiall, 0, c7, c5, 6); +GEN_CP15_REG_FUNCS(bpimva, 0, c7, c5, 7); +GEN_CP15_REG_FUNCS(bpiallis, 0, c7, c1, 6); + /* tlb registers */ GEN_CP15_REG_FUNCS(tlbiallis, 0, c8, c3, 0); GEN_CP15_REG_FUNCS(tlbimvais, 0, c8, c3, 1); @@ -183,6 +197,9 @@ GEN_CP15_REG_FUNCS(tlbimva, 0, c8, c7, 1); GEN_CP15_REG_FUNCS(tlbiasid, 0, c8, c7, 2); GEN_CP15_REG_FUNCS(tlbimvaa, 0, c8, c7, 3); +GEN_CP15_REG_FUNCS(l2ctlr, 1, c9, c0, 2); +GEN_CP15_REG_FUNCS(l2ectlr, 1, c9, c0, 3); + /* debug registers */ GEN_CP14_REG_FUNCS(dbddidr, 0, c0, c0, 0); GEN_CP14_REG_FUNCS(dbgdrar, 0, c1, c0, 0); diff --git a/arch/arm/include/arch/arm/mmu.h b/arch/arm/include/arch/arm/mmu.h index 4057f3a7..b32d267e 100644 --- a/arch/arm/include/arch/arm/mmu.h +++ b/arch/arm/include/arch/arm/mmu.h @@ -51,7 +51,9 @@ #define MMU_MEMORY_L1_TYPE_NORMAL_WRITE_THROUGH ((0x0 << 12) | (0x2 << 2)) #define MMU_MEMORY_L1_TYPE_NORMAL_WRITE_BACK_NO_ALLOCATE ((0x0 << 12) | (0x3 << 2)) #define MMU_MEMORY_L1_TYPE_NORMAL_WRITE_BACK_ALLOCATE ((0x1 << 12) | (0x3 << 2)) -#define MMU_MEMORY_L1_TYPE_MASK ((0x3 << 12) | (0x3 << 2)) +#define MMU_MEMORY_L1_TYPE_MASK ((0x7 << 12) | (0x3 << 2)) + +#define MMU_MEMORY_L1_TYPE_INNER_WRITE_BACK_ALLOCATE ((0x4 << 12) | (0x1 << 2)) /* C, B and TEX[2:0] encodings without TEX remap (for second level descriptors) */ /* TEX | CB */ @@ -62,7 +64,7 @@ #define MMU_MEMORY_L2_TYPE_NORMAL_WRITE_THROUGH ((0x0 << 6) | (0x2 << 2)) #define MMU_MEMORY_L2_TYPE_NORMAL_WRITE_BACK_NO_ALLOCATE ((0x0 << 6) | (0x3 << 2)) #define MMU_MEMORY_L2_TYPE_NORMAL_WRITE_BACK_ALLOCATE ((0x1 << 6) | (0x3 << 2)) -#define MMU_MEMORY_L2_TYPE_MASK ((0x3 << 6) | (0x3 << 2)) +#define MMU_MEMORY_L2_TYPE_MASK ((0x7 << 6) | (0x3 << 2)) #define MMU_MEMORY_DOMAIN_MEM (0) @@ -109,6 +111,13 @@ #define MMU_MEMORY_L1_SECTION_NON_GLOBAL (1 << 17) #define MMU_MEMORY_L1_SECTION_XN (1 << 4) +#define MMU_MEMORY_L1_CB_SHIFT 2 +#define MMU_MEMORY_L1_TEX_SHIFT 12 + +#define MMU_MEMORY_SET_L1_INNER(val) (((val) & 0x3) << MMU_MEMORY_L1_CB_SHIFT) +#define MMU_MEMORY_SET_L1_OUTER(val) (((val) & 0x3) << MMU_MEMORY_L1_TEX_SHIFT) +#define MMU_MEMORY_SET_L1_CACHEABLE_MEM (0x4 << MMU_MEMORY_L1_TEX_SHIFT) + #define MMU_MEMORY_L2_SHAREABLE (1 << 10) #define MMU_MEMORY_L2_NON_GLOBAL (1 << 11) @@ -134,6 +143,8 @@ /* IRGN[1:0] is encoded as: IRGN[0] in TTBRx[6], and IRGN[1] in TTBRx[0] */ #define MMU_MEMORY_TTBR_IRGN(x) ((((x) & 0x1) << 6) | \ ((((x) >> 1) & 0x1) << 0)) +#define MMU_MEMORY_TTBR_S (1 << 1) +#define MMU_MEMORY_TTBR_NOS (1 << 5) /* Default configuration for main kernel page table: * - section mappings for memory @@ -142,18 +153,31 @@ /* Enable cached page table walks: * inner/outer (IRGN/RGN): write-back + write-allocate + * (select inner sharable on smp) */ +#if WITH_SMP +#define MMU_TTBRx_SHARABLE_FLAGS (MMU_MEMORY_TTBR_S | MMU_MEMORY_TTBR_NOS) +#else +#define MMU_TTBRx_SHARABLE_FLAGS (0) +#endif #define MMU_TTBRx_FLAGS \ (MMU_MEMORY_TTBR_RGN(MMU_MEMORY_WRITE_BACK_ALLOCATE) |\ - MMU_MEMORY_TTBR_IRGN(MMU_MEMORY_WRITE_BACK_ALLOCATE)) + MMU_MEMORY_TTBR_IRGN(MMU_MEMORY_WRITE_BACK_ALLOCATE) | \ + MMU_TTBRx_SHARABLE_FLAGS) /* Section mapping, TEX[2:0]=001, CB=11, S=1, AP[2:0]=001 */ +#if WITH_SMP +#define MMU_KERNEL_L1_PTE_FLAGS \ + (MMU_MEMORY_L1_DESCRIPTOR_SECTION | \ + MMU_MEMORY_L1_TYPE_NORMAL_WRITE_BACK_ALLOCATE | \ + MMU_MEMORY_L1_AP_P_RW_U_NA | \ + MMU_MEMORY_L1_SECTION_SHAREABLE) +#else #define MMU_KERNEL_L1_PTE_FLAGS \ (MMU_MEMORY_L1_DESCRIPTOR_SECTION | \ MMU_MEMORY_L1_TYPE_NORMAL_WRITE_BACK_ALLOCATE | \ MMU_MEMORY_L1_AP_P_RW_U_NA) -/* XXX add with smp to above */ -// MMU_MEMORY_L1_SECTION_SHAREABLE | +#endif #define MMU_INITIAL_MAP_STRONGLY_ORDERED \ (MMU_MEMORY_L1_DESCRIPTOR_SECTION | \ @@ -176,48 +200,77 @@ __BEGIN_CDECLS +void arm_mmu_early_init(void); void arm_mmu_init(void); status_t arm_vtop(addr_t va, addr_t *pa); /* tlb routines */ -static inline void arm_invalidate_tlb_global(void) { - CF; + +static inline void arm_after_invalidate_tlb_barrier(void) { +#if WITH_SMP + arm_write_bpiallis(0); +#else + arm_write_bpiall(0); +#endif + DSB; + ISB; +} + +static inline void arm_invalidate_tlb_global_no_barrier(void) { #if WITH_SMP arm_write_tlbiallis(0); #else arm_write_tlbiall(0); #endif - DSB; } -static inline void arm_invalidate_tlb_mva(vaddr_t va) { - CF; +static inline void arm_invalidate_tlb_global(void) { + DSB; + arm_invalidate_tlb_global_no_barrier(); + arm_after_invalidate_tlb_barrier(); +} + +static inline void arm_invalidate_tlb_mva_no_barrier(vaddr_t va) { #if WITH_SMP arm_write_tlbimvaais(va & 0xfffff000); #else arm_write_tlbimvaa(va & 0xfffff000); #endif - DSB; } -static inline void arm_invalidate_tlb_asid(uint8_t asid) { - CF; +static inline void arm_invalidate_tlb_mva(vaddr_t va) { + DSB; + arm_invalidate_tlb_mva_no_barrier(va); + arm_after_invalidate_tlb_barrier(); +} + + +static inline void arm_invalidate_tlb_asid_no_barrier(uint8_t asid) { #if WITH_SMP arm_write_tlbiasidis(asid); #else arm_write_tlbiasid(asid); #endif - DSB; } -static inline void arm_invalidate_tlb_mva_asid(vaddr_t va, uint8_t asid) { - CF; +static inline void arm_invalidate_tlb_asid(uint8_t asid) { + DSB; + arm_invalidate_tlb_asid_no_barrier(asid); + arm_after_invalidate_tlb_barrier(); +} + +static inline void arm_invalidate_tlb_mva_asid_no_barrier(vaddr_t va, uint8_t asid) { #if WITH_SMP arm_write_tlbimvais((va & 0xfffff000) | asid); #else arm_write_tlbimva((va & 0xfffff000) | asid); #endif +} + +static inline void arm_invalidate_tlb_mva_asid(vaddr_t va, uint8_t asid) { DSB; + arm_invalidate_tlb_mva_asid_no_barrier(va, asid); + arm_after_invalidate_tlb_barrier(); } __END_CDECLS diff --git a/arch/arm/include/arch/defines.h b/arch/arm/include/arch/defines.h index 899afc32..bf33b97d 100644 --- a/arch/arm/include/arch/defines.h +++ b/arch/arm/include/arch/defines.h @@ -34,12 +34,16 @@ #define CACHE_LINE 32 #elif ARM_CPU_ARM1136 #define CACHE_LINE 32 +#elif ARM_CPU_CORTEX_A7 +#define CACHE_LINE 64 /* XXX L1 icache is 32 bytes */ #elif ARM_CPU_CORTEX_A8 #define CACHE_LINE 64 #elif ARM_CPU_CORTEX_A9 #define CACHE_LINE 32 #elif ARM_CPU_CORTEX_M3 || ARM_CPU_CORTEX_M4 #define CACHE_LINE 32 /* doesn't actually matter */ +#elif ARM_CPU_CORTEX_A15 +#define CACHE_LINE 64 #else #error unknown cpu #endif diff --git a/arch/arm/include/arch/spinlock.h b/arch/arm/include/arch/spinlock.h new file mode 100644 index 00000000..b232024d --- /dev/null +++ b/arch/arm/include/arch/spinlock.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include + +#define SPIN_LOCK_INITIAL_VALUE (0) + +typedef unsigned long spin_lock_t; + +typedef unsigned long spin_lock_saved_state_t; +typedef unsigned long spin_lock_save_flags_t; + +static inline void arch_spin_lock_init(spin_lock_t *lock) +{ + *lock = SPIN_LOCK_INITIAL_VALUE; +} + +static inline bool arch_spin_lock_held(spin_lock_t *lock) +{ + return *lock != 0; +} + +#if WITH_SMP + +void arch_spin_lock(spin_lock_t *lock); +int arch_spin_trylock(spin_lock_t *lock); +void arch_spin_unlock(spin_lock_t *lock); + +#else + +static inline void arch_spin_lock(spin_lock_t *lock) +{ + *lock = 1; +} + +static inline int arch_spin_trylock(spin_lock_t *lock) +{ + return 0; +} + +static inline void arch_spin_unlock(spin_lock_t *lock) +{ + *lock = 0; +} + +#endif + + /* ARM specific flags */ +#define SPIN_LOCK_FLAG_IRQ 0x40000000 +#define SPIN_LOCK_FLAG_FIQ 0x80000000 /* Do not use unless IRQs are already disabled */ +#define SPIN_LOCK_FLAG_IRQ_FIQ (SPIN_LOCK_FLAG_IRQ | SPIN_LOCK_FLAG_FIQ) + + /* default arm flag is to just disable plain irqs */ +#define ARCH_DEFAULT_SPIN_LOCK_FLAG_INTERRUPTS SPIN_LOCK_FLAG_IRQ + +enum { + /* private */ + SPIN_LOCK_STATE_RESTORE_IRQ = 1, + SPIN_LOCK_STATE_RESTORE_FIQ = 2, +}; + +static inline void +arch_interrupt_save(spin_lock_saved_state_t *statep, spin_lock_save_flags_t flags) +{ + spin_lock_saved_state_t state = 0; + if ((flags & SPIN_LOCK_FLAG_IRQ) && !arch_ints_disabled()) { + state |= SPIN_LOCK_STATE_RESTORE_IRQ; + arch_disable_ints(); + } + if ((flags & SPIN_LOCK_FLAG_FIQ) && !arch_fiqs_disabled()) { + state |= SPIN_LOCK_STATE_RESTORE_FIQ; + arch_disable_fiqs(); + } + *statep = state; +} + +static inline void +arch_interrupt_restore(spin_lock_saved_state_t old_state, spin_lock_save_flags_t flags) +{ + if ((flags & SPIN_LOCK_FLAG_FIQ) && (old_state & SPIN_LOCK_STATE_RESTORE_FIQ)) + arch_enable_fiqs(); + if ((flags & SPIN_LOCK_FLAG_IRQ) && (old_state & SPIN_LOCK_STATE_RESTORE_IRQ)) + arch_enable_ints(); +} + + diff --git a/arch/arm/rules.mk b/arch/arm/rules.mk index 2cda3059..7ccc954a 100644 --- a/arch/arm/rules.mk +++ b/arch/arm/rules.mk @@ -20,7 +20,6 @@ GLOBAL_DEFINES += \ ARM_ISA_ARMv7M=1 \ ARM_WITH_THUMB=1 \ ARM_WITH_THUMB2=1 -GLOBAL_COMPILEFLAGS += -mcpu=$(ARM_CPU) HANDLED_CORE := true ENABLE_THUMB := true SUBARCH := arm-m @@ -32,7 +31,6 @@ GLOBAL_DEFINES += \ ARM_ISA_ARMv7M=1 \ ARM_WITH_THUMB=1 \ ARM_WITH_THUMB2=1 -GLOBAL_COMPILEFLAGS += -mcpu=$(ARM_CPU) HANDLED_CORE := true ENABLE_THUMB := true SUBARCH := arm-m @@ -47,11 +45,40 @@ GLOBAL_DEFINES += \ ARM_WITH_THUMB2=1 \ ARM_WITH_VFP=1 \ __FPU_PRESENT=1 -GLOBAL_COMPILEFLAGS += -mcpu=cortex-m4 -mfloat-abi=softfp HANDLED_CORE := true ENABLE_THUMB := true SUBARCH := arm-m endif +ifeq ($(ARM_CPU),cortex-a7) +GLOBAL_DEFINES += \ + ARM_WITH_CP15=1 \ + ARM_WITH_MMU=1 \ + ARM_ISA_ARMv7=1 \ + ARM_ISA_ARMv7A=1 \ + ARM_WITH_VFP=1 \ + ARM_WITH_NEON=1 \ + ARM_WITH_THUMB=1 \ + ARM_WITH_THUMB2=1 \ + ARM_WITH_CACHE=1 +HANDLED_CORE := true +endif +ifeq ($(ARM_CPU),cortex-a15) +GLOBAL_DEFINES += \ + ARM_WITH_CP15=1 \ + ARM_WITH_MMU=1 \ + ARM_ISA_ARMv7=1 \ + ARM_ISA_ARMv7A=1 \ + ARM_WITH_THUMB=1 \ + ARM_WITH_THUMB2=1 \ + ARM_WITH_CACHE=1 \ + ARM_WITH_L2=1 +ifneq ($(ARM_WITHOUT_VFP_NEON),true) +GLOBAL_DEFINES += \ + ARM_WITH_VFP=1 \ + ARM_WITH_NEON=1 +endif +HANDLED_CORE := true +endif ifeq ($(ARM_CPU),cortex-a8) GLOBAL_DEFINES += \ ARM_WITH_CP15=1 \ @@ -64,9 +91,7 @@ GLOBAL_DEFINES += \ ARM_WITH_THUMB2=1 \ ARM_WITH_CACHE=1 \ ARM_WITH_L2=1 -GLOBAL_COMPILEFLAGS += -mcpu=$(ARM_CPU) HANDLED_CORE := true -GLOBAL_COMPILEFLAGS += -mfpu=neon -mfloat-abi=softfp endif ifeq ($(ARM_CPU),cortex-a9) GLOBAL_DEFINES += \ @@ -77,7 +102,6 @@ GLOBAL_DEFINES += \ ARM_WITH_THUMB=1 \ ARM_WITH_THUMB2=1 \ ARM_WITH_CACHE=1 -GLOBAL_COMPILEFLAGS += -mcpu=$(ARM_CPU) HANDLED_CORE := true endif ifeq ($(ARM_CPU),cortex-a9-neon) @@ -92,11 +116,7 @@ GLOBAL_DEFINES += \ ARM_WITH_THUMB=1 \ ARM_WITH_THUMB2=1 \ ARM_WITH_CACHE=1 -GLOBAL_COMPILEFLAGS += -mcpu=cortex-a9 HANDLED_CORE := true -# XXX cannot enable neon right now because compiler generates -# neon code for 64bit integer ops -GLOBAL_COMPILEFLAGS += -mfpu=vfpv3 -mfloat-abi=softfp endif ifeq ($(ARM_CPU),arm1136j-s) GLOBAL_DEFINES += \ @@ -106,7 +126,6 @@ GLOBAL_DEFINES += \ ARM_WITH_THUMB=1 \ ARM_WITH_CACHE=1 \ ARM_CPU_ARM1136=1 -GLOBAL_COMPILEFLAGS += -mcpu=$(ARM_CPU) HANDLED_CORE := true endif ifeq ($(ARM_CPU),arm1176jzf-s) @@ -118,7 +137,6 @@ GLOBAL_DEFINES += \ ARM_WITH_THUMB=1 \ ARM_WITH_CACHE=1 \ ARM_CPU_ARM1136=1 -GLOBAL_COMPILEFLAGS += -mcpu=$(ARM_CPU) HANDLED_CORE := true endif @@ -176,6 +194,31 @@ KERNEL_LOAD_OFFSET ?= 0 GLOBAL_DEFINES += \ KERNEL_BASE=$(KERNEL_BASE) \ KERNEL_LOAD_OFFSET=$(KERNEL_LOAD_OFFSET) + +# if its requested we build with SMP, arm generically supports 4 cpus +ifeq ($(WITH_SMP),1) +SMP_MAX_CPUS ?= 4 +SMP_CPU_CLUSTER_SHIFT ?= 8 +SMP_CPU_ID_BITS ?= 24 + +GLOBAL_DEFINES += \ + WITH_SMP=1 \ + SMP_MAX_CPUS=$(SMP_MAX_CPUS) \ + SMP_CPU_CLUSTER_SHIFT=$(SMP_CPU_CLUSTER_SHIFT) \ + SMP_CPU_ID_BITS=$(SMP_CPU_ID_BITS) + +MODULE_SRCS += \ + $(LOCAL_DIR)/arm/mp.c +else +GLOBAL_DEFINES += \ + SMP_MAX_CPUS=1 +endif + +ifeq (true,$(call TOBOOL,$(WITH_NS_MAPPING))) +GLOBAL_DEFINES += \ + WITH_ARCH_MMU_PICK_SPOT=1 +endif + endif ifeq ($(SUBARCH),arm-m) MODULE_SRCS += \ @@ -192,49 +235,20 @@ GLOBAL_INCLUDES += \ # we're building for small binaries GLOBAL_DEFINES += \ ARM_ONLY_THUMB=1 \ - ARCH_DEFAULT_STACK_SIZE=1024 + ARCH_DEFAULT_STACK_SIZE=1024 \ + SMP_MAX_CPUS=1 ARCH_OPTFLAGS := -Os WITH_LINKER_GC ?= 1 endif -# try to find the toolchain -ifndef TOOLCHAIN_PREFIX -TOOLCHAIN_PREFIX := arm-eabi- -FOUNDTOOL=$(shell which $(TOOLCHAIN_PREFIX)gcc) -ifeq ($(FOUNDTOOL),) -TOOLCHAIN_PREFIX := arm-elf- -FOUNDTOOL=$(shell which $(TOOLCHAIN_PREFIX)gcc) -ifeq ($(FOUNDTOOL),) -TOOLCHAIN_PREFIX := arm-none-eabi- -FOUNDTOOL=$(shell which $(TOOLCHAIN_PREFIX)gcc) -ifeq ($(FOUNDTOOL),) -TOOLCHAIN_PREFIX := arm-linux-gnueabi- -FOUNDTOOL=$(shell which $(TOOLCHAIN_PREFIX)gcc) - -# Set no stack protection if we found our gnueabi toolchain. We don't -# need it. -# -# Stack protection is default in this toolchain and we get such errors -# final linking stage: -# -# undefined reference to `__stack_chk_guard' -# undefined reference to `__stack_chk_fail' -# undefined reference to `__stack_chk_guard' -# -ifneq (,$(findstring arm-linux-gnueabi-,$(FOUNDTOOL))) - GLOBAL_COMPILEFLAGS += -fno-stack-protector -endif - -endif -endif -endif -ifeq ($(FOUNDTOOL),) -$(error cannot find toolchain, please set TOOLCHAIN_PREFIX or add it to your path) -endif -endif +# try to find toolchain +include $(LOCAL_DIR)/toolchain.mk +TOOLCHAIN_PREFIX := $(ARCH_$(ARCH)_TOOLCHAIN_PREFIX) $(info TOOLCHAIN_PREFIX = $(TOOLCHAIN_PREFIX)) +ARCH_COMPILEFLAGS += $(ARCH_$(ARCH)_COMPILEFLAGS) + GLOBAL_COMPILEFLAGS += $(THUMBINTERWORK) # make sure some bits were set up @@ -249,7 +263,10 @@ ifeq ($(MEMVARS_SET),0) $(error missing MEMBASE or MEMSIZE variable, please set in target rules.mk) endif -LIBGCC := $(shell $(TOOLCHAIN_PREFIX)gcc $(GLOBAL_COMPILEFLAGS) $(THUMBCFLAGS) -print-libgcc-file-name) +LIBGCC := $(shell $(TOOLCHAIN_PREFIX)gcc $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(THUMBCFLAGS) -print-libgcc-file-name) +$(info LIBGCC = $(LIBGCC)) + +$(info GLOBAL_COMPILEFLAGS = $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(THUMBCFLAGS)) # potentially generated files that should be cleaned out with clean make rule GENERATED += \ diff --git a/arch/arm/toolchain.mk b/arch/arm/toolchain.mk new file mode 100644 index 00000000..5a4c8819 --- /dev/null +++ b/arch/arm/toolchain.mk @@ -0,0 +1,80 @@ +ifndef ARCH_arm_TOOLCHAIN_INCLUDED +ARCH_arm_TOOLCHAIN_INCLUDED := 1 + +# try to find the toolchain +ifndef ARCH_arm_TOOLCHAIN_PREFIX +ARCH_arm_TOOLCHAIN_PREFIX := arm-eabi- +FOUNDTOOL=$(shell which $(ARCH_arm_TOOLCHAIN_PREFIX)gcc) +ifeq ($(FOUNDTOOL),) +ARCH_arm_TOOLCHAIN_PREFIX := arm-elf- +FOUNDTOOL=$(shell which $(ARCH_arm_TOOLCHAIN_PREFIX)gcc) +ifeq ($(FOUNDTOOL),) +ARCH_arm_TOOLCHAIN_PREFIX := arm-none-eabi- +FOUNDTOOL=$(shell which $(ARCH_arm_TOOLCHAIN_PREFIX)gcc) +ifeq ($(FOUNDTOOL),) +ARCH_arm_TOOLCHAIN_PREFIX := arm-linux-gnueabi- +FOUNDTOOL=$(shell which $(ARCH_arm_TOOLCHAIN_PREFIX)gcc) + +# Set no stack protection if we found our gnueabi toolchain. We don't +# need it. +# +# Stack protection is default in this toolchain and we get such errors +# final linking stage: +# +# undefined reference to `__stack_chk_guard' +# undefined reference to `__stack_chk_fail' +# undefined reference to `__stack_chk_guard' +# +ifneq (,$(findstring arm-linux-gnueabi-,$(FOUNDTOOL))) + ARCH_arm_COMPILEFLAGS += -fno-stack-protector +endif + +endif +endif +endif +ifeq ($(FOUNDTOOL),) +$(error cannot find toolchain, please set ARCH_arm_TOOLCHAIN_PREFIX or add it to your path) +endif +endif + + +ifeq ($(ARM_CPU),cortex-m3) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +endif +ifeq ($(ARM_CPU),cortex-m4) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +endif +ifeq ($(ARM_CPU),cortex-m4f) +ARCH_arm_COMPILEFLAGS += -mcpu=cortex-m4 -mfloat-abi=softfp +endif +ifeq ($(ARM_CPU),cortex-a7) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +ARCH_arm_COMPILEFLAGS += -mfpu=vfpv3 -mfloat-abi=softfp +endif +ifeq ($(ARM_CPU),cortex-a8) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +ARCH_arm_COMPILEFLAGS += -mfpu=neon -mfloat-abi=softfp +endif +ifeq ($(ARM_CPU),cortex-a9) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +endif +ifeq ($(ARM_CPU),cortex-a9-neon) +ARCH_arm_COMPILEFLAGS += -mcpu=cortex-a9 +# XXX cannot enable neon right now because compiler generates +# neon code for 64bit integer ops +ARCH_arm_COMPILEFLAGS += -mfpu=vfpv3 -mfloat-abi=softfp +endif +ifeq ($(ARM_CPU),cortex-a15) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +ifneq ($(ARM_WITHOUT_VFP_NEON),true) +ARCH_arm_COMPILEFLAGS += -mfpu=vfpv3 -mfloat-abi=softfp +endif +endif +ifeq ($(ARM_CPU),arm1136j-s) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +endif +ifeq ($(ARM_CPU),arm1176jzf-s) +ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU) +endif + +endif diff --git a/arch/arm64/arch.c b/arch/arm64/arch.c index e66ecbba..d4758b13 100644 --- a/arch/arm64/arch.c +++ b/arch/arm64/arch.c @@ -24,9 +24,23 @@ #include #include #include +#include +#include +#include +#include +#include #include +#include -void arch_early_init(void) +#define LOCAL_TRACE 0 + +#if WITH_SMP +/* smp boot lock */ +static spin_lock_t arm_boot_cpu_lock = 1; +static volatile int secondaries_to_init = 0; +#endif + +static void arm64_cpu_early_init(void) { /* set the vector base */ ARM64_WRITE_SYSREG(VBAR_EL1, (uint64_t)&arm64_exception_base); @@ -38,8 +52,31 @@ void arch_early_init(void) } } +void arch_early_init(void) +{ + arm64_cpu_early_init(); + platform_init_mmu_mappings(); +} + void arch_init(void) { + arch_mp_init_percpu(); + +#if WITH_SMP + LTRACEF("midr_el1 0x%llx\n", ARM64_READ_SYSREG(midr_el1)); + + secondaries_to_init = SMP_MAX_CPUS - 1; /* TODO: get count from somewhere else, or add cpus as they boot */ + + lk_init_secondary_cpus(secondaries_to_init); + + LTRACEF("releasing %d secondary cpus\n", secondaries_to_init); + + /* release the secondary cpus */ + spin_unlock(&arm_boot_cpu_lock); + + /* flush the release of the lock, since the secondary cpus are running without cache on */ + arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock)); +#endif } void arch_quiesce(void) @@ -56,4 +93,30 @@ void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3 PANIC_UNIMPLEMENTED; } +#if WITH_SMP +void arm64_secondary_entry(ulong asm_cpu_num) +{ + uint cpu = arch_curr_cpu_num(); + if (cpu != asm_cpu_num) + return; + + arm64_cpu_early_init(); + + spin_lock(&arm_boot_cpu_lock); + spin_unlock(&arm_boot_cpu_lock); + + /* run early secondary cpu init routines up to the threading level */ + lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1); + + arch_mp_init_percpu(); + + LTRACEF("cpu num %d\n", cpu); + + /* we're done, tell the main cpu we're up */ + atomic_add(&secondaries_to_init, -1); + __asm__ volatile("sev"); + + lk_secondary_cpu_entry(); +} +#endif diff --git a/arch/arm64/asm.S b/arch/arm64/asm.S index 617547d3..d60ca848 100644 --- a/arch/arm64/asm.S +++ b/arch/arm64/asm.S @@ -21,14 +21,7 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include - -.macro push ra, rb -stp \ra, \rb, [sp,#-16]! -.endm - -.macro pop ra, rb -ldp \ra, \rb, [sp], #16 -.endm +#include /* void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp); */ FUNCTION(arm64_context_switch) diff --git a/arch/arm64/cache-ops.S b/arch/arm64/cache-ops.S new file mode 100644 index 00000000..f5fa49d1 --- /dev/null +++ b/arch/arm64/cache-ops.S @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, Google Inc. All rights reserved + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +.text + +.macro cache_range_op, cache op + add x2, x0, x1 // calculate the end address + bic x3, x0, #(CACHE_LINE-1) // align the start with a cache line +.Lcache_range_op_loop\@: + \cache \op, x3 + add x3, x3, #CACHE_LINE + cmp x3, x2 + blo .Lcache_range_op_loop\@ + dsb sy +.endm + + /* void arch_flush_cache_range(addr_t start, size_t len); */ +FUNCTION(arch_clean_cache_range) + cache_range_op dc cvac // clean cache to PoC by MVA + ret + + /* void arch_flush_invalidate_cache_range(addr_t start, size_t len); */ +FUNCTION(arch_clean_invalidate_cache_range) + cache_range_op dc civac // clean & invalidate dcache to PoC by MVA + ret + + /* void arch_invalidate_cache_range(addr_t start, size_t len); */ +FUNCTION(arch_invalidate_cache_range) + cache_range_op dc ivac // invalidate dcache to PoC by MVA + ret + + /* void arch_sync_cache_range(addr_t start, size_t len); */ +FUNCTION(arch_sync_cache_range) + cache_range_op dc cvau // clean dcache to PoU by MVA + cache_range_op ic ivau // invalidate icache to PoU by MVA + ret diff --git a/arch/arm64/exceptions.S b/arch/arm64/exceptions.S index 693d6571..38e3f3ff 100644 --- a/arch/arm64/exceptions.S +++ b/arch/arm64/exceptions.S @@ -21,18 +21,11 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include +#include .section .text.boot.vectab .align 12 -.macro push ra, rb -stp \ra, \rb, [sp,#-16]! -.endm - -.macro pop ra, rb -ldp \ra, \rb, [sp], #16 -.endm - #define lr x30 #define regsave_long_offset 0xf0 #define regsave_short_offset 0x90 @@ -127,6 +120,17 @@ add sp, sp, #32 b . .endm +.macro irq_exception + regsave_short + mov x0, sp + bl platform_irq + cbz x0, .Lirq_exception_no_preempt\@ + bl thread_preempt +1: +.Lirq_exception_no_preempt\@: + b arm64_exc_shared_restore_short +.endm + FUNCTION(arm64_exception_base) /* exceptions from current EL, using SP0 */ @@ -155,10 +159,7 @@ LOCAL_FUNCTION(arm64_sync_exc_current_el_SPx) .org 0x280 LOCAL_FUNCTION(arm64_irq_current_el_SPx) - regsave_short - mov x0, sp - bl platform_irq - b arm64_exc_shared_restore_short + irq_exception .org 0x300 LOCAL_FUNCTION(arm64_fiq_current_el_SPx) @@ -191,15 +192,21 @@ LOCAL_FUNCTION(arm64_err_exc_lower_el_64) /* exceptions from lower EL, running arm32 */ .org 0x600 LOCAL_FUNCTION(arm64_sync_exc_lower_el_32) - invalid_exception 0x30 + regsave_long + mov x0, sp + bl arm64_sync_exception + b arm64_exc_shared_restore_long .org 0x680 LOCAL_FUNCTION(arm64_irq_lower_el_32) - invalid_exception 0x31 + irq_exception .org 0x700 LOCAL_FUNCTION(arm64_fiq_lower_el_32) - invalid_exception 0x32 + regsave_short + mov x0, sp + bl platform_fiq + b arm64_exc_shared_restore_short .org 0x780 LOCAL_FUNCTION(arm64_err_exc_lower_el_32) diff --git a/arch/arm64/exceptions_c.c b/arch/arm64/exceptions_c.c index a83ca7cb..dfb0edd9 100644 --- a/arch/arm64/exceptions_c.c +++ b/arch/arm64/exceptions_c.c @@ -43,14 +43,22 @@ static void dump_iframe(const struct arm64_iframe_long *iframe) void arm64_sync_exception(struct arm64_iframe_long *iframe) { - printf("sync_exception\n"); - dump_iframe(iframe); - uint32_t esr = ARM64_READ_SYSREG(esr_el1); uint32_t ec = esr >> 26; uint32_t il = (esr >> 25) & 0x1; uint32_t iss = esr & ((1<<24) - 1); +#ifdef WITH_LIB_SYSCALL + if (ec == 0x15 || ec == 0x11) { // syscall 64/32 + void arm64_syscall(struct arm64_iframe_long *iframe); + arm64_syscall(iframe); + return; + } +#endif + + printf("sync_exception\n"); + dump_iframe(iframe); + printf("ESR 0x%x: ec 0x%x, il 0x%x, iss 0x%x\n", esr, ec, il, iss); if (ec == 0x15) { // syscall diff --git a/arch/arm64/include/arch/arch_ops.h b/arch/arm64/include/arch/arch_ops.h index 2a12d7be..b1cc6aca 100644 --- a/arch/arm64/include/arch/arch_ops.h +++ b/arch/arm64/include/arch/arch_ops.h @@ -36,26 +36,62 @@ static inline void arch_enable_ints(void) { CF; - __asm__ volatile("msr daifclr, #3" ::: "memory"); + __asm__ volatile("msr daifclr, #2" ::: "memory"); } static inline void arch_disable_ints(void) { - __asm__ volatile("msr daifset, #3" ::: "memory"); + __asm__ volatile("msr daifset, #2" ::: "memory"); CF; } -// XXX static inline bool arch_ints_disabled(void) { unsigned int state; - __asm__ volatile("mrs %0, cpsr" : "=r"(state)); + __asm__ volatile("mrs %0, daif" : "=r"(state)); state &= (1<<7); return !!state; } +static inline void arch_enable_fiqs(void) +{ + CF; + __asm__ volatile("msr daifclr, #1" ::: "memory"); +} + +static inline void arch_disable_fiqs(void) +{ + __asm__ volatile("msr daifset, #1" ::: "memory"); + CF; +} + +// XXX +static inline bool arch_fiqs_disabled(void) +{ + unsigned int state; + + __asm__ volatile("mrs %0, daif" : "=r"(state)); + state &= (1<<6); + + return !!state; +} + +#define mb() __asm__ volatile("dsb sy" : : : "memory") +#define rmb() __asm__ volatile("dsb ld" : : : "memory") +#define wmb() __asm__ volatile("dsb st" : : : "memory") + +#ifdef WITH_SMP +#define smp_mb() __asm__ volatile("dmb ish" : : : "memory") +#define smp_rmb() __asm__ volatile("dmb ishld" : : : "memory") +#define smp_wmb() __asm__ volatile("dmb ishst" : : : "memory") +#else +#define smp_mb() CF +#define smp_wmb() CF +#define smp_rmb() CF +#endif + static inline int atomic_add(volatile int *ptr, int val) { #if USE_GCC_ATOMICS @@ -152,6 +188,11 @@ static inline int atomic_swap(volatile int *ptr, int val) static inline int atomic_cmpxchg(volatile int *ptr, int oldval, int newval) { +#if USE_GCC_ATOMICS + __atomic_compare_exchange_n(ptr, &oldval, newval, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return oldval; +#else int old; int test; @@ -174,6 +215,7 @@ static inline int atomic_cmpxchg(volatile int *ptr, int oldval, int newval) } while (test != 0); return old; +#endif } static inline uint32_t arch_cycle_count(void) @@ -208,5 +250,11 @@ static inline void set_current_thread(struct thread *t) ARM64_WRITE_SYSREG(tpidr_el1, (uint64_t)t); } +static inline uint arch_curr_cpu_num(void) +{ + uint64_t mpidr = ARM64_READ_SYSREG(mpidr_el1); + return ((mpidr & ((1U << SMP_CPU_ID_BITS) - 1)) >> 8 << SMP_CPU_CLUSTER_SHIFT) | (mpidr & 0xff); +} + #endif // ASSEMBLY diff --git a/arch/arm64/include/arch/arm64.h b/arch/arm64/include/arch/arm64.h index 997d1c26..ba9f731c 100644 --- a/arch/arm64/include/arch/arm64.h +++ b/arch/arm64/include/arch/arm64.h @@ -28,19 +28,22 @@ __BEGIN_CDECLS -#define DSB __asm__ volatile("dsb" ::: "memory") +#define DSB __asm__ volatile("dsb sy" ::: "memory") #define ISB __asm__ volatile("isb" ::: "memory") +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + #define ARM64_READ_SYSREG(reg) \ ({ \ uint64_t _val; \ - __asm__ volatile("mrs %0," #reg : "=r" (_val)); \ + __asm__ volatile("mrs %0," TOSTRING(reg) : "=r" (_val)); \ _val; \ }) #define ARM64_WRITE_SYSREG(reg, val) \ ({ \ - __asm__ volatile("msr " #reg ", %0" :: "r" (val)); \ + __asm__ volatile("msr " TOSTRING(reg) ", %0" :: "r" (val)); \ ISB; \ }) diff --git a/arch/arm64/include/arch/arm64/mmu.h b/arch/arm64/include/arch/arm64/mmu.h new file mode 100644 index 00000000..8681564f --- /dev/null +++ b/arch/arm64/include/arch/arm64/mmu.h @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __ARCH_ARM64_MMU_H +#define __ARCH_ARM64_MMU_H + +#include + +#define IFTE(c,t,e) (!!(c) * (t) | !(c) * (e)) +#define NBITS01(n) IFTE(n, 1, 0) +#define NBITS02(n) IFTE((n) >> 1, 1 + NBITS01((n) >> 1), NBITS01(n)) +#define NBITS04(n) IFTE((n) >> 2, 2 + NBITS02((n) >> 2), NBITS02(n)) +#define NBITS08(n) IFTE((n) >> 4, 4 + NBITS04((n) >> 4), NBITS04(n)) +#define NBITS16(n) IFTE((n) >> 8, 8 + NBITS08((n) >> 8), NBITS08(n)) +#define NBITS32(n) IFTE((n) >> 16, 16 + NBITS16((n) >> 16), NBITS16(n)) +#define NBITS(n) IFTE((n) >> 32, 32 + NBITS32((n) >> 32), NBITS32(n)) + +#ifndef MMU_KERNEL_SIZE_SHIFT +#define KERNEL_ASPACE_BITS (NBITS(0xffffffffffffffff-KERNEL_ASPACE_BASE)) +#define KERNEL_BASE_BITS (NBITS(0xffffffffffffffff-KERNEL_BASE)) +#if KERNEL_BASE_BITS > KERNEL_ASPACE_BITS +#define KERNEL_ASPACE_BITS KERNEL_BASE_BITS /* KERNEL_BASE should not be below KERNEL_ASPACE_BASE */ +#endif + +#if KERNEL_ASPACE_BITS < 25 +#define MMU_KERNEL_SIZE_SHIFT (25) +#else +#define MMU_KERNEL_SIZE_SHIFT (KERNEL_ASPACE_BITS) +#endif +#endif + +#ifndef MMU_USER_SIZE_SHIFT +#define MMU_USER_SIZE_SHIFT 48 +#endif + +#ifndef MMU_IDENT_SIZE_SHIFT +#define MMU_IDENT_SIZE_SHIFT 42 /* Max size supported by block mappings */ +#endif + +#define MMU_KERNEL_PAGE_SIZE_SHIFT (PAGE_SIZE_SHIFT) +#define MMU_USER_PAGE_SIZE_SHIFT (USER_PAGE_SIZE_SHIFT) + +#if MMU_IDENT_SIZE_SHIFT < 25 +#error MMU_IDENT_SIZE_SHIFT too small +#elif MMU_IDENT_SIZE_SHIFT <= 29 /* Use 2MB block mappings (4K page size) */ +#define MMU_IDENT_PAGE_SIZE_SHIFT (SHIFT_4K) +#elif MMU_IDENT_SIZE_SHIFT <= 30 /* Use 512MB block mappings (64K page size) */ +#define MMU_IDENT_PAGE_SIZE_SHIFT (SHIFT_64K) +#elif MMU_IDENT_SIZE_SHIFT <= 39 /* Use 1GB block mappings (4K page size) */ +#define MMU_IDENT_PAGE_SIZE_SHIFT (SHIFT_4K) +#elif MMU_IDENT_SIZE_SHIFT <= 42 /* Use 512MB block mappings (64K page size) */ +#define MMU_IDENT_PAGE_SIZE_SHIFT (SHIFT_64K) +#else +#error MMU_IDENT_SIZE_SHIFT too large +#endif + +/* + * TCR TGx values + * + * Page size: 4K 16K 64K + * TG0: 0 2 1 + * TG1: 2 1 3 + */ + +#define MMU_TG0(page_size_shift) ((((page_size_shift == 14) & 1) << 1) | \ + ((page_size_shift == 16) & 1)) + +#define MMU_TG1(page_size_shift) ((((page_size_shift == 12) & 1) << 1) | \ + ((page_size_shift == 14) & 1) | \ + ((page_size_shift == 16) & 1) | \ + (((page_size_shift == 16) & 1) << 1)) + +#define MMU_LX_X(page_shift, level) ((4 - (level)) * ((page_shift) - 3) + 3) + +#if MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 0) +#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 0) +#elif MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 1) +#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 1) +#elif MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 2) +#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 2) +#elif MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 3) +#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 3) +#else +#error User address space size must be larger than page size +#endif +#define MMU_USER_PAGE_TABLE_ENTRIES_TOP (0x1 << (MMU_USER_SIZE_SHIFT - MMU_USER_TOP_SHIFT)) + +#if MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 0) +#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 0) +#elif MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 1) +#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 1) +#elif MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 2) +#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 2) +#elif MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 3) +#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 3) +#else +#error Kernel address space size must be larger than page size +#endif +#define MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP (0x1 << (MMU_KERNEL_SIZE_SHIFT - MMU_KERNEL_TOP_SHIFT)) + +#if MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 0) +#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 0) +#elif MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 1) +#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 1) +#elif MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 2) +#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 2) +#elif MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 3) +#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 3) +#else +#error Ident address space size must be larger than page size +#endif +#define MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT (MMU_IDENT_SIZE_SHIFT - MMU_IDENT_TOP_SHIFT) +#define MMU_PAGE_TABLE_ENTRIES_IDENT (0x1 << MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT) + +#define MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT (30) + +#ifndef ASSEMBLY +#define BM(base, count, val) (((val) & ((1UL << (count)) - 1)) << (base)) +#else +#define BM(base, count, val) (((val) & ((0x1 << (count)) - 1)) << (base)) +#endif + +#define MMU_SH_NON_SHAREABLE (0) +#define MMU_SH_OUTER_SHAREABLE (2) +#define MMU_SH_INNER_SHAREABLE (3) + +#define MMU_RGN_NON_CACHEABLE (0) +#define MMU_RGN_WRITE_BACK_ALLOCATE (1) +#define MMU_RGN_WRITE_THROUGH_NO_ALLOCATE (2) +#define MMU_RGN_WRITE_BACK_NO_ALLOCATE (3) + +#define MMU_TCR_TBI1 BM(38, 1, 1) +#define MMU_TCR_TBI0 BM(37, 1, 1) +#define MMU_TCR_AS BM(36, 1, 1) +#define MMU_TCR_IPS(size) BM(32, 3, (size)) +#define MMU_TCR_TG1(granule_size) BM(30, 2, (granule_size)) +#define MMU_TCR_SH1(shareability_flags) BM(28, 2, (shareability_flags)) +#define MMU_TCR_ORGN1(cache_flags) BM(26, 2, (cache_flags)) +#define MMU_TCR_IRGN1(cache_flags) BM(24, 2, (cache_flags)) +#define MMU_TCR_EPD1 BM(23, 1, 1) +#define MMU_TCR_A1 BM(22, 1, 1) +#define MMU_TCR_T1SZ(size) BM(16, 6, (size)) +#define MMU_TCR_TG0(granule_size) BM(14, 2, (granule_size)) +#define MMU_TCR_SH0(shareability_flags) BM(12, 2, (shareability_flags)) +#define MMU_TCR_ORGN0(cache_flags) BM(10, 2, (cache_flags)) +#define MMU_TCR_IRGN0(cache_flags) BM( 8, 2, (cache_flags)) +#define MMU_TCR_EPD0 BM( 7, 1, 1) +#define MMU_TCR_T0SZ(size) BM( 0, 6, (size)) + +#define MMU_MAIR_ATTR(index, attr) BM(index * 8, 8, (attr)) + + +/* L0/L1/L2/L3 descriptor types */ +#define MMU_PTE_DESCRIPTOR_INVALID BM(0, 2, 0) +#define MMU_PTE_DESCRIPTOR_MASK BM(0, 2, 3) + +/* L0/L1/L2 descriptor types */ +#define MMU_PTE_L012_DESCRIPTOR_BLOCK BM(0, 2, 1) +#define MMU_PTE_L012_DESCRIPTOR_TABLE BM(0, 2, 3) + +/* L3 descriptor types */ +#define MMU_PTE_L3_DESCRIPTOR_PAGE BM(0, 2, 3) + +/* Output address mask */ +#define MMU_PTE_OUTPUT_ADDR_MASK BM(12, 36, 0xfffffffff) + +/* Table attrs */ +#define MMU_PTE_ATTR_NS_TABLE BM(63, 1, 1) +#define MMU_PTE_ATTR_AP_TABLE_NO_WRITE BM(62, 1, 1) +#define MMU_PTE_ATTR_AP_TABLE_NO_EL0 BM(61, 1, 1) +#define MMU_PTE_ATTR_UXN_TABLE BM(60, 1, 1) +#define MMU_PTE_ATTR_PXN_TABLE BM(59, 1, 1) + +/* Block/Page attrs */ +#define MMU_PTE_ATTR_RES_SOFTWARE BM(55, 4, 0xf) +#define MMU_PTE_ATTR_UXN BM(54, 1, 1) +#define MMU_PTE_ATTR_PXN BM(53, 1, 1) +#define MMU_PTE_ATTR_CONTIGUOUS BM(52, 1, 1) + +#define MMU_PTE_ATTR_NON_GLOBAL BM(11, 1, 1) +#define MMU_PTE_ATTR_AF BM(10, 1, 1) + +#define MMU_PTE_ATTR_SH_NON_SHAREABLE BM(8, 2, 0) +#define MMU_PTE_ATTR_SH_OUTER_SHAREABLE BM(8, 2, 2) +#define MMU_PTE_ATTR_SH_INNER_SHAREABLE BM(8, 2, 3) + +#define MMU_PTE_ATTR_AP_P_RW_U_NA BM(6, 2, 0) +#define MMU_PTE_ATTR_AP_P_RW_U_RW BM(6, 2, 1) +#define MMU_PTE_ATTR_AP_P_RO_U_NA BM(6, 2, 2) +#define MMU_PTE_ATTR_AP_P_RO_U_RO BM(6, 2, 3) +#define MMU_PTE_ATTR_AP_MASK BM(6, 2, 3) + +#define MMU_PTE_ATTR_NON_SECURE BM(5, 1, 1) + +#define MMU_PTE_ATTR_ATTR_INDEX(attrindex) BM(2, 3, attrindex) +#define MMU_PTE_ATTR_ATTR_INDEX_MASK MMU_PTE_ATTR_ATTR_INDEX(7) + +/* Default configuration for main kernel page table: + * - do cached translation walks + */ + +/* Device-nGnRnE memory */ +#define MMU_MAIR_ATTR0 MMU_MAIR_ATTR(0, 0x00) +#define MMU_PTE_ATTR_STRONGLY_ORDERED MMU_PTE_ATTR_ATTR_INDEX(0) + +/* Device-nGnRE memory */ +#define MMU_MAIR_ATTR1 MMU_MAIR_ATTR(1, 0x04) +#define MMU_PTE_ATTR_DEVICE MMU_PTE_ATTR_ATTR_INDEX(1) + +/* Normal Memory, Outer Write-back non-transient Read/Write allocate, + * Inner Write-back non-transient Read/Write allocate + */ +#define MMU_MAIR_ATTR2 MMU_MAIR_ATTR(2, 0xff) +#define MMU_PTE_ATTR_NORMAL_MEMORY MMU_PTE_ATTR_ATTR_INDEX(2) + +#define MMU_MAIR_ATTR3 (0) +#define MMU_MAIR_ATTR4 (0) +#define MMU_MAIR_ATTR5 (0) +#define MMU_MAIR_ATTR6 (0) +#define MMU_MAIR_ATTR7 (0) + +#define MMU_MAIR_VAL (MMU_MAIR_ATTR0 | MMU_MAIR_ATTR1 | \ + MMU_MAIR_ATTR2 | MMU_MAIR_ATTR3 | \ + MMU_MAIR_ATTR4 | MMU_MAIR_ATTR5 | \ + MMU_MAIR_ATTR6 | MMU_MAIR_ATTR7 ) + +#define MMU_TCR_IPS_DEFAULT MMU_TCR_IPS(2) /* TODO: read at runtime, or configure per platform */ + +/* Enable cached page table walks: + * inner/outer (IRGN/ORGN): write-back + write-allocate + */ +#define MMU_TCR_FLAGS1 (MMU_TCR_TG1(MMU_TG1(MMU_KERNEL_PAGE_SIZE_SHIFT)) | \ + MMU_TCR_SH1(MMU_SH_INNER_SHAREABLE) | \ + MMU_TCR_ORGN1(MMU_RGN_WRITE_BACK_ALLOCATE) | \ + MMU_TCR_IRGN1(MMU_RGN_WRITE_BACK_ALLOCATE) | \ + MMU_TCR_T1SZ(64 - MMU_KERNEL_SIZE_SHIFT)) +#define MMU_TCR_FLAGS0 (MMU_TCR_TG0(MMU_TG0(MMU_USER_PAGE_SIZE_SHIFT)) | \ + MMU_TCR_SH0(MMU_SH_INNER_SHAREABLE) | \ + MMU_TCR_ORGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \ + MMU_TCR_IRGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \ + MMU_TCR_T0SZ(64 - MMU_USER_SIZE_SHIFT)) +#define MMU_TCR_FLAGS0_IDENT \ + (MMU_TCR_TG0(MMU_TG0(MMU_IDENT_PAGE_SIZE_SHIFT)) | \ + MMU_TCR_SH0(MMU_SH_INNER_SHAREABLE) | \ + MMU_TCR_ORGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \ + MMU_TCR_IRGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \ + MMU_TCR_T0SZ(64 - MMU_IDENT_SIZE_SHIFT)) +#define MMU_TCR_FLAGS_IDENT (MMU_TCR_IPS_DEFAULT | MMU_TCR_FLAGS1 | MMU_TCR_FLAGS0_IDENT) +#define MMU_TCR_FLAGS_KERNEL (MMU_TCR_IPS_DEFAULT | MMU_TCR_FLAGS1 | MMU_TCR_FLAGS0 | MMU_TCR_EPD0) +#define MMU_TCR_FLAGS_USER (MMU_TCR_IPS_DEFAULT | MMU_TCR_FLAGS1 | MMU_TCR_FLAGS0) + + +#if MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 2) +#define MMU_PTE_IDENT_DESCRIPTOR MMU_PTE_L012_DESCRIPTOR_BLOCK +#else +#define MMU_PTE_IDENT_DESCRIPTOR MMU_PTE_L3_DESCRIPTOR_PAGE +#endif +#define MMU_PTE_IDENT_FLAGS \ + (MMU_PTE_IDENT_DESCRIPTOR | \ + MMU_PTE_ATTR_AF | \ + MMU_PTE_ATTR_SH_INNER_SHAREABLE | \ + MMU_PTE_ATTR_NORMAL_MEMORY | \ + MMU_PTE_ATTR_AP_P_RW_U_NA) + +#define MMU_PTE_KERNEL_FLAGS \ + (MMU_PTE_ATTR_AF | \ + MMU_PTE_ATTR_SH_INNER_SHAREABLE | \ + MMU_PTE_ATTR_NORMAL_MEMORY | \ + MMU_PTE_ATTR_AP_P_RW_U_NA) + +#define MMU_INITIAL_MAP_STRONGLY_ORDERED \ + (MMU_PTE_ATTR_AF | \ + MMU_PTE_ATTR_STRONGLY_ORDERED | \ + MMU_PTE_ATTR_AP_P_RW_U_NA) + +#define MMU_INITIAL_MAP_DEVICE \ + (MMU_PTE_ATTR_AF | \ + MMU_PTE_ATTR_DEVICE | \ + MMU_PTE_ATTR_AP_P_RW_U_NA) + +#ifndef ASSEMBLY + +#include +#include +#include +#include + +typedef uint64_t pte_t; + +__BEGIN_CDECLS + +#define ARM64_TLBI_NOADDR(op) \ +({ \ + __asm__ volatile("tlbi " #op::); \ + ISB; \ +}) + +#define ARM64_TLBI(op, val) \ +({ \ + __asm__ volatile("tlbi " #op ", %0" :: "r" (val)); \ + ISB; \ +}) + +#define MMU_ARM64_GLOBAL_ASID (~0U) +int arm64_mmu_map(vaddr_t vaddr, paddr_t paddr, size_t size, pte_t attrs, + vaddr_t vaddr_base, uint top_size_shift, + uint top_index_shift, uint page_size_shift, + pte_t *top_page_table, uint asid); +int arm64_mmu_unmap(vaddr_t vaddr, size_t size, + vaddr_t vaddr_base, uint top_size_shift, + uint top_index_shift, uint page_size_shift, + pte_t *top_page_table, uint asid); + +__END_CDECLS +#endif /* ASSEMBLY */ + +#endif diff --git a/arch/arm64/include/arch/asm_macros.h b/arch/arm64/include/arch/asm_macros.h new file mode 100644 index 00000000..a07b783e --- /dev/null +++ b/arch/arm64/include/arch/asm_macros.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +.macro push ra, rb +stp \ra, \rb, [sp,#-16]! +.endm + +.macro pop ra, rb +ldp \ra, \rb, [sp], #16 +.endm + +.macro tbzmask, reg, mask, label, shift=0 +.if \shift >= 64 + .error "tbzmask: unsupported mask, \mask" +.elseif \mask == 1 << \shift + tbz \reg, #\shift, \label +.else + tbzmask \reg, \mask, \label, "(\shift + 1)" +.endif +.endm + +.macro tbnzmask, reg, mask, label, shift=0 +.if \shift >= 64 + .error "tbnzmask: unsupported mask, \mask" +.elseif \mask == 1 << \shift + tbnz \reg, #\shift, \label +.else + tbnzmask \reg, \mask, \label, "(\shift + 1)" +.endif +.endm + +.macro calloc_bootmem_aligned, new_ptr, new_ptr_end, tmp, size_shift, phys_offset=0 +.if \size_shift < 4 + .error "calloc_bootmem_aligned: Unsupported size_shift, \size_shift" +.endif + + /* load boot_alloc_end */ + adrp \tmp, boot_alloc_end + ldr \new_ptr, [\tmp, #:lo12:boot_alloc_end] + + /* align to page */ +.if \size_shift > 12 + add \new_ptr, \new_ptr, #(1 << \size_shift) + sub \new_ptr, \new_ptr, #1 +.else + add \new_ptr, \new_ptr, #(1 << \size_shift) - 1 +.endif + and \new_ptr, \new_ptr, #~((1 << \size_shift) - 1) + + /* add one page and store boot_alloc_end */ + add \new_ptr_end, \new_ptr, #(1 << \size_shift) + str \new_ptr_end, [\tmp, #:lo12:boot_alloc_end] + +.if \phys_offset != 0 + /* clear page */ + sub \new_ptr, \new_ptr, \phys_offset + sub \new_ptr_end, \new_ptr_end, \phys_offset +.endif + + /* clear page */ + mov \tmp, \new_ptr +.Lcalloc_bootmem_aligned_clear_loop\@: + stp xzr, xzr, [\tmp], #16 + cmp \tmp, \new_ptr_end + b.lo .Lcalloc_bootmem_aligned_clear_loop\@ +.endm diff --git a/arch/arm64/include/arch/defines.h b/arch/arm64/include/arch/defines.h index 34eb68c3..42ad4240 100644 --- a/arch/arm64/include/arch/defines.h +++ b/arch/arm64/include/arch/defines.h @@ -22,8 +22,22 @@ */ #pragma once +#define SHIFT_4K (12) +#define SHIFT_16K (14) +#define SHIFT_64K (16) + /* arm specific stuff */ -#define PAGE_SIZE 4096 +#ifdef ARM64_LARGE_PAGESIZE_64K +#define PAGE_SIZE_SHIFT (SHIFT_64K) +#elif ARM64_LARGE_PAGESIZE_16K +#define PAGE_SIZE_SHIFT (SHIFT_16K) +#else +#define PAGE_SIZE_SHIFT (SHIFT_4K) +#endif +#define USER_PAGE_SIZE_SHIFT SHIFT_4K + +#define PAGE_SIZE (1UL << PAGE_SIZE_SHIFT) +#define USER_PAGE_SIZE (1UL << USER_PAGE_SIZE_SHIFT) #define CACHE_LINE 32 diff --git a/arch/arm64/include/arch/spinlock.h b/arch/arm64/include/arch/spinlock.h new file mode 100644 index 00000000..f063cee0 --- /dev/null +++ b/arch/arm64/include/arch/spinlock.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include + +#define SPIN_LOCK_INITIAL_VALUE (0) + +typedef unsigned long spin_lock_t; + +typedef unsigned int spin_lock_saved_state_t; +typedef unsigned int spin_lock_save_flags_t; + +#if WITH_SMP +void arch_spin_lock(spin_lock_t *lock); +int arch_spin_trylock(spin_lock_t *lock); +void arch_spin_unlock(spin_lock_t *lock); +#else +static inline void arch_spin_lock(spin_lock_t *lock) +{ + *lock = 1; +} + +static inline int arch_spin_trylock(spin_lock_t *lock) +{ + return 0; +} + +static inline void arch_spin_unlock(spin_lock_t *lock) +{ + *lock = 0; +} +#endif + +static inline void arch_spin_lock_init(spin_lock_t *lock) +{ + *lock = SPIN_LOCK_INITIAL_VALUE; +} + +static inline bool arch_spin_lock_held(spin_lock_t *lock) +{ + return *lock != 0; +} + +enum { + /* Possible future flags: + * SPIN_LOCK_FLAG_PMR_MASK = 0x000000ff, + * SPIN_LOCK_FLAG_PREEMPTION = 0x10000000, + * SPIN_LOCK_FLAG_SET_PMR = 0x20000000, + */ + + /* ARM specific flags */ + SPIN_LOCK_FLAG_IRQ = 0x40000000, + SPIN_LOCK_FLAG_FIQ = 0x80000000, /* Do not use unless IRQs are already disabled */ + SPIN_LOCK_FLAG_IRQ_FIQ = SPIN_LOCK_FLAG_IRQ | SPIN_LOCK_FLAG_FIQ, + + /* Generic flags */ + SPIN_LOCK_FLAG_INTERRUPTS = SPIN_LOCK_FLAG_IRQ, +}; + + /* default arm flag is to just disable plain irqs */ +#define ARCH_DEFAULT_SPIN_LOCK_FLAG_INTERRUPTS SPIN_LOCK_FLAG_INTERRUPTS + +enum { + /* private */ + SPIN_LOCK_STATE_RESTORE_IRQ = 1, + SPIN_LOCK_STATE_RESTORE_FIQ = 2, +}; + +static inline void +arch_interrupt_save(spin_lock_saved_state_t *statep, spin_lock_save_flags_t flags) +{ + spin_lock_saved_state_t state = 0; + if ((flags & SPIN_LOCK_FLAG_IRQ) && !arch_ints_disabled()) { + state |= SPIN_LOCK_STATE_RESTORE_IRQ; + arch_disable_ints(); + } + if ((flags & SPIN_LOCK_FLAG_FIQ) && !arch_fiqs_disabled()) { + state |= SPIN_LOCK_STATE_RESTORE_FIQ; + arch_disable_fiqs(); + } + *statep = state; +} + +static inline void +arch_interrupt_restore(spin_lock_saved_state_t old_state, spin_lock_save_flags_t flags) +{ + if ((flags & SPIN_LOCK_FLAG_FIQ) && (old_state & SPIN_LOCK_STATE_RESTORE_FIQ)) + arch_enable_fiqs(); + if ((flags & SPIN_LOCK_FLAG_IRQ) && (old_state & SPIN_LOCK_STATE_RESTORE_IRQ)) + arch_enable_ints(); +} + + + diff --git a/arch/arm64/mmu.c b/arch/arm64/mmu.c new file mode 100644 index 00000000..02cb1f47 --- /dev/null +++ b/arch/arm64/mmu.c @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LOCAL_TRACE 0 + +STATIC_ASSERT(((long)KERNEL_BASE >> MMU_KERNEL_SIZE_SHIFT) == -1); +STATIC_ASSERT(((long)KERNEL_ASPACE_BASE >> MMU_KERNEL_SIZE_SHIFT) == -1); +STATIC_ASSERT(MMU_KERNEL_SIZE_SHIFT <= 48); +STATIC_ASSERT(MMU_KERNEL_SIZE_SHIFT >= 25); + +/* the main translation table */ +pte_t arm64_kernel_translation_table[MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP] __ALIGNED(MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP * 8) __SECTION(".bss.prebss.translation_table"); + +/* convert user level mmu flags to flags that go in L1 descriptors */ +static pte_t mmu_flags_to_pte_attr(uint flags) +{ + pte_t attr = MMU_PTE_ATTR_AF; + + switch (flags & ARCH_MMU_FLAG_CACHE_MASK) { + case ARCH_MMU_FLAG_CACHED: + attr |= MMU_PTE_ATTR_NORMAL_MEMORY | MMU_PTE_ATTR_SH_INNER_SHAREABLE; + break; + case ARCH_MMU_FLAG_UNCACHED: + attr |= MMU_PTE_ATTR_STRONGLY_ORDERED; + break; + case ARCH_MMU_FLAG_UNCACHED_DEVICE: + attr |= MMU_PTE_ATTR_DEVICE; + break; + default: + /* invalid user-supplied flag */ + DEBUG_ASSERT(1); + return ERR_INVALID_ARGS; + } + + switch (flags & (ARCH_MMU_FLAG_PERM_USER | ARCH_MMU_FLAG_PERM_RO)) { + case 0: + attr |= MMU_PTE_ATTR_AP_P_RW_U_NA; + break; + case ARCH_MMU_FLAG_PERM_RO: + attr |= MMU_PTE_ATTR_AP_P_RO_U_NA; + break; + case ARCH_MMU_FLAG_PERM_USER: + attr |= MMU_PTE_ATTR_AP_P_RW_U_RW; + break; + case ARCH_MMU_FLAG_PERM_USER | ARCH_MMU_FLAG_PERM_RO: + attr |= MMU_PTE_ATTR_AP_P_RO_U_RO; + break; + } + + if (flags & ARCH_MMU_FLAG_NS) { + attr |= MMU_PTE_ATTR_NON_SECURE; + } + + return attr; +} + +status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags) +{ + uint index; + uint index_shift; + pte_t pte; + pte_t pte_addr; + uint descriptor_type; + pte_t *page_table; + vaddr_t kernel_base = ~0UL << MMU_KERNEL_SIZE_SHIFT; + vaddr_t vaddr_rem; + + if (vaddr < kernel_base) { + TRACEF("vaddr 0x%lx < base 0x%lx\n", vaddr, kernel_base); + return ERR_INVALID_ARGS; + } + + index_shift = MMU_KERNEL_TOP_SHIFT; + page_table = arm64_kernel_translation_table; + + vaddr_rem = vaddr - kernel_base; + index = vaddr_rem >> index_shift; + ASSERT(index < MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP); + + while (true) { + index = vaddr_rem >> index_shift; + vaddr_rem -= (vaddr_t)index << index_shift; + pte = page_table[index]; + descriptor_type = pte & MMU_PTE_DESCRIPTOR_MASK; + pte_addr = pte & MMU_PTE_OUTPUT_ADDR_MASK; + + LTRACEF("va 0x%lx, index %d, index_shift %d, rem 0x%lx, pte 0x%llx\n", + vaddr, index, index_shift, vaddr_rem, pte); + + if (descriptor_type == MMU_PTE_DESCRIPTOR_INVALID) + return ERR_NOT_FOUND; + + if (descriptor_type == ((index_shift > MMU_KERNEL_PAGE_SIZE_SHIFT) ? + MMU_PTE_L012_DESCRIPTOR_BLOCK : + MMU_PTE_L3_DESCRIPTOR_PAGE)) { + break; + } + + if (index_shift <= MMU_KERNEL_PAGE_SIZE_SHIFT || + descriptor_type != MMU_PTE_L012_DESCRIPTOR_TABLE) { + PANIC_UNIMPLEMENTED; + } + + page_table = paddr_to_kvaddr(pte_addr); + index_shift -= MMU_KERNEL_PAGE_SIZE_SHIFT - 3; + } + + if (paddr) + *paddr = pte_addr + vaddr_rem; + if (flags) { + *flags = 0; + if (pte & MMU_PTE_ATTR_NON_SECURE) + *flags |= ARCH_MMU_FLAG_NS; + switch (pte & MMU_PTE_ATTR_ATTR_INDEX_MASK) { + case MMU_PTE_ATTR_STRONGLY_ORDERED: + *flags |= ARCH_MMU_FLAG_UNCACHED; + break; + case MMU_PTE_ATTR_DEVICE: + *flags |= ARCH_MMU_FLAG_UNCACHED_DEVICE; + break; + case MMU_PTE_ATTR_NORMAL_MEMORY: + break; + default: + PANIC_UNIMPLEMENTED; + } + switch (pte & MMU_PTE_ATTR_AP_MASK) { + case MMU_PTE_ATTR_AP_P_RW_U_NA: + break; + case MMU_PTE_ATTR_AP_P_RW_U_RW: + *flags |= ARCH_MMU_FLAG_PERM_USER; + break; + case MMU_PTE_ATTR_AP_P_RO_U_NA: + *flags |= ARCH_MMU_FLAG_PERM_RO; + break; + case MMU_PTE_ATTR_AP_P_RO_U_RO: + *flags |= ARCH_MMU_FLAG_PERM_USER | ARCH_MMU_FLAG_PERM_RO; + break; + } + } + LTRACEF("va 0x%lx, paddr 0x%lx, flags 0x%x\n", + vaddr, paddr ? *paddr : ~0UL, flags ? *flags : ~0U); + return 0; +} + +static int alloc_page_table(paddr_t *paddrp, uint page_size_shift) +{ + int ret; + int count; + size_t size = 1U << page_size_shift; + void *vaddr; + + if (size >= PAGE_SIZE) { + count = size / PAGE_SIZE; + ret = pmm_alloc_contiguous(count, page_size_shift, paddrp, NULL); + if (ret != count) + return ERR_NO_MEMORY; + } else { + vaddr = heap_alloc(size, size); + if (!vaddr) + return ERR_NO_MEMORY; + ret = arch_mmu_query((vaddr_t)vaddr, paddrp, NULL); + if (ret) { + heap_free(vaddr); + return ret; + } + } + return 0; +} + +static void free_page_table(void *vaddr, paddr_t paddr, uint page_size_shift) +{ + vm_page_t *address_to_page(paddr_t addr); /* TODO: remove */ + + size_t size = 1U << page_size_shift; + vm_page_t *page; + + if (size >= PAGE_SIZE) { + page = address_to_page(paddr); + if (!page) + panic("bad page table paddr 0x%lx\n", paddr); + pmm_free_page(page); + } else { + heap_free(vaddr); + } +} + +static pte_t *arm64_mmu_get_page_table(vaddr_t index, uint page_size_shift, pte_t *page_table) +{ + pte_t pte; + paddr_t paddr; + void *vaddr; + int ret; + + pte = page_table[index]; + switch (pte & MMU_PTE_DESCRIPTOR_MASK) { + case MMU_PTE_DESCRIPTOR_INVALID: + ret = alloc_page_table(&paddr, page_size_shift); + if (ret) { + TRACEF("failed to allocate page table\n"); + return NULL; + } + vaddr = paddr_to_kvaddr(paddr); + LTRACEF("allocated page table, vaddr %p, paddr 0x%lx\n", vaddr, paddr); + memset(vaddr, MMU_PTE_DESCRIPTOR_INVALID, 1U << page_size_shift); + __asm__ volatile("dmb ishst" ::: "memory"); + pte = paddr | MMU_PTE_L012_DESCRIPTOR_TABLE; + page_table[index] = pte; + LTRACEF("pte %p[0x%lx] = 0x%llx\n", page_table, index, pte); + return vaddr; + + case MMU_PTE_L012_DESCRIPTOR_TABLE: + paddr = pte & MMU_PTE_OUTPUT_ADDR_MASK; + LTRACEF("found page table 0x%lx\n", paddr); + return paddr_to_kvaddr(paddr); + + case MMU_PTE_L012_DESCRIPTOR_BLOCK: + return NULL; + + default: + PANIC_UNIMPLEMENTED; + } +} + +static bool page_table_is_clear(pte_t *page_table, uint page_size_shift) +{ + int i; + int count = 1U << (page_size_shift - 3); + pte_t pte; + + for (i = 0; i < count; i++) { + pte = page_table[i]; + if (pte != MMU_PTE_DESCRIPTOR_INVALID) { + LTRACEF("page_table at %p still in use, index %d is 0x%llx\n", + page_table, i, pte); + return false; + } + } + + LTRACEF("page table at %p is clear\n", page_table); + return true; +} + +static void arm64_mmu_unmap_pt(vaddr_t vaddr, vaddr_t vaddr_rel, + size_t size, + uint index_shift, uint page_size_shift, + pte_t *page_table, uint asid) +{ + pte_t *next_page_table; + vaddr_t index; + size_t chunk_size; + vaddr_t vaddr_rem; + vaddr_t block_size; + vaddr_t block_mask; + pte_t pte; + paddr_t page_table_paddr; + + LTRACEF("vaddr 0x%lx, vaddr_rel 0x%lx, size 0x%lx, index shift %d, page_size_shift %d, page_table %p\n", + vaddr, vaddr_rel, size, index_shift, page_size_shift, page_table); + + while (size) { + block_size = 1UL << index_shift; + block_mask = block_size - 1; + vaddr_rem = vaddr_rel & block_mask; + chunk_size = MIN(size, block_size - vaddr_rem); + index = vaddr_rel >> index_shift; + + pte = page_table[index]; + + if (index_shift > page_size_shift && + (pte & MMU_PTE_DESCRIPTOR_MASK) == MMU_PTE_L012_DESCRIPTOR_TABLE) { + page_table_paddr = pte & MMU_PTE_OUTPUT_ADDR_MASK; + next_page_table = paddr_to_kvaddr(page_table_paddr); + arm64_mmu_unmap_pt(vaddr, vaddr_rem, chunk_size, + index_shift - (page_size_shift - 3), + page_size_shift, + next_page_table, asid); + if (chunk_size == block_size || + page_table_is_clear(next_page_table, page_size_shift)) { + LTRACEF("pte %p[0x%lx] = 0 (was page table)\n", page_table, index); + page_table[index] = MMU_PTE_DESCRIPTOR_INVALID; + __asm__ volatile("dmb ishst" ::: "memory"); + free_page_table(next_page_table, page_table_paddr, page_size_shift); + } + } else if (pte) { + LTRACEF("pte %p[0x%lx] = 0\n", page_table, index); + page_table[index] = MMU_PTE_DESCRIPTOR_INVALID; + CF; + if (asid == MMU_ARM64_GLOBAL_ASID) + ARM64_TLBI(vaae1is, vaddr >> 12); + else + ARM64_TLBI(vae1is, vaddr >> 12 | (vaddr_t)asid << 48); + } else { + LTRACEF("pte %p[0x%lx] already clear\n", page_table, index); + } + vaddr += chunk_size; + vaddr_rel += chunk_size; + size -= chunk_size; + } +} + +static int arm64_mmu_map_pt(vaddr_t vaddr_in, vaddr_t vaddr_rel_in, + paddr_t paddr_in, + size_t size_in, pte_t attrs, + uint index_shift, uint page_size_shift, + pte_t *page_table, uint asid) +{ + int ret; + pte_t *next_page_table; + vaddr_t index; + vaddr_t vaddr = vaddr_in; + vaddr_t vaddr_rel = vaddr_rel_in; + paddr_t paddr = paddr_in; + size_t size = size_in; + size_t chunk_size; + vaddr_t vaddr_rem; + vaddr_t block_size; + vaddr_t block_mask; + pte_t pte; + + LTRACEF("vaddr 0x%lx, vaddr_rel 0x%lx, paddr 0x%lx, size 0x%lx, attrs 0x%llx, index shift %d, page_size_shift %d, page_table %p\n", + vaddr, vaddr_rel, paddr, size, attrs, + index_shift, page_size_shift, page_table); + + if ((vaddr_rel | paddr | size) & ((1UL << page_size_shift) - 1)) { + TRACEF("not page aligned\n"); + return ERR_INVALID_ARGS; + } + + while (size) { + block_size = 1UL << index_shift; + block_mask = block_size - 1; + vaddr_rem = vaddr_rel & block_mask; + chunk_size = MIN(size, block_size - vaddr_rem); + index = vaddr_rel >> index_shift; + + if (((vaddr_rel | paddr) & block_mask) || + (chunk_size != block_size) || + (index_shift > MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT)) { + next_page_table = arm64_mmu_get_page_table(index, page_size_shift, + page_table); + if (!next_page_table) + goto err; + + ret = arm64_mmu_map_pt(vaddr, vaddr_rem, paddr, chunk_size, attrs, + index_shift - (page_size_shift - 3), + page_size_shift, next_page_table, asid); + if (ret) + goto err; + } else { + pte = page_table[index]; + if (pte) { + TRACEF("page table entry already in use, index 0x%lx, 0x%llx\n", + index, pte); + goto err; + } + + pte = paddr | attrs; + if (index_shift > page_size_shift) + pte |= MMU_PTE_L012_DESCRIPTOR_BLOCK; + else + pte |= MMU_PTE_L3_DESCRIPTOR_PAGE; + + LTRACEF("pte %p[0x%lx] = 0x%llx\n", page_table, index, pte); + page_table[index] = pte; + } + vaddr += chunk_size; + vaddr_rel += chunk_size; + paddr += chunk_size; + size -= chunk_size; + } + + return 0; + +err: + arm64_mmu_unmap_pt(vaddr_in, vaddr_rel_in, size_in - size, + index_shift, page_size_shift, page_table, asid); + DSB; + return ERR_GENERIC; +} + +int arm64_mmu_map(vaddr_t vaddr, paddr_t paddr, size_t size, pte_t attrs, + vaddr_t vaddr_base, uint top_size_shift, + uint top_index_shift, uint page_size_shift, + pte_t *top_page_table, uint asid) +{ + int ret; + vaddr_t vaddr_rel = vaddr - vaddr_base; + vaddr_t vaddr_rel_max = 1UL << top_size_shift; + + LTRACEF("vaddr 0x%lx, paddr 0x%lx, size 0x%lx, attrs 0x%llx, asid 0x%x\n", + vaddr, paddr, size, attrs, asid); + + if (vaddr_rel > vaddr_rel_max - size || size > vaddr_rel_max) { + TRACEF("vaddr 0x%lx, size 0x%lx out of range vaddr 0x%lx, size 0x%lx\n", + vaddr, size, vaddr_base, vaddr_rel_max); + return ERR_INVALID_ARGS; + } + + if (!top_page_table) { + TRACEF("page table is NULL\n"); + return ERR_INVALID_ARGS; + } + + ret = arm64_mmu_map_pt(vaddr, vaddr_rel, paddr, size, attrs, + top_index_shift, page_size_shift, top_page_table, asid); + DSB; + return ret; +} + +int arm64_mmu_unmap(vaddr_t vaddr, size_t size, + vaddr_t vaddr_base, uint top_size_shift, + uint top_index_shift, uint page_size_shift, + pte_t *top_page_table, uint asid) +{ + vaddr_t vaddr_rel = vaddr - vaddr_base; + vaddr_t vaddr_rel_max = 1UL << top_size_shift; + + LTRACEF("vaddr 0x%lx, size 0x%lx, asid 0x%x\n", vaddr, size, asid); + + if (vaddr_rel > vaddr_rel_max - size || size > vaddr_rel_max) { + TRACEF("vaddr 0x%lx, size 0x%lx out of range vaddr 0x%lx, size 0x%lx\n", + vaddr, size, vaddr_base, vaddr_rel_max); + return ERR_INVALID_ARGS; + } + + if (!top_page_table) { + TRACEF("page table is NULL\n"); + return ERR_INVALID_ARGS; + } + + arm64_mmu_unmap_pt(vaddr, vaddr_rel, size, + top_index_shift, page_size_shift, top_page_table, asid); + DSB; + return 0; +} + +int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags) +{ + return arm64_mmu_map(vaddr, paddr, count * PAGE_SIZE, + mmu_flags_to_pte_attr(flags), + ~0UL << MMU_KERNEL_SIZE_SHIFT, MMU_KERNEL_SIZE_SHIFT, + MMU_KERNEL_TOP_SHIFT, MMU_KERNEL_PAGE_SIZE_SHIFT, + arm64_kernel_translation_table, MMU_ARM64_GLOBAL_ASID); +} + +int arch_mmu_unmap(vaddr_t vaddr, uint count) +{ + return arm64_mmu_unmap(vaddr, count * PAGE_SIZE, + ~0UL << MMU_KERNEL_SIZE_SHIFT, MMU_KERNEL_SIZE_SHIFT, + MMU_KERNEL_TOP_SHIFT, MMU_KERNEL_PAGE_SIZE_SHIFT, + arm64_kernel_translation_table, + MMU_ARM64_GLOBAL_ASID); +} diff --git a/arch/arm64/mp.c b/arch/arm64/mp.c new file mode 100644 index 00000000..0760ed32 --- /dev/null +++ b/arch/arm64/mp.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include + +#include +#include +#include +#include +#include + +#if WITH_DEV_INTERRUPT_ARM_GIC +#include +#else +#error need other implementation of interrupt controller that can ipi +#endif + +#define LOCAL_TRACE 0 + +#define GIC_IPI_BASE (14) + +status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi) +{ + LTRACEF("target 0x%x, ipi %u\n", target, ipi); + +#if WITH_DEV_INTERRUPT_ARM_GIC + uint gic_ipi_num = ipi + GIC_IPI_BASE; + + /* filter out targets outside of the range of cpus we care about */ + target &= ((1UL << SMP_MAX_CPUS) - 1); + if (target != 0) { + LTRACEF("target 0x%x, gic_ipi %u\n", target, gic_ipi_num); + arm_gic_sgi(gic_ipi_num, ARM_GIC_SGI_FLAG_NS, target); + } +#endif + + return NO_ERROR; +} + +enum handler_return arm_ipi_generic_handler(void *arg) +{ + LTRACEF("cpu %u, arg %p\n", arch_curr_cpu_num(), arg); + + return INT_NO_RESCHEDULE; +} + +enum handler_return arm_ipi_reschedule_handler(void *arg) +{ + LTRACEF("cpu %u, arg %p\n", arch_curr_cpu_num(), arg); + + return mp_mbx_reschedule_irq(); +} + +void arch_mp_init_percpu(void) +{ + register_int_handler(MP_IPI_GENERIC + GIC_IPI_BASE, &arm_ipi_generic_handler, 0); + register_int_handler(MP_IPI_RESCHEDULE + GIC_IPI_BASE, &arm_ipi_reschedule_handler, 0); + + //unmask_interrupt(MP_IPI_GENERIC); + //unmask_interrupt(MP_IPI_RESCHEDULE); +} + diff --git a/arch/arm64/rules.mk b/arch/arm64/rules.mk index b413eea8..1faf7b2a 100644 --- a/arch/arm64/rules.mk +++ b/arch/arm64/rules.mk @@ -4,7 +4,8 @@ MODULE := $(LOCAL_DIR) GLOBAL_DEFINES += \ ARM64_CPU_$(ARM_CPU)=1 \ - ARM_ISA_ARMV8=1 + ARM_ISA_ARMV8=1 \ + IS_64BIT=1 GLOBAL_INCLUDES += \ $(LOCAL_DIR)/include @@ -15,32 +16,77 @@ MODULE_SRCS += \ $(LOCAL_DIR)/exceptions.S \ $(LOCAL_DIR)/exceptions_c.c \ $(LOCAL_DIR)/thread.c \ + $(LOCAL_DIR)/spinlock.S \ $(LOCAL_DIR)/start.S \ + $(LOCAL_DIR)/cache-ops.S \ # $(LOCAL_DIR)/arm/start.S \ - $(LOCAL_DIR)/arm/cache-ops.S \ $(LOCAL_DIR)/arm/cache.c \ $(LOCAL_DIR)/arm/ops.S \ $(LOCAL_DIR)/arm/faults.c \ - $(LOCAL_DIR)/arm/mmu.c \ $(LOCAL_DIR)/arm/dcc.S GLOBAL_DEFINES += \ - ARCH_DEFAULT_STACK_SIZE=8192 + ARCH_DEFAULT_STACK_SIZE=4096 + +# if its requested we build with SMP, arm generically supports 4 cpus +ifeq ($(WITH_SMP),1) +SMP_MAX_CPUS ?= 4 +SMP_CPU_CLUSTER_SHIFT ?= 8 +SMP_CPU_ID_BITS ?= 24 # Ignore aff3 bits for now since they are not next to aff2 + +GLOBAL_DEFINES += \ + WITH_SMP=1 \ + SMP_MAX_CPUS=$(SMP_MAX_CPUS) \ + SMP_CPU_CLUSTER_SHIFT=$(SMP_CPU_CLUSTER_SHIFT) \ + SMP_CPU_ID_BITS=$(SMP_CPU_ID_BITS) + +MODULE_SRCS += \ + $(LOCAL_DIR)/mp.c +else +GLOBAL_DEFINES += \ + SMP_MAX_CPUS=1 +endif ARCH_OPTFLAGS := -O2 -# try to find the toolchain -ifndef TOOLCHAIN_PREFIX -TOOLCHAIN_PREFIX := aarch64-elf- +# we have a mmu and want the vmm/pmm +WITH_KERNEL_VM ?= 1 + +ifeq ($(WITH_KERNEL_VM),1) + +MODULE_SRCS += \ + $(LOCAL_DIR)/mmu.c + +KERNEL_ASPACE_BASE ?= 0xffff000000000000 +KERNEL_ASPACE_SIZE ?= 0x0001000000000000 + +GLOBAL_DEFINES += \ + KERNEL_ASPACE_BASE=$(KERNEL_ASPACE_BASE) \ + KERNEL_ASPACE_SIZE=$(KERNEL_ASPACE_SIZE) + +KERNEL_BASE ?= 0xffff000000000000 +KERNEL_LOAD_OFFSET ?= 0 + +GLOBAL_DEFINES += \ + KERNEL_BASE=$(KERNEL_BASE) \ + KERNEL_LOAD_OFFSET=$(KERNEL_LOAD_OFFSET) + +else + +KERNEL_BASE ?= $(MEMBASE) +KERNEL_LOAD_OFFSET ?= 0 + endif -FOUNDTOOL=$(shell which $(TOOLCHAIN_PREFIX)gcc) -ifeq ($(FOUNDTOOL),) -$(error cannot find toolchain, please set TOOLCHAIN_PREFIX or add it to your path) -endif + +# try to find the toolchain +include $(LOCAL_DIR)/toolchain.mk +TOOLCHAIN_PREFIX := $(ARCH_$(ARCH)_TOOLCHAIN_PREFIX) $(info TOOLCHAIN_PREFIX = $(TOOLCHAIN_PREFIX)) +ARCH_COMPILEFLAGS += $(ARCH_$(ARCH)_COMPILEFLAGS) + # make sure some bits were set up MEMVARS_SET := 0 ifneq ($(MEMBASE),) @@ -63,6 +109,6 @@ GENERATED += \ $(BUILDDIR)/system-onesegment.ld: $(LOCAL_DIR)/system-onesegment.ld $(wildcard arch/*.ld) @echo generating $@ @$(MKDIR) - $(NOECHO)sed "s/%MEMBASE%/$(MEMBASE)/;s/%MEMSIZE%/$(MEMSIZE)/" < $< > $@ + $(NOECHO)sed "s/%MEMBASE%/$(MEMBASE)/;s/%MEMSIZE%/$(MEMSIZE)/;s/%KERNEL_BASE%/$(KERNEL_BASE)/;s/%KERNEL_LOAD_OFFSET%/$(KERNEL_LOAD_OFFSET)/" < $< > $@ include make/module.mk diff --git a/arch/arm64/spinlock.S b/arch/arm64/spinlock.S new file mode 100644 index 00000000..ef5b3d13 --- /dev/null +++ b/arch/arm64/spinlock.S @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include + +.text + +FUNCTION(arch_spin_trylock) + mov x2, x0 + mov x1, #1 + ldaxr x0, [x2] + cbnz x0, 1f + stxr w0, x1, [x2] +1: + ret + +FUNCTION(arch_spin_lock) + mov x1, #1 + sevl +1: + wfe + ldaxr x2, [x0] + cbnz x2, 1b + stxr w2, x1, [x0] + cbnz w2, 1b + ret + +FUNCTION(arch_spin_unlock) + stlr xzr, [x0] + ret diff --git a/arch/arm64/start.S b/arch/arm64/start.S index 250a7246..9f76a48e 100644 --- a/arch/arm64/start.S +++ b/arch/arm64/start.S @@ -1,33 +1,342 @@ #include +#include +#include +#include + +/* + * Register use: + * x0-x3 Arguments + * x9-x15 Scratch + * x19-x28 Globals + */ +tmp .req x9 +tmp2 .req x10 +index .req x11 +index_shift .req x12 +page_table .req x13 +new_page_table .req x14 +phys_offset .req x15 + +cpuid .req x19 +page_table0 .req x20 +page_table1 .req x21 +mmu_initial_mapping .req x22 +vaddr .req x23 +paddr .req x24 +size .req x25 +attr .req x26 .section .text.boot FUNCTION(_start) - ldr x0, =__stack_end - mov sp, x0 +#if WITH_KERNEL_VM + /* enable caches so atomics and spinlocks work */ + mrs tmp, sctlr_el1 + orr tmp, tmp, #(1<<12) /* Enable icache */ + orr tmp, tmp, #(1<<2) /* Enable dcache/ucache */ + bic tmp, tmp, #(1<<3) /* Disable Stack Alignment Check */ /* TODO: don't use unaligned stacks */ + msr sctlr_el1, tmp + + /* set up the mmu according to mmu_initial_mappings */ + + /* load the base of the translation table and clear the table */ + adrp page_table1, arm64_kernel_translation_table + add page_table1, page_table1, #:lo12:arm64_kernel_translation_table + + /* Prepare tt_trampoline page table */ + /* Calculate pagetable physical addresses */ + adrp page_table0, tt_trampoline + add page_table0, page_table0, #:lo12:tt_trampoline + +#if WITH_SMP + mrs cpuid, mpidr_el1 + ubfx cpuid, cpuid, #0, #SMP_CPU_ID_BITS + cbnz cpuid, .Lmmu_enable_secondary +#endif + + mov tmp, #0 + + /* walk through all the entries in the translation table, setting them up */ +.Lclear_top_page_table_loop: + str xzr, [page_table1, tmp, lsl #3] + add tmp, tmp, #1 + cmp tmp, #MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP + bne .Lclear_top_page_table_loop + + /* load the address of the mmu_initial_mappings table and start processing */ + adrp mmu_initial_mapping, mmu_initial_mappings + add mmu_initial_mapping, mmu_initial_mapping, #:lo12:mmu_initial_mappings + +.Linitial_mapping_loop: + ldp paddr, vaddr, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_PHYS_OFFSET] + ldp size, tmp, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_SIZE_OFFSET] + + tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_DYNAMIC, .Lnot_dynamic + adr paddr, _start + mov size, x0 + str paddr, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_PHYS_OFFSET] + str size, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_SIZE_OFFSET] + +.Lnot_dynamic: + /* if size == 0, end of list */ + cbz size, .Linitial_mapping_done + + /* set up the flags */ + ldr attr, =MMU_PTE_KERNEL_FLAGS + tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_UNCACHED, .Lnot_uncached + ldr attr, =MMU_INITIAL_MAP_STRONGLY_ORDERED + b .Lmem_type_done +.Lnot_uncached: + tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_DEVICE, .Lmem_type_done + ldr attr, =MMU_INITIAL_MAP_DEVICE +.Lmem_type_done: + + /* Check that paddr, vaddr and size are page aligned */ + orr tmp, vaddr, paddr + orr tmp, tmp, size + tst tmp, #(1 << MMU_KERNEL_PAGE_SIZE_SHIFT) - 1 + bne . /* Error: not page aligned */ + + /* Clear top bits of virtual address (should be all set) */ + eor vaddr, vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT) + + /* Check that top bits were all set */ + tst vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT) + bne . /* Error: vaddr out of range */ + +.Lmap_range_top_loop: + /* Select top level page table */ + mov page_table, page_table1 + mov index_shift, #MMU_KERNEL_TOP_SHIFT + + lsr index, vaddr, index_shift + +.Lmap_range_one_table_loop: + /* Check if current level allow block descriptors */ + cmp index_shift, #MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT + b.hi .Lmap_range_need_page_table + + /* Check if paddr and vaddr alignment allows a block descriptor */ + orr tmp2, vaddr, paddr + lsr tmp, tmp2, index_shift + lsl tmp, tmp, index_shift + cmp tmp, tmp2 + b.ne .Lmap_range_need_page_table + + /* Check if size is large enough for a block mapping */ + lsr tmp, size, index_shift + cbz tmp, .Lmap_range_need_page_table + + /* Select descriptor type, page for level 3, block for level 0-2 */ + orr tmp, attr, #MMU_PTE_L3_DESCRIPTOR_PAGE + cmp index_shift, MMU_KERNEL_PAGE_SIZE_SHIFT + beq .Lmap_range_l3 + orr tmp, attr, #MMU_PTE_L012_DESCRIPTOR_BLOCK +.Lmap_range_l3: + + /* Write page table entry */ + orr tmp, tmp, paddr + str tmp, [page_table, index, lsl #3] + + /* Move to next page table entry */ + mov tmp, #1 + lsl tmp, tmp, index_shift + add vaddr, vaddr, tmp + add paddr, paddr, tmp + subs size, size, tmp + /* TODO: add local loop if next entry is in the same page table */ + b.ne .Lmap_range_top_loop /* size != 0 */ + + /* Move to next mmu_initial_mappings entry */ + add mmu_initial_mapping, mmu_initial_mapping, __MMU_INITIAL_MAPPING_SIZE + b .Linitial_mapping_loop + +.Lmap_range_need_page_table: + /* Check if page table entry is unused */ + ldr new_page_table, [page_table, index, lsl #3] + cbnz new_page_table, .Lmap_range_has_page_table + + /* Calculate phys offset (needed for memory allocation) */ +.Lphys_offset: + adr phys_offset, .Lphys_offset /* phys */ + ldr tmp, =.Lphys_offset /* virt */ + sub phys_offset, tmp, phys_offset + + /* Allocate new page table */ + calloc_bootmem_aligned new_page_table, tmp, tmp2, MMU_KERNEL_PAGE_SIZE_SHIFT, phys_offset + + /* Write page table entry (with allocated page table) */ + orr new_page_table, new_page_table, #MMU_PTE_L012_DESCRIPTOR_TABLE + str new_page_table, [page_table, index, lsl #3] + +.Lmap_range_has_page_table: + /* Check descriptor type */ + and tmp, new_page_table, #MMU_PTE_DESCRIPTOR_MASK + cmp tmp, #MMU_PTE_L012_DESCRIPTOR_TABLE + b.ne . /* Error: entry already in use (as a block entry) */ + + /* switch to next page table level */ + bic page_table, new_page_table, #MMU_PTE_DESCRIPTOR_MASK + mov tmp, #~0 + lsl tmp, tmp, index_shift + bic tmp, vaddr, tmp + sub index_shift, index_shift, #(MMU_KERNEL_PAGE_SIZE_SHIFT - 3) + lsr index, tmp, index_shift + + b .Lmap_range_one_table_loop + +.Linitial_mapping_done: + + /* Prepare tt_trampoline page table */ + + /* Zero tt_trampoline translation tables */ + mov tmp, #0 +.Lclear_tt_trampoline: + str xzr, [page_table0, tmp, lsl#3] + add tmp, tmp, #1 + cmp tmp, #MMU_PAGE_TABLE_ENTRIES_IDENT + blt .Lclear_tt_trampoline + + /* Setup mapping at phys -> phys */ + adr tmp, .Lmmu_on_pc + lsr tmp, tmp, #MMU_IDENT_TOP_SHIFT /* tmp = paddr index */ + ldr tmp2, =MMU_PTE_IDENT_FLAGS + add tmp2, tmp2, tmp, lsl #MMU_IDENT_TOP_SHIFT /* tmp2 = pt entry */ + + str tmp2, [page_table0, tmp, lsl #3] /* tt_trampoline[paddr index] = pt entry */ + +#if WITH_SMP + adr tmp, page_tables_not_ready + str wzr, [tmp] + b .Lpage_tables_ready + +.Lmmu_enable_secondary: + adr tmp, page_tables_not_ready +.Lpage_tables_not_ready: + ldr tmp2, [tmp] + cbnz tmp2, .Lpage_tables_not_ready +.Lpage_tables_ready: +#endif + + /* set up the mmu */ + + /* Invalidate TLB */ + tlbi vmalle1is + isb + dsb sy + + /* Initialize Memory Attribute Indirection Register */ + ldr tmp, =MMU_MAIR_VAL + msr mair_el1, tmp + + /* Initialize TCR_EL1 */ + /* set cacheable attributes on translation walk */ + /* (SMP extensions) non-shareable, inner write-back write-allocate */ + ldr tmp, =MMU_TCR_FLAGS_IDENT + msr tcr_el1, tmp + + isb + + /* Write ttbr with phys addr of the translation table */ + msr ttbr0_el1, page_table0 + msr ttbr1_el1, page_table1 + isb + + /* Read SCTLR */ + mrs tmp, sctlr_el1 + + /* Turn on the MMU */ + orr tmp, tmp, #0x1 + + /* Write back SCTLR */ + msr sctlr_el1, tmp +.Lmmu_on_pc: + isb + + /* Jump to virtual code address */ + ldr tmp, =.Lmmu_on_vaddr + br tmp + +.Lmmu_on_vaddr: + + /* Disable trampoline page-table in ttbr0 */ + ldr tmp, =MMU_TCR_FLAGS_KERNEL + msr tcr_el1, tmp + isb + + + /* Invalidate TLB */ + tlbi vmalle1 + isb + +#if WITH_SMP + cbnz cpuid, .Lsecondary_boot +#endif +#endif /* WITH_KERNEL_VM */ + + ldr tmp, =__stack_end + mov sp, tmp /* clear bss */ .L__do_bss: /* clear out the bss */ /* NOTE: relies on __bss_start and __bss_end being 8 byte aligned */ - ldr x0, =__bss_start - ldr x1, =__bss_end - mov x2, #0 - sub x1, x1, x0 - cbz x1, .L__bss_loop_done + ldr tmp, =__bss_start + ldr tmp2, =__bss_end + sub tmp2, tmp2, tmp + cbz tmp2, .L__bss_loop_done .L__bss_loop: - sub x1, x1, #8 - str x2, [x0], #8 - cbnz x1, .L__bss_loop + sub tmp2, tmp2, #8 + str xzr, [tmp], #8 + cbnz tmp2, .L__bss_loop .L__bss_loop_done: bl lk_main b . +#if WITH_SMP +.Lsecondary_boot: + and tmp, cpuid, #0xff + cmp tmp, #(1 << SMP_CPU_CLUSTER_SHIFT) + bge .Lunsupported_cpu_trap + bic cpuid, cpuid, #0xff + orr cpuid, tmp, cpuid, LSR #(8 - SMP_CPU_CLUSTER_SHIFT) + + cmp cpuid, #SMP_MAX_CPUS + bge .Lunsupported_cpu_trap + + /* Set up the stack */ + ldr tmp, =__stack_end + mov tmp2, #ARCH_DEFAULT_STACK_SIZE + mul tmp2, tmp2, cpuid + sub sp, tmp, tmp2 + + mov x0, cpuid + bl arm64_secondary_entry + +.Lunsupported_cpu_trap: + wfe + b .Lunsupported_cpu_trap +#endif + .ltorg +#if WITH_SMP +.data +DATA(page_tables_not_ready) + .long 1 +DATA(secondary_cpu_allocated_stack) + .quad 0 +#endif + .section .bss.prebss.stack .align 4 DATA(__stack) - .skip 0x2000 + .skip ARCH_DEFAULT_STACK_SIZE * SMP_MAX_CPUS DATA(__stack_end) +#if WITH_KERNEL_VM +.section ".bss.prebss.translation_table" +.align 3 + MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT +DATA(tt_trampoline) + .skip 8 * MMU_PAGE_TABLE_ENTRIES_IDENT +#endif diff --git a/arch/arm64/system-onesegment.ld b/arch/arm64/system-onesegment.ld index 719474c5..8677be7c 100644 --- a/arch/arm64/system-onesegment.ld +++ b/arch/arm64/system-onesegment.ld @@ -4,10 +4,11 @@ OUTPUT_ARCH(aarch64) ENTRY(_start) SECTIONS { - . = %MEMBASE%; + . = %KERNEL_BASE% + %KERNEL_LOAD_OFFSET%; /* text/read-only data */ - .text : { + /* set the load address to physical MEMBASE */ + .text : AT(%MEMBASE% + %KERNEL_LOAD_OFFSET%) { KEEP(*(.text.boot)) KEEP(*(.text.boot.vectab)) *(.text* .sram.text.glue_7* .gnu.linkonce.t.*) @@ -93,7 +94,7 @@ INCLUDE "arch/shared_data_sections.ld" . = ALIGN(8); _end = .; - . = %MEMBASE% + %MEMSIZE%; + . = %KERNEL_BASE% + %MEMSIZE%; _end_of_ram = .; /* Strip unnecessary stuff */ diff --git a/arch/arm64/thread.c b/arch/arm64/thread.c index edecae3d..5d21b278 100644 --- a/arch/arm64/thread.c +++ b/arch/arm64/thread.c @@ -57,8 +57,9 @@ static void initial_thread_func(void) LTRACEF("initial_thread_func: thread %p calling %p with arg %p\n", current_thread, current_thread->entry, current_thread->arg); - /* exit the implicit critical section we're within */ - exit_critical_section(); + /* release the thread lock that was implicitly held across the reschedule */ + spin_unlock(&thread_lock); + arch_enable_ints(); ret = current_thread->entry(current_thread->arg); @@ -92,3 +93,10 @@ void arch_context_switch(thread_t *oldthread, thread_t *newthread) arm64_context_switch(&oldthread->arch.sp, newthread->arch.sp); } +void arch_dump_thread(thread_t *t) +{ + if (t->state != THREAD_RUNNING) { + dprintf(INFO, "\tarch: "); + dprintf(INFO, "sp 0x%lx\n", t->arch.sp); + } +} diff --git a/arch/arm64/toolchain.mk b/arch/arm64/toolchain.mk new file mode 100644 index 00000000..4fd8c644 --- /dev/null +++ b/arch/arm64/toolchain.mk @@ -0,0 +1,18 @@ +ifndef ARCH_arm64_TOOLCHAIN_INCLUDED +ARCH_arm64_TOOLCHAIN_INCLUDED := 1 + +ifndef ARCH_arm64_TOOLCHAIN_PREFIX +ARCH_arm64_TOOLCHAIN_PREFIX := aarch64-elf- +FOUNDTOOL=$(shell which $(ARCH_arm64_TOOLCHAIN_PREFIX)gcc) +ifeq ($(FOUNDTOOL),) +ARCH_arm64_TOOLCHAIN_PREFIX := aarch64-linux-android- +FOUNDTOOL=$(shell which $(ARCH_arm64_TOOLCHAIN_PREFIX)gcc) +ifeq ($(FOUNDTOOL),) +$(error cannot find toolchain, please set ARCH_arm64_TOOLCHAIN_PREFIX or add it to your path) +endif +endif +endif + +ARCH_arm64_COMPILEFLAGS := -mgeneral-regs-only -DWITH_NO_FP=1 + +endif diff --git a/arch/microblaze/exceptions.c b/arch/microblaze/exceptions.c index 807b5aca..857662ee 100644 --- a/arch/microblaze/exceptions.c +++ b/arch/microblaze/exceptions.c @@ -31,11 +31,7 @@ enum handler_return platform_irq_handler(void); void microblaze_irq(void) { - inc_critical_section(); - if (platform_irq_handler() == INT_RESCHEDULE) thread_preempt(); - - dec_critical_section(); } diff --git a/arch/microblaze/include/arch/arch_ops.h b/arch/microblaze/include/arch/arch_ops.h index 7a54bdaa..7d5fd942 100644 --- a/arch/microblaze/include/arch/arch_ops.h +++ b/arch/microblaze/include/arch/arch_ops.h @@ -56,54 +56,37 @@ static inline void arch_disable_ints(void) CF; } +static inline bool arch_ints_disabled(void) +{ + uint32_t state; + + __asm__ volatile( + "mfs %0, rmsr;" + : "=r" (state)); + + return !(state & (1<<1)); +} + static inline int atomic_add(volatile int *ptr, int val) { - return __atomic_fetch_add(ptr, val, __ATOMIC_RELAXED); + return __atomic_fetch_add(ptr, val, __ATOMIC_RELAXED); } static inline int atomic_or(volatile int *ptr, int val) { - return __atomic_fetch_or(ptr, val, __ATOMIC_RELAXED); + return __atomic_fetch_or(ptr, val, __ATOMIC_RELAXED); } static inline int atomic_and(volatile int *ptr, int val) { - return __atomic_fetch_and(ptr, val, __ATOMIC_RELAXED); + return __atomic_fetch_and(ptr, val, __ATOMIC_RELAXED); } static inline int atomic_swap(volatile int *ptr, int val) { - return __atomic_exchange_n(ptr, val, __ATOMIC_RELAXED); + return __atomic_exchange_n(ptr, val, __ATOMIC_RELAXED); } -#if 0 -static inline int atomic_cmpxchg(volatile int *ptr, int oldval, int newval) -{ - int old; - int test; - - do { - __asm__ volatile( - "ldrex %[old], [%[ptr]]\n" - "mov %[test], #0\n" - "teq %[old], %[oldval]\n" -#if ARM_ISA_ARMV7M - "bne 0f\n" - "strex %[test], %[newval], [%[ptr]]\n" - "0:\n" -#else - "strexeq %[test], %[newval], [%[ptr]]\n" -#endif - : [old]"=&r" (old), [test]"=&r" (test) - : [ptr]"r" (ptr), [oldval]"Ir" (oldval), [newval]"r" (newval) - : "cc"); - - } while (test != 0); - - return old; -} -#endif - /* use a global pointer to store the current_thread */ extern struct thread *_current_thread; @@ -119,3 +102,9 @@ static inline void set_current_thread(struct thread *t) static inline uint32_t arch_cycle_count(void) { return 0; } +static inline uint arch_curr_cpu_num(void) +{ + return 0; +} + + diff --git a/arch/microblaze/include/arch/spinlock.h b/arch/microblaze/include/arch/spinlock.h new file mode 100644 index 00000000..5c50c5b9 --- /dev/null +++ b/arch/microblaze/include/arch/spinlock.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include + +#if WITH_SMP +#error microblaze does not support SMP +#endif + +#define SPIN_LOCK_INITIAL_VALUE (0) + +typedef unsigned int spin_lock_t; + +typedef unsigned int spin_lock_saved_state_t; +typedef unsigned int spin_lock_save_flags_t; + +static inline void arch_spin_lock(spin_lock_t *lock) +{ + *lock = 1; +} + +static inline int arch_spin_trylock(spin_lock_t *lock) +{ + return 0; +} + +static inline void arch_spin_unlock(spin_lock_t *lock) +{ + *lock = 0; +} + +static inline void arch_spin_lock_init(spin_lock_t *lock) +{ + *lock = SPIN_LOCK_INITIAL_VALUE; +} + +static inline bool arch_spin_lock_held(spin_lock_t *lock) +{ + return *lock != 0; +} + + /* default arm flag is to just disable plain irqs */ +#define ARCH_DEFAULT_SPIN_LOCK_FLAG_INTERRUPTS 0 + +enum { + /* private */ + SPIN_LOCK_STATE_RESTORE_IRQ = 1, +}; + +static inline void +arch_interrupt_save(spin_lock_saved_state_t *statep, spin_lock_save_flags_t flags) +{ + spin_lock_saved_state_t state = 0; + if (!arch_ints_disabled()) { + state |= SPIN_LOCK_STATE_RESTORE_IRQ; + arch_disable_ints(); + } + *statep = state; +} + +static inline void +arch_interrupt_restore(spin_lock_saved_state_t old_state, spin_lock_save_flags_t flags) +{ + if (old_state & SPIN_LOCK_STATE_RESTORE_IRQ) + arch_enable_ints(); +} + + + + diff --git a/arch/microblaze/rules.mk b/arch/microblaze/rules.mk index 48a80211..7c3ebe21 100644 --- a/arch/microblaze/rules.mk +++ b/arch/microblaze/rules.mk @@ -19,6 +19,9 @@ MODULE_SRCS += \ $(LOCAL_DIR)/faults.c \ $(LOCAL_DIR)/descriptor.c +GLOBAL_DEFINES += \ + SMP_MAX_CPUS=1 + # set the default toolchain to microblaze elf and set a #define ifndef TOOLCHAIN_PREFIX TOOLCHAIN_PREFIX := microblaze-elf- diff --git a/arch/microblaze/thread.c b/arch/microblaze/thread.c index 22fd0d2b..2237971b 100644 --- a/arch/microblaze/thread.c +++ b/arch/microblaze/thread.c @@ -42,8 +42,9 @@ static void initial_thread_func(void) dump_thread(ct); #endif - /* exit the implicit critical section we're within */ - exit_critical_section(); + /* release the thread lock that was implicitly held across the reschedule */ + spin_unlock(&thread_lock); + arch_enable_ints(); int ret = ct->entry(ct->arg); @@ -82,3 +83,11 @@ void arch_context_switch(thread_t *oldthread, thread_t *newthread) microblaze_context_switch(&oldthread->arch.cs_frame, &newthread->arch.cs_frame); } +void arch_dump_thread(thread_t *t) +{ + if (t->state != THREAD_RUNNING) { + dprintf(INFO, "\tarch: "); + dprintf(INFO, "sp 0x%x\n", t->arch.cs_frame.r1); + } +} + diff --git a/arch/x86-64/thread.c b/arch/x86-64/thread.c index dea6fbdf..15ae7cb4 100644 --- a/arch/x86-64/thread.c +++ b/arch/x86-64/thread.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -45,8 +46,9 @@ static void initial_thread_func(void) { int ret; - /* exit the implicit critical section we're within */ - exit_critical_section(); + /* release the thread lock that was implicitly held across the reschedule */ + spin_unlock(&thread_lock); + arch_enable_ints(); ret = _current_thread->entry(_current_thread->arg); @@ -126,3 +128,5 @@ void arch_context_switch(thread_t *oldthread, thread_t *newthread) : "g" (newthread->arch.rsp) ); } + +/* vim: noexpandtab */ diff --git a/arch/x86/crt0.S b/arch/x86/crt0.S index 4550b8ab..26cc8436 100644 --- a/arch/x86/crt0.S +++ b/arch/x86/crt0.S @@ -173,8 +173,6 @@ interrupt_common: movl %esp, %eax /* store pointer to iframe, using same method */ pushl %eax - incl critical_section_count - call platform_irq cmpl $0,%eax @@ -182,8 +180,6 @@ interrupt_common: call thread_preempt 0: - decl critical_section_count - popl %eax /* drop pointer to iframe */ popl %eax /* restore task_esp, stack switch can occur here if task_esp is modified */ movl %eax, %esp diff --git a/arch/x86/faults.c b/arch/x86/faults.c index afdc0bb9..32c732f7 100644 --- a/arch/x86/faults.c +++ b/arch/x86/faults.c @@ -46,7 +46,6 @@ static void dump_fault_frame(struct x86_iframe *frame) static void exception_die(struct x86_iframe *frame, const char *msg) { - inc_critical_section(); dprintf(CRITICAL, msg); dump_fault_frame(frame); diff --git a/arch/x86/include/arch/arch_ops.h b/arch/x86/include/arch/arch_ops.h index f0816cdb..f8c5f169 100644 --- a/arch/x86/include/arch/arch_ops.h +++ b/arch/x86/include/arch/arch_ops.h @@ -37,13 +37,13 @@ static inline void arch_enable_ints(void) __asm__ volatile("sti"); } -static inline inline void arch_disable_ints(void) +static inline void arch_disable_ints(void) { __asm__ volatile("cli"); CF; } -static inline inline bool arch_ints_disabled(void) +static inline bool arch_ints_disabled(void) { unsigned int state; @@ -53,7 +53,7 @@ static inline inline bool arch_ints_disabled(void) : "=a" (state) :: "memory"); - return !!(state & (1<<9)); + return !(state & (1<<9)); } int _atomic_and(volatile int *ptr, int val); @@ -110,6 +110,11 @@ static inline void set_current_thread(struct thread *t) _current_thread = t; } +static inline uint arch_curr_cpu_num(void) +{ + return 0; +} + #endif // !ASSEMBLY #endif diff --git a/arch/x86/include/arch/spinlock.h b/arch/x86/include/arch/spinlock.h new file mode 100644 index 00000000..42d35a7a --- /dev/null +++ b/arch/x86/include/arch/spinlock.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include +#include + +#define SPIN_LOCK_INITIAL_VALUE (0) + +typedef unsigned long spin_lock_t; + +typedef uint32_t spin_lock_saved_state_t; +typedef uint spin_lock_save_flags_t; + +/* simple implementation of spinlocks for no smp support */ +static inline void arch_spin_lock_init(spin_lock_t *lock) +{ + *lock = SPIN_LOCK_INITIAL_VALUE; +} + +static inline bool arch_spin_lock_held(spin_lock_t *lock) +{ + return *lock != 0; +} + +static inline void arch_spin_lock(spin_lock_t *lock) +{ + *lock = 1; +} + +static inline int arch_spin_trylock(spin_lock_t *lock) +{ + return 0; +} + +static inline void arch_spin_unlock(spin_lock_t *lock) +{ + *lock = 0; +} + +/* flags are unused on x86 */ +#define ARCH_DEFAULT_SPIN_LOCK_FLAG_INTERRUPTS 0 + +static inline void +arch_interrupt_save(spin_lock_saved_state_t *statep, spin_lock_save_flags_t flags) +{ + *statep = x86_save_eflags(); + arch_disable_ints(); +} + +static inline void +arch_interrupt_restore(spin_lock_saved_state_t old_state, spin_lock_save_flags_t flags) +{ + x86_restore_eflags(old_state); +} + + diff --git a/arch/x86/include/arch/x86.h b/arch/x86/include/arch/x86.h index 0caa0560..02493532 100644 --- a/arch/x86/include/arch/x86.h +++ b/arch/x86/include/arch/x86.h @@ -118,6 +118,28 @@ static inline uint32_t x86_get_cr2(void) return rv; } +static inline uint32_t x86_save_eflags(void) +{ + unsigned int state; + + __asm__ volatile( + "pushfl;" + "popl %0" + : "=rm" (state) + :: "memory"); + + return state; +} + +static inline void x86_restore_eflags(uint32_t eflags) +{ + __asm__ volatile( + "pushl %0;" + "popfl" + :: "g" (eflags) + : "memory", "cc"); +} + #define rdtsc(low,high) \ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) diff --git a/arch/x86/rules.mk b/arch/x86/rules.mk index 8eb4dccf..eee65490 100644 --- a/arch/x86/rules.mk +++ b/arch/x86/rules.mk @@ -22,6 +22,10 @@ ifndef TOOLCHAIN_PREFIX TOOLCHAIN_PREFIX := i386-elf- endif +# for the moment, SMP is not supported on x86 +GLOBAL_DEFINES += \ + SMP_MAX_CPUS=1 + LIBGCC := $(shell $(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -print-libgcc-file-name) #$(info LIBGCC = $(LIBGCC)) diff --git a/arch/x86/thread.c b/arch/x86/thread.c index 190ef9f5..fff6d348 100644 --- a/arch/x86/thread.c +++ b/arch/x86/thread.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -50,11 +51,9 @@ static void initial_thread_func(void) { int ret; -// dprintf("initial_thread_func: thread %p calling %p with arg %p\n", _current_thread, _current_thread->entry, _current_thread->arg); -// dump_thread(_current_thread); - - /* exit the implicit critical section we're within */ - exit_critical_section(); + /* release the thread lock that was implicitly held across the reschedule */ + spin_unlock(&thread_lock); + arch_enable_ints(); ret = _current_thread->entry(_current_thread->arg); @@ -89,6 +88,14 @@ void arch_thread_initialize(thread_t *t) t->arch.esp = (vaddr_t)frame; } +void arch_dump_thread(thread_t *t) +{ + if (t->state != THREAD_RUNNING) { + dprintf(INFO, "\tarch: "); + dprintf(INFO, "sp 0x%lx\n", t->arch.esp); + } +} + void arch_context_switch(thread_t *oldthread, thread_t *newthread) { //dprintf(DEBUG, "arch_context_switch: old %p (%s), new %p (%s)\n", oldthread, oldthread->name, newthread, newthread->name); diff --git a/dev/cache/pl310/pl310.c b/dev/cache/pl310/pl310.c index 5e1b7f4e..13c59163 100644 --- a/dev/cache/pl310/pl310.c +++ b/dev/cache/pl310/pl310.c @@ -104,6 +104,10 @@ static void pl310_init(uint level) PL310_REG(REG1_TAG_RAM_CONTROL) = PL310_TAG_RAM_LATENCY; PL310_REG(REG1_DATA_RAM_CONTROL) = PL310_DATA_RAM_LATENCY; + /* configure */ + /* early BRESP enable, instruction/data prefetch, exclusive cache, full line of zero */ + PL310_REG(REG1_AUX_CONTROL) |= (1<<30)|(1<<29)|(1<<28)|(1<<12)|(1<<0); + /* flush all the ways */ PL310_REG(REG7_INV_WAY) = 0xffff; } @@ -120,6 +124,7 @@ status_t pl310_set_enable(bool enable) if (enable) { if ((PL310_REG(REG1_CONTROL) & 1) == 0) { /* if disabled */ + pl310_invalidate(); PL310_REG(REG1_CONTROL) = 1; } } else { diff --git a/dev/interrupt/arm_gic/arm_gic.c b/dev/interrupt/arm_gic/arm_gic.c index e49a404b..0069d1fa 100644 --- a/dev/interrupt/arm_gic/arm_gic.c +++ b/dev/interrupt/arm_gic/arm_gic.c @@ -20,6 +20,8 @@ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include #include #include #include @@ -27,9 +29,9 @@ #include #include #include +#include #include #include -#include #include #include #if WITH_LIB_SM @@ -60,11 +62,17 @@ static bool arm_gic_interrupt_change_allowed(int irq) TRACEF("change to interrupt %d ignored after booting ns\n", irq); return false; } + +static void suspend_resume_fiq(bool resume_gicc, bool resume_gicd); #else static bool arm_gic_interrupt_change_allowed(int irq) { return true; } + +static void suspend_resume_fiq(bool resume_gicc, bool resume_gicd) +{ +} #endif @@ -73,10 +81,22 @@ struct int_handler_struct { void *arg; }; -static struct int_handler_struct int_handler_table[MAX_INT]; +static struct int_handler_struct int_handler_table_per_cpu[GIC_MAX_PER_CPU_INT][SMP_MAX_CPUS]; +static struct int_handler_struct int_handler_table_shared[MAX_INT-GIC_MAX_PER_CPU_INT]; + +static struct int_handler_struct *get_int_handler(unsigned int vector, uint cpu) +{ + if (vector < GIC_MAX_PER_CPU_INT) + return &int_handler_table_per_cpu[vector][cpu]; + else + return &int_handler_table_shared[vector - GIC_MAX_PER_CPU_INT]; +} void register_int_handler(unsigned int vector, int_handler handler, void *arg) { + struct int_handler_struct *h; + uint cpu = arch_curr_cpu_num(); + spin_lock_saved_state_t state; if (vector >= MAX_INT) @@ -85,8 +105,9 @@ void register_int_handler(unsigned int vector, int_handler handler, void *arg) spin_lock_save(&gicd_lock, &state, GICD_LOCK_FLAGS); if (arm_gic_interrupt_change_allowed(vector)) { - int_handler_table[vector].handler = handler; - int_handler_table[vector].arg = arg; + h = get_int_handler(vector, cpu); + h->handler = handler; + h->arg = arg; } spin_unlock_restore(&gicd_lock, state, GICD_LOCK_FLAGS); @@ -130,6 +151,19 @@ void register_int_handler(unsigned int vector, int_handler handler, void *arg) #define GICD_CPENDSGIR(n) (GICD_OFFSET + 0xf10 + (n) * 4) #define GICD_SPENDSGIR(n) (GICD_OFFSET + 0xf20 + (n) * 4) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define GIC_REG_COUNT(bit_per_reg) DIV_ROUND_UP(MAX_INT, (bit_per_reg)) +#define DEFINE_GIC_SHADOW_REG(name, bit_per_reg, init_val, init_from) \ + uint32_t (name)[GIC_REG_COUNT(bit_per_reg)] = { \ + [(init_from / bit_per_reg) ... \ + (GIC_REG_COUNT(bit_per_reg) - 1)] = (init_val) \ + } + +#if WITH_LIB_SM +static DEFINE_GIC_SHADOW_REG(gicd_igroupr, 32, ~0U, 0); +#endif +static DEFINE_GIC_SHADOW_REG(gicd_itargetsr, 4, 0x01010101, 32); + static void gic_set_enable(uint vector, bool enable) { int reg = vector / 32; @@ -141,17 +175,49 @@ static void gic_set_enable(uint vector, bool enable) GICREG(0, GICD_ICENABLER(reg)) = mask; } -void arm_gic_init_secondary_cpu(void) +static void arm_gic_init_percpu(uint level) { #if WITH_LIB_SM GICREG(0, GICC_CTLR) = 0xb; // enable GIC0 and select fiq mode for secure - GICREG(0, GICD_IGROUPR(0)) = ~0UL; /* GICD_IGROUPR0 is banked */ + GICREG(0, GICD_IGROUPR(0)) = ~0U; /* GICD_IGROUPR0 is banked */ #else GICREG(0, GICC_CTLR) = 1; // enable GIC0 #endif GICREG(0, GICC_PMR) = 0xFF; // unmask interrupts at all priority levels } +LK_INIT_HOOK_FLAGS(arm_gic_init_percpu, + arm_gic_init_percpu, + LK_INIT_LEVEL_PLATFORM_EARLY, LK_INIT_FLAG_SECONDARY_CPUS); + +static void arm_gic_suspend_cpu(uint level) +{ + suspend_resume_fiq(false, false); +} + +LK_INIT_HOOK_FLAGS(arm_gic_suspend_cpu, arm_gic_suspend_cpu, + LK_INIT_LEVEL_PLATFORM, LK_INIT_FLAG_CPU_SUSPEND); + +static void arm_gic_resume_cpu(uint level) +{ + spin_lock_saved_state_t state; + bool resume_gicd = false; + + spin_lock_save(&gicd_lock, &state, GICD_LOCK_FLAGS); + if (!(GICREG(0, GICD_CTLR) & 1)) { + dprintf(SPEW, "%s: distibutor is off, calling arm_gic_init instead\n", __func__); + arm_gic_init(); + resume_gicd = true; + } else { + arm_gic_init_percpu(0); + } + spin_unlock_restore(&gicd_lock, state, GICD_LOCK_FLAGS); + suspend_resume_fiq(true, resume_gicd); +} + +LK_INIT_HOOK_FLAGS(arm_gic_resume_cpu, arm_gic_resume_cpu, + LK_INIT_LEVEL_PLATFORM, LK_INIT_FLAG_CPU_RESUME); + static int arm_gic_max_cpu(void) { return (GICREG(0, GICD_TYPER) >> 5) & 0x7; @@ -169,21 +235,24 @@ void arm_gic_init(void) if (arm_gic_max_cpu() > 0) { /* Set external interrupts to target cpu 0 */ for (i = 32; i < MAX_INT; i += 4) { - GICREG(0, GICD_ITARGETSR(i / 4)) = 0x01010101; + GICREG(0, GICD_ITARGETSR(i / 4)) = gicd_itargetsr[i / 4]; } } GICREG(0, GICD_CTLR) = 1; // enable GIC0 #if WITH_LIB_SM + GICREG(0, GICD_CTLR) = 3; // enable GIC0 ns interrupts /* * Iterate through all IRQs and set them to non-secure * mode. This will allow the non-secure side to handle * all the interrupts we don't explicitly claim. */ - for (i = 32; i < MAX_INT; i += 32) - GICREG(0, GICD_IGROUPR(i / 32)) = ~0UL; + for (i = 32; i < MAX_INT; i += 32) { + u_int reg = i / 32; + GICREG(0, GICD_IGROUPR(reg)) = gicd_igroupr[reg]; + } #endif - arm_gic_init_secondary_cpu(); + arm_gic_init_percpu(0); } static status_t arm_gic_set_secure_locked(u_int irq, bool secure) @@ -196,9 +265,9 @@ static status_t arm_gic_set_secure_locked(u_int irq, bool secure) return ERR_INVALID_ARGS; if (secure) - GICREG(0, GICD_IGROUPR(reg)) &= ~mask; + GICREG(0, GICD_IGROUPR(reg)) = (gicd_igroupr[reg] &= ~mask); else - GICREG(0, GICD_IGROUPR(reg)) |= mask; + GICREG(0, GICD_IGROUPR(reg)) = (gicd_igroupr[reg] |= mask); LTRACEF("irq %d, secure %d, GICD_IGROUP%d = %x\n", irq, secure, reg, GICREG(0, GICD_IGROUPR(reg))); #endif @@ -216,8 +285,8 @@ static status_t arm_gic_set_target_locked(u_int irq, u_int cpu_mask, u_int enabl enable_mask = (enable_mask << shift) & cpu_mask; old_val = GICREG(0, GICD_ITARGETSR(reg)); - new_val = (old_val & ~cpu_mask) | enable_mask; - GICREG(0, GICD_ITARGETSR(reg)) = new_val; + new_val = (gicd_itargetsr[reg] & ~cpu_mask) | enable_mask; + GICREG(0, GICD_ITARGETSR(reg)) = gicd_itargetsr[reg] = new_val; LTRACEF("irq %i, GICD_ITARGETSR%d %x => %x (got %x)\n", irq, reg, old_val, new_val, GICREG(0, GICD_ITARGETSR(reg))); @@ -292,7 +361,8 @@ static enum handler_return __platform_irq(struct arm_iframe *frame) { // get the current vector - unsigned int vector = GICREG(0, GICC_IAR) & 0x3ff; + uint32_t iar = GICREG(0, GICC_IAR); + unsigned int vector = iar & 0x3ff; if (vector >= 0x3fe) { // spurious @@ -302,18 +372,22 @@ enum handler_return __platform_irq(struct arm_iframe *frame) THREAD_STATS_INC(interrupts); KEVLOG_IRQ_ENTER(vector); -// printf("platform_irq: spsr 0x%x, pc 0x%x, currthread %p, vector %d\n", frame->spsr, frame->pc, current_thread, vector); + uint cpu = arch_curr_cpu_num(); + +// printf("platform_irq: iar 0x%x cpu %u spsr 0x%x, pc 0x%x, currthread %p, vector %d\n", +// iar, cpu, frame->spsr, frame->pc, get_current_thread(), vector); // deliver the interrupt enum handler_return ret; ret = INT_NO_RESCHEDULE; - if (int_handler_table[vector].handler) - ret = int_handler_table[vector].handler(int_handler_table[vector].arg); + struct int_handler_struct *handler = get_int_handler(vector, cpu); + if (handler->handler) + ret = handler->handler(handler->arg); - GICREG(0, GICC_EOIR) = vector; + GICREG(0, GICC_EOIR) = iar; -// printf("platform_irq: exit %d\n", ret); +// printf("platform_irq: cpu %u exit %d\n", cpu, ret); KEVLOG_IRQ_EXIT(vector); @@ -325,9 +399,11 @@ enum handler_return platform_irq(struct arm_iframe *frame) #if WITH_LIB_SM uint32_t ahppir = GICREG(0, GICC_AHPPIR); uint32_t pending_irq = ahppir & 0x3ff; + struct int_handler_struct *h; + uint cpu = arch_curr_cpu_num(); LTRACEF("ahppir %d\n", ahppir); - if (pending_irq < MAX_INT && int_handler_table[pending_irq].handler) { + if (pending_irq < MAX_INT && get_int_handler(pending_irq, cpu)->handler) { enum handler_return ret = 0; uint32_t irq; uint8_t old_priority; @@ -348,8 +424,8 @@ enum handler_return platform_irq(struct arm_iframe *frame) spin_unlock_restore(&gicd_lock, state, GICD_LOCK_FLAGS); LTRACEF("irq %d\n", irq); - if (irq < MAX_INT && int_handler_table[irq].handler) - ret = int_handler_table[irq].handler(int_handler_table[irq].arg); + if (irq < MAX_INT && (h = get_int_handler(pending_irq, cpu))->handler) + ret = h->handler(h->arg); else TRACEF("unexpected irq %d != %d may get lost\n", irq, pending_irq); GICREG(0, GICC_AEOIR) = irq; @@ -364,7 +440,7 @@ enum handler_return platform_irq(struct arm_iframe *frame) void platform_fiq(struct arm_iframe *frame) { #if WITH_LIB_SM - sm_handle_irq(); + sm_handle_fiq(); #else PANIC_UNIMPLEMENTED; #endif @@ -375,12 +451,13 @@ static status_t arm_gic_get_next_irq_locked(u_int min_irq, bool per_cpu) { u_int irq; u_int max_irq = per_cpu ? GIC_MAX_PER_CPU_INT : MAX_INT; + uint cpu = arch_curr_cpu_num(); if (!per_cpu && min_irq < GIC_MAX_PER_CPU_INT) min_irq = GIC_MAX_PER_CPU_INT; for (irq = min_irq; irq < max_irq; irq++) - if (int_handler_table[irq].handler) + if (get_int_handler(irq, cpu)->handler) return irq; return SM_ERR_END_OF_INPUT; @@ -395,12 +472,27 @@ long smc_intc_get_next_irq(smc32_args_t *args) arm_gic_non_secure_interrupts_frozen = true; ret = arm_gic_get_next_irq_locked(args->params[0], args->params[1]); + LTRACEF("min_irq %d, per_cpu %d, ret %d\n", + args->params[0], args->params[1], ret); spin_unlock_restore(&gicd_lock, state, GICD_LOCK_FLAGS); return ret; } +static u_long enabled_fiq_mask[BITMAP_NUM_WORDS(MAX_INT)]; + +static void bitmap_update_locked(u_long *bitmap, u_int bit, bool set) +{ + u_long mask = 1UL << BITMAP_BIT_IN_WORD(bit); + + bitmap += BITMAP_WORD(bit); + if (set) + *bitmap |= mask; + else + *bitmap &= ~mask; +} + long smc_intc_request_fiq(smc32_args_t *args) { u_int fiq = args->params[0]; @@ -415,6 +507,7 @@ long smc_intc_request_fiq(smc32_args_t *args) arm_gic_set_priority_locked(fiq, 0); gic_set_enable(fiq, enable); + bitmap_update_locked(enabled_fiq_mask, fiq, enable); dprintf(SPEW, "%s: fiq %d, enable %d done\n", __func__, fiq, enable); @@ -423,21 +516,52 @@ long smc_intc_request_fiq(smc32_args_t *args) return NO_ERROR; } -static uint32_t read_mpidr(void) +static u_int current_fiq[8] = { 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff }; + +static bool update_fiq_targets(u_int cpu, bool enable, u_int triggered_fiq, bool resume_gicd) { - int mpidr; - __asm__ volatile("mrc p15, 0, %0, c0, c0, 5" - : "=r" (mpidr) - ); - return mpidr; + u_int i, j; + u_long mask; + u_int fiq; + bool smp = arm_gic_max_cpu() > 0; + bool ret = false; + + spin_lock(&gicd_lock); /* IRQs and FIQs are already masked */ + for (i = 0; i < BITMAP_NUM_WORDS(MAX_INT); i++) { + mask = enabled_fiq_mask[i]; + while (mask) { + j = _ffz(~mask); + mask &= ~(1UL << j); + fiq = i * BITMAP_BITS_PER_WORD + j; + if (fiq == triggered_fiq) + ret = true; + LTRACEF("cpu %d, irq %i, enable %d\n", cpu, fiq, enable); + if (smp) + arm_gic_set_target_locked(fiq, 1U << cpu, enable ? ~0 : 0); + if (!smp || resume_gicd) + gic_set_enable(fiq, enable); + } + } + spin_unlock(&gicd_lock); + return ret; } -static u_int current_fiq[8] = { 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff, 0x3ff }; +static void suspend_resume_fiq(bool resume_gicc, bool resume_gicd) +{ + u_int cpu = arch_curr_cpu_num(); + + ASSERT(cpu < 8); + + update_fiq_targets(cpu, resume_gicc, ~0, resume_gicd); +} status_t sm_intc_fiq_enter(void) { - u_int cpu = read_mpidr() & 7; + u_int cpu = arch_curr_cpu_num(); u_int irq = GICREG(0, GICC_IAR) & 0x3ff; + bool fiq_enabled; + + ASSERT(cpu < 8); LTRACEF("cpu %d, irq %i\n", cpu, irq); @@ -446,20 +570,19 @@ status_t sm_intc_fiq_enter(void) return ERR_NO_MSG; } - if (arm_gic_max_cpu() > 0) { - spin_lock(&gicd_lock); /* IRQs and FIQs are already masked */ - arm_gic_set_target_locked(irq, 1U << cpu, 0); - spin_unlock(&gicd_lock); - } else { - /* target register has no effect on uniprocessor systems */ - gic_set_enable(irq, 0); - } + fiq_enabled = update_fiq_targets(cpu, false, irq, false); GICREG(0, GICC_EOIR) = irq; if (current_fiq[cpu] != 0x3ff) { dprintf(INFO, "more than one fiq active: cpu %d, old %d, new %d\n", cpu, current_fiq[cpu], irq); return ERR_ALREADY_STARTED; } + + if (!fiq_enabled) { + dprintf(INFO, "got disabled fiq: cpu %d, new %d\n", cpu, irq); + return ERR_NOT_READY; + } + current_fiq[cpu] = irq; return 0; @@ -467,19 +590,16 @@ status_t sm_intc_fiq_enter(void) void sm_intc_fiq_exit(void) { - u_int cpu = read_mpidr() & 7; + u_int cpu = arch_curr_cpu_num(); + + ASSERT(cpu < 8); + LTRACEF("cpu %d, irq %i\n", cpu, current_fiq[cpu]); if (current_fiq[cpu] == 0x3ff) { dprintf(INFO, "%s: no fiq active, cpu %d\n", __func__, cpu); return; } - if (arm_gic_max_cpu() > 0) { - spin_lock(&gicd_lock); /* IRQs and FIQs are already masked */ - arm_gic_set_target_locked(current_fiq[cpu], 1U << cpu, ~0); - spin_unlock(&gicd_lock); - } else { - gic_set_enable(current_fiq[cpu], 1); - } + update_fiq_targets(cpu, true, current_fiq[cpu], false); current_fiq[cpu] = 0x3ff; } #endif diff --git a/dev/interrupt/arm_gic/include/dev/interrupt/arm_gic.h b/dev/interrupt/arm_gic/include/dev/interrupt/arm_gic.h index 7b38e4b6..ee0fe614 100644 --- a/dev/interrupt/arm_gic/include/dev/interrupt/arm_gic.h +++ b/dev/interrupt/arm_gic/include/dev/interrupt/arm_gic.h @@ -26,7 +26,6 @@ #include void arm_gic_init(void); -void arm_gic_init_secondary_cpu(void); enum { /* Ignore cpu_mask and forward interrupt to all CPUs other than the current cpu */ diff --git a/dev/timer/arm_cortex_a9/arm_cortex_a9_timer.c b/dev/timer/arm_cortex_a9/arm_cortex_a9_timer.c index b7491560..ce7077b9 100644 --- a/dev/timer/arm_cortex_a9/arm_cortex_a9_timer.c +++ b/dev/timer/arm_cortex_a9/arm_cortex_a9_timer.c @@ -30,9 +30,11 @@ #include #include #include +#include #include #include #include +#include /* driver for cortex-a9's private timer */ #define LOCAL_TRACE 0 @@ -69,6 +71,7 @@ static platform_timer_callback t_callback; static addr_t scu_control_base; +static spin_lock_t lock = SPIN_LOCK_INITIAL_VALUE; static lk_time_t periodic_interval; static lk_time_t oneshot_interval; @@ -77,6 +80,8 @@ static struct fp_32_64 timer_freq_msec_conversion; static struct fp_32_64 timer_freq_usec_conversion_inverse; static struct fp_32_64 timer_freq_msec_conversion_inverse; +static void arm_cortex_a9_timer_init_percpu(uint level); + uint64_t get_global_val(void) { uint32_t lo, hi; @@ -118,7 +123,8 @@ status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg if (unlikely(ticks > 0xffffffff)) ticks = 0xffffffff; - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); t_callback = callback; @@ -130,7 +136,7 @@ status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg TIMREG(TIMER_LOAD) = ticks; TIMREG(TIMER_CONTROL) = (1<<2) | (1<<1) | (1<<0); // irq enable, autoreload, enable - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -145,7 +151,8 @@ status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg if (unlikely(ticks > 0xffffffff)) ticks = 0xffffffff; - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); t_callback = callback; oneshot_interval = interval; @@ -156,7 +163,7 @@ status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg TIMREG(TIMER_LOAD) = ticks; TIMREG(TIMER_CONTROL) = (1<<2) | (1<<0) | (1<<0); // irq enable, oneshot, enable - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -185,6 +192,19 @@ void arm_cortex_a9_timer_init(addr_t _scu_control_base, uint32_t freq) { scu_control_base = _scu_control_base; + arm_cortex_a9_timer_init_percpu(0); + + /* save the timer frequency for later calculations */ + timer_freq = freq; + + /* precompute the conversion factor for global time to real time */ + fp_32_64_div_32_32(&timer_freq_msec_conversion, timer_freq, 1000); + fp_32_64_div_32_32(&timer_freq_usec_conversion_inverse, 1000000, timer_freq); + fp_32_64_div_32_32(&timer_freq_msec_conversion_inverse, 1000, timer_freq); +} + +static void arm_cortex_a9_timer_init_percpu(uint level) +{ /* disable timer */ TIMREG(TIMER_CONTROL) = 0; @@ -194,16 +214,14 @@ void arm_cortex_a9_timer_init(addr_t _scu_control_base, uint32_t freq) /* ack any irqs that may be pending */ TIMREG(TIMER_ISR) = 1; - /* save the timer frequency for later calculations */ - timer_freq = freq; - - /* precompute the conversion factor for global time to real time */ - fp_32_64_div_32_32(&timer_freq_msec_conversion, timer_freq, 1000); - fp_32_64_div_32_32(&timer_freq_usec_conversion_inverse, 1000000, timer_freq); - fp_32_64_div_32_32(&timer_freq_msec_conversion_inverse, 1000, timer_freq); - + /* register the platform tick on each cpu */ register_int_handler(CPU_PRIV_TIMER_INT, &platform_tick, NULL); unmask_interrupt(CPU_PRIV_TIMER_INT); } +/* secondary cpu initialize the timer just before the kernel starts with interrupts enabled */ +LK_INIT_HOOK_FLAGS(arm_cortex_a9_timer_init_percpu, + arm_cortex_a9_timer_init_percpu, + LK_INIT_LEVEL_THREADING - 1, LK_INIT_FLAG_SECONDARY_CPUS); + /* vim: set ts=4 sw=4 expandtab: */ diff --git a/dev/timer/arm_generic/arm_generic_timer.c b/dev/timer/arm_generic/arm_generic_timer.c index 83d5542c..6193be3b 100644 --- a/dev/timer/arm_generic/arm_generic_timer.c +++ b/dev/timer/arm_generic/arm_generic_timer.c @@ -21,7 +21,9 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include +#include #include #include #include @@ -33,7 +35,98 @@ #include +#if ARCH_ARM64 + +/* CNTFRQ AArch64 register */ +#define TIMER_REG_CNTFRQ cntfrq_el0 + +/* CNTP AArch64 registers */ +#define TIMER_REG_CNTP_CTL cntp_ctl_el0 +#define TIMER_REG_CNTP_CVAL cntp_cval_el0 +#define TIMER_REG_CNTP_TVAL cntp_tval_el0 +#define TIMER_REG_CNTPCT cntpct_el0 + +/* CNTPS AArch64 registers */ +#define TIMER_REG_CNTPS_CTL cntps_ctl_el1 +#define TIMER_REG_CNTPS_CVAL cntps_cval_el1 +#define TIMER_REG_CNTPS_TVAL cntps_tval_el1 +#define TIMER_REG_CNTPSCT cntpct_el0 + +/* CNTV AArch64 registers */ +#define TIMER_REG_CNTV_CTL cntv_ctl_el0 +#define TIMER_REG_CNTV_CVAL cntv_cval_el0 +#define TIMER_REG_CNTV_TVAL cntv_tval_el0 +#define TIMER_REG_CNTVCT cntvct_el0 + +#define READ_TIMER_REG32(reg) ARM64_READ_SYSREG(reg) +#define READ_TIMER_REG64(reg) ARM64_READ_SYSREG(reg) +#define WRITE_TIMER_REG32(reg, val) ARM64_WRITE_SYSREG(reg, val) +#define WRITE_TIMER_REG64(reg, val) ARM64_WRITE_SYSREG(reg, val) + +#else + +/* CNTFRQ AArch32 register */ +#define TIMER_REG_CNTFRQ "c0, 0" + +/* CNTP AArch32 registers */ +#define TIMER_REG_CNTP_CTL "c2, 1" +#define TIMER_REG_CNTP_CVAL "2" +#define TIMER_REG_CNTP_TVAL "c2, 0" +#define TIMER_REG_CNTPCT "0" + +/* CNTPS AArch32 registers are banked and accessed though CNTP */ +#define CNTPS CNTP + +/* CNTV AArch32 registers */ +#define TIMER_REG_CNTV_CTL "c3, 1" +#define TIMER_REG_CNTV_CVAL "3" +#define TIMER_REG_CNTV_TVAL "c3, 0" +#define TIMER_REG_CNTVCT "1" + +#define READ_TIMER_REG32(reg) \ +({ \ + uint32_t _val; \ + __asm__ volatile("mrc p15, 0, %0, c14, " reg : "=r" (_val)); \ + _val; \ +}) + +#define READ_TIMER_REG64(reg) \ +({ \ + uint64_t _val; \ + __asm__ volatile("mrrc p15, " reg ", %0, %H0, c14" : "=r" (_val)); \ + _val; \ +}) + +#define WRITE_TIMER_REG32(reg, val) \ +({ \ + __asm__ volatile("mcr p15, 0, %0, c14, " reg :: "r" (val)); \ + ISB; \ +}) + +#define WRITE_TIMER_REG64(reg, val) \ +({ \ + __asm__ volatile("mcrr p15, " reg ", %0, %H0, c14" :: "r" (val)); \ + ISB; \ +}) + +#endif + +#ifndef TIMER_ARM_GENERIC_SELECTED +#define TIMER_ARM_GENERIC_SELECTED CNTP +#endif + +#define COMBINE3(a,b,c) a ## b ## c +#define XCOMBINE3(a,b,c) COMBINE3(a, b, c) + +#define SELECTED_TIMER_REG(reg) XCOMBINE3(TIMER_REG_, TIMER_ARM_GENERIC_SELECTED, reg) +#define TIMER_REG_CTL SELECTED_TIMER_REG(_CTL) +#define TIMER_REG_CVAL SELECTED_TIMER_REG(_CVAL) +#define TIMER_REG_TVAL SELECTED_TIMER_REG(_TVAL) +#define TIMER_REG_CT SELECTED_TIMER_REG(CT) + + static platform_timer_callback t_callback; +static int timer_irq; struct fp_32_64 cntpct_per_ms; struct fp_32_64 ms_per_cntpct; @@ -58,7 +151,7 @@ static uint32_t read_cntfrq(void) { uint32_t cntfrq; - __asm__ volatile("mrc p15, 0, %0, c14, c0, 0" : "=r" (cntfrq)); + cntfrq = READ_TIMER_REG32(TIMER_REG_CNTFRQ); LTRACEF("cntfrq: 0x%08x, %u\n", cntfrq, cntfrq); return cntfrq; } @@ -67,33 +160,33 @@ static uint32_t read_cntp_ctl(void) { uint32_t cntp_ctl; - __asm__ volatile("mrc p15, 0, %0, c14, c2, 1" : "=r" (cntp_ctl)); + cntp_ctl = READ_TIMER_REG32(TIMER_REG_CTL); return cntp_ctl; } static void write_cntp_ctl(uint32_t cntp_ctl) { - LTRACEF_LEVEL(3, "cntp_ctl: 0x%x\n", cntp_ctl); - __asm__ volatile("mcr p15, 0, %0, c14, c2, 1" :: "r" (cntp_ctl)); + LTRACEF_LEVEL(3, "cntp_ctl: 0x%x %x\n", cntp_ctl, read_cntp_ctl()); + WRITE_TIMER_REG32(TIMER_REG_CTL, cntp_ctl); } static void write_cntp_cval(uint64_t cntp_cval) { LTRACEF_LEVEL(3, "cntp_cval: 0x%016llx, %llu\n", cntp_cval, cntp_cval); - __asm__ volatile("mcrr p15, 2, %0, %H0, c14" :: "r" (cntp_cval)); + WRITE_TIMER_REG64(TIMER_REG_CVAL, cntp_cval); } static void write_cntp_tval(int32_t cntp_tval) { LTRACEF_LEVEL(3, "cntp_tval: 0x%08x, %d\n", cntp_tval, cntp_tval); - __asm__ volatile("mcr p15, 0, %0, c14, c2, 0" :: "r" (cntp_tval)); + WRITE_TIMER_REG32(TIMER_REG_TVAL, cntp_tval); } static uint64_t read_cntpct(void) { uint64_t cntpct; - __asm__ volatile("mrrc p15, 0, %0, %H0, c14" : "=r" (cntpct)); + cntpct = READ_TIMER_REG64(TIMER_REG_CT); LTRACEF_LEVEL(3, "cntpct: 0x%016llx, %llu\n", cntpct, cntpct); return cntpct; } @@ -120,6 +213,7 @@ status_t platform_set_oneshot_timer(platform_timer_callback callback, void *arg, else write_cntp_cval(read_cntpct() + cntpct_interval); write_cntp_ctl(1); + return 0; } @@ -138,10 +232,6 @@ lk_time_t current_time(void) return cntpct_to_lk_time(read_cntpct()); } -void arm_generic_timer_init_secondary_cpu(void) -{ -} - static uint32_t abs_int32(int32_t a) { return (a > 0) ? a : -a; @@ -231,13 +321,19 @@ static void arm_generic_timer_init_conversion_factors(uint32_t cntfrq) LTRACEF("us_per_cntpct: %08x.%08x%08x\n", us_per_cntpct.l0, us_per_cntpct.l32, us_per_cntpct.l64); } -void arm_generic_timer_init(int irq) +void arm_generic_timer_init(int irq, uint32_t freq_override) { - uint32_t cntfrq = read_cntfrq(); + uint32_t cntfrq; - if (!cntfrq) { - TRACEF("Failed to initialize timer, frequency is 0\n"); - return; + if (freq_override == 0) { + cntfrq = read_cntfrq(); + + if (!cntfrq) { + TRACEF("Failed to initialize timer, frequency is 0\n"); + return; + } + } else { + cntfrq = freq_override; } #if LOCAL_TRACE @@ -252,7 +348,33 @@ void arm_generic_timer_init(int irq) arm_generic_timer_init_conversion_factors(cntfrq); test_time_conversions(cntfrq); + LTRACEF("register irq %d on cpu %d\n", irq, arch_curr_cpu_num()); register_int_handler(irq, &platform_tick, NULL); unmask_interrupt(irq); + + timer_irq = irq; } +static void arm_generic_timer_init_secondary_cpu(uint level) +{ + LTRACEF("register irq %d on cpu %d\n", timer_irq, arch_curr_cpu_num()); + register_int_handler(timer_irq, &platform_tick, NULL); + unmask_interrupt(timer_irq); +} + +/* secondary cpu initialize the timer just before the kernel starts with interrupts enabled */ +LK_INIT_HOOK_FLAGS(arm_generic_timer_init_secondary_cpu, + arm_generic_timer_init_secondary_cpu, + LK_INIT_LEVEL_THREADING - 1, LK_INIT_FLAG_SECONDARY_CPUS); + +static void arm_generic_timer_resume_cpu(uint level) +{ + /* Always trigger a timer interrupt on each cpu for now */ + write_cntp_tval(0); + write_cntp_ctl(1); +} + +LK_INIT_HOOK_FLAGS(arm_generic_timer_resume_cpu, arm_generic_timer_resume_cpu, + LK_INIT_LEVEL_PLATFORM, LK_INIT_FLAG_CPU_RESUME); + +/* vim: set noexpandtab: */ diff --git a/dev/timer/arm_generic/include/dev/timer/arm_generic.h b/dev/timer/arm_generic/include/dev/timer/arm_generic.h index f7f95ced..368d43c3 100644 --- a/dev/timer/arm_generic/include/dev/timer/arm_generic.h +++ b/dev/timer/arm_generic/include/dev/timer/arm_generic.h @@ -25,8 +25,8 @@ #include -void arm_generic_timer_init(int irq); -void arm_generic_timer_init_secondary_cpu(void); +/* if freq_override != 0, use that as the operating frequency instead of CNTFRQ register */ +void arm_generic_timer_init(int irq, uint32_t freq_override); #endif diff --git a/engine.mk b/engine.mk index 690a906e..a511eb19 100644 --- a/engine.mk +++ b/engine.mk @@ -49,7 +49,7 @@ OUTBIN := $(BUILDDIR)/lk.bin OUTELF := $(BUILDDIR)/lk.elf CONFIGHEADER := $(BUILDDIR)/config.h -GLOBAL_INCLUDES := $(BUILDDIR) $(LKROOT)/include $(addsuffix /include,$(LKINC)) +GLOBAL_INCLUDES := $(BUILDDIR) $(addsuffix /include,$(LKINC)) GLOBAL_OPTFLAGS ?= $(ARCH_OPTFLAGS) GLOBAL_COMPILEFLAGS := -g -fno-builtin -finline -include $(CONFIGHEADER) GLOBAL_COMPILEFLAGS += -W -Wall -Wno-multichar -Wno-unused-parameter -Wno-unused-function -Wno-unused-label @@ -60,7 +60,13 @@ GLOBAL_CPPFLAGS := -fno-exceptions -fno-rtti -fno-threadsafe-statics GLOBAL_ASMFLAGS := -DASSEMBLY GLOBAL_LDFLAGS := -GLOBAL_LDFLAGS += -L $(LKROOT) +GLOBAL_LDFLAGS += $(addprefix -L,$(LKINC)) + +# Architecture specific compile flags +ARCH_COMPILEFLAGS := +ARCH_CFLAGS := +ARCH_CPPFLAGS := +ARCH_ASMFLAGS := # top level rule all:: $(OUTBIN) $(OUTELF).lst $(OUTELF).debug.lst $(OUTELF).sym $(OUTELF).sym.sorted $(OUTELF).size @@ -93,6 +99,9 @@ ALLMODULES := # add any external module dependencies MODULES := $(EXTERNAL_MODULES) +# any .mk specified here will be included before build.mk +EXTRA_BUILDRULES := + # any rules you put here will also be built by the system before considered being complete EXTRA_BUILDDEPS := @@ -179,6 +188,7 @@ OBJCOPY := $(TOOLCHAIN_PREFIX)objcopy CPPFILT := $(TOOLCHAIN_PREFIX)c++filt SIZE := $(TOOLCHAIN_PREFIX)size NM := $(TOOLCHAIN_PREFIX)nm +STRIP := $(TOOLCHAIN_PREFIX)strip # try to have the compiler output colorized error messages if available export GCC_COLORS ?= 1 diff --git a/include/alloca.h b/include/alloca.h new file mode 100644 index 00000000..ce617fed --- /dev/null +++ b/include/alloca.h @@ -0,0 +1,6 @@ +#if !defined(__ALLOCA_H) +#define __ALLOCA_H + +#define alloca(size) __builtin_alloca (size) + +#endif /* !__ALLOCA_H */ diff --git a/include/arch/mmu.h b/include/arch/mmu.h index 3180436f..dc6386b3 100644 --- a/include/arch/mmu.h +++ b/include/arch/mmu.h @@ -37,10 +37,14 @@ __BEGIN_CDECLS #define ARCH_MMU_FLAG_PERM_RO (1<<3) #define ARCH_MMU_FLAG_PERM_NO_EXECUTE (1<<4) #define ARCH_MMU_FLAG_NS (1<<5) /* NON-SECURE */ +#define ARCH_MMU_FLAG_INVALID (1<<7) /* indicates that flags are not specified */ int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags); int arch_mmu_unmap(vaddr_t vaddr, uint count); status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags); +vaddr_t arch_mmu_pick_spot(vaddr_t base, uint prev_region_arch_mmu_flags, + vaddr_t end, uint next_region_arch_mmu_flags, + vaddr_t align, size_t size, uint arch_mmu_flags); void arch_disable_mmu(void); diff --git a/include/arch/mp.h b/include/arch/mp.h new file mode 100644 index 00000000..2c3b2c56 --- /dev/null +++ b/include/arch/mp.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include + +/* send inter processor interrupt, if supported */ +status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi); + +void arch_mp_init_percpu(void); diff --git a/include/arch/ops.h b/include/arch/ops.h index d0e4f2ee..f0977519 100644 --- a/include/arch/ops.h +++ b/include/arch/ops.h @@ -45,6 +45,11 @@ static int atomic_or(volatile int *ptr, int val); static uint32_t arch_cycle_count(void); +static uint arch_curr_cpu_num(void); + +/* Use to align structures on cache lines to avoid cpu aliasing. */ +#define __CPU_ALIGN __ALIGNED(CACHE_LINE) + #endif // !ASSEMBLY #define ICACHE 1 #define DCACHE 2 diff --git a/include/bits.h b/include/bits.h index b430b7b5..a2ea571a 100644 --- a/include/bits.h +++ b/include/bits.h @@ -28,30 +28,34 @@ #define clz(x) __builtin_clz(x) #define ctz(x) __builtin_ctz(x) -#define BIT(x, bit) ((x) & (1 << (bit))) +#define BIT(x, bit) ((x) & (1UL << (bit))) #define BIT_SHIFT(x, bit) (((x) >> (bit)) & 1) -#define BITS(x, high, low) ((x) & (((1<<((high)+1))-1) & ~((1<<(low))-1))) -#define BITS_SHIFT(x, high, low) (((x) >> (low)) & ((1<<((high)-(low)+1))-1)) -#define BIT_SET(x, bit) (((x) & (1 << (bit))) ? 1 : 0) +#define BITS(x, high, low) ((x) & (((1UL<<((high)+1))-1) & ~((1UL<<(low))-1))) +#define BITS_SHIFT(x, high, low) (((x) >> (low)) & ((1UL<<((high)-(low)+1))-1)) +#define BIT_SET(x, bit) (((x) & (1UL << (bit))) ? 1 : 0) #define BITMAP_BITS_PER_WORD (sizeof(unsigned long) * 8) #define BITMAP_NUM_WORDS(x) (((x) + BITMAP_BITS_PER_WORD - 1) / BITMAP_BITS_PER_WORD) #define BITMAP_WORD(x) ((x) / BITMAP_BITS_PER_WORD) #define BITMAP_BIT_IN_WORD(x) ((x) & (BITMAP_BITS_PER_WORD - 1)) +#define BITMAP_BITS_PER_INT (sizeof(unsigned int) * 8) +#define BITMAP_BIT_IN_INT(x) ((x) & (BITMAP_BITS_PER_INT - 1)) +#define BITMAP_INT(x) ((x) / BITMAP_BITS_PER_INT) + #define BIT_MASK(x) (((x) >= sizeof(unsigned long) * 8) ? (0UL-1) : ((1UL << (x)) - 1)) static inline int bitmap_set(unsigned long *bitmap, int bit) { - unsigned long mask = 1 << BITMAP_BIT_IN_WORD(bit); - return atomic_or((int*)&bitmap[BITMAP_WORD(bit)], mask) & mask ? 1 : 0; + unsigned long mask = 1 << BITMAP_BIT_IN_INT(bit); + return atomic_or(&((int*)bitmap)[BITMAP_INT(bit)], mask) & mask ? 1 : 0; } static inline int bitmap_clear(unsigned long *bitmap, int bit) { - unsigned long mask = 1 << BITMAP_BIT_IN_WORD(bit); + unsigned long mask = 1 << BITMAP_BIT_IN_INT(bit); - return atomic_and((int*)&bitmap[BITMAP_WORD(bit)], ~mask) & mask ? 1:0; + return atomic_and(&((int*)bitmap)[BITMAP_INT(bit)], ~mask) & mask ? 1:0; } static inline int bitmap_test(unsigned long *bitmap, int bit) diff --git a/include/debug.h b/include/debug.h index e347fe5a..27b9b909 100644 --- a/include/debug.h +++ b/include/debug.h @@ -27,8 +27,7 @@ #include #include #include - -__BEGIN_CDECLS +#include #if !defined(LK_DEBUGLEVEL) #define LK_DEBUGLEVEL 0 @@ -40,11 +39,20 @@ __BEGIN_CDECLS #define INFO 1 #define SPEW 2 +typedef struct __print_callback print_callback_t; +struct __print_callback { + struct list_node entry; + void (*print)(print_callback_t *cb, const char *str, size_t len); +}; + +__BEGIN_CDECLS + #if !DISABLE_DEBUG_OUTPUT /* input/output */ -#define _dputc(c) platform_dputc(c) +void _dputc(char c); int _dputs(const char *str); +int _dwrite(const char *ptr, size_t len); int _dprintf(const char *fmt, ...) __PRINTFLIKE(1, 2); int _dvprintf(const char *fmt, va_list ap); @@ -57,6 +65,7 @@ void hexdump8(const void *ptr, size_t len); /* input/output */ static inline void _dputc(char c) { } static inline int _dputs(const char *str) { return 0; } +static inline int _dwrite(const char *ptr, size_t len) { return 0; } static inline int __PRINTFLIKE(1, 2) _dprintf(const char *fmt, ...) { return 0; } static inline int _dvprintf(const char *fmt, va_list ap) { return 0; } @@ -66,8 +75,13 @@ static inline void hexdump8(const void *ptr, size_t len) { } #endif /* DISABLE_DEBUG_OUTPUT */ +/* register callback to receive debug prints */ +void register_print_callback(print_callback_t *cb); +void unregister_print_callback(print_callback_t *cb); + #define dputc(level, str) do { if ((level) <= LK_DEBUGLEVEL) { _dputc(str); } } while (0) #define dputs(level, str) do { if ((level) <= LK_DEBUGLEVEL) { _dputs(str); } } while (0) +#define dwrite(level, ptr, len) do { if ((level) <= LK_DEBUGLEVEL) { _dwrite(ptr, len); } } while(0) #define dprintf(level, x...) do { if ((level) <= LK_DEBUGLEVEL) { _dprintf(x); } } while (0) #define dvprintf(level, x...) do { if ((level) <= LK_DEBUGLEVEL) { _dvprintf(x); } } while (0) diff --git a/include/endian.h b/include/endian.h index bff71dbf..90da5eab 100644 --- a/include/endian.h +++ b/include/endian.h @@ -69,6 +69,10 @@ #define ntohl(n) BE32(n) #define htonl(h) BE32(h) +/* 64-bit network byte swap stuff */ +#define htobe64(h) BE64(h) +#define be64toh(b) BE64(b) + // some memory access macros #if __POWERPC__ #include diff --git a/include/err.h b/include/err.h index a24459b2..f315dd39 100644 --- a/include/err.h +++ b/include/err.h @@ -65,6 +65,10 @@ #define ERR_OUT_OF_RANGE (-37) #define ERR_NOT_CONFIGURED (-38) #define ERR_NOT_MOUNTED (-39) +#define ERR_FAULT (-40) +#define ERR_NO_RESOURCES (-41) +#define ERR_BAD_HANDLE (-42) +#define ERR_ACCESS_DENIED (-43) #define ERR_USER_BASE (-16384) diff --git a/include/inttypes.h b/include/inttypes.h index f6681e38..4e542d4a 100644 --- a/include/inttypes.h +++ b/include/inttypes.h @@ -23,6 +23,9 @@ #ifndef __INTTYPES_H #define __INTTYPES_H +#define PRIu32 "u" +#define PRIx32 "x" + #include #endif diff --git a/include/kernel/mp.h b/include/kernel/mp.h new file mode 100644 index 00000000..90df39e1 --- /dev/null +++ b/include/kernel/mp.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include +#include +#include + +void mp_init(void); + +typedef uint32_t mp_cpu_mask_t; + +#define MP_CPU_ALL_BUT_LOCAL (UINT32_MAX) + +/* by default, mp_mbx_reschedule does not signal to cpus that are running realtime + * threads. Override this behavior. + */ +#define MP_RESCHEDULE_FLAG_REALTIME (0x1) + +void mp_reschedule(mp_cpu_mask_t target, uint flags); +void mp_set_curr_cpu_active(bool active); + +typedef enum { + MP_IPI_GENERIC, + MP_IPI_RESCHEDULE, +} mp_ipi_t; + +/* called from arch code during reschedule irq */ +enum handler_return mp_mbx_reschedule_irq(void); + +/* global mp state to track what the cpus are up to */ +struct mp_state { + volatile mp_cpu_mask_t active_cpus; + + /* only safely accessible with thread lock held */ + mp_cpu_mask_t idle_cpus; + mp_cpu_mask_t realtime_cpus; +}; + +extern struct mp_state mp; + +/* must be called with the thread lock held */ +static inline void mp_set_cpu_idle(uint cpu) +{ + mp.idle_cpus |= 1UL << cpu; +} + +static inline void mp_set_cpu_busy(uint cpu) +{ + mp.idle_cpus &= ~(1UL << cpu); +} + +static inline mp_cpu_mask_t mp_get_idle_mask(void) +{ + return mp.idle_cpus; +} + +static inline void mp_set_cpu_realtime(uint cpu) +{ + mp.realtime_cpus |= 1UL << cpu; +} + +static inline void mp_set_cpu_non_realtime(uint cpu) +{ + mp.realtime_cpus &= ~(1UL << cpu); +} + +static inline mp_cpu_mask_t mp_get_realtime_mask(void) +{ + return mp.realtime_cpus; +} + + diff --git a/include/kernel/spinlock.h b/include/kernel/spinlock.h new file mode 100644 index 00000000..b9bf246f --- /dev/null +++ b/include/kernel/spinlock.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include +#include + +__BEGIN_CDECLS + +/* interrupts should already be disabled */ +static inline void spin_lock(spin_lock_t *lock) +{ + arch_spin_lock(lock); +} + + /* Returns 0 on success, non-0 on failure */ +static inline int spin_trylock(spin_lock_t *lock) +{ + return arch_spin_trylock(lock); +} + +/* interrupts should already be disabled */ +static inline void spin_unlock(spin_lock_t *lock) +{ + arch_spin_unlock(lock); +} + +static inline void spin_lock_init(spin_lock_t *lock) +{ + arch_spin_lock_init(lock); +} + +static inline bool spin_lock_held(spin_lock_t *lock) +{ + return arch_spin_lock_held(lock); +} + +/* spin lock irq save flags: */ + +/* Possible future flags: + * SPIN_LOCK_FLAG_PMR_MASK = 0x000000ff + * SPIN_LOCK_FLAG_PREEMPTION = 0x00000100 + * SPIN_LOCK_FLAG_SET_PMR = 0x00000200 + */ + +/* Generic flags */ +#define SPIN_LOCK_FLAG_INTERRUPTS ARCH_DEFAULT_SPIN_LOCK_FLAG_INTERRUPTS + +/* same as spin lock, but save disable and save interrupt state first */ +static inline void spin_lock_save( + spin_lock_t *lock, + spin_lock_saved_state_t *statep, + spin_lock_save_flags_t flags) +{ + arch_interrupt_save(statep, flags); + spin_lock(lock); +} + +/* restore interrupt state before unlocking */ +static inline void spin_unlock_restore( + spin_lock_t *lock, + spin_lock_saved_state_t old_state, + spin_lock_save_flags_t flags) +{ + spin_unlock(lock); + arch_interrupt_restore(old_state, flags); +} + +/* hand(ier) routines */ +#define spin_lock_irqsave(lock, statep) spin_lock_save(lock, &(statep), SPIN_LOCK_FLAG_INTERRUPTS) +#define spin_unlock_irqrestore(lock, statep) spin_unlock_restore(lock, statep, SPIN_LOCK_FLAG_INTERRUPTS) + +__END_CDECLS diff --git a/include/kernel/thread.h b/include/kernel/thread.h index 4b981b23..e84f9af9 100644 --- a/include/kernel/thread.h +++ b/include/kernel/thread.h @@ -30,6 +30,7 @@ #include #include #include +#include #include enum thread_state { @@ -45,6 +46,9 @@ typedef int (*thread_start_routine)(void *arg); /* thread local storage */ enum thread_tls_list { +#ifdef WITH_LIB_UTHREAD + TLS_ENTRY_UTHREAD, +#endif MAX_TLS_ENTRY }; @@ -52,6 +56,7 @@ enum thread_tls_list { #define THREAD_FLAG_FREE_STACK 0x2 #define THREAD_FLAG_FREE_STRUCT 0x4 #define THREAD_FLAG_REAL_TIME 0x8 +#define THREAD_FLAG_IDLE 0x10 #define THREAD_MAGIC 'thrd' @@ -63,9 +68,10 @@ typedef struct thread { struct list_node queue_node; int priority; enum thread_state state; - int saved_critical_section_count; int remaining_quantum; unsigned int flags; + int curr_cpu; + int pinned_cpu; /* only run on pinned_cpu if >= 0 */ /* if blocked, a pointer to the wait queue */ struct wait_queue *blocking_wait_queue; @@ -113,6 +119,8 @@ typedef struct thread { void thread_init_early(void); void thread_init(void); void thread_become_idle(void) __NO_RETURN; +void thread_secondary_cpu_init_early(void); +void thread_secondary_cpu_entry(void) __NO_RETURN; void thread_set_name(const char *name); void thread_set_priority(int priority); thread_t *thread_create(const char *name, thread_start_routine entry, void *arg, int priority, size_t stack_size); @@ -126,6 +134,7 @@ status_t thread_detach_and_resume(thread_t *t); status_t thread_set_real_time(thread_t *t); void dump_thread(thread_t *t); +void arch_dump_thread(thread_t *t); void dump_all_threads(void); /* scheduler routines */ @@ -134,6 +143,10 @@ void thread_preempt(void); /* get preempted (inserted into head of run queue) */ void thread_block(void); /* block on something and reschedule */ void thread_unblock(thread_t *t, bool resched); /* go back in the run queue */ +#ifdef WITH_LIB_UTHREAD +void uthread_context_switch(thread_t *oldthread, thread_t *newthread); +#endif + /* called on every timer tick for the scheduler to do quantum expiration */ enum handler_return thread_timer_tick(void); @@ -141,36 +154,11 @@ enum handler_return thread_timer_tick(void); thread_t *get_current_thread(void); void set_current_thread(thread_t *); -/* critical sections */ -extern int critical_section_count; +/* scheduler lock */ +extern spin_lock_t thread_lock; -static inline __ALWAYS_INLINE void enter_critical_section(void) -{ - CF; - if (critical_section_count == 0) - arch_disable_ints(); - critical_section_count++; - CF; -} - -static inline __ALWAYS_INLINE void exit_critical_section(void) -{ - CF; - critical_section_count--; - if (critical_section_count == 0) - arch_enable_ints(); - CF; -} - -static inline __ALWAYS_INLINE bool in_critical_section(void) -{ - CF; - return critical_section_count > 0; -} - -/* only used by interrupt glue */ -static inline void inc_critical_section(void) { critical_section_count++; } -static inline void dec_critical_section(void) { critical_section_count--; } +#define THREAD_LOCK(state) spin_lock_saved_state_t state; spin_lock_irqsave(&thread_lock, state) +#define THREAD_UNLOCK(state) spin_unlock_irqrestore(&thread_lock, state) /* thread local storage */ static inline __ALWAYS_INLINE uintptr_t tls_get(uint entry) @@ -195,18 +183,22 @@ static inline __ALWAYS_INLINE uintptr_t tls_set(uint entry, uintptr_t val) struct thread_stats { lk_bigtime_t idle_time; lk_bigtime_t last_idle_timestamp; - int reschedules; - int context_switches; - int preempts; - int yields; - int interrupts; /* platform code increment this */ - int timer_ints; /* timer code increment this */ - int timers; /* timer code increment this */ + ulong reschedules; + ulong context_switches; + ulong preempts; + ulong yields; + ulong interrupts; /* platform code increment this */ + ulong timer_ints; /* timer code increment this */ + ulong timers; /* timer code increment this */ + +#if WITH_SMP + ulong reschedule_ipis; +#endif }; -extern struct thread_stats thread_stats; +extern struct thread_stats thread_stats[SMP_MAX_CPUS]; -#define THREAD_STATS_INC(name) do { thread_stats.name++; } while(0) +#define THREAD_STATS_INC(name) do { thread_stats[arch_curr_cpu_num()].name++; } while(0) #else @@ -216,3 +208,4 @@ extern struct thread_stats thread_stats; #endif +/* vim: set ts=4 sw=4 noexpandtab: */ diff --git a/include/kernel/vm.h b/include/kernel/vm.h index 5c1aa2c1..cce6f5e7 100644 --- a/include/kernel/vm.h +++ b/include/kernel/vm.h @@ -22,10 +22,26 @@ */ #pragma once +/* some assembly #defines, need to match the structure below */ +#if IS_64BIT +#define __MMU_INITIAL_MAPPING_PHYS_OFFSET 0 +#define __MMU_INITIAL_MAPPING_VIRT_OFFSET 8 +#define __MMU_INITIAL_MAPPING_SIZE_OFFSET 16 +#define __MMU_INITIAL_MAPPING_FLAGS_OFFSET 24 +#define __MMU_INITIAL_MAPPING_SIZE 40 +#else +#define __MMU_INITIAL_MAPPING_PHYS_OFFSET 0 +#define __MMU_INITIAL_MAPPING_VIRT_OFFSET 4 +#define __MMU_INITIAL_MAPPING_SIZE_OFFSET 8 +#define __MMU_INITIAL_MAPPING_FLAGS_OFFSET 12 +#define __MMU_INITIAL_MAPPING_SIZE 20 +#endif + /* flags for initial mapping struct */ #define MMU_INITIAL_MAPPING_TEMPORARY (0x1) #define MMU_INITIAL_MAPPING_FLAG_UNCACHED (0x2) #define MMU_INITIAL_MAPPING_FLAG_DEVICE (0x4) +#define MMU_INITIAL_MAPPING_FLAG_DYNAMIC (0x8) /* entry has to be patched up by platform_reset */ #ifndef ASSEMBLY @@ -50,6 +66,13 @@ struct mmu_initial_mapping { const char *name; }; +/* Assert that the assembly macros above match this struct. */ +STATIC_ASSERT(__offsetof(struct mmu_initial_mapping, phys) == __MMU_INITIAL_MAPPING_PHYS_OFFSET); +STATIC_ASSERT(__offsetof(struct mmu_initial_mapping, virt) == __MMU_INITIAL_MAPPING_VIRT_OFFSET); +STATIC_ASSERT(__offsetof(struct mmu_initial_mapping, size) == __MMU_INITIAL_MAPPING_SIZE_OFFSET); +STATIC_ASSERT(__offsetof(struct mmu_initial_mapping, flags) == __MMU_INITIAL_MAPPING_FLAGS_OFFSET); +STATIC_ASSERT(sizeof(struct mmu_initial_mapping) == __MMU_INITIAL_MAPPING_SIZE); + /* Platform or target must fill out one of these to set up the initial memory map * for kernel and enough IO space to boot. */ @@ -77,7 +100,7 @@ STATIC_ASSERT(KERNEL_ASPACE_BASE + (KERNEL_ASPACE_SIZE - 1) > KERNEL_ASPACE_BASE static inline bool is_kernel_address(vaddr_t va) { - return (va >= KERNEL_ASPACE_BASE && va <= (KERNEL_ASPACE_BASE + KERNEL_ASPACE_SIZE)); + return (va >= KERNEL_ASPACE_BASE && va <= (KERNEL_ASPACE_BASE + KERNEL_ASPACE_SIZE - 1)); } /* physical allocator */ @@ -183,7 +206,7 @@ status_t vmm_reserve_space(vmm_aspace_t *aspace, const char *name, size_t size, /* allocate a region of virtual space that maps a physical piece of address space. the physical pages that back this are not allocated from the pmm. */ -status_t vmm_alloc_physical(vmm_aspace_t *aspace, const char *name, size_t size, void **ptr, paddr_t paddr, uint vmm_flags, uint arch_mmu_flags) +status_t vmm_alloc_physical(vmm_aspace_t *aspace, const char *name, size_t size, void **ptr, uint8_t align_log2, paddr_t paddr, uint vmm_flags, uint arch_mmu_flags) __NONNULL((1)); /* allocate a region of memory backed by newly allocated contiguous physical memory */ diff --git a/include/kernel/wait.h b/include/kernel/wait.h index 8d23d69c..7f2c156c 100644 --- a/include/kernel/wait.h +++ b/include/kernel/wait.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2012 Travis Geiselbrecht + * Copyright (c) 2008-2014 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files diff --git a/include/lib/cbuf.h b/include/lib/cbuf.h index 45e116dd..c8e77c03 100644 --- a/include/lib/cbuf.h +++ b/include/lib/cbuf.h @@ -25,6 +25,7 @@ #include #include +#include #include typedef struct cbuf { @@ -33,6 +34,7 @@ typedef struct cbuf { uint len_pow2; char *buf; event_t event; + spin_lock_t lock; } cbuf_t; /** diff --git a/include/lk/init.h b/include/lk/init.h index fd1b1ae2..d5744b38 100644 --- a/include/lk/init.h +++ b/include/lk/init.h @@ -7,8 +7,6 @@ * LK's init system */ -int lk_init_level(uint level); - typedef void (*lk_init_hook)(uint level); enum lk_init_level { @@ -29,25 +27,50 @@ enum lk_init_level { LK_INIT_LEVEL_LAST = UINT_MAX, }; +enum lk_init_flags { + LK_INIT_FLAG_PRIMARY_CPU = 0x1, + LK_INIT_FLAG_SECONDARY_CPUS = 0x2, + LK_INIT_FLAG_ALL_CPUS = LK_INIT_FLAG_PRIMARY_CPU | LK_INIT_FLAG_SECONDARY_CPUS, + LK_INIT_FLAG_CPU_SUSPEND = 0x4, + LK_INIT_FLAG_CPU_RESUME = 0x8, +}; + +void lk_init_level(enum lk_init_flags flags, uint start_level, uint stop_level); + +static inline void lk_primary_cpu_init_level(uint start_level, uint stop_level) { + lk_init_level(LK_INIT_FLAG_PRIMARY_CPU, start_level, stop_level); +} + +static inline void lk_init_level_all(enum lk_init_flags flags) { + lk_init_level(flags, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_LAST); +} + struct lk_init_struct { uint level; + uint flags; lk_init_hook hook; const char *name; }; #ifdef ARCH_X86_64 -#define LK_INIT_HOOK(_name, _hook, _level) \ +#define LK_INIT_HOOK_FLAGS(_name, _hook, _level, _flags) \ const struct lk_init_struct _init_struct_##_name __ALIGNED(8) __SECTION(".lk_init") = { \ .level = _level, \ + .flags = _flags, \ .hook = _hook, \ .name = #_name, \ }; #else -#define LK_INIT_HOOK(_name, _hook, _level) \ +#define LK_INIT_HOOK_FLAGS(_name, _hook, _level, _flags) \ const struct lk_init_struct _init_struct_##_name __SECTION(".lk_init") = { \ .level = _level, \ + .flags = _flags, \ .hook = _hook, \ .name = #_name, \ }; #endif + +#define LK_INIT_HOOK(_name, _hook, _level) \ + LK_INIT_HOOK_FLAGS(_name, _hook, _level, LK_INIT_FLAG_PRIMARY_CPU) + // vim: set ts=4 sw=4 expandtab: diff --git a/include/lk/main.h b/include/lk/main.h new file mode 100644 index 00000000..bcf00428 --- /dev/null +++ b/include/lk/main.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include + + +void lk_main(ulong arg0, ulong arg1, ulong arg2, ulong arg3) __NO_RETURN __EXTERNALLY_VISIBLE; +void lk_secondary_cpu_entry(void); +void lk_init_secondary_cpus(uint secondary_cpu_count); diff --git a/include/stdlib.h b/include/stdlib.h index 540febb9..46eeedab 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -49,7 +49,12 @@ unsigned long long atoull(const char *num); #define STACKBUF_DMA_ALIGN(var, size) \ uint8_t __##var[(size) + CACHE_LINE]; uint8_t *var = (uint8_t *)(ROUNDUP((addr_t)__##var, CACHE_LINE)) +void abort(void) __attribute__((noreturn)); void qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *)); +void *bsearch(const void *key, const void *base, size_t num_elems, size_t size, + int (*compare)(const void *, const void *)); +unsigned long int strtoul(const char *nptr, char **endptr, int base); +char *getenv(const char *name); #endif diff --git a/include/strings.h b/include/strings.h new file mode 100644 index 00000000..324dd8c6 --- /dev/null +++ b/include/strings.h @@ -0,0 +1,6 @@ +#if !defined(__STRINGS_H) +#define __STRINGS_H + +int strcasecmp(const char *s1, const char *s2); + +#endif /* !__STRINGS_H */ diff --git a/include/sys/types.h b/include/sys/types.h index e65e5d05..d1769eb5 100644 --- a/include/sys/types.h +++ b/include/sys/types.h @@ -46,14 +46,14 @@ typedef uintptr_t paddr_t; typedef int kobj_id; -typedef unsigned long lk_time_t; +typedef uint32_t lk_time_t; typedef unsigned long long lk_bigtime_t; -#define INFINITE_TIME ULONG_MAX +#define INFINITE_TIME UINT32_MAX -#define TIME_GTE(a, b) ((long)((a) - (b)) >= 0) -#define TIME_LTE(a, b) ((long)((a) - (b)) <= 0) -#define TIME_GT(a, b) ((long)((a) - (b)) > 0) -#define TIME_LT(a, b) ((long)((a) - (b)) < 0) +#define TIME_GTE(a, b) ((int32_t)((a) - (b)) >= 0) +#define TIME_LTE(a, b) ((int32_t)((a) - (b)) <= 0) +#define TIME_GT(a, b) ((int32_t)((a) - (b)) > 0) +#define TIME_LT(a, b) ((int32_t)((a) - (b)) < 0) enum handler_return { INT_NO_RESCHEDULE = 0, diff --git a/kernel/debug.c b/kernel/debug.c index c28402ae..d66cbaa0 100644 --- a/kernel/debug.c +++ b/kernel/debug.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -73,43 +74,74 @@ static int cmd_threads(int argc, const cmd_args *argv) #if THREAD_STATS static int cmd_threadstats(int argc, const cmd_args *argv) { - printf("thread stats:\n"); - printf("\ttotal idle time: %lld\n", thread_stats.idle_time); - printf("\ttotal busy time: %lld\n", current_time_hires() - thread_stats.idle_time); - printf("\treschedules: %d\n", thread_stats.reschedules); - printf("\tcontext_switches: %d\n", thread_stats.context_switches); - printf("\tpreempts: %d\n", thread_stats.preempts); - printf("\tyields: %d\n", thread_stats.yields); - printf("\tinterrupts: %d\n", thread_stats.interrupts); - printf("\ttimer interrupts: %d\n", thread_stats.timer_ints); - printf("\ttimers: %d\n", thread_stats.timers); + for (uint i = 0; i < SMP_MAX_CPUS; i++) { + if (!(mp.active_cpus & (1 << i))) + continue; + + printf("thread stats (cpu %d):\n", i); + printf("\ttotal idle time: %lld\n", thread_stats[i].idle_time); + printf("\ttotal busy time: %lld\n", current_time_hires() - thread_stats[i].idle_time); + printf("\treschedules: %lu\n", thread_stats[i].reschedules); +#if WITH_SMP + printf("\treschedule_ipis: %lu\n", thread_stats[i].reschedule_ipis); +#endif + printf("\tcontext_switches: %lu\n", thread_stats[i].context_switches); + printf("\tpreempts: %lu\n", thread_stats[i].preempts); + printf("\tyields: %lu\n", thread_stats[i].yields); + printf("\tinterrupts: %lu\n", thread_stats[i].interrupts); + printf("\ttimer interrupts: %lu\n", thread_stats[i].timer_ints); + printf("\ttimers: %lu\n", thread_stats[i].timers); + } return 0; } static enum handler_return threadload(struct timer *t, lk_time_t now, void *arg) { - static struct thread_stats old_stats; - static lk_bigtime_t last_idle_time; + static struct thread_stats old_stats[SMP_MAX_CPUS]; + static lk_bigtime_t last_idle_time[SMP_MAX_CPUS]; - lk_bigtime_t idle_time = thread_stats.idle_time; - if (get_current_thread()->priority == IDLE_PRIORITY) { - idle_time += current_time_hires() - thread_stats.last_idle_timestamp; + for (uint i = 0; i < SMP_MAX_CPUS; i++) { + /* dont display time for inactiv cpus */ + if (!(mp.active_cpus & (1 << i))) + continue; + + lk_bigtime_t idle_time = thread_stats[i].idle_time; + + /* if the cpu is currently idle, add the time since it went idle up until now to the idle counter */ + bool is_idle = !!(mp.idle_cpus & (1 << i)); + if (is_idle) { + idle_time += current_time_hires() - thread_stats[i].last_idle_timestamp; + } + + lk_bigtime_t delta_time = idle_time - last_idle_time[i]; + lk_bigtime_t busy_time = 1000000ULL - (delta_time > 1000000ULL ? 1000000ULL : delta_time); + uint busypercent = (busy_time * 10000) / (1000000); + + printf("cpu %u LOAD: " + "%u.%02u%%, " + "cs %lu, " + "pmpts %lu, " +#if WITH_SMP + "rs_ipis %lu, " +#endif + "ints %lu, " + "tmr ints %lu, " + "tmrs %lu\n", + i, + busypercent / 100, busypercent % 100, + thread_stats[i].context_switches - old_stats[i].context_switches, + thread_stats[i].preempts - old_stats[i].preempts, +#if WITH_SMP + thread_stats[i].reschedule_ipis - old_stats[i].reschedule_ipis, +#endif + thread_stats[i].interrupts - old_stats[i].interrupts, + thread_stats[i].timer_ints - old_stats[i].timer_ints, + thread_stats[i].timers - old_stats[i].timers); + + old_stats[i] = thread_stats[i]; + last_idle_time[i] = idle_time; } - lk_bigtime_t delta_time = idle_time - last_idle_time; - lk_bigtime_t busy_time = 1000000ULL - (delta_time > 1000000ULL ? 1000000ULL : delta_time); - - uint busypercent = (busy_time * 10000) / (1000000); - -// printf("idle_time %lld, busytime %lld\n", idle_time - last_idle_time, busy_time); - printf("LOAD: %d.%02d%%, cs %d, ints %d, timer ints %d, timers %d\n", busypercent / 100, busypercent % 100, - thread_stats.context_switches - old_stats.context_switches, - thread_stats.interrupts - old_stats.interrupts, - thread_stats.timer_ints - old_stats.timer_ints, - thread_stats.timers - old_stats.timers); - - old_stats = thread_stats; - last_idle_time = idle_time; return INT_NO_RESCHEDULE; } @@ -119,8 +151,6 @@ static int cmd_threadload(int argc, const cmd_args *argv) static bool showthreadload = false; static timer_t tltimer; - enter_critical_section(); - if (showthreadload == false) { // start the display timer_initialize(&tltimer); @@ -131,8 +161,6 @@ static int cmd_threadload(int argc, const cmd_args *argv) showthreadload = false; } - exit_critical_section(); - return 0; } @@ -160,7 +188,7 @@ void kernel_evlog_add(uintptr_t id, uintptr_t arg0, uintptr_t arg1) uint index = evlog_bump_head(&kernel_evlog); kernel_evlog.items[index] = (uintptr_t)current_time_hires(); - kernel_evlog.items[index+1] = id; + kernel_evlog.items[index+1] = (arch_curr_cpu_num() << 16) | id; kernel_evlog.items[index+2] = arg0; kernel_evlog.items[index+3] = arg1; } @@ -170,24 +198,24 @@ void kernel_evlog_add(uintptr_t id, uintptr_t arg0, uintptr_t arg1) static void kevdump_cb(const uintptr_t *i) { - switch (i[1]) { + switch (i[1] & 0xffff) { case KERNEL_EVLOG_CONTEXT_SWITCH: - printf("%lu: context switch from %p to %p\n", i[0], (void *)i[2], (void *)i[3]); + printf("%lu.%lu: context switch from %p to %p\n", i[0], i[1] >> 16, (void *)i[2], (void *)i[3]); break; case KERNEL_EVLOG_PREEMPT: - printf("%lu: preempt on thread %p\n", i[0], (void *)i[2]); + printf("%lu.%lu: preempt on thread %p\n", i[0], i[1] >> 16, (void *)i[2]); break; case KERNEL_EVLOG_TIMER_TICK: - printf("%lu: timer tick\n", i[0]); + printf("%lu.%lu: timer tick\n", i[0], i[1] >> 16); break; case KERNEL_EVLOG_TIMER_CALL: - printf("%lu: timer call %p, arg %p\n", i[0], (void *)i[2], (void *)i[3]); + printf("%lu.%lu: timer call %p, arg %p\n", i[0], i[1] >> 16, (void *)i[2], (void *)i[3]); break; case KERNEL_EVLOG_IRQ_ENTER: - printf("%lu: irq entry %u\n", i[0], (uint)i[2]); + printf("%lu.%lu: irq entry %u\n", i[0], i[1] >> 16, (uint)i[2]); break; case KERNEL_EVLOG_IRQ_EXIT: - printf("%lu: irq exit %u\n", i[0], (uint)i[2]); + printf("%lu.%lu: irq exit %u\n", i[0], i[1] >> 16, (uint)i[2]); break; default: printf("%lu: unknown id 0x%x 0x%x 0x%x\n", i[0], i[1], (uint)i[2], (uint)i[3]); @@ -213,4 +241,4 @@ static int cmd_kevlog(int argc, const cmd_args *argv) #endif // WITH_KERNEL_EVLOG - +// vim: set noexpandtab: diff --git a/kernel/event.c b/kernel/event.c index 62f5b78b..b289b75f 100644 --- a/kernel/event.c +++ b/kernel/event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Travis Geiselbrecht + * Copyright (c) 2008-2014 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -40,10 +40,11 @@ * @{ */ +#include #include #include #include -#include +#include /** * @brief Initialize an event object @@ -70,14 +71,14 @@ void event_destroy(event_t *e) { DEBUG_ASSERT(e->magic == EVENT_MAGIC); - enter_critical_section(); + THREAD_LOCK(state); e->magic = 0; e->signalled = false; e->flags = 0; wait_queue_destroy(&e->wait, true); - exit_critical_section(); + THREAD_UNLOCK(state); } /** @@ -101,7 +102,7 @@ status_t event_wait_timeout(event_t *e, lk_time_t timeout) DEBUG_ASSERT(e->magic == EVENT_MAGIC); - enter_critical_section(); + THREAD_LOCK(state); if (e->signalled) { /* signalled, we're going to fall through */ @@ -112,12 +113,9 @@ status_t event_wait_timeout(event_t *e, lk_time_t timeout) } else { /* unsignalled, block here */ ret = wait_queue_block(&e->wait, timeout); - if (ret < 0) - goto err; } -err: - exit_critical_section(); + THREAD_UNLOCK(state); return ret; } @@ -143,7 +141,7 @@ status_t event_signal(event_t *e, bool reschedule) { DEBUG_ASSERT(e->magic == EVENT_MAGIC); - enter_critical_section(); + THREAD_LOCK(state); if (!e->signalled) { if (e->flags & EVENT_FLAG_AUTOUNSIGNAL) { @@ -163,7 +161,7 @@ status_t event_signal(event_t *e, bool reschedule) } } - exit_critical_section(); + THREAD_UNLOCK(state); return NO_ERROR; } diff --git a/kernel/init.c b/kernel/init.c index 2a990832..557266c8 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -22,15 +22,20 @@ */ #include #include +#include #include #include -#include +#include void kernel_init(void) { // if enabled, configure the kernel's event log kernel_evlog_init(); + // initialize the threading system + dprintf(SPEW, "initializing mp\n"); + mp_init(); + // initialize the threading system dprintf(SPEW, "initializing threads\n"); thread_init(); diff --git a/kernel/mp.c b/kernel/mp.c new file mode 100644 index 00000000..814050b8 --- /dev/null +++ b/kernel/mp.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#define LOCAL_TRACE 0 + +/* a global state structure, aligned on cpu cache line to minimize aliasing */ +struct mp_state mp __CPU_ALIGN; + +void mp_init(void) +{ +} + +void mp_reschedule(mp_cpu_mask_t target, uint flags) +{ +#if WITH_SMP + uint local_cpu = arch_curr_cpu_num(); + + LTRACEF("local %d, target 0x%x\n", local_cpu, target); + + /* mask out cpus that are not active and the local cpu */ + target &= mp.active_cpus; + + /* mask out cpus that are currently running realtime code */ + if ((flags & MP_RESCHEDULE_FLAG_REALTIME) == 0) { + target &= ~mp.realtime_cpus; + } + target &= ~(1U << local_cpu); + + LTRACEF("local %d, post mask target now 0x%x\n", local_cpu, target); + + arch_mp_send_ipi(target, MP_IPI_RESCHEDULE); +#endif +} + +void mp_set_curr_cpu_active(bool active) +{ + atomic_or((volatile int *)&mp.active_cpus, 1U << arch_curr_cpu_num()); +} + +#if WITH_SMP +enum handler_return mp_mbx_reschedule_irq(void) +{ + uint cpu = arch_curr_cpu_num(); + + LTRACEF("cpu %u\n", cpu); + + THREAD_STATS_INC(reschedule_ipis); + + return (mp.active_cpus & (1U << cpu)) ? INT_RESCHEDULE : INT_NO_RESCHEDULE; +} +#endif + +// vim: set noexpandtab: + diff --git a/kernel/mutex.c b/kernel/mutex.c index b0b7ea04..69b7f5df 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -60,11 +60,11 @@ void mutex_destroy(mutex_t *m) get_current_thread(), get_current_thread()->name, m, m->holder, m->holder->name); #endif - enter_critical_section(); + THREAD_LOCK(state); m->magic = 0; m->count = 0; wait_queue_destroy(&m->wait, true); - exit_critical_section(); + THREAD_UNLOCK(state); } /** @@ -87,7 +87,7 @@ status_t mutex_acquire_timeout(mutex_t *m, lk_time_t timeout) get_current_thread(), get_current_thread()->name, m); #endif - enter_critical_section(); + THREAD_LOCK(state); status_t ret = NO_ERROR; if (unlikely(++m->count > 1)) { @@ -112,7 +112,7 @@ status_t mutex_acquire_timeout(mutex_t *m, lk_time_t timeout) m->holder = get_current_thread(); err: - exit_critical_section(); + THREAD_UNLOCK(state); return ret; } @@ -130,7 +130,7 @@ status_t mutex_release(mutex_t *m) } #endif - enter_critical_section(); + THREAD_LOCK(state); m->holder = 0; @@ -139,7 +139,7 @@ status_t mutex_release(mutex_t *m) wait_queue_wake_one(&m->wait, true, NO_ERROR); } - exit_critical_section(); + THREAD_UNLOCK(state); return NO_ERROR; } diff --git a/kernel/rules.mk b/kernel/rules.mk index 1dd4ca36..aab917b3 100644 --- a/kernel/rules.mk +++ b/kernel/rules.mk @@ -15,6 +15,7 @@ MODULE_SRCS := \ $(LOCAL_DIR)/thread.c \ $(LOCAL_DIR)/timer.c \ $(LOCAL_DIR)/semaphore.c \ + $(LOCAL_DIR)/mp.c ifeq ($(WITH_KERNEL_VM),1) MODULE_DEPS += kernel/vm diff --git a/kernel/semaphore.c b/kernel/semaphore.c index a1395fae..219360a4 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -26,17 +26,17 @@ void sem_init(semaphore_t *sem, unsigned int value) void sem_destroy(semaphore_t *sem) { - enter_critical_section(); + THREAD_LOCK(state); sem->count = 0; wait_queue_destroy(&sem->wait, true); - exit_critical_section(); + THREAD_UNLOCK(state); } int sem_post(semaphore_t *sem, bool resched) { int ret = 0; - enter_critical_section(); + THREAD_LOCK(state); /* * If the count is or was negative then a thread is waiting for a resource, otherwise @@ -45,7 +45,7 @@ int sem_post(semaphore_t *sem, bool resched) if (unlikely(++sem->count <= 0)) ret = wait_queue_wake_one(&sem->wait, resched, NO_ERROR); - exit_critical_section(); + THREAD_UNLOCK(state); return ret; } @@ -53,7 +53,7 @@ int sem_post(semaphore_t *sem, bool resched) status_t sem_wait(semaphore_t *sem) { status_t ret = NO_ERROR; - enter_critical_section(); + THREAD_LOCK(state); /* * If there are no resources available then we need to @@ -62,28 +62,28 @@ status_t sem_wait(semaphore_t *sem) if (unlikely(--sem->count < 0)) ret = wait_queue_block(&sem->wait, INFINITE_TIME); - exit_critical_section(); + THREAD_UNLOCK(state); return ret; } status_t sem_trywait(semaphore_t *sem) { status_t ret = NO_ERROR; - enter_critical_section(); + THREAD_LOCK(state); if (unlikely(sem->count <= 0)) ret = ERR_NOT_READY; else sem->count--; - exit_critical_section(); + THREAD_UNLOCK(state); return ret; } status_t sem_timedwait(semaphore_t *sem, lk_time_t timeout) { status_t ret = NO_ERROR; - enter_critical_section(); + THREAD_LOCK(state); if (unlikely(--sem->count < 0)) { ret = wait_queue_block(&sem->wait, timeout); @@ -94,7 +94,7 @@ status_t sem_timedwait(semaphore_t *sem, lk_time_t timeout) } } - exit_critical_section(); + THREAD_UNLOCK(state); return ret; } diff --git a/kernel/thread.c b/kernel/thread.c index 721ae19a..180980bb 100644 --- a/kernel/thread.c +++ b/kernel/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009 Travis Geiselbrecht + * Copyright (c) 2008-2014 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -35,11 +35,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -49,24 +51,24 @@ #endif #if THREAD_STATS -struct thread_stats thread_stats; +struct thread_stats thread_stats[SMP_MAX_CPUS]; #endif /* global thread list */ static struct list_node thread_list; -/* the global critical section count */ -int critical_section_count; +/* master thread spinlock */ +spin_lock_t thread_lock = SPIN_LOCK_INITIAL_VALUE; /* the run queue */ static struct list_node run_queue[NUM_PRIORITIES]; static uint32_t run_queue_bitmap; -/* the bootstrap thread (statically allocated) */ -static thread_t bootstrap_thread; +/* make sure the bitmap is large enough to cover our number of priorities */ +STATIC_ASSERT(NUM_PRIORITIES <= sizeof(run_queue_bitmap) * 8); -/* the idle thread */ -static thread_t *idle_thread; +/* the idle thread(s) (statically allocated) */ +static thread_t idle_threads[SMP_MAX_CPUS]; /* local routines */ static void thread_resched(void); @@ -74,7 +76,7 @@ static void idle_thread_routine(void) __NO_RETURN; #if PLATFORM_HAS_DYNAMIC_TIMER /* preemption timer */ -static timer_t preempt_timer; +static timer_t preempt_timer[SMP_MAX_CPUS]; #endif /* run queue manipulation */ @@ -84,7 +86,8 @@ static void insert_in_run_queue_head(thread_t *t) ASSERT(t->magic == THREAD_MAGIC); ASSERT(t->state == THREAD_READY); ASSERT(!list_in_list(&t->queue_node)); - ASSERT(in_critical_section()); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif list_add_head(&run_queue[t->priority], &t->queue_node); @@ -97,7 +100,8 @@ static void insert_in_run_queue_tail(thread_t *t) ASSERT(t->magic == THREAD_MAGIC); ASSERT(t->state == THREAD_READY); ASSERT(!list_in_list(&t->queue_node)); - ASSERT(in_critical_section()); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif list_add_tail(&run_queue[t->priority], &t->queue_node); @@ -108,6 +112,7 @@ static void init_thread_struct(thread_t *t, const char *name) { memset(t, 0, sizeof(thread_t)); t->magic = THREAD_MAGIC; + t->pinned_cpu = -1; strlcpy(t->name, name, sizeof(t->name)); } @@ -154,10 +159,10 @@ thread_t *thread_create_etc(thread_t *t, const char *name, thread_start_routine t->entry = entry; t->arg = arg; t->priority = priority; - t->saved_critical_section_count = 1; /* we always start inside a critical section */ t->state = THREAD_SUSPENDED; t->blocking_wait_queue = NULL; t->wait_queue_block_ret = NO_ERROR; + t->curr_cpu = -1; t->retcode = 0; wait_queue_init(&t->retcode_wait_queue); @@ -188,9 +193,9 @@ thread_t *thread_create_etc(thread_t *t, const char *name, thread_start_routine arch_thread_initialize(t); /* add it to the global thread list */ - enter_critical_section(); + THREAD_LOCK(state); list_add_head(&thread_list, &t->thread_list_node); - exit_critical_section(); + THREAD_UNLOCK(state); return t; } @@ -216,22 +221,32 @@ status_t thread_set_real_time(thread_t *t) ASSERT(t->magic == THREAD_MAGIC); #endif - enter_critical_section(); + THREAD_LOCK(state); #if PLATFORM_HAS_DYNAMIC_TIMER if (t == get_current_thread()) { /* if we're currently running, cancel the preemption timer. */ - timer_cancel(&preempt_timer); + timer_cancel(&preempt_timer[arch_curr_cpu_num()]); } #endif t->flags |= THREAD_FLAG_REAL_TIME; - exit_critical_section(); + THREAD_UNLOCK(state); return NO_ERROR; } -static bool thread_is_real_time(thread_t *t) +static bool thread_is_realtime(thread_t *t) { - return !!(t->flags & THREAD_FLAG_REAL_TIME); + return (t->flags & THREAD_FLAG_REAL_TIME) && t->priority > DEFAULT_PRIORITY; +} + +static bool thread_is_idle(thread_t *t) +{ + return !!(t->flags & THREAD_FLAG_IDLE); +} + +static bool thread_is_real_time_or_idle(thread_t *t) +{ + return !!(t->flags & (THREAD_FLAG_REAL_TIME | THREAD_FLAG_IDLE)); } /** @@ -251,13 +266,22 @@ status_t thread_resume(thread_t *t) ASSERT(t->state != THREAD_DEATH); #endif - enter_critical_section(); + bool resched = false; + bool ints_disabled = arch_ints_disabled(); + THREAD_LOCK(state); if (t->state == THREAD_SUSPENDED) { t->state = THREAD_READY; insert_in_run_queue_head(t); - thread_yield(); + if (!ints_disabled) /* HACK, don't resced into bootstrap thread before idle thread is set up */ + resched = true; } - exit_critical_section(); + + mp_reschedule(MP_CPU_ALL_BUT_LOCAL, 0); + + THREAD_UNLOCK(state); + + if (resched) + thread_yield(); return NO_ERROR; } @@ -277,11 +301,11 @@ status_t thread_join(thread_t *t, int *retcode, lk_time_t timeout) ASSERT(t->magic == THREAD_MAGIC); #endif - enter_critical_section(); + THREAD_LOCK(state); if (t->flags & THREAD_FLAG_DETACHED) { /* the thread is detached, go ahead and exit */ - exit_critical_section(); + THREAD_UNLOCK(state); return ERR_THREAD_DETACHED; } @@ -289,7 +313,7 @@ status_t thread_join(thread_t *t, int *retcode, lk_time_t timeout) if (t->state != THREAD_DEATH) { status_t err = wait_queue_block(&t->retcode_wait_queue, timeout); if (err < 0) { - exit_critical_section(); + THREAD_UNLOCK(state); return err; } } @@ -311,7 +335,7 @@ status_t thread_join(thread_t *t, int *retcode, lk_time_t timeout) /* clear the structure's magic */ t->magic = 0; - exit_critical_section(); + THREAD_UNLOCK(state); /* free its stack and the thread structure itself */ if (t->flags & THREAD_FLAG_FREE_STACK && t->stack) @@ -329,7 +353,7 @@ status_t thread_detach(thread_t *t) ASSERT(t->magic == THREAD_MAGIC); #endif - enter_critical_section(); + THREAD_LOCK(state); /* if another thread is blocked inside thread_join() on this thread, * wake them up with a specific return code */ @@ -338,11 +362,11 @@ status_t thread_detach(thread_t *t) /* if it's already dead, then just do what join would have and exit */ if (t->state == THREAD_DEATH) { t->flags &= ~THREAD_FLAG_DETACHED; /* makes sure thread_join continues */ - exit_critical_section(); + THREAD_UNLOCK(state); return thread_join(t, NULL, 0); } else { t->flags |= THREAD_FLAG_DETACHED; - exit_critical_section(); + THREAD_UNLOCK(state); return NO_ERROR; } } @@ -361,11 +385,12 @@ void thread_exit(int retcode) #if THREAD_CHECKS ASSERT(current_thread->magic == THREAD_MAGIC); ASSERT(current_thread->state == THREAD_RUNNING); + ASSERT(!thread_is_idle(current_thread)); #endif // dprintf("thread_exit: current %p\n", current_thread); - enter_critical_section(); + THREAD_LOCK(state); /* enter the dead state */ current_thread->state = THREAD_DEATH; @@ -402,6 +427,34 @@ static void idle_thread_routine(void) arch_idle(); } +static thread_t *get_top_thread(int cpu) +{ + thread_t *newthread; + uint32_t local_run_queue_bitmap = run_queue_bitmap; + uint next_queue; + + while (local_run_queue_bitmap) { + /* find the first (remaining) queue with a thread in it */ + next_queue = HIGHEST_PRIORITY - __builtin_clz(local_run_queue_bitmap) + - (sizeof(run_queue_bitmap) * 8 - NUM_PRIORITIES); + + list_for_every_entry(&run_queue[next_queue], newthread, thread_t, queue_node) { + if (newthread->pinned_cpu < 0 || newthread->pinned_cpu == cpu) { + list_delete(&newthread->queue_node); + + if (list_is_empty(&run_queue[next_queue])) + run_queue_bitmap &= ~(1<state != THREAD_RUNNING); #endif THREAD_STATS_INC(reschedules); - oldthread = current_thread; - - // at the moment, can't deal with more than 32 priority levels - ASSERT(NUM_PRIORITIES <= 32); - - // should at least find the idle thread -#if THREAD_CHECKS - ASSERT(run_queue_bitmap != 0); -#endif - - int next_queue = HIGHEST_PRIORITY - __builtin_clz(run_queue_bitmap) - (32 - NUM_PRIORITIES); - //dprintf(SPEW, "bitmap 0x%x, next %d\n", run_queue_bitmap, next_queue); - - newthread = list_remove_head_type(&run_queue[next_queue], thread_t, queue_node); - - if (list_is_empty(&run_queue[next_queue])) - run_queue_bitmap &= ~(1<state = THREAD_RUNNING; + oldthread = current_thread; + if (newthread == oldthread) return; @@ -463,46 +499,74 @@ void thread_resched(void) newthread->remaining_quantum = 5; // XXX make this smarter } + /* mark the cpu ownership of the threads */ + oldthread->curr_cpu = -1; + newthread->curr_cpu = cpu; + + if (thread_is_idle(newthread)) { + mp_set_cpu_idle(cpu); + } else { + mp_set_cpu_busy(cpu); + } + + if (thread_is_realtime(newthread)) { + mp_set_cpu_realtime(cpu); + } else { + mp_set_cpu_non_realtime(cpu); + } + #if THREAD_STATS THREAD_STATS_INC(context_switches); - if (oldthread == idle_thread) { + if (thread_is_idle(oldthread)) { lk_bigtime_t now = current_time_hires(); - thread_stats.idle_time += now - thread_stats.last_idle_timestamp; + thread_stats[cpu].idle_time += now - thread_stats[cpu].last_idle_timestamp; } - if (newthread == idle_thread) { - thread_stats.last_idle_timestamp = current_time_hires(); + if (thread_is_idle(newthread)) { + thread_stats[cpu].last_idle_timestamp = current_time_hires(); } #endif KEVLOG_THREAD_SWITCH(oldthread, newthread); -#if THREAD_CHECKS - ASSERT(critical_section_count > 0); - ASSERT(newthread->saved_critical_section_count > 0); -#endif - #if PLATFORM_HAS_DYNAMIC_TIMER - if (thread_is_real_time(newthread)) { - if (!thread_is_real_time(oldthread)) { + if (thread_is_real_time_or_idle(newthread)) { + if (!thread_is_real_time_or_idle(oldthread)) { /* if we're switching from a non real time to a real time, cancel * the preemption timer. */ - timer_cancel(&preempt_timer); +#ifdef DEBUG_THREAD_CONTEXT_SWITCH + dprintf(ALWAYS, "arch_context_switch: stop preempt, cpu %d, old %p (%s), new %p (%s)\n", + cpu, oldthread, oldthread->name, newthread, newthread->name); +#endif + timer_cancel(&preempt_timer[cpu]); } - } else if (thread_is_real_time(oldthread)) { + } else if (thread_is_real_time_or_idle(oldthread)) { /* if we're switching from a real time (or idle thread) to a regular one, * set up a periodic timer to run our preemption tick. */ - timer_set_periodic(&preempt_timer, 10, (timer_callback)thread_timer_tick, NULL); +#ifdef DEBUG_THREAD_CONTEXT_SWITCH + dprintf(ALWAYS, "arch_context_switch: start preempt, cpu %d, old %p (%s), new %p (%s)\n", + cpu, oldthread, oldthread->name, newthread, newthread->name); +#endif + timer_set_periodic(&preempt_timer[cpu], 10, (timer_callback)thread_timer_tick, NULL); } #endif /* set some optional target debug leds */ - target_set_debug_led(0, newthread != idle_thread); + target_set_debug_led(0, !thread_is_idle(&idle_threads[cpu])); /* do the switch */ - oldthread->saved_critical_section_count = critical_section_count; set_current_thread(newthread); - critical_section_count = newthread->saved_critical_section_count; + +#ifdef DEBUG_THREAD_CONTEXT_SWITCH + dprintf(ALWAYS, "arch_context_switch: cpu %d, old %p (%s, pri %d, flags 0x%x), new %p (%s, pri %d, flags 0x%x)\n", + cpu, oldthread, oldthread->name, oldthread->priority, + oldthread->flags, newthread, newthread->name, + newthread->priority, newthread->flags); +#endif + +#ifdef WITH_LIB_UTHREAD + uthread_context_switch(oldthread, newthread); +#endif arch_context_switch(oldthread, newthread); } @@ -524,17 +588,19 @@ void thread_yield(void) ASSERT(current_thread->state == THREAD_RUNNING); #endif - enter_critical_section(); + THREAD_LOCK(state); THREAD_STATS_INC(yields); /* we are yielding the cpu, so stick ourselves into the tail of the run queue and reschedule */ current_thread->state = THREAD_READY; current_thread->remaining_quantum = 0; - insert_in_run_queue_tail(current_thread); + if (likely(!thread_is_idle(current_thread))) { /* idle thread doesn't go in the run queue */ + insert_in_run_queue_tail(current_thread); + } thread_resched(); - exit_critical_section(); + THREAD_UNLOCK(state); } /** @@ -559,23 +625,28 @@ void thread_preempt(void) #if THREAD_CHECKS ASSERT(current_thread->magic == THREAD_MAGIC); ASSERT(current_thread->state == THREAD_RUNNING); - ASSERT(in_critical_section()); #endif #if THREAD_STATS - if (current_thread != idle_thread) + if (!thread_is_idle(current_thread)) THREAD_STATS_INC(preempts); /* only track when a meaningful preempt happens */ #endif KEVLOG_THREAD_PREEMPT(current_thread); + THREAD_LOCK(state); + /* we are being preempted, so we get to go back into the front of the run queue if we have quantum left */ current_thread->state = THREAD_READY; - if (current_thread->remaining_quantum > 0) - insert_in_run_queue_head(current_thread); - else - insert_in_run_queue_tail(current_thread); /* if we're out of quantum, go to the tail of the queue */ + if (likely(!thread_is_idle(current_thread))) { /* idle thread doesn't go in the run queue */ + if (current_thread->remaining_quantum > 0) + insert_in_run_queue_head(current_thread); + else + insert_in_run_queue_tail(current_thread); /* if we're out of quantum, go to the tail of the queue */ + } thread_resched(); + + THREAD_UNLOCK(state); } /** @@ -595,7 +666,8 @@ void thread_block(void) ASSERT(current_thread->magic == THREAD_MAGIC); ASSERT(current_thread->state == THREAD_BLOCKED); - ASSERT(in_critical_section()); + ASSERT(spin_lock_held(&thread_lock)); + ASSERT(!thread_is_idle(current_thread)); #endif /* we are blocking on something. the blocking code should have already stuck us on a queue */ @@ -607,11 +679,13 @@ void thread_unblock(thread_t *t, bool resched) #if THREAD_CHECKS ASSERT(t->magic == THREAD_MAGIC); ASSERT(t->state == THREAD_BLOCKED); - ASSERT(in_critical_section()); + ASSERT(spin_lock_held(&thread_lock)); + ASSERT(!thread_is_idle(t)); #endif t->state = THREAD_READY; insert_in_run_queue_head(t); + mp_reschedule(MP_CPU_ALL_BUT_LOCAL, 0); if (resched) thread_resched(); } @@ -620,7 +694,7 @@ enum handler_return thread_timer_tick(void) { thread_t *current_thread = get_current_thread(); - if (thread_is_real_time(current_thread)) + if (thread_is_real_time_or_idle(current_thread)) return INT_NO_RESCHEDULE; current_thread->remaining_quantum--; @@ -641,9 +715,13 @@ static enum handler_return thread_sleep_handler(timer_t *timer, lk_time_t now, v ASSERT(t->state == THREAD_SLEEPING); #endif + THREAD_LOCK(state); + t->state = THREAD_READY; insert_in_run_queue_head(t); + THREAD_UNLOCK(state); + return INT_RESCHEDULE; } @@ -666,15 +744,16 @@ void thread_sleep(lk_time_t delay) #if THREAD_CHECKS ASSERT(current_thread->magic == THREAD_MAGIC); ASSERT(current_thread->state == THREAD_RUNNING); + ASSERT(!thread_is_idle(current_thread)); #endif timer_initialize(&timer); - enter_critical_section(); + THREAD_LOCK(state); timer_set_oneshot(&timer, delay, thread_sleep_handler, (void *)current_thread); current_thread->state = THREAD_SLEEPING; thread_resched(); - exit_critical_section(); + THREAD_UNLOCK(state); } /** @@ -686,6 +765,8 @@ void thread_init_early(void) { int i; + DEBUG_ASSERT(arch_curr_cpu_num() == 0); + /* initialize the run queues */ for (i=0; i < NUM_PRIORITIES; i++) list_initialize(&run_queue[i]); @@ -694,14 +775,15 @@ void thread_init_early(void) list_initialize(&thread_list); /* create a thread to cover the current running state */ - thread_t *t = &bootstrap_thread; + thread_t *t = &idle_threads[0]; init_thread_struct(t, "bootstrap"); /* half construct this thread, since we're already running */ t->priority = HIGHEST_PRIORITY; t->state = THREAD_RUNNING; - t->saved_critical_section_count = 1; t->flags = THREAD_FLAG_DETACHED; + t->curr_cpu = 0; + t->pinned_cpu = 0; wait_queue_init(&t->retcode_wait_queue); list_add_head(&thread_list, &t->thread_list_node); set_current_thread(t); @@ -715,7 +797,9 @@ void thread_init_early(void) void thread_init(void) { #if PLATFORM_HAS_DYNAMIC_TIMER - timer_initialize(&preempt_timer); + for (uint i = 0; i < SMP_MAX_CPUS; i++) { + timer_initialize(&preempt_timer[i]); + } #endif } @@ -735,11 +819,21 @@ void thread_set_name(const char *name) */ void thread_set_priority(int priority) { - if (priority < LOWEST_PRIORITY) - priority = LOWEST_PRIORITY; + thread_t *current_thread = get_current_thread(); + + THREAD_LOCK(state); + + if (priority <= IDLE_PRIORITY) + priority = IDLE_PRIORITY + 1; if (priority > HIGHEST_PRIORITY) priority = HIGHEST_PRIORITY; - get_current_thread()->priority = priority; + current_thread->priority = priority; + + current_thread->state = THREAD_READY; + insert_in_run_queue_head(current_thread); + thread_resched(); + + THREAD_UNLOCK(state); } /** @@ -751,17 +845,71 @@ void thread_set_priority(int priority) */ void thread_become_idle(void) { - idle_thread = get_current_thread(); + DEBUG_ASSERT(arch_ints_disabled()); - thread_set_name("idle"); - thread_set_priority(IDLE_PRIORITY); + thread_t *t = get_current_thread(); - /* mark the idle thread as real time, to avoid running the preemption - * timer when it is scheduled. */ - thread_set_real_time(idle_thread); + char name[16]; + snprintf(name, sizeof(name), "idle %d", arch_curr_cpu_num()); + thread_set_name(name); - /* release the implicit boot critical section and yield to the scheduler */ - exit_critical_section(); + /* mark ourself as idle */ + t->priority = IDLE_PRIORITY; + t->flags |= THREAD_FLAG_IDLE; + t->pinned_cpu = arch_curr_cpu_num(); + + mp_set_curr_cpu_active(true); + mp_set_cpu_idle(arch_curr_cpu_num()); + + /* enable interrupts and start the scheduler */ + arch_enable_ints(); + thread_yield(); + + idle_thread_routine(); +} + +/* create an idle thread for the cpu we're on, and start scheduling */ + +void thread_secondary_cpu_init_early(void) +{ + DEBUG_ASSERT(arch_ints_disabled()); + + /* construct an idle thread to cover our cpu */ + uint cpu = arch_curr_cpu_num(); + thread_t *t = &idle_threads[cpu]; + + char name[16]; + snprintf(name, sizeof(name), "idle %d", cpu); + init_thread_struct(t, name); + t->pinned_cpu = cpu; + + /* half construct this thread, since we're already running */ + t->priority = HIGHEST_PRIORITY; + t->state = THREAD_RUNNING; + t->flags = THREAD_FLAG_DETACHED | THREAD_FLAG_IDLE; + t->curr_cpu = cpu; + t->pinned_cpu = cpu; + wait_queue_init(&t->retcode_wait_queue); + + THREAD_LOCK(state); + + list_add_head(&thread_list, &t->thread_list_node); + set_current_thread(t); + + THREAD_UNLOCK(state); +} + +void thread_secondary_cpu_entry(void) +{ + uint cpu = arch_curr_cpu_num(); + thread_t *t = get_current_thread(); + t->priority = IDLE_PRIORITY; + + mp_set_curr_cpu_active(true); + mp_set_cpu_idle(cpu); + + /* enable interrupts and start the scheduler on this cpu */ + arch_enable_ints(); thread_yield(); idle_thread_routine(); @@ -786,9 +934,8 @@ static const char *thread_state_to_str(enum thread_state state) void dump_thread(thread_t *t) { dprintf(INFO, "dump_thread: t %p (%s)\n", t, t->name); - dprintf(INFO, "\tstate %s, priority %d, remaining quantum %d, critical section %d\n", - thread_state_to_str(t->state), t->priority, t->remaining_quantum, - t->saved_critical_section_count); + dprintf(INFO, "\tstate %s, curr_cpu %d, pinned_cpu %d, priority %d, remaining quantum %d\n", + thread_state_to_str(t->state), t->curr_cpu, t->pinned_cpu, t->priority, t->remaining_quantum); dprintf(INFO, "\tstack %p, stack_size %zd\n", t->stack, t->stack_size); dprintf(INFO, "\tentry %p, arg %p, flags 0x%x\n", t->entry, t->arg, t->flags); dprintf(INFO, "\twait queue %p, wait queue ret %d\n", t->blocking_wait_queue, t->wait_queue_block_ret); @@ -798,6 +945,7 @@ void dump_thread(thread_t *t) dprintf(INFO, " 0x%lx", t->tls[i]); } dprintf(INFO, "\n"); + arch_dump_thread(t); } /** @@ -807,11 +955,11 @@ void dump_all_threads(void) { thread_t *t; - enter_critical_section(); + THREAD_LOCK(state); list_for_every_entry(&thread_list, t, thread_t, thread_list_node) { dump_thread(t); } - exit_critical_section(); + THREAD_UNLOCK(state); } /** @} */ @@ -834,10 +982,16 @@ static enum handler_return wait_queue_timeout_handler(timer_t *timer, lk_time_t ASSERT(thread->magic == THREAD_MAGIC); #endif - if (thread_unblock_from_wait_queue(thread, ERR_TIMED_OUT) >= NO_ERROR) - return INT_RESCHEDULE; + spin_lock(&thread_lock); - return INT_NO_RESCHEDULE; + enum handler_return ret = INT_NO_RESCHEDULE; + if (thread_unblock_from_wait_queue(thread, ERR_TIMED_OUT) >= NO_ERROR) { + ret = INT_RESCHEDULE; + } + + spin_unlock(&thread_lock); + + return ret; } /** @@ -867,7 +1021,8 @@ status_t wait_queue_block(wait_queue_t *wait, lk_time_t timeout) #if THREAD_CHECKS ASSERT(wait->magic == WAIT_QUEUE_MAGIC); ASSERT(current_thread->state == THREAD_RUNNING); - ASSERT(in_critical_section()); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif if (timeout == 0) @@ -885,7 +1040,7 @@ status_t wait_queue_block(wait_queue_t *wait, lk_time_t timeout) timer_set_oneshot(&timer, timeout, wait_queue_timeout_handler, (void *)current_thread); } - thread_block(); + thread_resched(); /* we don't really know if the timer fired or not, so it's better safe to try to cancel it */ if (timeout != INFINITE_TIME) { @@ -918,7 +1073,8 @@ int wait_queue_wake_one(wait_queue_t *wait, bool reschedule, status_t wait_queue #if THREAD_CHECKS ASSERT(wait->magic == WAIT_QUEUE_MAGIC); - ASSERT(in_critical_section()); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif t = list_remove_head_type(&wait->list, thread_t, queue_node); @@ -940,9 +1096,12 @@ int wait_queue_wake_one(wait_queue_t *wait, bool reschedule, status_t wait_queue insert_in_run_queue_head(current_thread); } insert_in_run_queue_head(t); - if (reschedule) + mp_reschedule(MP_CPU_ALL_BUT_LOCAL, 0); + if (reschedule) { thread_resched(); + } ret = 1; + } return ret; @@ -972,7 +1131,8 @@ int wait_queue_wake_all(wait_queue_t *wait, bool reschedule, status_t wait_queue #if THREAD_CHECKS ASSERT(wait->magic == WAIT_QUEUE_MAGIC); - ASSERT(in_critical_section()); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif if (reschedule && wait->count > 0) { @@ -1002,8 +1162,12 @@ int wait_queue_wake_all(wait_queue_t *wait, bool reschedule, status_t wait_queue ASSERT(wait->count == 0); #endif - if (reschedule && ret > 0) - thread_resched(); + if (ret > 0) { + mp_reschedule(MP_CPU_ALL_BUT_LOCAL, 0); + if (reschedule) { + thread_resched(); + } + } return ret; } @@ -1017,7 +1181,8 @@ void wait_queue_destroy(wait_queue_t *wait, bool reschedule) { #if THREAD_CHECKS ASSERT(wait->magic == WAIT_QUEUE_MAGIC); - ASSERT(in_critical_section()); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif wait_queue_wake_all(wait, reschedule, ERR_OBJECT_DESTROYED); wait->magic = 0; @@ -1038,8 +1203,9 @@ void wait_queue_destroy(wait_queue_t *wait, bool reschedule) status_t thread_unblock_from_wait_queue(thread_t *t, status_t wait_queue_error) { #if THREAD_CHECKS - ASSERT(in_critical_section()); ASSERT(t->magic == THREAD_MAGIC); + ASSERT(arch_ints_disabled()); + ASSERT(spin_lock_held(&thread_lock)); #endif if (t->state != THREAD_BLOCKED) @@ -1057,6 +1223,7 @@ status_t thread_unblock_from_wait_queue(thread_t *t, status_t wait_queue_error) t->state = THREAD_READY; t->wait_queue_block_ret = wait_queue_error; insert_in_run_queue_head(t); + mp_reschedule(MP_CPU_ALL_BUT_LOCAL, 0); return NO_ERROR; } diff --git a/kernel/timer.c b/kernel/timer.c index 0b708870..48ec7dad 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009 Travis Geiselbrecht + * Copyright (c) 2008-2014 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -41,12 +41,19 @@ #include #include #include +#include #include #include #define LOCAL_TRACE 0 -static struct list_node timer_queue; +spin_lock_t timer_lock; + +struct timer_state { + struct list_node timer_queue; +} __CPU_ALIGN; + +static struct timer_state timers[SMP_MAX_CPUS]; static enum handler_return timer_tick(void *arg, lk_time_t now); @@ -58,13 +65,15 @@ void timer_initialize(timer_t *timer) *timer = (timer_t)TIMER_INITIAL_VALUE(*timer); } -static void insert_timer_in_queue(timer_t *timer) +static void insert_timer_in_queue(uint cpu, timer_t *timer) { timer_t *entry; - LTRACEF("timer %p, scheduled %lu, periodic %lu\n", timer, timer->scheduled_time, timer->periodic_time); + DEBUG_ASSERT(arch_ints_disabled()); - list_for_every_entry(&timer_queue, entry, timer_t, node) { + LTRACEF("timer %p, cpu %u, scheduled %lu, periodic %lu\n", timer, cpu, timer->scheduled_time, timer->periodic_time); + + list_for_every_entry(&timers[cpu].timer_queue, entry, timer_t, node) { if (TIME_GT(entry->scheduled_time, timer->scheduled_time)) { list_add_before(&entry->node, &timer->node); return; @@ -72,14 +81,14 @@ static void insert_timer_in_queue(timer_t *timer) } /* walked off the end of the list */ - list_add_tail(&timer_queue, &timer->node); + list_add_tail(&timers[cpu].timer_queue, &timer->node); } static void timer_set(timer_t *timer, lk_time_t delay, lk_time_t period, timer_callback callback, void *arg) { lk_time_t now; - LTRACEF("timer %p, delay %lu, period %lu, callback %p, arg %p, now %lu\n", timer, delay, period, callback, arg, now); + LTRACEF("timer %p, delay %lu, period %lu, callback %p, arg %p\n", timer, delay, period, callback, arg); DEBUG_ASSERT(timer->magic == TIMER_MAGIC); @@ -95,19 +104,21 @@ static void timer_set(timer_t *timer, lk_time_t delay, lk_time_t period, timer_c LTRACEF("scheduled time %lu\n", timer->scheduled_time); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&timer_lock, state); - insert_timer_in_queue(timer); + uint cpu = arch_curr_cpu_num(); + insert_timer_in_queue(cpu, timer); #if PLATFORM_HAS_DYNAMIC_TIMER - if (list_peek_head_type(&timer_queue, timer_t, node) == timer) { + if (list_peek_head_type(&timers[cpu].timer_queue, timer_t, node) == timer) { /* we just modified the head of the timer queue */ LTRACEF("setting new timer for %u msecs\n", (uint)delay); platform_set_oneshot_timer(timer_tick, NULL, delay); } #endif - exit_critical_section(); + spin_unlock_irqrestore(&timer_lock, state); } /** @@ -159,10 +170,13 @@ void timer_cancel(timer_t *timer) { DEBUG_ASSERT(timer->magic == TIMER_MAGIC); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&timer_lock, state); #if PLATFORM_HAS_DYNAMIC_TIMER - timer_t *oldhead = list_peek_head_type(&timer_queue, timer_t, node); + uint cpu = arch_curr_cpu_num(); + + timer_t *oldhead = list_peek_head_type(&timers[cpu].timer_queue, timer_t, node); #endif if (list_in_list(&timer->node)) @@ -177,7 +191,7 @@ void timer_cancel(timer_t *timer) #if PLATFORM_HAS_DYNAMIC_TIMER /* see if we've just modified the head of the timer queue */ - timer_t *newhead = list_peek_head_type(&timer_queue, timer_t, node); + timer_t *newhead = list_peek_head_type(&timers[cpu].timer_queue, timer_t, node); if (newhead == NULL) { LTRACEF("clearing old hw timer, nothing in the queue\n"); platform_stop_timer(); @@ -195,7 +209,7 @@ void timer_cancel(timer_t *timer) } #endif - exit_critical_section(); + spin_unlock_irqrestore(&timer_lock, state); } /* called at interrupt time to process any pending timers */ @@ -204,14 +218,20 @@ static enum handler_return timer_tick(void *arg, lk_time_t now) timer_t *timer; enum handler_return ret = INT_NO_RESCHEDULE; + DEBUG_ASSERT(arch_ints_disabled()); + THREAD_STATS_INC(timer_ints); // KEVLOG_TIMER_TICK(); // enable only if necessary - LTRACEF("now %lu, sp %p\n", now, __GET_FRAME()); + uint cpu = arch_curr_cpu_num(); + + LTRACEF("cpu %u now %lu, sp %p\n", cpu, now, __GET_FRAME()); + + spin_lock(&timer_lock); for (;;) { /* see if there's an event to process */ - timer = list_peek_head_type(&timer_queue, timer_t, node); + timer = list_peek_head_type(&timers[cpu].timer_queue, timer_t, node); if (likely(timer == 0)) break; LTRACEF("next item on timer queue %p at %lu now %lu (%p, arg %p)\n", timer, timer->scheduled_time, now, timer->callback, timer->arg); @@ -223,6 +243,9 @@ static enum handler_return timer_tick(void *arg, lk_time_t now) DEBUG_ASSERT(timer && timer->magic == TIMER_MAGIC); list_delete(&timer->node); + /* we pulled it off the list, release the list lock to handle it */ + spin_unlock(&timer_lock); + LTRACEF("dequeued timer %p, scheduled %lu periodic %lu\n", timer, timer->scheduled_time, timer->periodic_time); THREAD_STATS_INC(timers); @@ -234,19 +257,22 @@ static enum handler_return timer_tick(void *arg, lk_time_t now) if (timer->callback(timer, now, timer->arg) == INT_RESCHEDULE) ret = INT_RESCHEDULE; + /* it may have been requeued or periodic, grab the lock so we can safely inspect it */ + spin_lock(&timer_lock); + /* if it was a periodic timer and it hasn't been requeued * by the callback put it back in the list */ if (periodic && !list_in_list(&timer->node) && timer->periodic_time > 0) { LTRACEF("periodic timer, period %u\n", (uint)timer->periodic_time); timer->scheduled_time = now + timer->periodic_time; - insert_timer_in_queue(timer); + insert_timer_in_queue(cpu, timer); } } #if PLATFORM_HAS_DYNAMIC_TIMER /* reset the timer to the next event */ - timer = list_peek_head_type(&timer_queue, timer_t, node); + timer = list_peek_head_type(&timers[cpu].timer_queue, timer_t, node); if (timer) { /* has to be the case or it would have fired already */ DEBUG_ASSERT(TIME_GT(timer->scheduled_time, now)); @@ -256,25 +282,33 @@ static enum handler_return timer_tick(void *arg, lk_time_t now) LTRACEF("setting new timer for %u msecs for event %p\n", (uint)delay, timer); platform_set_oneshot_timer(timer_tick, NULL, delay); } + + /* we're done manipulating the timer queue */ + spin_unlock(&timer_lock); #else + /* release the timer lock before calling the tick handler */ + spin_unlock(&timer_lock); + /* let the scheduler have a shot to do quantum expiration, etc */ /* in case of dynamic timer, the scheduler will set up a periodic timer */ if (thread_timer_tick() == INT_RESCHEDULE) ret = INT_RESCHEDULE; #endif - DEBUG_ASSERT(in_critical_section()); return ret; } void timer_init(void) { - list_initialize(&timer_queue); - + timer_lock = SPIN_LOCK_INITIAL_VALUE; + for (uint i = 0; i < SMP_MAX_CPUS; i++) { + list_initialize(&timers[i].timer_queue); + } #if !PLATFORM_HAS_DYNAMIC_TIMER /* register for a periodic timer tick */ platform_set_periodic_timer(timer_tick, NULL, 10); /* 10ms */ #endif } +/* vim: set noexpandtab */ diff --git a/kernel/vm/pmm.c b/kernel/vm/pmm.c index 1c2ced7e..e93bae58 100644 --- a/kernel/vm/pmm.c +++ b/kernel/vm/pmm.c @@ -294,8 +294,8 @@ uint pmm_alloc_contiguous(uint count, uint8_t alignment_log2, paddr_t *pa, struc retry: /* search while we're still within the arena and have a chance of finding a slot (start + count < end of arena) */ - while (start + count > start && - start + count <= a->size / PAGE_SIZE) { + while ((start < a->size / PAGE_SIZE) && + ((start + count) <= a->size / PAGE_SIZE)) { vm_page_t *p = &a->page_array[start]; for (uint i = 0; i < count; i++) { if (p->flags & VM_PAGE_FLAG_NONFREE) { diff --git a/kernel/vm/vm.c b/kernel/vm/vm.c index 1e7c04c7..6a223568 100644 --- a/kernel/vm/vm.c +++ b/kernel/vm/vm.c @@ -60,6 +60,8 @@ static void mark_pages_in_use(vaddr_t va, size_t len) /* alloate the range, throw the results away */ pmm_alloc_range(pa, 1, &list); + } else { + panic("Could not find pa for va 0x%lx\n", va); } } } diff --git a/kernel/vm/vmm.c b/kernel/vm/vmm.c index 8c7f15f5..7cffb260 100644 --- a/kernel/vm/vmm.c +++ b/kernel/vm/vmm.c @@ -26,11 +26,13 @@ #include #include #include +#include #include "vm_priv.h" #define LOCAL_TRACE 0 static struct list_node aspace_list = LIST_INITIAL_VALUE(aspace_list); +static mutex_t vmm_lock = MUTEX_INITIAL_VALUE(vmm_lock); vmm_aspace_t _kernel_aspace; @@ -161,7 +163,68 @@ static status_t add_region_to_aspace(vmm_aspace_t *aspace, vmm_region_t *r) return ERR_NO_MEMORY; } -static vaddr_t alloc_spot(vmm_aspace_t *aspace, size_t size, uint8_t align_pow2, struct list_node **before) +/* + * Try to pick the spot within specified gap + * + * Arch can override this to impose it's own restrictions. + */ +__WEAK vaddr_t arch_mmu_pick_spot(vaddr_t base, uint prev_region_arch_mmu_flags, + vaddr_t end, uint next_region_arch_mmu_flags, + vaddr_t align, size_t size, uint arch_mmu_flags) +{ + /* just align it by default */ + return ALIGN(base, align); +} + +/* + * Returns true if the caller has to stop search + */ +static inline bool check_gap(vmm_aspace_t *aspace, + vmm_region_t *prev, vmm_region_t *next, + vaddr_t *pva, vaddr_t align, size_t size, + uint arch_mmu_flags) +{ + vaddr_t gap_beg; /* first byte of a gap */ + vaddr_t gap_end; /* last byte of a gap */ + + DEBUG_ASSERT(pva); + + if (prev) + gap_beg = prev->base + prev->size; + else + gap_beg = aspace->base; + + if (next) { + if (gap_beg == next->base) + goto next_gap; /* no gap between regions */ + gap_end = next->base - 1; + } else { + if (gap_beg == (aspace->base + aspace->size)) + goto not_found; /* no gap at the end of address space. Stop search */ + gap_end = aspace->base + aspace->size - 1; + } + + *pva = arch_mmu_pick_spot(gap_beg, prev ? prev->flags : ARCH_MMU_FLAG_INVALID, + gap_end, next ? next->flags : ARCH_MMU_FLAG_INVALID, + align, size, arch_mmu_flags); + if (*pva < gap_beg) + goto not_found; /* address wrapped around */ + + if (*pva < gap_end && ((gap_end - *pva + 1) >= size)) { + /* we have enough room */ + return true; /* found spot, stop search */ + } + +next_gap: + return false; /* continue search */ + +not_found: + *pva = -1; + return true; /* not_found: stop search */ +} + +static vaddr_t alloc_spot(vmm_aspace_t *aspace, size_t size, uint8_t align_pow2, + uint arch_mmu_flags, struct list_node **before) { DEBUG_ASSERT(aspace); DEBUG_ASSERT(size > 0 && IS_PAGE_ALIGNED(size)); @@ -172,65 +235,30 @@ static vaddr_t alloc_spot(vmm_aspace_t *aspace, size_t size, uint8_t align_pow2, align_pow2 = PAGE_SIZE_SHIFT; vaddr_t align = 1UL << align_pow2; - /* start our search */ - vaddr_t spot = ALIGN(aspace->base, align); - if (!is_inside_aspace(aspace, spot)) { - /* the alignment is so big, we can't even allocate in this address space */ - return -1; - } + vaddr_t spot; + vmm_region_t *r = NULL; - vmm_region_t *r = list_peek_head_type(&aspace->region_list, vmm_region_t, node); - if (r) { - /* does it fit before the first element? */ - if (spot < r->base && r->base - spot >= size) { - if (before) - *before = &aspace->region_list; - return spot; - } - } else { - /* nothing is in the list, does it fit in the aspace? */ - if (aspace->base + aspace->size - spot >= size) { - if (before) - *before = &aspace->region_list; - return spot; - } - } + /* try to pick spot at the beginning of address space */ + if (check_gap(aspace, NULL, + list_peek_head_type(&aspace->region_list, vmm_region_t, node), + &spot, align, size, arch_mmu_flags)) + goto done; /* search the middle of the list */ list_for_every_entry(&aspace->region_list, r, vmm_region_t, node) { - /* calculate the aligned spot after r */ - spot = ALIGN(r->base + r->size, align); - if (!is_inside_aspace(aspace, spot)) - break; - - /* get the next element in the list */ - vmm_region_t *next = list_next_type(&aspace->region_list, &r->node, vmm_region_t, node); - - if (next) { - /* see if the aligned spot is between current and next */ - if (spot >= next->base) - continue; - - /* see if it'll fit between the current item and the next */ - if (next->base - spot >= size) { - /* it'll fit here */ - if (before) - *before = &r->node; - return spot; - } - } else { - /* we're at the end of the list, will it fit between us and the end of the aspace? */ - if ((aspace->base + aspace->size) - spot >= size) { - /* it'll fit here */ - if (before) - *before = &r->node; - return spot; - } - } + if (check_gap(aspace, r, + list_next_type(&aspace->region_list, &r->node, vmm_region_t, node), + &spot, align, size, arch_mmu_flags)) + goto done; } /* couldn't find anything */ return -1; + +done: + if (before) + *before = r ? &r->node : &aspace->region_list; + return spot; } /* allocate a region structure and stick it in the address space */ @@ -254,7 +282,8 @@ static vmm_region_t *alloc_region(vmm_aspace_t *aspace, const char *name, size_t } else { /* allocate a virtual slot for it */ struct list_node *before = NULL; - vaddr = alloc_spot(aspace, size, align_pow2, &before); + + vaddr = alloc_spot(aspace, size, align_pow2, arch_mmu_flags, &before); LTRACEF("alloc_spot returns 0x%lx, before %p\n", vaddr, before); if (vaddr == (vaddr_t)-1) { @@ -298,20 +327,23 @@ status_t vmm_reserve_space(vmm_aspace_t *aspace, const char *name, size_t size, /* trim the size */ size = trim_to_aspace(aspace, vaddr, size); + mutex_acquire(&vmm_lock); + /* lookup how it's already mapped */ uint arch_mmu_flags = 0; arch_mmu_query(vaddr, NULL, &arch_mmu_flags); /* build a new region structure */ vmm_region_t *r = alloc_region(aspace, name, size, vaddr, 0, VMM_FLAG_VALLOC_SPECIFIC, VMM_REGION_FLAG_RESERVED, arch_mmu_flags); - if (!r) - return ERR_NO_MEMORY; - return NO_ERROR; + mutex_release(&vmm_lock); + return r ? NO_ERROR : ERR_NO_MEMORY; } -status_t vmm_alloc_physical(vmm_aspace_t *aspace, const char *name, size_t size, void **ptr, paddr_t paddr, uint vmm_flags, uint arch_mmu_flags) +status_t vmm_alloc_physical(vmm_aspace_t *aspace, const char *name, size_t size, void **ptr, uint8_t align_log2, paddr_t paddr, uint vmm_flags, uint arch_mmu_flags) { + status_t ret; + LTRACEF("aspace %p name '%s' size 0x%zx ptr %p paddr 0x%lx vmm_flags 0x%x arch_mmu_flags 0x%x\n", aspace, name, size, ptr ? *ptr : 0, paddr, vmm_flags, arch_mmu_flags); @@ -340,10 +372,14 @@ status_t vmm_alloc_physical(vmm_aspace_t *aspace, const char *name, size_t size, vaddr = (vaddr_t)*ptr; } + mutex_acquire(&vmm_lock); + /* allocate a region and put it in the aspace list */ - vmm_region_t *r = alloc_region(aspace, name, size, vaddr, 0, vmm_flags, VMM_REGION_FLAG_PHYSICAL, arch_mmu_flags); - if (!r) - return ERR_NO_MEMORY; + vmm_region_t *r = alloc_region(aspace, name, size, vaddr, align_log2, vmm_flags, VMM_REGION_FLAG_PHYSICAL, arch_mmu_flags); + if (!r) { + ret = ERR_NO_MEMORY; + goto err_alloc_region; + } /* return the vaddr if requested */ if (ptr) @@ -353,7 +389,11 @@ status_t vmm_alloc_physical(vmm_aspace_t *aspace, const char *name, size_t size, int err = arch_mmu_map(r->base, paddr, size / PAGE_SIZE, arch_mmu_flags); LTRACEF("arch_mmu_map returns %d\n", err); - return NO_ERROR; + ret = NO_ERROR; + +err_alloc_region: + mutex_release(&vmm_lock); + return ret; } status_t vmm_alloc_contiguous(vmm_aspace_t *aspace, const char *name, size_t size, void **ptr, uint8_t align_pow2, uint vmm_flags, uint arch_mmu_flags) @@ -396,6 +436,8 @@ status_t vmm_alloc_contiguous(vmm_aspace_t *aspace, const char *name, size_t siz goto err; } + mutex_acquire(&vmm_lock); + /* allocate a region and put it in the aspace list */ vmm_region_t *r = alloc_region(aspace, name, size, vaddr, align_pow2, vmm_flags, VMM_REGION_FLAG_PHYSICAL, arch_mmu_flags); if (!r) { @@ -416,9 +458,11 @@ status_t vmm_alloc_contiguous(vmm_aspace_t *aspace, const char *name, size_t siz list_add_tail(&r->page_list, &p->node); } + mutex_release(&vmm_lock); return NO_ERROR; err1: + mutex_release(&vmm_lock); pmm_free(&page_list); err: return err; @@ -466,6 +510,8 @@ status_t vmm_alloc(vmm_aspace_t *aspace, const char *name, size_t size, void **p goto err1; } + mutex_acquire(&vmm_lock); + /* allocate a region and put it in the aspace list */ vmm_region_t *r = alloc_region(aspace, name, size, vaddr, align_pow2, vmm_flags, VMM_REGION_FLAG_PHYSICAL, arch_mmu_flags); if (!r) { @@ -483,7 +529,7 @@ status_t vmm_alloc(vmm_aspace_t *aspace, const char *name, size_t size, void **p vaddr_t va = r->base; DEBUG_ASSERT(IS_PAGE_ALIGNED(va)); while ((p = list_remove_head_type(&page_list, vm_page_t, node))) { - DEBUG_ASSERT(va < r->base + r->size); + DEBUG_ASSERT(va <= r->base + r->size - 1); paddr_t pa = page_to_address(p); DEBUG_ASSERT(IS_PAGE_ALIGNED(pa)); @@ -496,9 +542,11 @@ status_t vmm_alloc(vmm_aspace_t *aspace, const char *name, size_t size, void **p va += PAGE_SIZE; } + mutex_release(&vmm_lock); return NO_ERROR; err1: + mutex_release(&vmm_lock); pmm_free(&page_list); err: return err; @@ -515,7 +563,7 @@ static vmm_region_t *vmm_find_region(const vmm_aspace_t *aspace, vaddr_t vaddr) /* search the region list */ list_for_every_entry(&aspace->region_list, r, vmm_region_t, node) { - if ((vaddr >= r->base) && (vaddr < r->base + r->size)) + if ((vaddr >= r->base) && (vaddr <= r->base + r->size - 1)) return r; } @@ -524,8 +572,11 @@ static vmm_region_t *vmm_find_region(const vmm_aspace_t *aspace, vaddr_t vaddr) status_t vmm_free_region(vmm_aspace_t *aspace, vaddr_t vaddr) { + mutex_acquire(&vmm_lock); + vmm_region_t *r = vmm_find_region (aspace, vaddr); if (!r) { + mutex_release(&vmm_lock); return ERR_NOT_FOUND; } @@ -535,6 +586,8 @@ status_t vmm_free_region(vmm_aspace_t *aspace, vaddr_t vaddr) /* unmap it */ arch_mmu_unmap(r->base, r->size / PAGE_SIZE); + mutex_release(&vmm_lock); + /* return physical pages if any */ pmm_free (&r->page_list); @@ -571,7 +624,7 @@ usage: printf("usage:\n"); printf("%s aspaces\n", argv[0].str); printf("%s alloc \n", argv[0].str); - printf("%s alloc_physical \n", argv[0].str); + printf("%s alloc_physical \n", argv[0].str); printf("%s alloc_contig \n", argv[0].str); return ERR_GENERIC; } @@ -591,7 +644,7 @@ usage: if (argc < 4) goto notenoughargs; void *ptr = (void *)0x99; - status_t err = vmm_alloc_physical(vmm_get_kernel_aspace(), "physical test", argv[3].u, &ptr, argv[2].u, 0, ARCH_MMU_FLAG_UNCACHED_DEVICE); + status_t err = vmm_alloc_physical(vmm_get_kernel_aspace(), "physical test", argv[3].u, &ptr, argv[4].u, argv[2].u, 0, ARCH_MMU_FLAG_UNCACHED_DEVICE); printf("vmm_alloc_physical returns %d, ptr %p\n", err, ptr); } else if (!strcmp(argv[1].str, "alloc_contig")) { if (argc < 4) goto notenoughargs; diff --git a/lib/cbuf/cbuf.c b/lib/cbuf/cbuf.c index 76030174..67855e12 100644 --- a/lib/cbuf/cbuf.c +++ b/lib/cbuf/cbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Travis Geiselbrecht + * Copyright (c) 2008-2014 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -50,6 +50,7 @@ void cbuf_initialize_etc(cbuf_t *cbuf, size_t len, void *buf) cbuf->len_pow2 = log2_uint(len); cbuf->buf = buf; event_init(&cbuf->event, false, 0); + spin_lock_init(&cbuf->lock); LTRACEF("len %zd, len_pow2 %u\n", len, cbuf->len_pow2); } @@ -74,7 +75,8 @@ size_t cbuf_write(cbuf_t *cbuf, const void *_buf, size_t len, bool canreschedule DEBUG_ASSERT(cbuf); DEBUG_ASSERT(len < valpow2(cbuf->len_pow2)); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&cbuf->lock, state); size_t write_len; size_t pos = 0; @@ -102,9 +104,13 @@ size_t cbuf_write(cbuf_t *cbuf, const void *_buf, size_t len, bool canreschedule } if (cbuf->head != cbuf->tail) - event_signal(&cbuf->event, canreschedule); + event_signal(&cbuf->event, false); - exit_critical_section(); + spin_unlock_irqrestore(&cbuf->lock, state); + + // XXX convert to only rescheduling if + if (canreschedule) + thread_preempt(); return pos; } @@ -115,11 +121,15 @@ size_t cbuf_read(cbuf_t *cbuf, void *_buf, size_t buflen, bool block) DEBUG_ASSERT(cbuf); - enter_critical_section(); - +retry: + // block on the cbuf outside of the lock, which may + // unblock us early and we'll have to double check below if (block) event_wait(&cbuf->event); + spin_lock_saved_state_t state; + spin_lock_irqsave(&cbuf->lock, state); + // see if there's data available size_t ret = 0; if (cbuf->tail != cbuf->head) { @@ -147,6 +157,7 @@ size_t cbuf_read(cbuf_t *cbuf, void *_buf, size_t buflen, bool block) } if (cbuf->tail == cbuf->head) { + DEBUG_ASSERT(pos > 0); // we've emptied the buffer, unsignal the event event_unsignal(&cbuf->event); } @@ -154,7 +165,11 @@ size_t cbuf_read(cbuf_t *cbuf, void *_buf, size_t buflen, bool block) ret = pos; } - exit_critical_section(); + spin_unlock_irqrestore(&cbuf->lock, state); + + // we apparently blocked but raced with another thread and found no data, retry + if (block && ret == 0) + goto retry; return ret; } @@ -189,7 +204,8 @@ size_t cbuf_write_char(cbuf_t *cbuf, char c, bool canreschedule) { DEBUG_ASSERT(cbuf); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&cbuf->lock, state); size_t ret = 0; if (cbuf_space_avail(cbuf) > 0) { @@ -202,7 +218,7 @@ size_t cbuf_write_char(cbuf_t *cbuf, char c, bool canreschedule) event_signal(&cbuf->event, canreschedule); } - exit_critical_section(); + spin_unlock_irqrestore(&cbuf->lock, state); return ret; } @@ -212,11 +228,13 @@ size_t cbuf_read_char(cbuf_t *cbuf, char *c, bool block) DEBUG_ASSERT(cbuf); DEBUG_ASSERT(c); - enter_critical_section(); - +retry: if (block) event_wait(&cbuf->event); + spin_lock_saved_state_t state; + spin_lock_irqsave(&cbuf->lock, state); + // see if there's data available size_t ret = 0; if (cbuf->tail != cbuf->head) { @@ -232,7 +250,10 @@ size_t cbuf_read_char(cbuf_t *cbuf, char *c, bool block) ret = 1; } - exit_critical_section(); + spin_unlock_irqrestore(&cbuf->lock, state); + + if (block && ret == 0) + goto retry; return ret; } diff --git a/lib/debug/debug.c b/lib/debug/debug.c index 81060ecf..4749bc34 100644 --- a/lib/debug/debug.c +++ b/lib/debug/debug.c @@ -33,6 +33,65 @@ #include #include +#if WITH_LIB_SM +#define PRINT_LOCK_FLAGS SPIN_LOCK_FLAG_IRQ_FIQ +#else +#define PRINT_LOCK_FLAGS SPIN_LOCK_FLAG_INTERRUPTS +#endif + +static spin_lock_t print_spin_lock = 0; +static struct list_node print_callbacks = LIST_INITIAL_VALUE(print_callbacks); +/* print lock must be held when invoking out, outs, outc */ +static void out_count(const char *str, size_t len) +{ + print_callback_t *cb; + size_t i; + + /* print to any registered loggers */ + list_for_every_entry(&print_callbacks, cb, print_callback_t, entry) { + if (cb->print) + cb->print(cb, str, len); + } + + /* write out the serial port */ + for (i = 0; i < len; i++) { + platform_dputc(str[i]); + } +} + +static void out_string(const char *str) +{ + out_count(str, strlen(str)); +} + +static void out_char(char c) +{ + out_count(&c, 1); +} + +static int input_char(char *c) +{ + return platform_dgetc(c, true); +} + +void register_print_callback(print_callback_t *cb) +{ + spin_lock_saved_state_t state; + + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); + list_add_head(&print_callbacks, &cb->entry); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); +} + +void unregister_print_callback(print_callback_t *cb) +{ + spin_lock_saved_state_t state; + + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); + list_delete(&cb->entry); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); +} + void spin(uint32_t usecs) { lk_bigtime_t start = current_time_hires(); @@ -69,7 +128,7 @@ static int __debug_stdio_fgetc(void *ctx) char c; int err; - err = platform_dgetc(&c, true); + err = input_char(&c); if (err < 0) return err; return (unsigned char)c; @@ -98,34 +157,55 @@ FILE __stdio_FILEs[3] = { #if !DISABLE_DEBUG_OUTPUT +void _dputc(char c) +{ + spin_lock_saved_state_t state; + + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); + out_char(c); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); +} + int _dputs(const char *str) { - while (*str != 0) { - _dputc(*str++); - } + spin_lock_saved_state_t state; + + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); + out_string(str); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); + + return 0; +} + +int _dwrite(const char *ptr, size_t len) +{ + spin_lock_saved_state_t state; + + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); + out_count(ptr, len); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); return 0; } static int _dprintf_output_func(const char *str, size_t len, void *state) { - size_t count = 0; - while (count < len && *str) { - _dputc(*str); - str++; - count++; - } + size_t n = strnlen(str, len); - return count; + out_count(str, n); + return n; } int _dprintf(const char *fmt, ...) { + spin_lock_saved_state_t state; int err; - va_list ap; + va_start(ap, fmt); + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); err = _printf_engine(&_dprintf_output_func, NULL, fmt, ap); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); va_end(ap); return err; @@ -133,9 +213,12 @@ int _dprintf(const char *fmt, ...) int _dvprintf(const char *fmt, va_list ap) { + spin_lock_saved_state_t state; int err; + spin_lock_save(&print_spin_lock, &state, PRINT_LOCK_FLAGS); err = _printf_engine(&_dprintf_output_func, NULL, fmt, ap); + spin_unlock_restore(&print_spin_lock, state, PRINT_LOCK_FLAGS); return err; } diff --git a/lib/heap/heap.c b/lib/heap/heap.c index 7364cd1f..7e99bf6d 100644 --- a/lib/heap/heap.c +++ b/lib/heap/heap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009,2012,2014 Travis Geiselbrecht + * Copyright (c) 2008-2009,2012-2014 Travis Geiselbrecht * Copyright (c) 2009 Corey Tabaka * * Permission is hereby granted, free of charge, to any person obtaining @@ -32,6 +32,7 @@ #include #include #include +#include #include #define LOCAL_TRACE 0 @@ -87,6 +88,7 @@ struct heap { mutex_t lock; struct list_node free_list; struct list_node delayed_free_list; + spin_lock_t delayed_free_lock; }; // heap static vars @@ -124,12 +126,15 @@ static void heap_dump(void) list_for_every_entry(&theheap.free_list, chunk, struct free_heap_chunk, node) { dump_free_chunk(chunk); } + mutex_release(&theheap.lock); dprintf(INFO, "\tdelayed free list:\n"); + spin_lock_saved_state_t state; + spin_lock_irqsave(&theheap.delayed_free_lock, state); list_for_every_entry(&theheap.delayed_free_list, chunk, struct free_heap_chunk, node) { dump_free_chunk(chunk); } - mutex_release(&theheap.lock); + spin_unlock_irqrestore(&theheap.delayed_free_lock, state); } static void heap_test(void) @@ -269,13 +274,14 @@ static void heap_free_delayed_list(void) list_initialize(&list); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&theheap.delayed_free_lock, state); struct free_heap_chunk *chunk; while ((chunk = list_remove_head_type(&theheap.delayed_free_list, struct free_heap_chunk, node))) { list_add_head(&list, &chunk->node); } - exit_critical_section(); + spin_unlock_irqrestore(&theheap.delayed_free_lock, state); while ((chunk = list_remove_head_type(&list, struct free_heap_chunk, node))) { LTRACEF("freeing chunk %p\n", chunk); @@ -464,9 +470,10 @@ void heap_delayed_free(void *ptr) struct free_heap_chunk *chunk = heap_create_free_chunk(as->ptr, as->size, false); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&theheap.delayed_free_lock, state); list_add_head(&theheap.delayed_free_list, &chunk->node); - exit_critical_section(); + spin_unlock_irqrestore(&theheap.delayed_free_lock, state); } void heap_get_stats(struct heap_stats *ptr) @@ -507,8 +514,10 @@ static ssize_t heap_grow(size_t size) size = ROUNDUP(size, PAGE_SIZE); void *ptr = pmm_alloc_kpages(size / PAGE_SIZE, NULL); - if (!ptr) + if (!ptr) { + TRACEF("failed to grow kernel heap by 0x%zx bytes\n", size); return ERR_NO_MEMORY; + } LTRACEF("growing heap by 0x%zx bytes, new ptr %p\n", size, ptr); @@ -541,6 +550,7 @@ void heap_init(void) // initialize the delayed free list list_initialize(&theheap.delayed_free_list); + spin_lock_init(&theheap.delayed_free_lock); // set the heap range #if WITH_KERNEL_VM diff --git a/lib/libc/atoi.c b/lib/libc/atoi.c index f5610835..71dfd0f6 100644 --- a/lib/libc/atoi.c +++ b/lib/libc/atoi.c @@ -27,6 +27,7 @@ #include #include +#include #define LONG_IS_INT 1 @@ -120,4 +121,70 @@ unsigned long long atoull(const char *num) return value; } +unsigned long strtoul(const char *nptr, char **endptr, int base) { + int neg = 0; + unsigned long ret = 0; + if (base < 0 || base == 1 || base > 36) { + errno = EINVAL; + return 0; + } + + while (isspace(*nptr)) { + nptr++; + } + + if (*nptr == '+') { + nptr++; + } else if (*nptr == '-') { + neg = 1; + nptr++; + } + + if ((base == 0 || base == 16) && nptr[0] == '0' && nptr[1] == 'x') { + base = 16; + nptr += 2; + } else if (base == 0 && nptr[0] == '0') { + base = 8; + nptr++; + } else if (base == 0) { + base = 10; + } + + for (;;) { + char c = *nptr; + int v = -1; + unsigned long new_ret; + + if (c >= 'A' && c <= 'Z') { + v = c - 'A' + 10; + } else if (c >= 'a' && c <= 'z') { + v = c - 'a' + 10; + } else if (c >= '0' && c <= '9') { + v = c - '0'; + } + + if (v < 0 || v >= base) { + *endptr = (char *) nptr; + break; + } + + new_ret = ret * base; + if (new_ret / base != ret || + new_ret + v < new_ret || + ret == ULONG_MAX) { + ret = ULONG_MAX; + errno = ERANGE; + } else { + ret = new_ret + v; + } + + nptr++; + } + + if (neg && ret != ULONG_MAX) { + ret = -ret; + } + + return ret; +} diff --git a/lib/libc/bsearch.c b/lib/libc/bsearch.c new file mode 100644 index 00000000..53aafb87 --- /dev/null +++ b/lib/libc/bsearch.c @@ -0,0 +1,44 @@ +/* Copyright (c) 2014, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#include + +void *bsearch(const void *key, const void *base, size_t num_elems, size_t size, + int (*compare)(const void *, const void *)) { + size_t low = 0, high = num_elems - 1; + + if (num_elems == 0) { + return NULL; + } + + for (;;) { + size_t mid = low + ((high - low) / 2); + const void *mid_elem = ((unsigned char*) base) + mid*size; + int r = compare(key, mid_elem); + + if (r < 0) { + if (mid == 0) { + return NULL; + } + high = mid - 1; + } else if (r > 0) { + low = mid + 1; + if (low < mid || low > high) { + return NULL; + } + } else { + return (void*) mid_elem; + } + } +} diff --git a/lib/libc/rules.mk b/lib/libc/rules.mk index 132fe5b5..a774683d 100644 --- a/lib/libc/rules.mk +++ b/lib/libc/rules.mk @@ -4,14 +4,17 @@ MODULE := $(LOCAL_DIR) MODULE_SRCS += \ $(LOCAL_DIR)/atoi.c \ + $(LOCAL_DIR)/bsearch.c \ $(LOCAL_DIR)/ctype.c \ $(LOCAL_DIR)/printf.c \ - $(LOCAL_DIR)/malloc.c \ $(LOCAL_DIR)/rand.c \ $(LOCAL_DIR)/stdio.c \ $(LOCAL_DIR)/qsort.c \ $(LOCAL_DIR)/eabi.c +ifneq ($(WITH_CUSTOM_MALLOC),true) +MODULE_SRCS += $(LOCAL_DIR)/malloc.c +endif include $(LOCAL_DIR)/string/rules.mk diff --git a/lib/minip/pktbuf.c b/lib/minip/pktbuf.c index 69bf9041..27e3df91 100644 --- a/lib/minip/pktbuf.c +++ b/lib/minip/pktbuf.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -45,6 +46,7 @@ static struct list_node pb_freelist = LIST_INITIAL_VALUE(pb_freelist); static struct list_node pb_buflist = LIST_INITIAL_VALUE(pb_buflist); static semaphore_t pb_sem = SEMAPHORE_INITIAL_VALUE(pb_sem, -1); +static spin_lock_t lock; static unsigned int cur_id = 0; @@ -90,6 +92,8 @@ static inline pktbuf_buf_t *pktbuf_get_buf(void) { } pktbuf_t *pktbuf_alloc(void) { + spin_lock_saved_state_t state; + pktbuf_t *p = NULL; pktbuf_buf_t *b = NULL; @@ -97,12 +101,12 @@ pktbuf_t *pktbuf_alloc(void) { * pointer but no buffer and would otherwise have to do sem / list bookkeeping on * cleanup */ sem_wait(&pb_sem); - enter_critical_section(); + spin_lock_irqsave(&lock, state); b = pktbuf_get_buf(); if (b) { p = list_remove_head_type(&pb_freelist, pktbuf_t, list); } - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); if (b->magic != PKTBUF_BUF_MAGIC) { panic("pktbuf id %u has corrupted buffer magic value\n" @@ -127,12 +131,13 @@ pktbuf_t *pktbuf_alloc(void) { } pktbuf_t *pktbuf_alloc_empty(void *buf, size_t dlen) { + spin_lock_saved_state_t state; pktbuf_t *p; sem_wait(&pb_sem); - enter_critical_section(); + spin_lock_irqsave(&lock, state); p = list_remove_head_type(&pb_freelist, pktbuf_t, list); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); if (!p) { return NULL; @@ -147,7 +152,8 @@ pktbuf_t *pktbuf_alloc_empty(void *buf, size_t dlen) { } int pktbuf_free(pktbuf_t *p, bool reschedule) { - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); list_add_tail(&pb_freelist, &(p->list)); if (p->managed && p->buffer) { pktbuf_buf_t *pkt = (pktbuf_buf_t *)p->buffer; @@ -158,7 +164,7 @@ int pktbuf_free(pktbuf_t *p, bool reschedule) { p->eof = false; p->managed = false; p->flags = 0; - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return sem_post(&pb_sem, reschedule); } diff --git a/make/build.mk b/make/build.mk index 5d6f4b4a..2259a471 100644 --- a/make/build.mk +++ b/make/build.mk @@ -4,6 +4,10 @@ GLOBAL_COMPILEFLAGS += -ffunction-sections -fdata-sections GLOBAL_LDFLAGS += --gc-sections endif +ifneq (,$(EXTRA_BUILDRULES)) +-include $(EXTRA_BUILDRULES) +endif + $(OUTBIN): $(OUTELF) @echo generating image: $@ $(NOECHO)$(SIZE) $< diff --git a/make/compile.mk b/make/compile.mk index 798f269f..c2c98788 100644 --- a/make/compile.mk +++ b/make/compile.mk @@ -40,33 +40,33 @@ $(MODULE_OBJS): MODULE_INCLUDES:=$(MODULE_INCLUDES) $(MODULE_COBJS): $(BUILDDIR)/%.o: %.c $(MODULE_SRCDEPS) @$(MKDIR) @echo compiling $< - $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CFLAGS) $(MODULE_CFLAGS) $(THUMBCFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ + $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CFLAGS) $(ARCH_CFLAGS) $(MODULE_CFLAGS) $(THUMBCFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ $(MODULE_CPPOBJS): $(BUILDDIR)/%.o: %.cpp $(MODULE_SRCDEPS) @$(MKDIR) @echo compiling $< - $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CPPFLAGS) $(MODULE_CPPFLAGS) $(THUMBCFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ + $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CPPFLAGS) $(ARCH_CPPFLAGS) $(MODULE_CPPFLAGS) $(THUMBCFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ $(MODULE_ASMOBJS): $(BUILDDIR)/%.o: %.S $(MODULE_SRCDEPS) @$(MKDIR) @echo compiling $< - $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_ASMFLAGS) $(MODULE_ASMFLAGS) $(THUMBCFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ + $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_ASMFLAGS) $(ARCH_ASMFLAGS) $(MODULE_ASMFLAGS) $(THUMBCFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ # overridden arm versions $(MODULE_ARM_COBJS): $(BUILDDIR)/%.o: %.c $(MODULE_SRCDEPS) @$(MKDIR) @echo compiling $< - $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CFLAGS) $(MODULE_CFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ + $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CFLAGS) $(ARCH_CFLAGS) $(MODULE_CFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ $(MODULE_ARM_CPPOBJS): $(BUILDDIR)/%.o: %.cpp $(MODULE_SRCDEPS) @$(MKDIR) @echo compiling $< - $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CPPFLAGS) $(MODULE_CPPFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ + $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_CPPFLAGS) $(ARCH_CPPFLAGS) $(MODULE_CPPFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ $(MODULE_ARM_ASMOBJS): $(BUILDDIR)/%.o: %.S $(MODULE_SRCDEPS) @$(MKDIR) @echo compiling $< - $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_ASMFLAGS) $(MODULE_ASMFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ + $(NOECHO)$(CC) $(GLOBAL_OPTFLAGS) $(MODULE_OPTFLAGS) $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(MODULE_COMPILEFLAGS) $(GLOBAL_ASMFLAGS) $(ARCH_ASMFLAGS) $(MODULE_ASMFLAGS) $(GLOBAL_INCLUDES) $(MODULE_INCLUDES) -c $< -MD -MP -MT $@ -MF $(@:%o=%d) -o $@ # clear some variables we set here MODULE_CSRCS := diff --git a/make/macros.mk b/make/macros.mk index 5eebdca9..a45aca14 100644 --- a/make/macros.mk +++ b/make/macros.mk @@ -7,6 +7,9 @@ MKDIR = if [ ! -d $(dir $@) ]; then mkdir -p $(dir $@); fi # prepends the BUILD_DIR var to each item in the list TOBUILDDIR = $(addprefix $(BUILDDIR)/,$(1)) +# converts specified variable to boolean value +TOBOOL = $(if $(filter-out 0 false,$1),true,false) + COMMA := , SPACE := SPACE += @@ -32,11 +35,11 @@ define MAKECONFIGHEADER $(MKDIR); \ echo generating $1; \ rm -f $1.tmp; \ - LDEF=`echo $1 | tr '/\\.-' '_'`; \ + LDEF=`echo $1 | tr '/\\.-' '_' | sed "s/C++/CPP/g;s/c++/cpp/g"`; \ echo \#ifndef __$${LDEF}_H > $1.tmp; \ echo \#define __$${LDEF}_H >> $1.tmp; \ for d in `echo $($2) | tr '[:lower:]' '[:upper:]'`; do \ - echo "#define $$d" | sed "s/=/\ /g;s/-/_/g;s/\//_/g;s/\./_/g;s/\//_/g" >> $1.tmp; \ + echo "#define $$d" | sed "s/=/\ /g;s/-/_/g;s/\//_/g;s/\./_/g;s/\//_/g;s/C++/CPP/g" >> $1.tmp; \ done; \ echo \#endif >> $1.tmp; \ $(call TESTANDREPLACEFILE,$1.tmp,$1) diff --git a/makefile b/makefile index 4bb9befe..4efdcb93 100644 --- a/makefile +++ b/makefile @@ -9,7 +9,10 @@ BUILDROOT ?= . DEFAULT_PROJECT ?= TOOLCHAIN_PREFIX ?= +# check if LKROOT is already a part of LKINC list and add it only if it is not +ifneq ($(findstring $(LKROOT),$(LKINC)), $(LKROOT)) LKINC := $(LKROOT) $(LKINC) +endif export LKMAKEROOT export LKROOT diff --git a/platform/alterasoc/platform.c b/platform/alterasoc/platform.c index c694594a..b5832979 100644 --- a/platform/alterasoc/platform.c +++ b/platform/alterasoc/platform.c @@ -74,6 +74,8 @@ void platform_early_init(void) { uart_init_early(); + printf("stat 0x%x\n", *REG32(0xffd05000)); + /* initialize the interrupt controller */ arm_gic_init(); @@ -81,6 +83,9 @@ void platform_early_init(void) arm_cortex_a9_timer_init(CPUPRIV_BASE, TIMER_CLOCK_FREQ); pmm_add_arena(&sdram_arena); + + /* start the secondary cpu */ + *REG32(0xffd05010) = 0; } void platform_init(void) diff --git a/platform/alterasoc/rules.mk b/platform/alterasoc/rules.mk index db1a7973..1122ea4c 100644 --- a/platform/alterasoc/rules.mk +++ b/platform/alterasoc/rules.mk @@ -4,6 +4,7 @@ MODULE := $(LOCAL_DIR) ARCH := arm ARM_CPU := cortex-a9-neon +WITH_SMP := 1 MODULE_DEPS := \ lib/cbuf \ diff --git a/platform/alterasoc/uart.c b/platform/alterasoc/uart.c index 660a8f7e..cf131c45 100644 --- a/platform/alterasoc/uart.c +++ b/platform/alterasoc/uart.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,8 @@ static cbuf_t uart1_rx_buf; static inline uintptr_t uart_to_ptr(unsigned int n) { return (n == 0) ? UART0_BASE : UART1_BASE; } static inline cbuf_t *uart_to_rxbuf(unsigned int n) { return (n == 0) ? &uart0_rx_buf : &uart1_rx_buf; } +static spin_lock_t lock = SPIN_LOCK_INITIAL_VALUE; + static enum handler_return uart_irq(void *arg) { bool resched = false; @@ -114,34 +117,26 @@ int uart_putc(int port, char c) { uintptr_t base = uart_to_ptr(port); -#if 1 + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); + /* spin while fifo is full */ - while ((UARTREG(base, UART_USR) & (1<<1)) == 0) - ; -#else - /* spin while fifo is not empty */ - while ((UARTREG(base, UART_USR) & (1<<2)) == 0) - ; -#endif + while ((UARTREG(base, UART_USR) & (1<<1)) == 0) { + } UARTREG(base, UART_THR) = c; + spin_unlock_irqrestore(&lock, state); + return 1; } int uart_getc(int port, bool wait) { -#if 0 - uintptr_t base = uart_to_ptr(port); - - if ((UARTREG(base, UART_USR) & (1<<3))) - return UARTREG(base, UART_RBR); -#else cbuf_t *rxbuf = uart_to_rxbuf(port); char c; if (cbuf_read_char(rxbuf, &c, wait) == 1) return c; -#endif return -1; } diff --git a/platform/bcm2835/include/platform/bcm2835.h b/platform/bcm2835/include/platform/bcm2835.h new file mode 100644 index 00000000..b44a0d2a --- /dev/null +++ b/platform/bcm2835/include/platform/bcm2835.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#define SDRAM_BASE 0 + +#define BCM_PERIPH_BASE_PHYS (0x3f000000U) +#define BCM_PERIPH_SIZE (0x01100000U) +#define BCM_PERIPH_BASE_VIRT (0xe0000000U) + +/* pointer to 'local' peripherals at 0x40000000 */ +#define BCM_LOCAL_PERIPH_BASE_VIRT (BCM_PERIPH_BASE_VIRT + 0x01000000) + +#define IC0_BASE (BCM_PERIPH_BASE_VIRT + 0x2000) +#define ST_BASE (BCM_PERIPH_BASE_VIRT + 0x3000) +#define MPHI_BASE (BCM_PERIPH_BASE_VIRT + 0x6000) +#define DMA_BASE (BCM_PERIPH_BASE_VIRT + 0x7000) +#define ARM_BASE (BCM_PERIPH_BASE_VIRT + 0xB000) +#define PM_BASE (BCM_PERIPH_BASE_VIRT + 0x100000) +#define PCM_CLOCK_BASE (BCM_PERIPH_BASE_VIRT + 0x101098) +#define RNG_BASE (BCM_PERIPH_BASE_VIRT + 0x104000) +#define GPIO_BASE (BCM_PERIPH_BASE_VIRT + 0x200000) +#define UART0_BASE (BCM_PERIPH_BASE_VIRT + 0x201000) +#define MMCI0_BASE (BCM_PERIPH_BASE_VIRT + 0x202000) +#define I2S_BASE (BCM_PERIPH_BASE_VIRT + 0x203000) +#define SPI0_BASE (BCM_PERIPH_BASE_VIRT + 0x204000) +#define BSC0_BASE (BCM_PERIPH_BASE_VIRT + 0x205000) +#define UART1_BASE (BCM_PERIPH_BASE_VIRT + 0x215000) +#define EMMC_BASE (BCM_PERIPH_BASE_VIRT + 0x300000) +#define SMI_BASE (BCM_PERIPH_BASE_VIRT + 0x600000) +#define BSC1_BASE (BCM_PERIPH_BASE_VIRT + 0x804000) +#define USB_BASE (BCM_PERIPH_BASE_VIRT + 0x980000) +#define MCORE_BASE (BCM_PERIPH_BASE_VIRT + 0x0000) + +#define ARMCTRL_BASE (ARM_BASE + 0x000) +#define ARMCTRL_INTC_BASE (ARM_BASE + 0x200) +#define ARMCTRL_TIMER0_1_BASE (ARM_BASE + 0x400) +#define ARMCTRL_0_SBM_BASE (ARM_BASE + 0x800) + +#define ARM_LOCAL_BASE (BCM_LOCAL_PERIPH_BASE_VIRT) + +/* interrupts */ +#define ARM_IRQ1_BASE 0 +#define INTERRUPT_TIMER0 (ARM_IRQ1_BASE + 0) +#define INTERRUPT_TIMER1 (ARM_IRQ1_BASE + 1) +#define INTERRUPT_TIMER2 (ARM_IRQ1_BASE + 2) +#define INTERRUPT_TIMER3 (ARM_IRQ1_BASE + 3) +#define INTERRUPT_CODEC0 (ARM_IRQ1_BASE + 4) +#define INTERRUPT_CODEC1 (ARM_IRQ1_BASE + 5) +#define INTERRUPT_CODEC2 (ARM_IRQ1_BASE + 6) +#define INTERRUPT_VC_JPEG (ARM_IRQ1_BASE + 7) +#define INTERRUPT_ISP (ARM_IRQ1_BASE + 8) +#define INTERRUPT_VC_USB (ARM_IRQ1_BASE + 9) +#define INTERRUPT_VC_3D (ARM_IRQ1_BASE + 10) +#define INTERRUPT_TRANSPOSER (ARM_IRQ1_BASE + 11) +#define INTERRUPT_MULTICORESYNC0 (ARM_IRQ1_BASE + 12) +#define INTERRUPT_MULTICORESYNC1 (ARM_IRQ1_BASE + 13) +#define INTERRUPT_MULTICORESYNC2 (ARM_IRQ1_BASE + 14) +#define INTERRUPT_MULTICORESYNC3 (ARM_IRQ1_BASE + 15) +#define INTERRUPT_DMA0 (ARM_IRQ1_BASE + 16) +#define INTERRUPT_DMA1 (ARM_IRQ1_BASE + 17) +#define INTERRUPT_VC_DMA2 (ARM_IRQ1_BASE + 18) +#define INTERRUPT_VC_DMA3 (ARM_IRQ1_BASE + 19) +#define INTERRUPT_DMA4 (ARM_IRQ1_BASE + 20) +#define INTERRUPT_DMA5 (ARM_IRQ1_BASE + 21) +#define INTERRUPT_DMA6 (ARM_IRQ1_BASE + 22) +#define INTERRUPT_DMA7 (ARM_IRQ1_BASE + 23) +#define INTERRUPT_DMA8 (ARM_IRQ1_BASE + 24) +#define INTERRUPT_DMA9 (ARM_IRQ1_BASE + 25) +#define INTERRUPT_DMA10 (ARM_IRQ1_BASE + 26) +#define INTERRUPT_DMA11 (ARM_IRQ1_BASE + 27) +#define INTERRUPT_DMA12 (ARM_IRQ1_BASE + 28) +#define INTERRUPT_AUX (ARM_IRQ1_BASE + 29) +#define INTERRUPT_ARM (ARM_IRQ1_BASE + 30) +#define INTERRUPT_VPUDMA (ARM_IRQ1_BASE + 31) + +#define ARM_IRQ2_BASE 32 +#define INTERRUPT_HOSTPORT (ARM_IRQ2_BASE + 0) +#define INTERRUPT_VIDEOSCALER (ARM_IRQ2_BASE + 1) +#define INTERRUPT_CCP2TX (ARM_IRQ2_BASE + 2) +#define INTERRUPT_SDC (ARM_IRQ2_BASE + 3) +#define INTERRUPT_DSI0 (ARM_IRQ2_BASE + 4) +#define INTERRUPT_AVE (ARM_IRQ2_BASE + 5) +#define INTERRUPT_CAM0 (ARM_IRQ2_BASE + 6) +#define INTERRUPT_CAM1 (ARM_IRQ2_BASE + 7) +#define INTERRUPT_HDMI0 (ARM_IRQ2_BASE + 8) +#define INTERRUPT_HDMI1 (ARM_IRQ2_BASE + 9) +#define INTERRUPT_PIXELVALVE1 (ARM_IRQ2_BASE + 10) +#define INTERRUPT_I2CSPISLV (ARM_IRQ2_BASE + 11) +#define INTERRUPT_DSI1 (ARM_IRQ2_BASE + 12) +#define INTERRUPT_PWA0 (ARM_IRQ2_BASE + 13) +#define INTERRUPT_PWA1 (ARM_IRQ2_BASE + 14) +#define INTERRUPT_CPR (ARM_IRQ2_BASE + 15) +#define INTERRUPT_SMI (ARM_IRQ2_BASE + 16) +#define INTERRUPT_GPIO0 (ARM_IRQ2_BASE + 17) +#define INTERRUPT_GPIO1 (ARM_IRQ2_BASE + 18) +#define INTERRUPT_GPIO2 (ARM_IRQ2_BASE + 19) +#define INTERRUPT_GPIO3 (ARM_IRQ2_BASE + 20) +#define INTERRUPT_VC_I2C (ARM_IRQ2_BASE + 21) +#define INTERRUPT_VC_SPI (ARM_IRQ2_BASE + 22) +#define INTERRUPT_VC_I2SPCM (ARM_IRQ2_BASE + 23) +#define INTERRUPT_VC_SDIO (ARM_IRQ2_BASE + 24) +#define INTERRUPT_VC_UART (ARM_IRQ2_BASE + 25) +#define INTERRUPT_SLIMBUS (ARM_IRQ2_BASE + 26) +#define INTERRUPT_VEC (ARM_IRQ2_BASE + 27) +#define INTERRUPT_CPG (ARM_IRQ2_BASE + 28) +#define INTERRUPT_RNG (ARM_IRQ2_BASE + 29) +#define INTERRUPT_VC_ARASANSDIO (ARM_IRQ2_BASE + 30) +#define INTERRUPT_AVSPMON (ARM_IRQ2_BASE + 31) + +/* ARM interrupts, which are mostly mirrored from bank 1 and 2 */ +#define ARM_IRQ0_BASE 64 +#define INTERRUPT_ARM_TIMER (ARM_IRQ0_BASE + 0) +#define INTERRUPT_ARM_MAILBOX (ARM_IRQ0_BASE + 1) +#define INTERRUPT_ARM_DOORBELL_0 (ARM_IRQ0_BASE + 2) +#define INTERRUPT_ARM_DOORBELL_1 (ARM_IRQ0_BASE + 3) +#define INTERRUPT_VPU0_HALTED (ARM_IRQ0_BASE + 4) +#define INTERRUPT_VPU1_HALTED (ARM_IRQ0_BASE + 5) +#define INTERRUPT_ILLEGAL_TYPE0 (ARM_IRQ0_BASE + 6) +#define INTERRUPT_ILLEGAL_TYPE1 (ARM_IRQ0_BASE + 7) +#define INTERRUPT_PENDING1 (ARM_IRQ0_BASE + 8) +#define INTERRUPT_PENDING2 (ARM_IRQ0_BASE + 9) +#define INTERRUPT_JPEG (ARM_IRQ0_BASE + 10) +#define INTERRUPT_USB (ARM_IRQ0_BASE + 11) +#define INTERRUPT_3D (ARM_IRQ0_BASE + 12) +#define INTERRUPT_DMA2 (ARM_IRQ0_BASE + 13) +#define INTERRUPT_DMA3 (ARM_IRQ0_BASE + 14) +#define INTERRUPT_I2C (ARM_IRQ0_BASE + 15) +#define INTERRUPT_SPI (ARM_IRQ0_BASE + 16) +#define INTERRUPT_I2SPCM (ARM_IRQ0_BASE + 17) +#define INTERRUPT_SDIO (ARM_IRQ0_BASE + 18) +#define INTERRUPT_UART (ARM_IRQ0_BASE + 19) +#define INTERRUPT_ARASANSDIO (ARM_IRQ0_BASE + 20) + +#define ARM_IRQ_LOCAL_BASE 96 +#define INTERRUPT_ARM_LOCAL_CNTPSIRQ (ARM_IRQ_LOCAL_BASE + 0) +#define INTERRUPT_ARM_LOCAL_CNTPNSIRQ (ARM_IRQ_LOCAL_BASE + 1) +#define INTERRUPT_ARM_LOCAL_CNTHPIRQ (ARM_IRQ_LOCAL_BASE + 2) +#define INTERRUPT_ARM_LOCAL_CNTVIRQ (ARM_IRQ_LOCAL_BASE + 3) +#define INTERRUPT_ARM_LOCAL_MAILBOX0 (ARM_IRQ_LOCAL_BASE + 4) +#define INTERRUPT_ARM_LOCAL_MAILBOX1 (ARM_IRQ_LOCAL_BASE + 5) +#define INTERRUPT_ARM_LOCAL_MAILBOX2 (ARM_IRQ_LOCAL_BASE + 6) +#define INTERRUPT_ARM_LOCAL_MAILBOX3 (ARM_IRQ_LOCAL_BASE + 7) +#define INTERRUPT_ARM_LOCAL_GPU_FAST (ARM_IRQ_LOCAL_BASE + 8) +#define INTERRUPT_ARM_LOCAL_PMU_FAST (ARM_IRQ_LOCAL_BASE + 9) +#define INTERRUPT_ARM_LOCAL_ZERO (ARM_IRQ_LOCAL_BASE + 10) +#define INTERRUPT_ARM_LOCAL_TIMER (ARM_IRQ_LOCAL_BASE + 11) + +#define MAX_INT INTERRUPT_ARM_LOCAL_TIMER + + diff --git a/platform/bcm2835/include/platform/gic.h b/platform/bcm2835/include/platform/gic.h new file mode 100644 index 00000000..d1e7c8ca --- /dev/null +++ b/platform/bcm2835/include/platform/gic.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#pragma once + +#include + +#define GICBASE(n) (CPUPRIV_BASE_PHYS) +#define GICC_OFFSET (0x0100) +#define GICD_OFFSET (0x1000) + + diff --git a/platform/bcm2835/intc.c b/platform/bcm2835/intc.c new file mode 100644 index 00000000..97688ab0 --- /dev/null +++ b/platform/bcm2835/intc.c @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LOCAL_TRACE 0 + +/* global interrupt controller */ +#define INTC_PEND0 (ARMCTRL_INTC_BASE + 0x0) +#define INTC_PEND1 (ARMCTRL_INTC_BASE + 0x4) +#define INTC_PEND2 (ARMCTRL_INTC_BASE + 0x8) +#define INTC_FAST (ARMCTRL_INTC_BASE + 0xc) +#define INTC_ENABLE1 (ARMCTRL_INTC_BASE + 0x10) +#define INTC_ENABLE2 (ARMCTRL_INTC_BASE + 0x14) +#define INTC_ENABLE3 (ARMCTRL_INTC_BASE + 0x18) +#define INTC_DISABLE1 (ARMCTRL_INTC_BASE + 0x1c) +#define INTC_DISABLE2 (ARMCTRL_INTC_BASE + 0x20) +#define INTC_DISABLE3 (ARMCTRL_INTC_BASE + 0x24) + +/* per-cpu local interrupt controller bits. + * each is repeated 4 times, one per cpu. + */ +#define INTC_LOCAL_TIMER_INT_CONTROL0 (ARM_LOCAL_BASE + 0x40) +#define INTC_LOCAL_TIMER_INT_CONTROL1 (ARM_LOCAL_BASE + 0x44) +#define INTC_LOCAL_TIMER_INT_CONTROL2 (ARM_LOCAL_BASE + 0x48) +#define INTC_LOCAL_TIMER_INT_CONTROL3 (ARM_LOCAL_BASE + 0x4c) + +#define INTC_LOCAL_MAILBOX_INT_CONTROL0 (ARM_LOCAL_BASE + 0x50) +#define INTC_LOCAL_MAILBOX_INT_CONTROL1 (ARM_LOCAL_BASE + 0x54) +#define INTC_LOCAL_MAILBOX_INT_CONTROL2 (ARM_LOCAL_BASE + 0x58) +#define INTC_LOCAL_MAILBOX_INT_CONTROL3 (ARM_LOCAL_BASE + 0x5c) + +#define INTC_LOCAL_IRQ_PEND0 (ARM_LOCAL_BASE + 0x60) +#define INTC_LOCAL_IRQ_PEND1 (ARM_LOCAL_BASE + 0x64) +#define INTC_LOCAL_IRQ_PEND2 (ARM_LOCAL_BASE + 0x68) +#define INTC_LOCAL_IRQ_PEND3 (ARM_LOCAL_BASE + 0x6c) + +#define INTC_LOCAL_FIQ_PEND0 (ARM_LOCAL_BASE + 0x70) +#define INTC_LOCAL_FIQ_PEND1 (ARM_LOCAL_BASE + 0x74) +#define INTC_LOCAL_FIQ_PEND2 (ARM_LOCAL_BASE + 0x78) +#define INTC_LOCAL_FIQ_PEND3 (ARM_LOCAL_BASE + 0x7c) + +#define INTC_LOCAL_MAILBOX0_SET0 (ARM_LOCAL_BASE + 0x80) +#define INTC_LOCAL_MAILBOX0_SET1 (ARM_LOCAL_BASE + 0x90) +#define INTC_LOCAL_MAILBOX0_SET2 (ARM_LOCAL_BASE + 0xa0) +#define INTC_LOCAL_MAILBOX0_SET3 (ARM_LOCAL_BASE + 0xb0) + +#define INTC_LOCAL_MAILBOX0_CLR0 (ARM_LOCAL_BASE + 0xc0) +#define INTC_LOCAL_MAILBOX0_CLR1 (ARM_LOCAL_BASE + 0xd0) +#define INTC_LOCAL_MAILBOX0_CLR2 (ARM_LOCAL_BASE + 0xe0) +#define INTC_LOCAL_MAILBOX0_CLR3 (ARM_LOCAL_BASE + 0xf0) + +struct int_handler_struct { + int_handler handler; + void *arg; +}; + +static struct int_handler_struct int_handler_table[MAX_INT]; + +static spin_lock_t lock = SPIN_LOCK_INITIAL_VALUE; + +status_t mask_interrupt(unsigned int vector) +{ + LTRACEF("vector %u\n", vector); + + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); + + if (vector >= INTERRUPT_ARM_LOCAL_CNTPSIRQ && vector <= INTERRUPT_ARM_LOCAL_CNTVIRQ) { + // local timer interrupts, mask on all cpus + for (uint cpu = 0; cpu < 4; cpu++) { + uintptr_t reg = INTC_LOCAL_TIMER_INT_CONTROL0 + cpu * 4; + + *REG32(reg) &= (1 << (vector - INTERRUPT_ARM_LOCAL_CNTPSIRQ)); + } + } else if (/* vector >= ARM_IRQ1_BASE && */ vector < (ARM_IRQ0_BASE + 32)) { + uintptr_t reg; + if (vector >= ARM_IRQ0_BASE) + reg = INTC_DISABLE3; + else if (vector >= ARM_IRQ2_BASE) + reg = INTC_DISABLE2; + else + reg = INTC_DISABLE1; + + *REG32(reg) = 1 << (vector % 32); + } else { + PANIC_UNIMPLEMENTED; + } + + spin_unlock_irqrestore(&lock, state); + + return NO_ERROR; +} + +status_t unmask_interrupt(unsigned int vector) +{ + LTRACEF("vector %u\n", vector); + + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); + + if (vector >= INTERRUPT_ARM_LOCAL_CNTPSIRQ && vector <= INTERRUPT_ARM_LOCAL_CNTVIRQ) { + // local timer interrupts, unmask for all cpus + for (uint cpu = 0; cpu < 4; cpu++) { + uintptr_t reg = INTC_LOCAL_TIMER_INT_CONTROL0 + cpu * 4; + + *REG32(reg) |= (1 << (vector - INTERRUPT_ARM_LOCAL_CNTPSIRQ)); + } + } else if (/* vector >= ARM_IRQ1_BASE && */ vector < (ARM_IRQ0_BASE + 32)) { + uintptr_t reg; + if (vector >= ARM_IRQ0_BASE) + reg = INTC_ENABLE3; + else if (vector >= ARM_IRQ2_BASE) + reg = INTC_ENABLE2; + else + reg = INTC_ENABLE1; + + *REG32(reg) = 1 << (vector % 32); + } else { + PANIC_UNIMPLEMENTED; + } + + spin_unlock_irqrestore(&lock, state); + + return NO_ERROR; +} + +void register_int_handler(unsigned int vector, int_handler handler, void *arg) +{ + if (vector >= MAX_INT) + panic("register_int_handler: vector out of range %d\n", vector); + + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); + + int_handler_table[vector].handler = handler; + int_handler_table[vector].arg = arg; + + spin_unlock_irqrestore(&lock, state); +} + +enum handler_return platform_irq(struct arm_iframe *frame) +{ + uint vector; + uint cpu = arch_curr_cpu_num(); + + THREAD_STATS_INC(interrupts); + + // see what kind of irq it is + uint32_t pend = *REG32(INTC_LOCAL_IRQ_PEND0 + cpu * 4); + + pend &= ~(1 << (INTERRUPT_ARM_LOCAL_GPU_FAST % 32)); // mask out gpu interrupts + + if (pend != 0) { + // it's a local interrupt + LTRACEF("local pend 0x%x\n", pend); + vector = ARM_IRQ_LOCAL_BASE + ctz(pend); + goto decoded; + } + + // XXX disable for now, since all of the interesting irqs are mirrored into the other banks +#if 0 + // look in bank 0 (ARM interrupts) + pend = *REG32(INTC_PEND0); + LTRACEF("pend0 0x%x\n", pend); + pend &= ~((1<<8)|(1<<9)); // mask out bit 8 and 9 + if (pend != 0) { + // it's a bank 0 interrupt + vector = ARM_IRQ0_BASE + ctz(pend); + goto decoded; + } +#endif + + // look for VC interrupt bank 1 + pend = *REG32(INTC_PEND1); + LTRACEF("pend1 0x%x\n", pend); + if (pend != 0) { + // it's a bank 1 interrupt + vector = ARM_IRQ1_BASE + ctz(pend); + goto decoded; + } + + // look for VC interrupt bank 2 + pend = *REG32(INTC_PEND2); + LTRACEF("pend2 0x%x\n", pend); + if (pend != 0) { + // it's a bank 2 interrupt + vector = ARM_IRQ2_BASE + ctz(pend); + goto decoded; + } + + vector = 0xffffffff; + +decoded: + LTRACEF("cpu %u vector %u\n", cpu, vector); + + // dispatch the irq + enum handler_return ret = INT_NO_RESCHEDULE; + +#if WITH_SMP + if (vector == INTERRUPT_ARM_LOCAL_MAILBOX0) { + pend = *REG32(INTC_LOCAL_MAILBOX0_CLR0 + 0x10 * cpu); + LTRACEF("mailbox0 clr 0x%x\n", pend); + + // ack it + *REG32(INTC_LOCAL_MAILBOX0_CLR0 + 0x10 * cpu) = pend; + + if (pend & (1 << MP_IPI_GENERIC)) { + PANIC_UNIMPLEMENTED; + } + if (pend & (1 << MP_IPI_RESCHEDULE)) { + ret = mp_mbx_reschedule_irq(); + } + } else +#endif // WITH_SMP + if (vector == 0xffffffff) { + ret = INT_NO_RESCHEDULE; + } else if (int_handler_table[vector].handler) { + ret = int_handler_table[vector].handler(int_handler_table[vector].arg); + } else { + panic("irq %u fired on cpu %u but no handler set!\n", vector, cpu); + } + + return ret; +} + +enum handler_return platform_fiq(struct arm_iframe *frame) +{ + PANIC_UNIMPLEMENTED; +} + +void bcm2835_send_ipi(uint irq, uint cpu_mask) +{ + LTRACEF("irq %u, cpu_mask 0x%x\n", irq, cpu_mask); + + for (uint i = 0; i < 4; i++) { + if (cpu_mask & (1< +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void intc_init(void); +extern void arm_reset(void); + +/* initial memory mappings. parsed by start.S */ +struct mmu_initial_mapping mmu_initial_mappings[] = { + /* 1GB of sdram space */ + { .phys = SDRAM_BASE, + .virt = KERNEL_BASE, + .size = MEMSIZE, + .flags = 0, + .name = "memory" }, + + /* peripherals */ + { .phys = BCM_PERIPH_BASE_PHYS, + .virt = BCM_PERIPH_BASE_VIRT, + .size = BCM_PERIPH_SIZE, + .flags = MMU_INITIAL_MAPPING_FLAG_DEVICE, + .name = "bcm peripherals" }, + + /* identity map to let the boot code run */ + { .phys = SDRAM_BASE, + .virt = SDRAM_BASE, + .size = 16*1024*1024, + .flags = MMU_INITIAL_MAPPING_TEMPORARY }, + + /* null entry to terminate the list */ + { 0 } +}; + +static pmm_arena_t arena = { + .name = "sdram", + .base = SDRAM_BASE, + .size = MEMSIZE, + .flags = PMM_ARENA_FLAG_KMAP, +}; + +void platform_init_mmu_mappings(void) +{ +} + +void platform_early_init(void) +{ + uart_init_early(); + + intc_init(); + + arm_generic_timer_init(INTERRUPT_ARM_LOCAL_CNTPNSIRQ, 1000000); + + /* add the main memory arena */ + pmm_add_arena(&arena); + +#if WITH_SMP + /* start the other cpus */ + uintptr_t sec_entry = (uintptr_t)&arm_reset; + sec_entry -= (KERNEL_BASE - MEMBASE); + for (uint i = 1; i <= 3; i++) { + *REG32(ARM_LOCAL_BASE + 0x8c + 0x10 * i) = sec_entry; + } +#endif +} + +void platform_init(void) +{ + uart_init(); +} + +#define DEBUG_UART 0 + +void platform_dputc(char c) +{ + if (c == '\n') + uart_putc(DEBUG_UART, '\r'); + uart_putc(DEBUG_UART, c); +} + +int platform_dgetc(char *c, bool wait) +{ + int ret = uart_getc(DEBUG_UART, wait); + if (ret == -1) + return -1; + *c = ret; + return 0; +} + diff --git a/platform/bcm2835/rules.mk b/platform/bcm2835/rules.mk new file mode 100644 index 00000000..3a4d44d4 --- /dev/null +++ b/platform/bcm2835/rules.mk @@ -0,0 +1,45 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +MODULE := $(LOCAL_DIR) + +ARCH := arm +ARM_CPU := cortex-a7 +WITH_SMP := 1 +SMP_CPU_ID_BITS := 8 + +MODULE_DEPS := \ + dev/timer/arm_generic \ + lib/cbuf + +#lib/bio \ + lib/cbuf \ + lib/minip \ + dev/interrupt/arm_gic \ + dev/timer/arm_cortex_a9 + +GLOBAL_INCLUDES += \ + $(LOCAL_DIR)/include + +MODULE_SRCS += \ + $(LOCAL_DIR)/intc.c \ + $(LOCAL_DIR)/platform.c \ + $(LOCAL_DIR)/uart.c \ + +# default to no sdram unless the target calls it out +ZYNQ_SDRAM_SIZE ?= 0 + +MEMBASE := 0x00000000 +MEMSIZE ?= 0x10000000 # 256MB +KERNEL_LOAD_OFFSET := 0x00008000 # loaded 32KB into physical + +# put our kernel at 0x80000000 +KERNEL_BASE = 0x80000000 + +GLOBAL_DEFINES += \ + MEMBASE=$(MEMBASE) \ + MEMSIZE=$(MEMSIZE) + +LINKER_SCRIPT += \ + $(BUILDDIR)/system-onesegment.ld + +include make/module.mk diff --git a/platform/bcm2835/uart.c b/platform/bcm2835/uart.c new file mode 100644 index 00000000..5c3d5ab6 --- /dev/null +++ b/platform/bcm2835/uart.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* TODO: extract this into a generic PL011 driver */ + +/* PL011 implementation */ +#define UART_DR (0x00) +#define UART_RSR (0x04) +#define UART_TFR (0x18) +#define UART_ILPR (0x20) +#define UART_IBRD (0x24) +#define UART_FBRD (0x28) +#define UART_LCRH (0x2c) +#define UART_CR (0x30) +#define UART_IFLS (0x34) +#define UART_IMSC (0x38) +#define UART_TRIS (0x3c) +#define UART_TMIS (0x40) +#define UART_ICR (0x44) +#define UART_DMACR (0x48) + +#define UARTREG(base, reg) (*REG32((base) + (reg))) + +#define RXBUF_SIZE 16 +#define NUM_UART 1 + +static cbuf_t uart_rx_buf[NUM_UART]; + +static inline uintptr_t uart_to_ptr(unsigned int n) +{ + switch (n) { + default: + case 0: return UART0_BASE; + } +} + +static enum handler_return uart_irq(void *arg) +{ + bool resched = false; + uint port = (uint)arg; + uintptr_t base = uart_to_ptr(port); + + /* read interrupt status and mask */ + uint32_t isr = UARTREG(base, UART_TMIS); + + if (isr & ((1<<6) | (1<<4))) { // rtmis, rxmis + UARTREG(base, UART_ICR) = (1<<4); + cbuf_t *rxbuf = &uart_rx_buf[port]; + + /* while fifo is not empty, read chars out of it */ + while ((UARTREG(base, UART_TFR) & (1<<4)) == 0) { + char c = UARTREG(base, UART_DR); + cbuf_write_char(rxbuf, c, false); + + resched = true; + } + } + + return resched ? INT_RESCHEDULE : INT_NO_RESCHEDULE; +} + +void uart_init(void) +{ + for (size_t i = 0; i < NUM_UART; i++) { + // create circular buffer to hold received data + cbuf_initialize(&uart_rx_buf[i], RXBUF_SIZE); + + // assumes interrupts are contiguous + register_int_handler(INTERRUPT_VC_UART + i, &uart_irq, (void *)i); + + // clear all irqs + UARTREG(uart_to_ptr(i), UART_ICR) = 0x3ff; + + // set fifo trigger level + UARTREG(uart_to_ptr(i), UART_IFLS) = 0; // 1/8 rxfifo, 1/8 txfifo + + // enable rx interrupt + UARTREG(uart_to_ptr(i), UART_IMSC) = (1<<6)|(1<<4); // rtim, rxim + + // enable receive + UARTREG(uart_to_ptr(i), UART_CR) |= (1<<9); // rxen + + // enable interrupt + unmask_interrupt(INTERRUPT_VC_UART + i); + } +} + +void uart_init_early(void) +{ + for (size_t i = 0; i < NUM_UART; i++) { + UARTREG(uart_to_ptr(i), UART_CR) = (1<<8)|(1<<0); // tx_enable, uarten + } +} + +int uart_putc(int port, char c) +{ + uintptr_t base = uart_to_ptr(port); + + /* spin while fifo is full */ + while (UARTREG(base, UART_TFR) & (1<<5)) + ; + UARTREG(base, UART_DR) = c; + + return 1; +} + +int uart_getc(int port, bool wait) +{ + cbuf_t *rxbuf = &uart_rx_buf[port]; + + char c; + if (cbuf_read_char(rxbuf, &c, wait) == 1) + return c; + + return -1; +} + +void uart_flush_tx(int port) +{ +} + +void uart_flush_rx(int port) +{ +} + +void uart_init_port(int port, uint baud) +{ +} + + diff --git a/platform/foundation-emu/interrupts.c b/platform/foundation-emu/interrupts.c index de0a6952..f1fb345e 100644 --- a/platform/foundation-emu/interrupts.c +++ b/platform/foundation-emu/interrupts.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -39,18 +40,20 @@ struct int_handler_struct { }; static struct int_handler_struct int_handler_table[MAX_INT]; +static spin_lock_t lock; void register_int_handler(unsigned int vector, int_handler handler, void *arg) { if (vector >= MAX_INT) panic("register_int_handler: vector out of range %d\n", vector); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); int_handler_table[vector].handler = handler; int_handler_table[vector].arg = arg; - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); } #define GICCPUREG(reg) (*REG32(GIC_PROC_BASE + (reg))) @@ -156,11 +159,12 @@ status_t mask_interrupt(unsigned int vector) if (vector >= MAX_INT) return -1; - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); gic_set_enable(vector, false); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -170,11 +174,12 @@ status_t unmask_interrupt(unsigned int vector) if (vector >= MAX_INT) return -1; - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); gic_set_enable(vector, true); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -191,8 +196,6 @@ enum handler_return platform_irq(struct arm64_iframe_short *frame) return INT_NO_RESCHEDULE; } - inc_critical_section(); - THREAD_STATS_INC(interrupts); KEVLOG_IRQ_ENTER(vector); @@ -212,8 +215,6 @@ enum handler_return platform_irq(struct arm64_iframe_short *frame) if (ret != INT_NO_RESCHEDULE) thread_preempt(); - dec_critical_section(); - return ret; } diff --git a/platform/foundation-emu/timer.c b/platform/foundation-emu/timer.c index 2fc98a50..301f9431 100644 --- a/platform/foundation-emu/timer.c +++ b/platform/foundation-emu/timer.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #define LOCAL_TRACE 0 static platform_timer_callback t_callback; +static spin_lock_t lock; /* armv8 specified timer */ @@ -64,7 +66,8 @@ status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg { LTRACEF("callback %p, arg %p, interval %lu\n", callback, arg, interval); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); t_callback = callback; @@ -83,7 +86,7 @@ status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg unmask_interrupt(INT_PPI_NSPHYS_TIMER); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -92,7 +95,8 @@ status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg { LTRACEF("callback %p, arg %p, interval %lu\n", callback, arg, interval); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); t_callback = callback; @@ -121,7 +125,7 @@ status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg unmask_interrupt(INT_PPI_NSPHYS_TIMER); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } diff --git a/platform/microblaze/intc.c b/platform/microblaze/intc.c index 0510400f..f70b9a70 100644 --- a/platform/microblaze/intc.c +++ b/platform/microblaze/intc.c @@ -45,6 +45,8 @@ #define INTC_REG(reg) (*REG32(INTC_BASEADDR + (reg) * 4)) +static spin_lock_t lock; + struct int_handler_struct { int_handler handler; void *arg; @@ -59,12 +61,13 @@ void register_int_handler(unsigned int vector, int_handler handler, void *arg) if (vector >= MAX_INT) return; - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); int_handler_table[vector].handler = handler; int_handler_table[vector].arg = arg; - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); } status_t mask_interrupt(unsigned int vector) diff --git a/platform/pc/interrupts.c b/platform/pc/interrupts.c index 8bd284fb..240ef283 100644 --- a/platform/pc/interrupts.c +++ b/platform/pc/interrupts.c @@ -28,9 +28,12 @@ #include #include #include +#include #include "platform_p.h" #include +static spin_lock_t lock; + void x86_gpf_handler(struct x86_iframe *frame); void x86_invop_handler(struct x86_iframe *frame); void x86_unhandled_exception(struct x86_iframe *frame); @@ -161,11 +164,12 @@ status_t mask_interrupt(unsigned int vector) // dprintf(DEBUG, "%s: vector %d\n", __PRETTY_FUNCTION__, vector); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); enable(vector, false); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -190,11 +194,12 @@ status_t unmask_interrupt(unsigned int vector) // dprintf("%s: vector %d\n", __PRETTY_FUNCTION__, vector); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); enable(vector, true); - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); return NO_ERROR; } @@ -246,12 +251,14 @@ void register_int_handler(unsigned int vector, int_handler handler, void *arg) if (vector >= INT_VECTORS) panic("register_int_handler: vector out of range %d\n", vector); - enter_critical_section(); + spin_lock_saved_state_t state; + spin_lock_irqsave(&lock, state); int_handler_table[vector].arg = arg; int_handler_table[vector].handler = handler; - exit_critical_section(); + spin_unlock_irqrestore(&lock, state); } +/* vim: set noexpandtab: */ diff --git a/platform/pc/pci.c b/platform/pc/pci.c index 49ad95cd..9547513a 100644 --- a/platform/pc/pci.c +++ b/platform/pc/pci.c @@ -25,10 +25,12 @@ #include #include #include +#include #include #include static int last_bus = 0; +static spin_lock_t lock; typedef struct { uint16_t size; @@ -64,87 +66,95 @@ int (*g_pci_set_irq_hw_int)(const pci_location_t *state, uint8_t int_pin, uint8_ int pci_find_pci_device(pci_location_t *state, uint16_t device_id, uint16_t vendor_id, uint16_t index) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_find_pci_device(state, device_id, vendor_id, index); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_find_pci_class_code(pci_location_t *state, uint32_t class_code, uint16_t index) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_find_pci_class_code(state, class_code, index); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_read_config_byte(const pci_location_t *state, uint32_t reg, uint8_t *value) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_read_config_byte(state, reg, value); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_read_config_half(const pci_location_t *state, uint32_t reg, uint16_t *value) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_read_config_half(state, reg, value); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_read_config_word(const pci_location_t *state, uint32_t reg, uint32_t *value) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_read_config_word(state, reg, value); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_write_config_byte(const pci_location_t *state, uint32_t reg, uint8_t value) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_write_config_byte(state, reg, value); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_write_config_half(const pci_location_t *state, uint32_t reg, uint16_t value) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_write_config_half(state, reg, value); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } int pci_write_config_word(const pci_location_t *state, uint32_t reg, uint32_t value) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_write_config_word(state, reg, value); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } @@ -152,29 +162,31 @@ int pci_write_config_word(const pci_location_t *state, uint32_t reg, uint32_t va int pci_get_irq_routing_options(irq_routing_entry *entries, uint16_t *count, uint16_t *pci_irqs) { - enter_critical_section(); - irq_routing_options_t options; options.size = sizeof(irq_routing_entry) * *count; options.selector = DATA_SELECTOR; options.offset = entries; + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); + int res = g_pci_get_irq_routing_options(&options, pci_irqs); - *count = options.size / sizeof(irq_routing_entry); + spin_unlock_irqrestore(&lock, irqstate); - exit_critical_section(); + *count = options.size / sizeof(irq_routing_entry); return res; } int pci_set_irq_hw_int(const pci_location_t *state, uint8_t int_pin, uint8_t irq) { - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); int res = g_pci_set_irq_hw_int(state, int_pin, irq); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); return res; } diff --git a/platform/pc/timer.c b/platform/pc/timer.c index 22ec13ca..fafc7689 100644 --- a/platform/pc/timer.c +++ b/platform/pc/timer.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,7 @@ static uint64_t next_trigger_time; static uint64_t next_trigger_delta; static uint64_t timer_delta_time; -static uint64_t timer_current_time; +static volatile uint64_t timer_current_time; static uint16_t divisor; @@ -49,16 +50,12 @@ static uint16_t divisor; status_t platform_set_periodic_timer(platform_timer_callback callback, void *arg, lk_time_t interval) { - enter_critical_section(); - t_callback = callback; callback_arg = arg; next_trigger_delta = (uint64_t) interval << 32; next_trigger_time = timer_current_time + next_trigger_delta; - exit_critical_section(); - return NO_ERROR; } @@ -66,9 +63,8 @@ lk_time_t current_time(void) { lk_time_t time; - enter_critical_section(); + // XXX slight race time = (lk_time_t) (timer_current_time >> 32); - exit_critical_section(); return time; } @@ -77,9 +73,8 @@ lk_bigtime_t current_time_hires(void) { lk_bigtime_t time; - enter_critical_section(); + // XXX slight race time = (lk_bigtime_t) ((timer_current_time >> 22) * 1000) >> 10; - exit_critical_section(); return time; } @@ -164,3 +159,4 @@ void platform_halt_timers(void) mask_interrupt(INT_PIT); } +/* vim: set noexpandtab */ diff --git a/platform/power.c b/platform/power.c index 0978e183..ecb74457 100644 --- a/platform/power.c +++ b/platform/power.c @@ -35,7 +35,7 @@ __WEAK void platform_halt(platform_halt_action suggested_action, platform_halt_reason reason) { dprintf(ALWAYS, "HALT: spinning forever... (reason = %d)\n", reason); - enter_critical_section(); + arch_disable_ints(); for(;;); } diff --git a/platform/vexpress-a9/include/platform/gic.h b/platform/vexpress-a9/include/platform/gic.h index 4cbf42b8..8270dd5c 100644 --- a/platform/vexpress-a9/include/platform/gic.h +++ b/platform/vexpress-a9/include/platform/gic.h @@ -24,7 +24,7 @@ #include -#define GICBASE(n) (CPUPRIV_BASE_VIRT) +#define GICBASE(n) (CPUPRIV_BASE_PHYS) #define GICC_OFFSET (0x0100) #define GICD_OFFSET (0x1000) diff --git a/platform/vexpress-a9/platform.c b/platform/vexpress-a9/platform.c index 72d45bd6..8494082f 100644 --- a/platform/vexpress-a9/platform.c +++ b/platform/vexpress-a9/platform.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,7 @@ struct mmu_initial_mapping mmu_initial_mappings[] = { /* cortex-a9 private memory area */ { .phys = CPUPRIV_BASE_PHYS, - .virt = CPUPRIV_BASE_VIRT, + .virt = CPUPRIV_BASE_PHYS, // XXX move back to CPUPRIV_BASE_VIRT .size = CPUPRIV_SIZE, .flags = MMU_INITIAL_MAPPING_FLAG_DEVICE, .name = "cpu_priv"}, @@ -96,7 +97,7 @@ void platform_early_init(void) arm_gic_init(); /* initialize the timer block */ - arm_cortex_a9_timer_init(CPUPRIV_BASE_VIRT, 100000000); + arm_cortex_a9_timer_init(CPUPRIV_BASE_PHYS, 100000000); uart_init_early(); diff --git a/platform/vexpress-a9/rules.mk b/platform/vexpress-a9/rules.mk index e5fe7eb1..5971d669 100644 --- a/platform/vexpress-a9/rules.mk +++ b/platform/vexpress-a9/rules.mk @@ -4,6 +4,7 @@ MODULE := $(LOCAL_DIR) ARCH := arm ARM_CPU := cortex-a9-neon +WITH_SMP ?= 1 GLOBAL_INCLUDES += \ $(LOCAL_DIR)/include diff --git a/platform/vexpress-a9/secondary_boot.S b/platform/vexpress-a9/secondary_boot.S new file mode 100644 index 00000000..fc85ea73 --- /dev/null +++ b/platform/vexpress-a9/secondary_boot.S @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 Travis Geiselbrecht + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +.section .text + +/* true reset vector, to catch non boot cpus and hold them until later */ +FUNCTION(platform_reset) + mrc p15, 0, r12, c0, c0, 5 /* read MPIDR */ + + /* mask off the bottom 12 bits to test cluster number:cpu number */ + ubfx r12, r12, #0, #12 + + /* if we're cpu 0:0, continue to the usual arm reset vector */ + cmp r12, #0 + beq arm_reset + + /* all other cpus, trap and wait to be released */ +1: + wfe + ldr r12, =boot_cpu_lock + ldr r12, [r12] + cmp r12, #0 + bne 1b + + b arm_secondary_reset + +.ltorg + diff --git a/platform/zynq/debug.c b/platform/zynq/debug.c index fe7db058..84244160 100644 --- a/platform/zynq/debug.c +++ b/platform/zynq/debug.c @@ -65,13 +65,13 @@ void platform_halt(platform_halt_action suggested_action, case HALT_ACTION_SHUTDOWN: case HALT_ACTION_HALT: printf("HALT: spinning forever... (reason = %d)\n", reason); - enter_critical_section(); + arch_disable_ints(); for(;;) arch_idle(); break; case HALT_ACTION_REBOOT: printf("REBOOT\n"); - enter_critical_section(); + arch_disable_ints(); for (;;) { zynq_slcr_unlock(); SLCR->PSS_RST_CTRL = 1; diff --git a/platform/zynq/gem.c b/platform/zynq/gem.c index 0351535f..6df7d96e 100644 --- a/platform/zynq/gem.c +++ b/platform/zynq/gem.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,8 @@ #define GEM_RX_BUF_SIZE 1536 #define GEM_TX_BUF_SIZE 1536 +static spin_lock_t lock = SPIN_LOCK_INITIAL_VALUE; + struct gem_desc { uint32_t addr; uint32_t ctrl; @@ -132,7 +135,8 @@ void queue_pkts_in_tx_tbl(void) { pktbuf_t *p; unsigned int cur_pos; - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); if (list_is_empty(&gem.tx_queue)) { goto exit; } @@ -168,7 +172,7 @@ void queue_pkts_in_tx_tbl(void) { gem.regs->net_ctrl |= NET_CTRL_START_TX; exit: - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); } int gem_send_raw_pkt(struct pktbuf *p) @@ -186,10 +190,11 @@ int gem_send_raw_pkt(struct pktbuf *p) // XXX handle multi part buffers arch_clean_cache_range((vaddr_t)p->data, p->dlen); - enter_critical_section(); + spin_lock_saved_state_t irqstate; + spin_lock_irqsave(&lock, irqstate); list_add_tail(&gem.tx_queue, &p->list); queue_pkts_in_tx_tbl(); - exit_critical_section(); + spin_unlock_irqrestore(&lock, irqstate); err: return ret; diff --git a/platform/zynq/include/platform/zynq.h b/platform/zynq/include/platform/zynq.h index 2bbfd4b6..bd434333 100644 --- a/platform/zynq/include/platform/zynq.h +++ b/platform/zynq/include/platform/zynq.h @@ -29,6 +29,7 @@ #define SDRAM_BASE (0x00100000) #define SDRAM_APERTURE_SIZE (0x3ff00000) #define SRAM_BASE (0x0) +#define SRAM_BASE_HIGH (0xfffc0000) #define SRAM_APERTURE_SIZE (0x00040000) #define SRAM_SIZE (0x00040000) diff --git a/platform/zynq/platform.c b/platform/zynq/platform.c index 2ebd1f3b..91743cfa 100644 --- a/platform/zynq/platform.c +++ b/platform/zynq/platform.c @@ -278,6 +278,12 @@ struct mmu_initial_mapping mmu_initial_mappings[] = { .flags = MMU_INITIAL_MAPPING_FLAG_DEVICE, .name = "hw-fc000000" }, + /* sram high aperture */ + { .phys = 0xfff00000, + .virt = 0xfff00000, + .size = 0x00100000, + .flags = MMU_INITIAL_MAPPING_FLAG_DEVICE }, + /* identity map to let the boot code run */ { .phys = SRAM_BASE, .virt = SRAM_BASE, diff --git a/platform/zynq/rules.mk b/platform/zynq/rules.mk index e1629a37..9829fdde 100644 --- a/platform/zynq/rules.mk +++ b/platform/zynq/rules.mk @@ -4,6 +4,8 @@ MODULE := $(LOCAL_DIR) ARCH := arm ARM_CPU := cortex-a9-neon +WITH_SMP ?= 1 +SMP_MAX_CPUS := 2 MODULE_DEPS := \ lib/bio \ @@ -13,6 +15,7 @@ MODULE_DEPS := \ dev/interrupt/arm_gic \ dev/timer/arm_cortex_a9 + GLOBAL_INCLUDES += \ $(LOCAL_DIR)/include diff --git a/platform/zynq/start.S b/platform/zynq/start.S index 942ed09b..c97c5241 100644 --- a/platform/zynq/start.S +++ b/platform/zynq/start.S @@ -41,14 +41,15 @@ FUNCTION(platform_reset) str r11, [r12] dsb +#if !WITH_SMP 0: /* stay trapped here forever */ wfe b 0b - - ldr pc, foo -foo: - .word 0xa +#else + /* pass on through the reset vector, where the arm arch code will trap the cpu */ + b arm_reset +#endif DATA(__cpu_trapped) .word 0 diff --git a/project/pc-x86-test.mk b/project/pc-x86-test.mk index a9d1ce6d..20a28dc5 100644 --- a/project/pc-x86-test.mk +++ b/project/pc-x86-test.mk @@ -5,6 +5,7 @@ LOCAL_DIR := $(GET_LOCAL_DIR) ARCH := x86 TARGET := pc-x86 MODULES += \ + lib/debugcommands \ lib/libm \ app/tests \ app/shell \ diff --git a/project/rpi2-test.mk b/project/rpi2-test.mk new file mode 100644 index 00000000..c71a7e7c --- /dev/null +++ b/project/rpi2-test.mk @@ -0,0 +1,11 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +TARGET := rpi2 + +MODULES += \ + app/shell \ + app/stringtests \ + app/tests \ + lib/cksum \ + lib/debugcommands \ + diff --git a/project/vexpress-a9-test.mk b/project/vexpress-a9-test.mk index 8ad23041..1bc72820 100644 --- a/project/vexpress-a9-test.mk +++ b/project/vexpress-a9-test.mk @@ -9,7 +9,10 @@ MODULES += \ lib/bytes \ lib/cksum \ lib/debugcommands \ + lib/evlog \ lib/libm WITH_LINKER_GC := 0 +GLOBAL_DEFINES += WITH_KERNEL_EVLOG=1 + diff --git a/target/rpi2/rules.mk b/target/rpi2/rules.mk new file mode 100644 index 00000000..41ffb2ed --- /dev/null +++ b/target/rpi2/rules.mk @@ -0,0 +1,9 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +GLOBAL_INCLUDES += \ + $(LOCAL_DIR)/include + +PLATFORM := bcm2835 + +#include make/module.mk + diff --git a/top/init.c b/top/init.c index 0fd8a747..ed5e0970 100644 --- a/top/init.c +++ b/top/init.c @@ -26,8 +26,10 @@ * a init hook that is called at increasing init levels as the system is * initialized. */ +#include #include +#include #include #include #include @@ -38,13 +40,13 @@ extern const struct lk_init_struct __lk_init[]; extern const struct lk_init_struct __lk_init_end[]; -static uint last_init_level = 0; - -int lk_init_level(uint level) +void lk_init_level(enum lk_init_flags required_flag, uint start_level, uint stop_level) { - LTRACEF("level %#x, last_init_level %#x\n", level, last_init_level); + LTRACEF("flags %#x, start_level %#x, stop_level %#x\n", + required_flag, start_level, stop_level); - uint last_called_level = last_init_level; + ASSERT(start_level > 0); + uint last_called_level = start_level - 1; const struct lk_init_struct *last = NULL; for (;;) { /* search for the lowest uncalled hook to call */ @@ -53,13 +55,15 @@ int lk_init_level(uint level) const struct lk_init_struct *found = NULL; bool seen_last = false; for (const struct lk_init_struct *ptr = __lk_init; ptr != __lk_init_end; ptr++) { - LTRACEF("looking at %p (%s) level %#x, seen_last %d\n", ptr, ptr->name, ptr->level, seen_last); + LTRACEF("looking at %p (%s) level %#x, flags %#x, seen_last %d\n", ptr, ptr->name, ptr->level, ptr->flags, seen_last); if (ptr == last) seen_last = true; /* reject the easy ones */ - if (ptr->level > level) + if (!(ptr->flags & required_flag)) + continue; + if (ptr->level > stop_level) continue; if (ptr->level < last_called_level) continue; @@ -67,7 +71,7 @@ int lk_init_level(uint level) continue; /* keep the lowest one we haven't called yet */ - if (ptr->level > last_init_level && ptr->level > last_called_level) { + if (ptr->level >= start_level && ptr->level > last_called_level) { found = ptr; continue; } @@ -86,16 +90,13 @@ int lk_init_level(uint level) break; #if TRACE_INIT - printf("INIT: calling hook %p (%s) at level %#x\n", found->hook, found->name, found->level); + printf("INIT: cpu %d, calling hook %p (%s) at level %#x, flags %#x\n", + arch_curr_cpu_num(), found->hook, found->name, found->level, found->flags); #endif found->hook(found->level); last_called_level = found->level; last = found; } - - last_init_level = level; - - return 0; } #if 0 diff --git a/top/main.c b/top/main.c index ae52c9f2..a31964d3 100644 --- a/top/main.c +++ b/top/main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Travis Geiselbrecht + * Copyright (c) 2013-2015 Travis Geiselbrecht * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files @@ -33,8 +33,10 @@ #include #include #include +#include #include #include +#include /* saved boot arguments from whoever loaded the system */ ulong lk_boot_args[4]; @@ -44,6 +46,11 @@ extern void *__ctor_end; extern int __bss_start; extern int _end; +#if WITH_SMP +static thread_t *secondary_bootstrap_threads[SMP_MAX_CPUS - 1]; +static uint secondary_bootstrap_thread_count; +#endif + static int bootstrap2(void *arg); extern void kernel_init(void); @@ -64,11 +71,8 @@ static void call_constructors(void) } /* called from arch code */ -void lk_main(ulong arg0, ulong arg1, ulong arg2, ulong arg3) __NO_RETURN __EXTERNALLY_VISIBLE; void lk_main(ulong arg0, ulong arg1, ulong arg2, ulong arg3) { - inc_critical_section(); - // save the boot args lk_boot_args[0] = arg0; lk_boot_args[1] = arg1; @@ -79,18 +83,22 @@ void lk_main(ulong arg0, ulong arg1, ulong arg2, ulong arg3) thread_init_early(); // early arch stuff - lk_init_level(LK_INIT_LEVEL_ARCH_EARLY - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_ARCH_EARLY - 1); arch_early_init(); // do any super early platform initialization - lk_init_level(LK_INIT_LEVEL_PLATFORM_EARLY - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_ARCH_EARLY, LK_INIT_LEVEL_PLATFORM_EARLY - 1); platform_early_init(); // do any super early target initialization - lk_init_level(LK_INIT_LEVEL_TARGET_EARLY - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_PLATFORM_EARLY, LK_INIT_LEVEL_TARGET_EARLY - 1); target_early_init(); - dprintf(INFO, "welcome to lk\n\n"); +#if WITH_SMP + dprintf(INFO, "\nwelcome to lk/MP\n\n"); +#else + dprintf(INFO, "\nwelcome to lk\n\n"); +#endif dprintf(INFO, "boot args 0x%lx 0x%lx 0x%lx 0x%lx\n", lk_boot_args[0], lk_boot_args[1], lk_boot_args[2], lk_boot_args[3]); @@ -100,18 +108,19 @@ void lk_main(ulong arg0, ulong arg1, ulong arg2, ulong arg3) // bring up the kernel heap dprintf(SPEW, "initializing heap\n"); - lk_init_level(LK_INIT_LEVEL_HEAP - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_TARGET_EARLY, LK_INIT_LEVEL_HEAP - 1); heap_init(); // initialize the kernel - lk_init_level(LK_INIT_LEVEL_KERNEL - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_HEAP, LK_INIT_LEVEL_KERNEL - 1); kernel_init(); - lk_init_level(LK_INIT_LEVEL_THREADING - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_KERNEL, LK_INIT_LEVEL_THREADING - 1); // create a thread to complete system initialization dprintf(SPEW, "creating bootstrap completion thread\n"); thread_t *t = thread_create("bootstrap2", &bootstrap2, NULL, DEFAULT_PRIORITY, DEFAULT_STACK_SIZE); + t->pinned_cpu = 0; thread_detach(t); thread_resume(t); @@ -123,26 +132,71 @@ static int bootstrap2(void *arg) { dprintf(SPEW, "top of bootstrap2()\n"); - lk_init_level(LK_INIT_LEVEL_ARCH - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_THREADING, LK_INIT_LEVEL_ARCH - 1); arch_init(); // initialize the rest of the platform dprintf(SPEW, "initializing platform\n"); - lk_init_level(LK_INIT_LEVEL_PLATFORM - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_ARCH, LK_INIT_LEVEL_PLATFORM - 1); platform_init(); // initialize the target dprintf(SPEW, "initializing target\n"); - lk_init_level(LK_INIT_LEVEL_TARGET - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_PLATFORM, LK_INIT_LEVEL_TARGET - 1); target_init(); dprintf(SPEW, "calling apps_init()\n"); - lk_init_level(LK_INIT_LEVEL_APPS - 1); + lk_primary_cpu_init_level(LK_INIT_LEVEL_TARGET, LK_INIT_LEVEL_APPS - 1); apps_init(); - lk_init_level(LK_INIT_LEVEL_LAST); + lk_primary_cpu_init_level(LK_INIT_LEVEL_APPS, LK_INIT_LEVEL_LAST); return 0; } +#if WITH_SMP +void lk_secondary_cpu_entry(void) +{ + uint cpu = arch_curr_cpu_num(); + + if (cpu > secondary_bootstrap_thread_count) { + dprintf(CRITICAL, "Invalid secondary cpu num %d, SMP_MAX_CPUS %d, secondary_bootstrap_thread_count %d\n", + cpu, SMP_MAX_CPUS, secondary_bootstrap_thread_count); + return; + } + + thread_secondary_cpu_init_early(); + thread_resume(secondary_bootstrap_threads[cpu - 1]); + + dprintf(SPEW, "entering scheduler on cpu %d\n", cpu); + thread_secondary_cpu_entry(); +} + +static int secondary_cpu_bootstrap2(void *arg) +{ + /* secondary cpu initialize from threading level up. 0 to threading was handled in arch */ + lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_THREADING, LK_INIT_LEVEL_LAST); + + return 0; +} + +void lk_init_secondary_cpus(uint secondary_cpu_count) +{ + if (secondary_cpu_count >= SMP_MAX_CPUS) { + dprintf(CRITICAL, "Invalid secondary_cpu_count %d, SMP_MAX_CPUS %d\n", + secondary_cpu_count, SMP_MAX_CPUS); + secondary_cpu_count = SMP_MAX_CPUS - 1; + } + for (uint i = 0; i < secondary_cpu_count; i++) { + dprintf(SPEW, "creating bootstrap completion thread for cpu %d\n", i + 1); + thread_t *t = thread_create("secondarybootstrap2", + &secondary_cpu_bootstrap2, NULL, + DEFAULT_PRIORITY, DEFAULT_STACK_SIZE); + t->pinned_cpu = i + 1; + thread_detach(t); + secondary_bootstrap_threads[i] = t; + } + secondary_bootstrap_thread_count = secondary_cpu_count; +} +#endif // vim: noexpandtab: