From 7505c3087a66bbae121fb4a0bfb34c89816723de Mon Sep 17 00:00:00 2001 From: Travis Geiselbrecht Date: Sun, 24 Jul 2022 19:56:24 -0700 Subject: [PATCH] [arch][x86][fpu] spiff up the fpu code to detect things more cleanly Doesn't really change the functionality of the code except it'll try to work with less fpu features present. --- arch/x86/arch.c | 11 ++- arch/x86/feature.c | 27 ++++++-- arch/x86/fpu.c | 104 +++++++++++++++++----------- arch/x86/include/arch/fpu.h | 3 +- arch/x86/include/arch/x86/feature.h | 5 ++ arch/x86/rules.mk | 2 +- scripts/do-qemux86 | 4 +- 7 files changed, 104 insertions(+), 52 deletions(-) diff --git a/arch/x86/arch.c b/arch/x86/arch.c index 0bf6cc1e..f7717bc4 100644 --- a/arch/x86/arch.c +++ b/arch/x86/arch.c @@ -49,16 +49,21 @@ void arch_early_init(void) { set_global_desc(TSS_SELECTOR, &system_tss, sizeof(system_tss), 1, 0, 0, SEG_TYPE_TSS, 0, 0); x86_ltr(TSS_SELECTOR); - x86_feature_init(); + x86_feature_early_init(); x86_mmu_early_init(); + +#if X86_WITH_FPU + x86_fpu_early_init(); +#endif } void arch_init(void) { + x86_feature_init(); x86_mmu_init(); -#ifdef X86_WITH_FPU - fpu_init(); +#if X86_WITH_FPU + x86_fpu_init(); #endif } diff --git a/arch/x86/feature.c b/arch/x86/feature.c index 0cf046bd..89fee471 100644 --- a/arch/x86/feature.c +++ b/arch/x86/feature.c @@ -183,6 +183,10 @@ static void x86_cpu_detect(void) { dprintf(SPEW, "x86: max cpuid leaf %#x ext %#x hyp %#x\n", max_cpuid_leaf, max_cpuid_leaf_ext, max_cpuid_leaf_hyp); } +} + +void x86_feature_early_init(void) { + x86_cpu_detect(); // cache a copy of the cpuid bits if (has_cpuid) { @@ -208,10 +212,25 @@ static void x86_cpu_detect(void) { } } -void x86_feature_early_init(void) { - x86_cpu_detect(); -} - void x86_feature_init(void) { } +bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num num, uint32_t subleaf, struct x86_cpuid_leaf* leaf) { + // make sure the leaf number is within the detected range of the three blocks we know about + if (num < X86_CPUID_HYP_BASE) { + if (num > max_cpuid_leaf) { + return false; + } + } else if (num < X86_CPUID_EXT_BASE) { + if (num > max_cpuid_leaf_hyp) { + return false; + } + } else if (num > max_cpuid_leaf_ext) { + return false; + } + + cpuid_c((uint32_t)num, subleaf, &leaf->a, &leaf->b, &leaf->c, &leaf->d); + return true; +} + + diff --git a/arch/x86/fpu.c b/arch/x86/fpu.c index 67c3a149..de41ec13 100644 --- a/arch/x86/fpu.c +++ b/arch/x86/fpu.c @@ -22,7 +22,9 @@ */ #include +#include #include +#include #include #include #include @@ -35,66 +37,81 @@ /* CPUID EAX = 1 return values */ -#define ECX_SSE3 (0x00000001 << 0) -#define ECX_SSSE3 (0x00000001 << 9) -#define ECX_SSE4_1 (0x00000001 << 19) -#define ECX_SSE4_2 (0x00000001 << 20) -#define EDX_FXSR (0x00000001 << 24) -#define EDX_SSE (0x00000001 << 25) -#define EDX_SSE2 (0x00000001 << 26) -#define EDX_FPU (0x00000001 << 0) - -#define FPU_CAP(ecx, edx) ((edx & EDX_FPU) != 0) - -#define SSE_CAP(ecx, edx) ( \ - ((ecx & (ECX_SSE3 | ECX_SSSE3 | ECX_SSE4_1 | ECX_SSE4_2)) != 0) || \ - ((edx & (EDX_SSE | EDX_SSE2)) != 0) \ - ) - -#define FXSAVE_CAP(ecx, edx) ((edx & EDX_FXSR) != 0) - static int fp_supported; static thread_t *fp_owner; /* FXSAVE area comprises 512 bytes starting with 16-byte aligned */ static uint8_t __ALIGNED(16) fpu_init_states[512]= {0}; -static void get_cpu_cap(uint32_t *ecx, uint32_t *edx) { - uint32_t a, b; - - cpuid(1, &a, &b, ecx, edx); -} - -void fpu_init(void) { - uint32_t ecx = 0, edx = 0; - uint16_t fcw; - uint32_t mxcsr; - -#ifdef ARCH_X86_64 - uint64_t x; -#else - uint32_t x; -#endif +void x86_fpu_early_init(void) { fp_supported = 0; fp_owner = NULL; - get_cpu_cap(&ecx, &edx); + // test a bunch of fpu features + const bool with_fpu = x86_feature_test(X86_FEATURE_FPU); + const bool with_sse = x86_feature_test(X86_FEATURE_SSE); + const bool with_sse2 = x86_feature_test(X86_FEATURE_SSE2); + const bool with_sse3 = x86_feature_test(X86_FEATURE_SSE3); + const bool with_ssse3 = x86_feature_test(X86_FEATURE_SSSE3); + const bool with_sse4_1 = x86_feature_test(X86_FEATURE_SSE4_1); + const bool with_sse4_2 = x86_feature_test(X86_FEATURE_SSE4_2); + const bool with_sse4a = x86_feature_test(X86_FEATURE_SSE4A); + const bool with_fxsave = x86_feature_test(X86_FEATURE_FXSR); + const bool with_xsave = x86_feature_test(X86_FEATURE_XSAVE); - if (!FPU_CAP(ecx, edx) || !SSE_CAP(ecx, edx) || !FXSAVE_CAP(ecx, edx)) + dprintf(SPEW, "X86: fpu %u sse %u sse2 %u sse3 %u ssse3 %u sse4.1 %u sse4.2 %u sse4a %u\n", + with_fpu, with_sse, with_sse2, with_sse3, with_ssse3, with_sse4_1, with_sse4_2, with_sse4a); + dprintf(SPEW, "X86: fxsave %u xsave %u\n", with_fxsave, with_xsave); + + // these are the mandatory ones to continue (for the moment) + if (!with_fpu || !with_sse || !with_fxsave) { + dprintf(SPEW, "no usable FPU detected (requires SSE + FXSAVE)\n"); return; + } fp_supported = 1; - /* No x87 emul, monitor co-processor */ + dprintf(SPEW, "X86: SSE + FXSAVE detected\n"); - x = x86_get_cr0(); + // detect and print some xsave information + // NOTE: currently unused + bool with_xsaveopt = false; + bool with_xsavec = false; + bool with_xsaves = false; + if (with_xsave) { + dprintf(SPEW, "X86: XSAVE detected\n"); + struct x86_cpuid_leaf leaf; + if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) { + with_xsaveopt = BIT(leaf.a, 0); + with_xsavec = BIT(leaf.a, 1); + with_xsaves = BIT(leaf.a, 3); + dprintf(SPEW, "\txsaveopt %u xsavec %u xsaves %u\n", with_xsaveopt, with_xsavec, with_xsaves); + dprintf(SPEW, "\txsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d); + } + if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) { + dprintf(SPEW, "\txsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d); + } + + for (int i = 2; i < 64; i++) { + if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) { + if (leaf.a > 0) { + dprintf(SPEW, "\txsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d); + dprintf(SPEW, "\t\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b); + } + } + } + } + + /* No x87 emul, monitor co-processor */ + ulong x = x86_get_cr0(); x &= ~X86_CR0_EM; x |= X86_CR0_NE; x |= X86_CR0_MP; x86_set_cr0(x); /* Init x87 */ + uint16_t fcw; __asm__ __volatile__ ("finit"); __asm__ __volatile__("fstcw %0" : "=m" (fcw)); #if FPU_MASK_ALL_EXCEPTIONS @@ -108,11 +125,12 @@ void fpu_init(void) { /* Init SSE */ x = x86_get_cr4(); - x |= X86_CR4_OSXMMEXPT; - x |= X86_CR4_OSFXSR; - x &= ~X86_CR4_OSXSAVE; + x |= X86_CR4_OSXMMEXPT; // supports exceptions + x |= X86_CR4_OSFXSR; // supports fxsave + x &= ~X86_CR4_OSXSAVE; // no support for xsave (currently) x86_set_cr4(x); + uint32_t mxcsr; __asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr)); #if FPU_MASK_ALL_EXCEPTIONS /* mask all exceptions */ @@ -127,9 +145,13 @@ void fpu_init(void) { __asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states)); x86_set_cr0(x86_get_cr0() | X86_CR0_TS); + return; } +void x86_fpu_init(void) { +} + void fpu_init_thread_states(thread_t *t) { t->arch.fpu_states = (vaddr_t *)ROUNDUP(((vaddr_t)t->arch.fpu_buffer), 16); memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states)); diff --git a/arch/x86/include/arch/fpu.h b/arch/x86/include/arch/fpu.h index 1a6258e0..59f62961 100644 --- a/arch/x86/include/arch/fpu.h +++ b/arch/x86/include/arch/fpu.h @@ -24,7 +24,8 @@ #include -void fpu_init(void); +void x86_fpu_early_init(void); +void x86_fpu_init(void); void fpu_init_thread_states(thread_t *t); void fpu_context_switch(thread_t *old_thread, thread_t *new_thread); void fpu_dev_na_handler(void); diff --git a/arch/x86/include/arch/x86/feature.h b/arch/x86/include/arch/x86/feature.h index a249923a..c5725227 100644 --- a/arch/x86/include/arch/x86/feature.h +++ b/arch/x86/include/arch/x86/feature.h @@ -118,6 +118,10 @@ extern uint32_t max_cpuid_leaf; extern uint32_t max_cpuid_leaf_hyp; extern uint32_t max_cpuid_leaf_ext; +/* Retrieve the specified subleaf. This function is not cached. + * Returns false if leaf num is invalid */ +bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num, uint32_t subleaf, struct x86_cpuid_leaf *); + static inline const struct x86_cpuid_leaf* x86_get_cpuid_leaf(enum x86_cpuid_leaf_num leaf) { if (leaf < X86_CPUID_HYP_BASE) { if (unlikely(leaf > max_cpuid_leaf)) @@ -232,6 +236,7 @@ static inline bool x86_feature_test(struct x86_cpuid_bit bit) { #define X86_FEATURE_KVM_PV_CLOCK_STABLE X86_CPUID_BIT(0x40000001, 0, 24) #define X86_FEATURE_AMD_TOPO X86_CPUID_BIT(0x80000001, 2, 22) +#define X86_FEATURE_SSE4A X86_CPUID_BIT(0x80000001, 3, 6) #define X86_FEATURE_SYSCALL X86_CPUID_BIT(0x80000001, 3, 11) #define X86_FEATURE_NX X86_CPUID_BIT(0x80000001, 3, 20) #define X86_FEATURE_HUGE_PAGE X86_CPUID_BIT(0x80000001, 3, 26) diff --git a/arch/x86/rules.mk b/arch/x86/rules.mk index 320f7111..74e8fb2e 100644 --- a/arch/x86/rules.mk +++ b/arch/x86/rules.mk @@ -118,7 +118,7 @@ ARCH_COMPILEFLAGS += -march=i686 ARCH_OPTFLAGS := -O2 GLOBAL_DEFINES += X86_LEGACY=0 else ifeq ($(SUBARCH),x86-64) -ARCH_COMPILEFLAGS += -march=x86-64 +ARCH_COMPILEFLAGS += -march=x86-64-v2 ARCH_OPTFLAGS := -O2 GLOBAL_DEFINES += X86_LEGACY=0 endif diff --git a/scripts/do-qemux86 b/scripts/do-qemux86 index 7d41d2d4..4d216e28 100755 --- a/scripts/do-qemux86 +++ b/scripts/do-qemux86 @@ -60,7 +60,7 @@ shift $((OPTIND-1)) if (( $DO_64BIT )); then QEMU="qemu-system-x86_64" PROJECT="pc-x86-64-test" - CPU=qemu64 + CPU=max MACHINE=q35 elif (( $DO_LEGACY )); then QEMU="qemu-system-i386" @@ -70,7 +70,7 @@ elif (( $DO_LEGACY )); then else QEMU="qemu-system-i386" PROJECT="pc-x86-test" - CPU=qemu32 + CPU=max MACHINE=q35 fi