[arch][x86][fpu] spiff up the fpu code to detect things more cleanly

Doesn't really change the functionality of the code except it'll try to
work with less fpu features present.
This commit is contained in:
Travis Geiselbrecht
2022-07-24 19:56:24 -07:00
parent c429ffcc7a
commit 7505c3087a
7 changed files with 104 additions and 52 deletions

View File

@@ -49,16 +49,21 @@ void arch_early_init(void) {
set_global_desc(TSS_SELECTOR, &system_tss, sizeof(system_tss), 1, 0, 0, SEG_TYPE_TSS, 0, 0);
x86_ltr(TSS_SELECTOR);
x86_feature_init();
x86_feature_early_init();
x86_mmu_early_init();
#if X86_WITH_FPU
x86_fpu_early_init();
#endif
}
void arch_init(void) {
x86_feature_init();
x86_mmu_init();
#ifdef X86_WITH_FPU
fpu_init();
#if X86_WITH_FPU
x86_fpu_init();
#endif
}

View File

@@ -183,6 +183,10 @@ static void x86_cpu_detect(void) {
dprintf(SPEW, "x86: max cpuid leaf %#x ext %#x hyp %#x\n",
max_cpuid_leaf, max_cpuid_leaf_ext, max_cpuid_leaf_hyp);
}
}
void x86_feature_early_init(void) {
x86_cpu_detect();
// cache a copy of the cpuid bits
if (has_cpuid) {
@@ -208,10 +212,25 @@ static void x86_cpu_detect(void) {
}
}
void x86_feature_early_init(void) {
x86_cpu_detect();
}
void x86_feature_init(void) {
}
bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num num, uint32_t subleaf, struct x86_cpuid_leaf* leaf) {
// make sure the leaf number is within the detected range of the three blocks we know about
if (num < X86_CPUID_HYP_BASE) {
if (num > max_cpuid_leaf) {
return false;
}
} else if (num < X86_CPUID_EXT_BASE) {
if (num > max_cpuid_leaf_hyp) {
return false;
}
} else if (num > max_cpuid_leaf_ext) {
return false;
}
cpuid_c((uint32_t)num, subleaf, &leaf->a, &leaf->b, &leaf->c, &leaf->d);
return true;
}

View File

@@ -22,7 +22,9 @@
*/
#include <lk/trace.h>
#include <lk/bits.h>
#include <arch/x86.h>
#include <arch/x86/feature.h>
#include <arch/fpu.h>
#include <string.h>
#include <kernel/thread.h>
@@ -35,66 +37,81 @@
/* CPUID EAX = 1 return values */
#define ECX_SSE3 (0x00000001 << 0)
#define ECX_SSSE3 (0x00000001 << 9)
#define ECX_SSE4_1 (0x00000001 << 19)
#define ECX_SSE4_2 (0x00000001 << 20)
#define EDX_FXSR (0x00000001 << 24)
#define EDX_SSE (0x00000001 << 25)
#define EDX_SSE2 (0x00000001 << 26)
#define EDX_FPU (0x00000001 << 0)
#define FPU_CAP(ecx, edx) ((edx & EDX_FPU) != 0)
#define SSE_CAP(ecx, edx) ( \
((ecx & (ECX_SSE3 | ECX_SSSE3 | ECX_SSE4_1 | ECX_SSE4_2)) != 0) || \
((edx & (EDX_SSE | EDX_SSE2)) != 0) \
)
#define FXSAVE_CAP(ecx, edx) ((edx & EDX_FXSR) != 0)
static int fp_supported;
static thread_t *fp_owner;
/* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};
static void get_cpu_cap(uint32_t *ecx, uint32_t *edx) {
uint32_t a, b;
cpuid(1, &a, &b, ecx, edx);
}
void fpu_init(void) {
uint32_t ecx = 0, edx = 0;
uint16_t fcw;
uint32_t mxcsr;
#ifdef ARCH_X86_64
uint64_t x;
#else
uint32_t x;
#endif
void x86_fpu_early_init(void) {
fp_supported = 0;
fp_owner = NULL;
get_cpu_cap(&ecx, &edx);
// test a bunch of fpu features
const bool with_fpu = x86_feature_test(X86_FEATURE_FPU);
const bool with_sse = x86_feature_test(X86_FEATURE_SSE);
const bool with_sse2 = x86_feature_test(X86_FEATURE_SSE2);
const bool with_sse3 = x86_feature_test(X86_FEATURE_SSE3);
const bool with_ssse3 = x86_feature_test(X86_FEATURE_SSSE3);
const bool with_sse4_1 = x86_feature_test(X86_FEATURE_SSE4_1);
const bool with_sse4_2 = x86_feature_test(X86_FEATURE_SSE4_2);
const bool with_sse4a = x86_feature_test(X86_FEATURE_SSE4A);
const bool with_fxsave = x86_feature_test(X86_FEATURE_FXSR);
const bool with_xsave = x86_feature_test(X86_FEATURE_XSAVE);
if (!FPU_CAP(ecx, edx) || !SSE_CAP(ecx, edx) || !FXSAVE_CAP(ecx, edx))
dprintf(SPEW, "X86: fpu %u sse %u sse2 %u sse3 %u ssse3 %u sse4.1 %u sse4.2 %u sse4a %u\n",
with_fpu, with_sse, with_sse2, with_sse3, with_ssse3, with_sse4_1, with_sse4_2, with_sse4a);
dprintf(SPEW, "X86: fxsave %u xsave %u\n", with_fxsave, with_xsave);
// these are the mandatory ones to continue (for the moment)
if (!with_fpu || !with_sse || !with_fxsave) {
dprintf(SPEW, "no usable FPU detected (requires SSE + FXSAVE)\n");
return;
}
fp_supported = 1;
/* No x87 emul, monitor co-processor */
dprintf(SPEW, "X86: SSE + FXSAVE detected\n");
x = x86_get_cr0();
// detect and print some xsave information
// NOTE: currently unused
bool with_xsaveopt = false;
bool with_xsavec = false;
bool with_xsaves = false;
if (with_xsave) {
dprintf(SPEW, "X86: XSAVE detected\n");
struct x86_cpuid_leaf leaf;
if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
with_xsaveopt = BIT(leaf.a, 0);
with_xsavec = BIT(leaf.a, 1);
with_xsaves = BIT(leaf.a, 3);
dprintf(SPEW, "\txsaveopt %u xsavec %u xsaves %u\n", with_xsaveopt, with_xsavec, with_xsaves);
dprintf(SPEW, "\txsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
}
if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) {
dprintf(SPEW, "\txsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
}
for (int i = 2; i < 64; i++) {
if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) {
if (leaf.a > 0) {
dprintf(SPEW, "\txsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d);
dprintf(SPEW, "\t\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b);
}
}
}
}
/* No x87 emul, monitor co-processor */
ulong x = x86_get_cr0();
x &= ~X86_CR0_EM;
x |= X86_CR0_NE;
x |= X86_CR0_MP;
x86_set_cr0(x);
/* Init x87 */
uint16_t fcw;
__asm__ __volatile__ ("finit");
__asm__ __volatile__("fstcw %0" : "=m" (fcw));
#if FPU_MASK_ALL_EXCEPTIONS
@@ -108,11 +125,12 @@ void fpu_init(void) {
/* Init SSE */
x = x86_get_cr4();
x |= X86_CR4_OSXMMEXPT;
x |= X86_CR4_OSFXSR;
x &= ~X86_CR4_OSXSAVE;
x |= X86_CR4_OSXMMEXPT; // supports exceptions
x |= X86_CR4_OSFXSR; // supports fxsave
x &= ~X86_CR4_OSXSAVE; // no support for xsave (currently)
x86_set_cr4(x);
uint32_t mxcsr;
__asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
#if FPU_MASK_ALL_EXCEPTIONS
/* mask all exceptions */
@@ -127,9 +145,13 @@ void fpu_init(void) {
__asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));
x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
return;
}
void x86_fpu_init(void) {
}
void fpu_init_thread_states(thread_t *t) {
t->arch.fpu_states = (vaddr_t *)ROUNDUP(((vaddr_t)t->arch.fpu_buffer), 16);
memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));

View File

@@ -24,7 +24,8 @@
#include <kernel/thread.h>
void fpu_init(void);
void x86_fpu_early_init(void);
void x86_fpu_init(void);
void fpu_init_thread_states(thread_t *t);
void fpu_context_switch(thread_t *old_thread, thread_t *new_thread);
void fpu_dev_na_handler(void);

View File

@@ -118,6 +118,10 @@ extern uint32_t max_cpuid_leaf;
extern uint32_t max_cpuid_leaf_hyp;
extern uint32_t max_cpuid_leaf_ext;
/* Retrieve the specified subleaf. This function is not cached.
* Returns false if leaf num is invalid */
bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num, uint32_t subleaf, struct x86_cpuid_leaf *);
static inline const struct x86_cpuid_leaf* x86_get_cpuid_leaf(enum x86_cpuid_leaf_num leaf) {
if (leaf < X86_CPUID_HYP_BASE) {
if (unlikely(leaf > max_cpuid_leaf))
@@ -232,6 +236,7 @@ static inline bool x86_feature_test(struct x86_cpuid_bit bit) {
#define X86_FEATURE_KVM_PV_CLOCK_STABLE X86_CPUID_BIT(0x40000001, 0, 24)
#define X86_FEATURE_AMD_TOPO X86_CPUID_BIT(0x80000001, 2, 22)
#define X86_FEATURE_SSE4A X86_CPUID_BIT(0x80000001, 3, 6)
#define X86_FEATURE_SYSCALL X86_CPUID_BIT(0x80000001, 3, 11)
#define X86_FEATURE_NX X86_CPUID_BIT(0x80000001, 3, 20)
#define X86_FEATURE_HUGE_PAGE X86_CPUID_BIT(0x80000001, 3, 26)

View File

@@ -118,7 +118,7 @@ ARCH_COMPILEFLAGS += -march=i686
ARCH_OPTFLAGS := -O2
GLOBAL_DEFINES += X86_LEGACY=0
else ifeq ($(SUBARCH),x86-64)
ARCH_COMPILEFLAGS += -march=x86-64
ARCH_COMPILEFLAGS += -march=x86-64-v2
ARCH_OPTFLAGS := -O2
GLOBAL_DEFINES += X86_LEGACY=0
endif

View File

@@ -60,7 +60,7 @@ shift $((OPTIND-1))
if (( $DO_64BIT )); then
QEMU="qemu-system-x86_64"
PROJECT="pc-x86-64-test"
CPU=qemu64
CPU=max
MACHINE=q35
elif (( $DO_LEGACY )); then
QEMU="qemu-system-i386"
@@ -70,7 +70,7 @@ elif (( $DO_LEGACY )); then
else
QEMU="qemu-system-i386"
PROJECT="pc-x86-test"
CPU=qemu32
CPU=max
MACHINE=q35
fi