[arch][x86][fpu] spiff up the fpu code to detect things more cleanly
Doesn't really change the functionality of the code except it'll try to work with less fpu features present.
This commit is contained in:
@@ -49,16 +49,21 @@ void arch_early_init(void) {
|
|||||||
set_global_desc(TSS_SELECTOR, &system_tss, sizeof(system_tss), 1, 0, 0, SEG_TYPE_TSS, 0, 0);
|
set_global_desc(TSS_SELECTOR, &system_tss, sizeof(system_tss), 1, 0, 0, SEG_TYPE_TSS, 0, 0);
|
||||||
x86_ltr(TSS_SELECTOR);
|
x86_ltr(TSS_SELECTOR);
|
||||||
|
|
||||||
x86_feature_init();
|
x86_feature_early_init();
|
||||||
|
|
||||||
x86_mmu_early_init();
|
x86_mmu_early_init();
|
||||||
|
|
||||||
|
#if X86_WITH_FPU
|
||||||
|
x86_fpu_early_init();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void arch_init(void) {
|
void arch_init(void) {
|
||||||
|
x86_feature_init();
|
||||||
x86_mmu_init();
|
x86_mmu_init();
|
||||||
|
|
||||||
#ifdef X86_WITH_FPU
|
#if X86_WITH_FPU
|
||||||
fpu_init();
|
x86_fpu_init();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -183,6 +183,10 @@ static void x86_cpu_detect(void) {
|
|||||||
dprintf(SPEW, "x86: max cpuid leaf %#x ext %#x hyp %#x\n",
|
dprintf(SPEW, "x86: max cpuid leaf %#x ext %#x hyp %#x\n",
|
||||||
max_cpuid_leaf, max_cpuid_leaf_ext, max_cpuid_leaf_hyp);
|
max_cpuid_leaf, max_cpuid_leaf_ext, max_cpuid_leaf_hyp);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void x86_feature_early_init(void) {
|
||||||
|
x86_cpu_detect();
|
||||||
|
|
||||||
// cache a copy of the cpuid bits
|
// cache a copy of the cpuid bits
|
||||||
if (has_cpuid) {
|
if (has_cpuid) {
|
||||||
@@ -208,10 +212,25 @@ static void x86_cpu_detect(void) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void x86_feature_early_init(void) {
|
|
||||||
x86_cpu_detect();
|
|
||||||
}
|
|
||||||
|
|
||||||
void x86_feature_init(void) {
|
void x86_feature_init(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num num, uint32_t subleaf, struct x86_cpuid_leaf* leaf) {
|
||||||
|
// make sure the leaf number is within the detected range of the three blocks we know about
|
||||||
|
if (num < X86_CPUID_HYP_BASE) {
|
||||||
|
if (num > max_cpuid_leaf) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (num < X86_CPUID_EXT_BASE) {
|
||||||
|
if (num > max_cpuid_leaf_hyp) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (num > max_cpuid_leaf_ext) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuid_c((uint32_t)num, subleaf, &leaf->a, &leaf->b, &leaf->c, &leaf->d);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
104
arch/x86/fpu.c
104
arch/x86/fpu.c
@@ -22,7 +22,9 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <lk/trace.h>
|
#include <lk/trace.h>
|
||||||
|
#include <lk/bits.h>
|
||||||
#include <arch/x86.h>
|
#include <arch/x86.h>
|
||||||
|
#include <arch/x86/feature.h>
|
||||||
#include <arch/fpu.h>
|
#include <arch/fpu.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <kernel/thread.h>
|
#include <kernel/thread.h>
|
||||||
@@ -35,66 +37,81 @@
|
|||||||
|
|
||||||
/* CPUID EAX = 1 return values */
|
/* CPUID EAX = 1 return values */
|
||||||
|
|
||||||
#define ECX_SSE3 (0x00000001 << 0)
|
|
||||||
#define ECX_SSSE3 (0x00000001 << 9)
|
|
||||||
#define ECX_SSE4_1 (0x00000001 << 19)
|
|
||||||
#define ECX_SSE4_2 (0x00000001 << 20)
|
|
||||||
#define EDX_FXSR (0x00000001 << 24)
|
|
||||||
#define EDX_SSE (0x00000001 << 25)
|
|
||||||
#define EDX_SSE2 (0x00000001 << 26)
|
|
||||||
#define EDX_FPU (0x00000001 << 0)
|
|
||||||
|
|
||||||
#define FPU_CAP(ecx, edx) ((edx & EDX_FPU) != 0)
|
|
||||||
|
|
||||||
#define SSE_CAP(ecx, edx) ( \
|
|
||||||
((ecx & (ECX_SSE3 | ECX_SSSE3 | ECX_SSE4_1 | ECX_SSE4_2)) != 0) || \
|
|
||||||
((edx & (EDX_SSE | EDX_SSE2)) != 0) \
|
|
||||||
)
|
|
||||||
|
|
||||||
#define FXSAVE_CAP(ecx, edx) ((edx & EDX_FXSR) != 0)
|
|
||||||
|
|
||||||
static int fp_supported;
|
static int fp_supported;
|
||||||
static thread_t *fp_owner;
|
static thread_t *fp_owner;
|
||||||
|
|
||||||
/* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
|
/* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
|
||||||
static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};
|
static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};
|
||||||
|
|
||||||
static void get_cpu_cap(uint32_t *ecx, uint32_t *edx) {
|
void x86_fpu_early_init(void) {
|
||||||
uint32_t a, b;
|
|
||||||
|
|
||||||
cpuid(1, &a, &b, ecx, edx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void fpu_init(void) {
|
|
||||||
uint32_t ecx = 0, edx = 0;
|
|
||||||
uint16_t fcw;
|
|
||||||
uint32_t mxcsr;
|
|
||||||
|
|
||||||
#ifdef ARCH_X86_64
|
|
||||||
uint64_t x;
|
|
||||||
#else
|
|
||||||
uint32_t x;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
fp_supported = 0;
|
fp_supported = 0;
|
||||||
fp_owner = NULL;
|
fp_owner = NULL;
|
||||||
|
|
||||||
get_cpu_cap(&ecx, &edx);
|
// test a bunch of fpu features
|
||||||
|
const bool with_fpu = x86_feature_test(X86_FEATURE_FPU);
|
||||||
|
const bool with_sse = x86_feature_test(X86_FEATURE_SSE);
|
||||||
|
const bool with_sse2 = x86_feature_test(X86_FEATURE_SSE2);
|
||||||
|
const bool with_sse3 = x86_feature_test(X86_FEATURE_SSE3);
|
||||||
|
const bool with_ssse3 = x86_feature_test(X86_FEATURE_SSSE3);
|
||||||
|
const bool with_sse4_1 = x86_feature_test(X86_FEATURE_SSE4_1);
|
||||||
|
const bool with_sse4_2 = x86_feature_test(X86_FEATURE_SSE4_2);
|
||||||
|
const bool with_sse4a = x86_feature_test(X86_FEATURE_SSE4A);
|
||||||
|
const bool with_fxsave = x86_feature_test(X86_FEATURE_FXSR);
|
||||||
|
const bool with_xsave = x86_feature_test(X86_FEATURE_XSAVE);
|
||||||
|
|
||||||
if (!FPU_CAP(ecx, edx) || !SSE_CAP(ecx, edx) || !FXSAVE_CAP(ecx, edx))
|
dprintf(SPEW, "X86: fpu %u sse %u sse2 %u sse3 %u ssse3 %u sse4.1 %u sse4.2 %u sse4a %u\n",
|
||||||
|
with_fpu, with_sse, with_sse2, with_sse3, with_ssse3, with_sse4_1, with_sse4_2, with_sse4a);
|
||||||
|
dprintf(SPEW, "X86: fxsave %u xsave %u\n", with_fxsave, with_xsave);
|
||||||
|
|
||||||
|
// these are the mandatory ones to continue (for the moment)
|
||||||
|
if (!with_fpu || !with_sse || !with_fxsave) {
|
||||||
|
dprintf(SPEW, "no usable FPU detected (requires SSE + FXSAVE)\n");
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
fp_supported = 1;
|
fp_supported = 1;
|
||||||
|
|
||||||
/* No x87 emul, monitor co-processor */
|
dprintf(SPEW, "X86: SSE + FXSAVE detected\n");
|
||||||
|
|
||||||
x = x86_get_cr0();
|
// detect and print some xsave information
|
||||||
|
// NOTE: currently unused
|
||||||
|
bool with_xsaveopt = false;
|
||||||
|
bool with_xsavec = false;
|
||||||
|
bool with_xsaves = false;
|
||||||
|
if (with_xsave) {
|
||||||
|
dprintf(SPEW, "X86: XSAVE detected\n");
|
||||||
|
struct x86_cpuid_leaf leaf;
|
||||||
|
if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
|
||||||
|
with_xsaveopt = BIT(leaf.a, 0);
|
||||||
|
with_xsavec = BIT(leaf.a, 1);
|
||||||
|
with_xsaves = BIT(leaf.a, 3);
|
||||||
|
dprintf(SPEW, "\txsaveopt %u xsavec %u xsaves %u\n", with_xsaveopt, with_xsavec, with_xsaves);
|
||||||
|
dprintf(SPEW, "\txsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
|
||||||
|
}
|
||||||
|
if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) {
|
||||||
|
dprintf(SPEW, "\txsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 2; i < 64; i++) {
|
||||||
|
if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) {
|
||||||
|
if (leaf.a > 0) {
|
||||||
|
dprintf(SPEW, "\txsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d);
|
||||||
|
dprintf(SPEW, "\t\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No x87 emul, monitor co-processor */
|
||||||
|
ulong x = x86_get_cr0();
|
||||||
x &= ~X86_CR0_EM;
|
x &= ~X86_CR0_EM;
|
||||||
x |= X86_CR0_NE;
|
x |= X86_CR0_NE;
|
||||||
x |= X86_CR0_MP;
|
x |= X86_CR0_MP;
|
||||||
x86_set_cr0(x);
|
x86_set_cr0(x);
|
||||||
|
|
||||||
/* Init x87 */
|
/* Init x87 */
|
||||||
|
uint16_t fcw;
|
||||||
__asm__ __volatile__ ("finit");
|
__asm__ __volatile__ ("finit");
|
||||||
__asm__ __volatile__("fstcw %0" : "=m" (fcw));
|
__asm__ __volatile__("fstcw %0" : "=m" (fcw));
|
||||||
#if FPU_MASK_ALL_EXCEPTIONS
|
#if FPU_MASK_ALL_EXCEPTIONS
|
||||||
@@ -108,11 +125,12 @@ void fpu_init(void) {
|
|||||||
|
|
||||||
/* Init SSE */
|
/* Init SSE */
|
||||||
x = x86_get_cr4();
|
x = x86_get_cr4();
|
||||||
x |= X86_CR4_OSXMMEXPT;
|
x |= X86_CR4_OSXMMEXPT; // supports exceptions
|
||||||
x |= X86_CR4_OSFXSR;
|
x |= X86_CR4_OSFXSR; // supports fxsave
|
||||||
x &= ~X86_CR4_OSXSAVE;
|
x &= ~X86_CR4_OSXSAVE; // no support for xsave (currently)
|
||||||
x86_set_cr4(x);
|
x86_set_cr4(x);
|
||||||
|
|
||||||
|
uint32_t mxcsr;
|
||||||
__asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
|
||||||
#if FPU_MASK_ALL_EXCEPTIONS
|
#if FPU_MASK_ALL_EXCEPTIONS
|
||||||
/* mask all exceptions */
|
/* mask all exceptions */
|
||||||
@@ -127,9 +145,13 @@ void fpu_init(void) {
|
|||||||
__asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));
|
__asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));
|
||||||
|
|
||||||
x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
|
x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void x86_fpu_init(void) {
|
||||||
|
}
|
||||||
|
|
||||||
void fpu_init_thread_states(thread_t *t) {
|
void fpu_init_thread_states(thread_t *t) {
|
||||||
t->arch.fpu_states = (vaddr_t *)ROUNDUP(((vaddr_t)t->arch.fpu_buffer), 16);
|
t->arch.fpu_states = (vaddr_t *)ROUNDUP(((vaddr_t)t->arch.fpu_buffer), 16);
|
||||||
memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));
|
memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));
|
||||||
|
|||||||
@@ -24,7 +24,8 @@
|
|||||||
|
|
||||||
#include <kernel/thread.h>
|
#include <kernel/thread.h>
|
||||||
|
|
||||||
void fpu_init(void);
|
void x86_fpu_early_init(void);
|
||||||
|
void x86_fpu_init(void);
|
||||||
void fpu_init_thread_states(thread_t *t);
|
void fpu_init_thread_states(thread_t *t);
|
||||||
void fpu_context_switch(thread_t *old_thread, thread_t *new_thread);
|
void fpu_context_switch(thread_t *old_thread, thread_t *new_thread);
|
||||||
void fpu_dev_na_handler(void);
|
void fpu_dev_na_handler(void);
|
||||||
|
|||||||
@@ -118,6 +118,10 @@ extern uint32_t max_cpuid_leaf;
|
|||||||
extern uint32_t max_cpuid_leaf_hyp;
|
extern uint32_t max_cpuid_leaf_hyp;
|
||||||
extern uint32_t max_cpuid_leaf_ext;
|
extern uint32_t max_cpuid_leaf_ext;
|
||||||
|
|
||||||
|
/* Retrieve the specified subleaf. This function is not cached.
|
||||||
|
* Returns false if leaf num is invalid */
|
||||||
|
bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num, uint32_t subleaf, struct x86_cpuid_leaf *);
|
||||||
|
|
||||||
static inline const struct x86_cpuid_leaf* x86_get_cpuid_leaf(enum x86_cpuid_leaf_num leaf) {
|
static inline const struct x86_cpuid_leaf* x86_get_cpuid_leaf(enum x86_cpuid_leaf_num leaf) {
|
||||||
if (leaf < X86_CPUID_HYP_BASE) {
|
if (leaf < X86_CPUID_HYP_BASE) {
|
||||||
if (unlikely(leaf > max_cpuid_leaf))
|
if (unlikely(leaf > max_cpuid_leaf))
|
||||||
@@ -232,6 +236,7 @@ static inline bool x86_feature_test(struct x86_cpuid_bit bit) {
|
|||||||
#define X86_FEATURE_KVM_PV_CLOCK_STABLE X86_CPUID_BIT(0x40000001, 0, 24)
|
#define X86_FEATURE_KVM_PV_CLOCK_STABLE X86_CPUID_BIT(0x40000001, 0, 24)
|
||||||
|
|
||||||
#define X86_FEATURE_AMD_TOPO X86_CPUID_BIT(0x80000001, 2, 22)
|
#define X86_FEATURE_AMD_TOPO X86_CPUID_BIT(0x80000001, 2, 22)
|
||||||
|
#define X86_FEATURE_SSE4A X86_CPUID_BIT(0x80000001, 3, 6)
|
||||||
#define X86_FEATURE_SYSCALL X86_CPUID_BIT(0x80000001, 3, 11)
|
#define X86_FEATURE_SYSCALL X86_CPUID_BIT(0x80000001, 3, 11)
|
||||||
#define X86_FEATURE_NX X86_CPUID_BIT(0x80000001, 3, 20)
|
#define X86_FEATURE_NX X86_CPUID_BIT(0x80000001, 3, 20)
|
||||||
#define X86_FEATURE_HUGE_PAGE X86_CPUID_BIT(0x80000001, 3, 26)
|
#define X86_FEATURE_HUGE_PAGE X86_CPUID_BIT(0x80000001, 3, 26)
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ ARCH_COMPILEFLAGS += -march=i686
|
|||||||
ARCH_OPTFLAGS := -O2
|
ARCH_OPTFLAGS := -O2
|
||||||
GLOBAL_DEFINES += X86_LEGACY=0
|
GLOBAL_DEFINES += X86_LEGACY=0
|
||||||
else ifeq ($(SUBARCH),x86-64)
|
else ifeq ($(SUBARCH),x86-64)
|
||||||
ARCH_COMPILEFLAGS += -march=x86-64
|
ARCH_COMPILEFLAGS += -march=x86-64-v2
|
||||||
ARCH_OPTFLAGS := -O2
|
ARCH_OPTFLAGS := -O2
|
||||||
GLOBAL_DEFINES += X86_LEGACY=0
|
GLOBAL_DEFINES += X86_LEGACY=0
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ shift $((OPTIND-1))
|
|||||||
if (( $DO_64BIT )); then
|
if (( $DO_64BIT )); then
|
||||||
QEMU="qemu-system-x86_64"
|
QEMU="qemu-system-x86_64"
|
||||||
PROJECT="pc-x86-64-test"
|
PROJECT="pc-x86-64-test"
|
||||||
CPU=qemu64
|
CPU=max
|
||||||
MACHINE=q35
|
MACHINE=q35
|
||||||
elif (( $DO_LEGACY )); then
|
elif (( $DO_LEGACY )); then
|
||||||
QEMU="qemu-system-i386"
|
QEMU="qemu-system-i386"
|
||||||
@@ -70,7 +70,7 @@ elif (( $DO_LEGACY )); then
|
|||||||
else
|
else
|
||||||
QEMU="qemu-system-i386"
|
QEMU="qemu-system-i386"
|
||||||
PROJECT="pc-x86-test"
|
PROJECT="pc-x86-test"
|
||||||
CPU=qemu32
|
CPU=max
|
||||||
MACHINE=q35
|
MACHINE=q35
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user