[arch][x86] move the KVM clock stuff to arch from platform

One can argue it more logically fits there, and eventually more KVM
features will arrive in this file that have nothing to do with timers.
This commit is contained in:
Travis Geiselbrecht
2025-04-06 19:28:08 -07:00
parent 71e795de19
commit 3013662345
2 changed files with 123 additions and 104 deletions

View File

@@ -5,3 +5,125 @@
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/MIT
*/
#include "arch/x86/pv.h"
#include <lk/err.h>
#include <lk/trace.h>
#include <assert.h>
#include <stdint.h>
#include <inttypes.h>
#include <arch/x86/feature.h>
#include <kernel/vm.h>
#define LOCAL_TRACE 0
#if !X86_LEGACY
// Deals with paravirtualized clock sources and event timers on the PC platform,
// specifically KVM.
// From https://www.kernel.org/doc/html/v6.14/virt/kvm/x86/msr.html
struct pvclock_wall_clock {
uint32_t version;
uint32_t sec;
uint32_t nsec;
} __PACKED;
static_assert(sizeof(struct pvclock_wall_clock) == 12, "pvclock_wall_clock size mismatch");
struct pvclock_vcpu_time_info {
uint32_t version;
uint32_t pad0;
uint64_t tsc_timestamp;
uint64_t system_time;
uint32_t tsc_to_system_mul;
int8_t tsc_shift;
uint8_t flags;
uint8_t pad[2];
} __PACKED;
static_assert(sizeof(struct pvclock_vcpu_time_info) == 32, "pvclock_vcpu_time_info size mismatch");
#define VCPU_TIME_INFO_FLAG_STABLE 0x1
static volatile struct pvclock_wall_clock *wall_clock;
static volatile struct pvclock_vcpu_time_info *vcpu_time_info;
status_t pvclock_init(void) {
uint32_t clocksource_msr_base = 0;
if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE)) {
clocksource_msr_base = 0x11;
}
if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE2)) {
clocksource_msr_base = 0x4b564d00;
}
if (!clocksource_msr_base) {
return ERR_NOT_SUPPORTED;
}
dprintf(INFO, "pv_clock: clocksource detected, msr base %#x\n", clocksource_msr_base);
// map a page of memory and point the KVM clocksource msrs at it
void *clocksource_page;
status_t err = vmm_alloc(vmm_get_kernel_aspace(), "lapic", PAGE_SIZE, &clocksource_page, 0, 0, 0);
if (err != NO_ERROR) {
printf("pv_clock: failed to allocate page for clocksource msrs\n");
return err;
}
paddr_t paddr;
arch_mmu_query(&vmm_get_kernel_aspace()->arch_aspace, (vaddr_t)clocksource_page, &paddr, NULL);
LTRACEF("clocksource page %p, paddr %#" PRIxPTR "\n", clocksource_page, paddr);
write_msr(clocksource_msr_base, paddr);
write_msr(clocksource_msr_base + 1, paddr + sizeof(struct pvclock_wall_clock) + 1);
wall_clock = (struct pvclock_wall_clock *)clocksource_page;
vcpu_time_info = (struct pvclock_vcpu_time_info *)(wall_clock + 1);
dprintf(SPEW, "pv_clock: wall clock version %u, sec %u, nsec %u\n",
wall_clock->version, wall_clock->sec, wall_clock->nsec);
dprintf(SPEW, "pv_clock: vcpu time info version %u, tsc timestamp %llu, system time %llu\n",
vcpu_time_info->version, vcpu_time_info->tsc_timestamp, vcpu_time_info->system_time);
dprintf(SPEW, "pv_clock: tsc to system mul %u, tsc shift %d, flags %u\n",
vcpu_time_info->tsc_to_system_mul, vcpu_time_info->tsc_shift, vcpu_time_info->flags);
return NO_ERROR;
}
uint64_t pvclock_get_tsc_freq(void) {
if (!vcpu_time_info) {
return 0;
}
uint32_t tsc_mul = 0;
int8_t tsc_shift = 0;
uint32_t pre_version = 0, post_version = 0;
do {
pre_version = vcpu_time_info->version;
if (pre_version % 2 != 0) {
asm("pause");
continue;
}
tsc_mul = vcpu_time_info->tsc_to_system_mul;
tsc_shift = vcpu_time_info->tsc_shift;
post_version = vcpu_time_info->version;
} while (pre_version != post_version);
uint64_t tsc_khz = 1000000ULL << 32;
tsc_khz = tsc_khz / tsc_mul;
if (tsc_shift > 0) {
tsc_khz >>= tsc_shift;
} else {
tsc_khz <<= -tsc_shift;
}
return tsc_khz * 1000;
}
bool pv_clock_is_stable(void) {
if (!vcpu_time_info) {
return false;
}
bool is_stable = (vcpu_time_info->flags & VCPU_TIME_INFO_FLAG_STABLE) ||
x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE_STABLE);
return is_stable;
}
#endif // !X86_LEGACY

View File

@@ -20,6 +20,7 @@
#include <arch/x86.h>
#include <arch/x86/feature.h>
#include <arch/x86/lapic.h>
#include <arch/x86/pv.h>
#include <inttypes.h>
#include <lib/fixed_point.h>
@@ -78,115 +79,11 @@ lk_bigtime_t current_time_hires(void) {
}
}
// From https://www.kernel.org/doc/html/v6.14/virt/kvm/x86/msr.html
struct pvclock_wall_clock {
uint32_t version;
uint32_t sec;
uint32_t nsec;
} __PACKED;
static_assert(sizeof(struct pvclock_wall_clock) == 12, "pvclock_wall_clock size mismatch");
struct pvclock_vcpu_time_info {
uint32_t version;
uint32_t pad0;
uint64_t tsc_timestamp;
uint64_t system_time;
uint32_t tsc_to_system_mul;
int8_t tsc_shift;
uint8_t flags;
uint8_t pad[2];
} __PACKED;
static_assert(sizeof(struct pvclock_vcpu_time_info) == 32, "pvclock_vcpu_time_info size mismatch");
static volatile struct pvclock_wall_clock *wall_clock;
static volatile struct pvclock_vcpu_time_info *vcpu_time_info;
status_t pvclock_init(void) {
uint32_t clocksource_msr_base = 0;
if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE)) {
clocksource_msr_base = 0x11;
}
if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE2)) {
clocksource_msr_base = 0x4b564d00;
}
if (!clocksource_msr_base) {
return ERR_NOT_SUPPORTED;
}
dprintf(INFO, "pv_clock: clocksource detected, msr base %#x\n", clocksource_msr_base);
// map a page of memory and point the KVM clocksource msrs at it
void *clocksource_page;
status_t err = vmm_alloc(vmm_get_kernel_aspace(), "lapic", PAGE_SIZE, &clocksource_page, 0, 0, 0);
if (err != NO_ERROR) {
printf("pv_clock: failed to allocate page for clocksource msrs\n");
return err;
}
paddr_t paddr;
arch_mmu_query(&vmm_get_kernel_aspace()->arch_aspace, (vaddr_t)clocksource_page, &paddr, NULL);
LTRACEF("clocksource page %p, paddr %#" PRIxPTR "\n", clocksource_page, paddr);
write_msr(clocksource_msr_base, paddr);
write_msr(clocksource_msr_base + 1, paddr + sizeof(struct pvclock_wall_clock) + 1);
wall_clock = (struct pvclock_wall_clock *)clocksource_page;
vcpu_time_info = (struct pvclock_vcpu_time_info *)(wall_clock + 1);
dprintf(SPEW, "pv_clock: wall clock version %u, sec %u, nsec %u\n",
wall_clock->version, wall_clock->sec, wall_clock->nsec);
dprintf(SPEW, "pv_clock: vcpu time info version %u, tsc timestamp %llu, system time %llu\n",
vcpu_time_info->version, vcpu_time_info->tsc_timestamp, vcpu_time_info->system_time);
dprintf(SPEW, "pv_clock: tsc to system mul %u, tsc shift %d, flags %u\n",
vcpu_time_info->tsc_to_system_mul, vcpu_time_info->tsc_shift, vcpu_time_info->flags);
return NO_ERROR;
}
// Convert lk_time_t to TSC ticks
uint64_t time_to_tsc_ticks(lk_time_t time) {
return u64_mul_u32_fp32_64(time, timebase_to_tsc);
}
uint64_t pvclock_get_tsc_freq(void) {
uint32_t tsc_mul = 0;
int8_t tsc_shift = 0;
if (!vcpu_time_info) {
return 0;
}
uint32_t pre_version = 0, post_version = 0;
do {
pre_version = vcpu_time_info->version;
if (pre_version % 2 != 0) {
asm("pause");
continue;
}
tsc_mul = vcpu_time_info->tsc_to_system_mul;
tsc_shift = vcpu_time_info->tsc_shift;
post_version = vcpu_time_info->version;
} while (pre_version != post_version);
uint64_t tsc_khz = 1000000ULL << 32;
tsc_khz = tsc_khz / tsc_mul;
if (tsc_shift > 0) {
tsc_khz >>= tsc_shift;
} else {
tsc_khz <<= -tsc_shift;
}
return tsc_khz * 1000;
}
bool pv_clock_is_stable(void) {
if (!vcpu_time_info) {
return false;
}
bool is_stable = (vcpu_time_info->flags & (1<<0)) ||
x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE_STABLE);
return is_stable;
}
void pc_init_timer(unsigned int level) {
// Initialize the PIT, it's always present in PC hardware
pit_init();