[platform][pc] add support for TSC based clock

-Detect if under KVM hypervisor and read tick rate or
-calibrate tick against PIT
This commit is contained in:
Travis Geiselbrecht
2025-03-30 21:59:39 -07:00
parent 09412c194f
commit 2987f73d08
10 changed files with 276 additions and 37 deletions

View File

@@ -141,14 +141,6 @@ enum handler_return lapic_timer_handler(void *arg) {
}
void lapic_init(void) {
// discover the presence of the local apic and map it
LTRACE_ENTRY;
// check feature bit 9 in edx of leaf 1 for presence of lapic
lapic_present = x86_feature_test(X86_FEATURE_APIC);
}
void lapic_init_postvm(uint level) {
if (!lapic_present)
return;
@@ -207,8 +199,6 @@ void lapic_init_postvm(uint level) {
lapic_set_oneshot_timer(1000000);
}
LK_INIT_HOOK(lapic, lapic_init_postvm, LK_INIT_LEVEL_VM);
void lapic_eoi(unsigned int vector) {
LTRACEF("vector %#x\n", vector);
if (!lapic_present) {

View File

@@ -91,6 +91,7 @@ static void local_apic_callback(const void *_entry, size_t entry_len, void *cook
const struct acpi_madt_local_apic_entry *entry = _entry;
struct detected_cpus *cpus = cookie;
// TODO: read the current APIC id and skip it, instead of assuming 0 is the boot cpu
if (entry->apic_id == 0) {
// skip the boot cpu
return;

View File

@@ -10,6 +10,7 @@
#include <lk/reg.h>
#include <lk/debug.h>
#include <lk/trace.h>
#include <assert.h>
#include <kernel/thread.h>
#include <kernel/spinlock.h>
#include <platform.h>
@@ -23,6 +24,9 @@
#define LOCAL_TRACE 0
// TODO: switch this logic to lib/fixed_point math
static platform_timer_callback t_callback;
static void *callback_arg;
static spin_lock_t lock = SPIN_LOCK_INITIAL_VALUE;
@@ -42,6 +46,7 @@ static uint64_t timer_delta_time;
#define INTERNAL_FREQ 1193182ULL
#define INTERNAL_FREQ_3X 3579546ULL
#define INTERNAL_FREQ_TICKS_PER_MS (INTERNAL_FREQ / 1000u)
/* Maximum amount of time that can be program on the timer to schedule the next
* interrupt, in milliseconds */
@@ -128,8 +133,8 @@ static void set_pit_frequency(uint32_t frequency) {
*/
timer_delta_time = (3685982306ULL * count) >> 10;
LTRACEF("dt 0x%016" PRIx64 "\n", timer_delta_time);
LTRACEF("divisor 0x%04" PRIx16 "\n", divisor);
LTRACEF("dt %#x.%08x\n", (uint32_t)(timer_delta_time >> 32), (uint32_t)(timer_delta_time & 0xffffffff));
LTRACEF("divisor %" PRIu16 "\n", divisor);
/*
* setup the Programmable Interval Timer
@@ -191,4 +196,54 @@ void pit_stop_timer(void) {
mask_interrupt(INT_PIT);
spin_unlock_irqrestore(&lock, state);
}
uint64_t pit_calibrate_tsc(void) {
DEBUG_ASSERT(arch_ints_disabled());
uint64_t tsc_ticks[5] = {0};
uint32_t countdown_ms[5] = {0};
uint64_t tsc_freq = 0;
for (uint i = 0; i < countof(tsc_ticks); i++) {
// calibrate the tsc frequency using the PIT
countdown_ms[i] = 2 * (i + 1);
uint16_t pic_ticks = INTERNAL_FREQ_TICKS_PER_MS * countdown_ms[i];
outp(I8253_CONTROL_REG, 0x30);
outp(I8253_DATA_REG, pic_ticks & 0xff); // LSB
outp(I8253_DATA_REG, pic_ticks >> 8); // MSB
// read the tsc
uint64_t tsc_start = __builtin_ia32_rdtsc();
// wait for countdown_ms
uint8_t status = 0;
do {
// Send a read-back command that latches the status of ch0
outp(I8253_CONTROL_REG, 0xe2);
status = inp(I8253_DATA_REG);
// Wait for bit 7 (output) to go high and for bit 6 (null count) to go low
} while ((status & 0xc0) != 0x80);
uint64_t tsc_end = __builtin_ia32_rdtsc();
tsc_ticks[i] = tsc_end - tsc_start;
}
// find the best time
uint best_index = 0;
for (uint i = 1; i < countof(tsc_ticks); i++) {
if (tsc_ticks[i] < tsc_ticks[best_index]) {
best_index = i;
}
}
// calculate the tsc frequency
tsc_freq = (tsc_ticks[best_index] * 1000) / countdown_ms[best_index];
dprintf(INFO, "PIT: calibrated TSC frequency: %" PRIu64 "Hz\n", tsc_freq);
// put the PIT back to 1ms countdown
set_pit_frequency(1000);
return tsc_freq;
}

View File

@@ -21,6 +21,7 @@ void pic_init(void);
void pic_enable(unsigned int vector, bool enable);
void pic_eoi(unsigned int vector);
void pic_mask_interrupts(void);
uint64_t pit_calibrate_tsc(void);
// local apic
void lapic_init(void);

View File

@@ -6,10 +6,10 @@ MODULE := $(LOCAL_DIR)
# legacy implies older hardware, pre pentium, pre pci
CPU ?= modern
MODULE_DEPS += \
lib/acpi_lite \
lib/bio \
lib/cbuf
MODULE_DEPS += lib/acpi_lite
MODULE_DEPS += lib/bio
MODULE_DEPS += lib/cbuf
MODULE_DEPS += lib/fixed_point
ifneq ($(CPU),legacy)
MODULE_DEPS += dev/bus/pci/drivers

View File

@@ -12,13 +12,17 @@
#include <lk/reg.h>
#include <lk/trace.h>
#include <kernel/thread.h>
#include <kernel/vm.h>
#include <platform.h>
#include <platform/timer.h>
#include <platform/pc.h>
#include "platform_p.h"
#include <arch/x86.h>
#include <arch/x86/feature.h>
#include <inttypes.h>
#include <lib/fixed_point.h>
#define LOCAL_TRACE 0
#define LOCAL_TRACE 1
// Deals with all of the various clock sources and event timers on the PC platform.
@@ -29,16 +33,19 @@ static enum clock_source {
CLOCK_SOURCE_HPET,
} clock_source = CLOCK_SOURCE_INITIAL;
struct fp_32_64 tsc_to_timebase;
struct fp_32_64 tsc_to_timebase_hires;
static const char *clock_source_name(void) {
switch (clock_source) {
case CLOCK_SOURCE_INITIAL:
return "initial";
case CLOCK_SOURCE_PIT:
return "pit";
return "PIT";
case CLOCK_SOURCE_TSC:
return "tsc";
return "TSC";
case CLOCK_SOURCE_HPET:
return "hpet";
return "HPET";
default:
return "unknown";
}
@@ -48,6 +55,8 @@ lk_time_t current_time(void) {
switch (clock_source) {
case CLOCK_SOURCE_PIT:
return pit_current_time();
case CLOCK_SOURCE_TSC:
return u32_mul_u64_fp32_64(__builtin_ia32_rdtsc(), tsc_to_timebase);
default:
return 0;
}
@@ -57,16 +66,174 @@ lk_bigtime_t current_time_hires(void) {
switch (clock_source) {
case CLOCK_SOURCE_PIT:
return pit_current_time_hires();
case CLOCK_SOURCE_TSC:
return u64_mul_u64_fp32_64(__builtin_ia32_rdtsc(), tsc_to_timebase_hires);
default:
return 0;
}
}
void pc_init_timer(unsigned int level) {
LTRACE_ENTRY;
// From https://www.kernel.org/doc/html/v6.14/virt/kvm/x86/msr.html
struct pvclock_wall_clock {
uint32_t version;
uint32_t sec;
uint32_t nsec;
} __PACKED;
static_assert(sizeof(struct pvclock_wall_clock) == 12, "pvclock_wall_clock size mismatch");
struct pvclock_vcpu_time_info {
uint32_t version;
uint32_t pad0;
uint64_t tsc_timestamp;
uint64_t system_time;
uint32_t tsc_to_system_mul;
int8_t tsc_shift;
uint8_t flags;
uint8_t pad[2];
} __PACKED;
static_assert(sizeof(struct pvclock_vcpu_time_info) == 32, "pvclock_vcpu_time_info size mismatch");
static volatile struct pvclock_wall_clock *wall_clock;
static volatile struct pvclock_vcpu_time_info *vcpu_time_info;
status_t pvclock_init(void) {
uint32_t clocksource_msr_base = 0;
if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE)) {
clocksource_msr_base = 0x11;
}
if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE2)) {
clocksource_msr_base = 0x4b564d00;
}
if (!clocksource_msr_base) {
return ERR_NOT_SUPPORTED;
}
dprintf(INFO, "pv_clock: clocksource detected, msr base %#x\n", clocksource_msr_base);
// map a page of memory and point the KVM clocksource msrs at it
void *clocksource_page;
status_t err = vmm_alloc(vmm_get_kernel_aspace(), "lapic", PAGE_SIZE, &clocksource_page, 0, 0, 0);
if (err != NO_ERROR) {
printf("pv_clock: failed to allocate page for clocksource msrs\n");
return err;
}
paddr_t paddr;
arch_mmu_query(&vmm_get_kernel_aspace()->arch_aspace, (vaddr_t)clocksource_page, &paddr, NULL);
LTRACEF("clocksource page %p, paddr %#" PRIxPTR "\n", clocksource_page, paddr);
write_msr(clocksource_msr_base, paddr);
write_msr(clocksource_msr_base + 1, paddr + sizeof(struct pvclock_wall_clock) + 1);
wall_clock = (struct pvclock_wall_clock *)clocksource_page;
vcpu_time_info = (struct pvclock_vcpu_time_info *)(wall_clock + 1);
dprintf(SPEW, "pv_clock: wall clock version %u, sec %u, nsec %u\n",
wall_clock->version, wall_clock->sec, wall_clock->nsec);
dprintf(SPEW, "pv_clock: vcpu time info version %u, tsc timestamp %llu, system time %llu\n",
vcpu_time_info->version, vcpu_time_info->tsc_timestamp, vcpu_time_info->system_time);
dprintf(SPEW, "pv_clock: tsc to system mul %u, tsc shift %d, flags %u\n",
vcpu_time_info->tsc_to_system_mul, vcpu_time_info->tsc_shift, vcpu_time_info->flags);
return NO_ERROR;
}
uint64_t pvclock_get_tsc_freq(void) {
uint32_t tsc_mul = 0;
int8_t tsc_shift = 0;
if (!vcpu_time_info) {
return 0;
}
uint32_t pre_version = 0, post_version = 0;
do {
pre_version = vcpu_time_info->version;
if (pre_version % 2 != 0) {
asm("pause");
continue;
}
tsc_mul = vcpu_time_info->tsc_to_system_mul;
tsc_shift = vcpu_time_info->tsc_shift;
post_version = vcpu_time_info->version;
} while (pre_version != post_version);
uint64_t tsc_khz = 1000000ULL << 32;
tsc_khz = tsc_khz / tsc_mul;
if (tsc_shift > 0) {
tsc_khz >>= tsc_shift;
} else {
tsc_khz <<= -tsc_shift;
}
return tsc_khz * 1000;
}
bool pv_clock_is_stable(void) {
if (!vcpu_time_info) {
return false;
}
bool is_stable = (vcpu_time_info->flags & (1<<0)) ||
x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE_STABLE);
return is_stable;
}
void pc_init_timer(unsigned int level) {
// Initialize the PIT, it's always present in PC hardware
pit_init();
clock_source = CLOCK_SOURCE_PIT;
lapic_init();
#if !X86_LEGACY
// XXX update note about what invariant TSC means
bool invariant_tsc = x86_feature_test(X86_FEATURE_INVAR_TSC);
LTRACEF("invariant TSC %d\n", invariant_tsc);
// Test for hypervisor PV clock, which also effectively says if TSC is invariant across
// all cpus.
if (pvclock_init() == NO_ERROR) {
bool pv_clock_stable = pv_clock_is_stable();
invariant_tsc |= pv_clock_stable;
printf("pv_clock: Clocksource is %sstable\n", (pv_clock_stable ? "" : "not "));
}
// XXX test for HPET and use it over PIT if present
if (invariant_tsc) {
// We're going to try to use the TSC as a time base, obtain the TSC frequency.
uint64_t tsc_hz = 0;
tsc_hz = pvclock_get_tsc_freq();
if (tsc_hz == 0) {
// TODO: some x86 cores describe the TSC and lapic clocks in cpuid
// Calibrate the TSC against the PIT, which should always be present
tsc_hz = pit_calibrate_tsc();
if (tsc_hz == 0) {
dprintf(CRITICAL, "PC: failed to calibrate TSC frequency\n");
goto out;
}
}
dprintf(INFO, "PC: TSC frequency %" PRIu64 "Hz\n", tsc_hz);
// Compute the ratio of TSC to timebase
fp_32_64_div_32_32(&tsc_to_timebase, 1000, tsc_hz);
dprintf(INFO, "PC: TSC to timebase ratio %u.%08u...\n",
tsc_to_timebase.l0, tsc_to_timebase.l32);
fp_32_64_div_32_32(&tsc_to_timebase_hires, 1000*1000, tsc_hz);
dprintf(INFO, "PC: TSC to hires timebase ratio %u.%08u...\n",
tsc_to_timebase_hires.l0, tsc_to_timebase_hires.l32);
clock_source = CLOCK_SOURCE_TSC;
}
out:
#endif // !X86_LEGACY
dprintf(INFO, "PC: using %s clock source\n", clock_source_name());
}
LK_INIT_HOOK(pc_timer, pc_init_timer, LK_INIT_LEVEL_VM);