From 0df68cc48dbbf49b5e0916ba2ed8e97484b96caf Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:43:19 +0000 Subject: [PATCH 01/18] feat: add narrow shared memory accessor to UninitializedSandbox Signed-off-by: danbugs --- src/hyperlight_host/src/sandbox/uninitialized.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 2167dca43..a280d9b03 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -169,6 +169,11 @@ impl<'a> From> for GuestEnvironment<'a, '_> { } impl UninitializedSandbox { + /// Returns a mutable reference to the sandbox's shared memory region. + pub fn shared_mem_mut(&mut self) -> &mut ExclusiveSharedMemory { + &mut self.mgr.shared_mem + } + // Creates a new uninitialized sandbox from a pre-built snapshot. // Note that since memory configuration is part of the snapshot the only configuration // that can be changed (from the original snapshot) is the configuration defines the behaviour of From 845885ebaa3f9afc2e9920a3a83c07b1e94b2b60 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 4 Mar 2026 16:25:29 +0000 Subject: [PATCH 02/18] feat: replace shared_mem_mut() with bounds-checked guest_memory_ptr() Replace the public shared_mem_mut() API (which exposed the full shared memory region) with a narrower guest_memory_ptr() method that takes an offset and length, bounds-checks against the allocated region, and returns the corresponding host pointer. Signed-off-by: danbugs --- .../src/sandbox/uninitialized.rs | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index a280d9b03..2e76af1c4 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -29,9 +29,10 @@ use crate::func::host_functions::{HostFunction, register_host_function}; use crate::func::{ParameterTuple, SupportedReturnType}; #[cfg(feature = "build-metadata")] use crate::log_build_details; +use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::{DEFAULT_GUEST_BLOB_MEM_FLAGS, MemoryRegionFlags}; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::shared_mem::ExclusiveSharedMemory; +use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; use crate::sandbox::SandboxConfiguration; use crate::{MultiUseSandbox, Result, new_error}; @@ -169,9 +170,41 @@ impl<'a> From> for GuestEnvironment<'a, '_> { } impl UninitializedSandbox { - /// Returns a mutable reference to the sandbox's shared memory region. - pub fn shared_mem_mut(&mut self) -> &mut ExclusiveSharedMemory { - &mut self.mgr.shared_mem + /// Returns a host-side pointer to a specific guest physical address (GPA) + /// within the sandbox's shared memory region. + /// + /// This is the safe way to obtain host-side access to guest memory. + /// The method validates that the GPA falls within the sandbox's + /// allocated memory region before returning the corresponding host pointer. + /// + /// # Safety + /// + /// The returned pointer is valid as long as the sandbox (and its underlying + /// shared memory mapping) remains alive. Dereferencing the pointer requires + /// `unsafe` code and the caller must ensure proper synchronization. + pub fn guest_memory_ptr(&mut self, gpa: usize) -> Result<*mut u8> { + let base = SandboxMemoryLayout::BASE_ADDRESS; + let mem_size = self.mgr.shared_mem.mem_size(); + + if gpa < base { + return Err(new_error!( + "GPA {:#x} is below the sandbox base address {:#x}", + gpa, + base + )); + } + + let offset = gpa - base; + if offset >= mem_size { + return Err(new_error!( + "GPA {:#x} (offset {:#x}) is beyond sandbox memory size {:#x}", + gpa, + offset, + mem_size + )); + } + + Ok(unsafe { self.mgr.shared_mem.base_ptr().add(offset) }) } // Creates a new uninitialized sandbox from a pre-built snapshot. From 954696762a65ac117a62446a9a1c474b7b66c96a Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 19:51:10 +0000 Subject: [PATCH 03/18] feat: use i686 layout for non-init-paging guests on x86_64 When the init-paging feature is disabled (e.g., 32-bit guests that manage their own paging), select the i686 layout module instead of amd64 on x86_64 hosts. This ensures MAX_GPA/MAX_GVA use 32-bit address space limits, and the scratch region is placed correctly at the top of 4 GiB physical memory. Key changes: - Set MAX_GVA = MAX_GPA (0xffff_ffff) in i686 layout so that identity-mapped guests get correct virtual addresses - Remove SNAPSHOT_PT_GVA_* from i686 layout (no page tables) - Gate SNAPSHOT_PT_GVA_* exports/usage behind init-paging feature - Propagate init-paging feature from hyperlight-host to hyperlight-common via Cargo feature forwarding - Make snapshot region writable for non-init-paging guests (no CoW page tables means the hardware needs direct write access) - Fix min_scratch_size signature to match amd64 layout Signed-off-by: danbugs --- src/hyperlight_common/src/arch/i686/layout.rs | 8 +++----- src/hyperlight_common/src/layout.rs | 13 +++++++++++-- src/hyperlight_host/Cargo.toml | 4 ++-- src/hyperlight_host/src/mem/shared_mem.rs | 17 ++++++++++++++++- src/hyperlight_host/src/sandbox/snapshot.rs | 1 + 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/hyperlight_common/src/arch/i686/layout.rs b/src/hyperlight_common/src/arch/i686/layout.rs index 68cc2cf1b..f3601c643 100644 --- a/src/hyperlight_common/src/arch/i686/layout.rs +++ b/src/hyperlight_common/src/arch/i686/layout.rs @@ -17,11 +17,9 @@ limitations under the License. // This file is just dummy definitions at the moment, in order to // allow compiling the guest for real mode boot scenarios. -pub const MAX_GVA: usize = 0xffff_efff; -pub const SNAPSHOT_PT_GVA_MIN: usize = 0xef00_0000; -pub const SNAPSHOT_PT_GVA_MAX: usize = 0xefff_efff; +pub const MAX_GVA: usize = 0xffff_ffff; pub const MAX_GPA: usize = 0xffff_ffff; -pub fn min_scratch_size() -> usize { - 1 * crate::vmem::PAGE_SIZE +pub fn min_scratch_size(_input_data_size: usize, _output_data_size: usize) -> usize { + crate::vmem::PAGE_SIZE } diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs index 215a80d87..aa62bea32 100644 --- a/src/hyperlight_common/src/layout.rs +++ b/src/hyperlight_common/src/layout.rs @@ -14,11 +14,20 @@ See the License for the specific language governing permissions and limitations under the License. */ -#[cfg_attr(target_arch = "x86_64", path = "arch/amd64/layout.rs")] #[cfg_attr(target_arch = "x86", path = "arch/i686/layout.rs")] +#[cfg_attr( + all(target_arch = "x86_64", feature = "init-paging"), + path = "arch/amd64/layout.rs" +)] +#[cfg_attr( + all(target_arch = "x86_64", not(feature = "init-paging")), + path = "arch/i686/layout.rs" +)] mod arch; -pub use arch::{MAX_GPA, MAX_GVA, SNAPSHOT_PT_GVA_MAX, SNAPSHOT_PT_GVA_MIN}; +pub use arch::{MAX_GPA, MAX_GVA}; +#[cfg(feature = "init-paging")] +pub use arch::{SNAPSHOT_PT_GVA_MAX, SNAPSHOT_PT_GVA_MIN}; // offsets down from the top of scratch memory for various things pub const SCRATCH_TOP_SIZE_OFFSET: u64 = 0x08; diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 84a112a05..8dab2eec0 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -40,7 +40,7 @@ tracing = { version = "0.1.44", features = ["log"] } tracing-log = "0.2.0" tracing-core = "0.1.36" tracing-opentelemetry = { version = "0.32.1", optional = true } -hyperlight-common = { workspace = true, default-features = true, features = [ "std", "init-paging" ] } +hyperlight-common = { workspace = true, default-features = true, features = [ "std" ] } hyperlight-guest-tracing = { workspace = true, default-features = true, optional = true } vmm-sys-util = "0.15.0" crossbeam-channel = "0.5.15" @@ -137,7 +137,7 @@ mshv3 = ["dep:mshv-bindings", "dep:mshv-ioctls"] gdb = ["dep:gdbstub", "dep:gdbstub_arch"] fuzzing = ["hyperlight-common/fuzzing"] build-metadata = ["dep:built"] -init-paging = [] +init-paging = ["hyperlight-common/init-paging"] [[bench]] name = "benchmarks" diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index 2cd3b8acc..8cd2d25a3 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -679,7 +679,22 @@ impl GuestSharedMemory { MemoryRegionType::Scratch => { MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE } - MemoryRegionType::Snapshot => MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE, + // For init-paging, the snapshot is read-only because guest page + // tables provide CoW semantics for writable pages. For + // non-init-paging there are no guest page tables, so the snapshot + // must be writable — otherwise writes (including the CPU setting + // the "Accessed" bit in GDT descriptors during segment loads) + // cause EPT violations that KVM retries forever. + MemoryRegionType::Snapshot => { + #[cfg(feature = "init-paging")] + { + MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE + } + #[cfg(not(feature = "init-paging"))] + { + MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE + } + } #[allow(clippy::panic)] // In the future, all the host side knowledge about memory // region types should collapse down to Snapshot vs diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index f9a48c871..2329a6b7a 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -265,6 +265,7 @@ fn filtered_mappings<'a>( return None; } // neither does the mapping of the snapshot's own page tables + #[cfg(feature = "init-paging")] if mapping.virt_base >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 && mapping.virt_base <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 { From ba43551cb301bc6a6e95778deb6ea0aec4417d06 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:51:48 +0000 Subject: [PATCH 04/18] feat: add PIC emulation, OutBAction variants, and guest halt mechanism - Add PvTimerConfig (port 107) and Halt (port 108) to OutBAction enum - Add userspace 8259A PIC emulation for MSHV/WHP (KVM uses in-kernel PIC) - Add halt() function in guest exit module using Halt port instead of HLT - Add default no-op IRQ handler at IDT vector 0x20 for PIC-remapped IRQ0 - Update dispatch epilogue to use Halt port before cli+hlt fallback - Add hw-interrupts feature flag to hyperlight-host Signed-off-by: danbugs --- src/hyperlight_common/src/outb.rs | 11 + src/hyperlight_guest/src/exit.rs | 28 +++ .../src/arch/amd64/dispatch.rs | 3 + .../src/arch/amd64/exception/entry.rs | 26 ++ src/hyperlight_host/Cargo.toml | 1 + src/hyperlight_host/src/hypervisor/mod.rs | 3 + src/hyperlight_host/src/hypervisor/pic.rs | 228 ++++++++++++++++++ src/hyperlight_host/src/sandbox/outb.rs | 5 + 8 files changed, 305 insertions(+) create mode 100644 src/hyperlight_host/src/hypervisor/pic.rs diff --git a/src/hyperlight_common/src/outb.rs b/src/hyperlight_common/src/outb.rs index b060ae5cf..9cba7f4a7 100644 --- a/src/hyperlight_common/src/outb.rs +++ b/src/hyperlight_common/src/outb.rs @@ -104,6 +104,15 @@ pub enum OutBAction { TraceMemoryAlloc = 105, #[cfg(feature = "mem_profile")] TraceMemoryFree = 106, + /// IO port for PV timer configuration. The guest writes a 32-bit + /// LE value representing the desired timer period in microseconds. + /// A value of 0 disables the timer. + PvTimerConfig = 107, + /// IO port the guest writes to signal "I'm done" to the host. + /// This replaces the `hlt` instruction for halt signaling so that + /// KVM's in-kernel LAPIC (which absorbs HLT exits) does not interfere + /// with hyperlight's halt-based guest-host protocol. + Halt = 108, } impl TryFrom for OutBAction { @@ -120,6 +129,8 @@ impl TryFrom for OutBAction { 105 => Ok(OutBAction::TraceMemoryAlloc), #[cfg(feature = "mem_profile")] 106 => Ok(OutBAction::TraceMemoryFree), + 107 => Ok(OutBAction::PvTimerConfig), + 108 => Ok(OutBAction::Halt), _ => Err(anyhow::anyhow!("Invalid OutBAction value: {}", val)), } } diff --git a/src/hyperlight_guest/src/exit.rs b/src/hyperlight_guest/src/exit.rs index e8d58e24e..785e10a19 100644 --- a/src/hyperlight_guest/src/exit.rs +++ b/src/hyperlight_guest/src/exit.rs @@ -18,6 +18,34 @@ use core::arch::asm; use core::ffi::{CStr, c_char}; use hyperlight_common::outb::OutBAction; +use tracing::instrument; + +/// Halt the execution of the guest and returns control to the host. +/// Halt is generally called for a successful completion of the guest's work, +/// this means we can instrument it as a trace point because the trace state +/// shall not be locked at this point (we are not in an exception context). +#[inline(never)] +#[instrument(skip_all, level = "Trace")] +pub fn halt() { + #[cfg(all(feature = "trace_guest", target_arch = "x86_64"))] + { + // Send data before halting + // If there is no data, this doesn't do anything + // The reason we do this here instead of in `hlt` asm function + // is to avoid allocating before halting, which leaks memory + // because the guest is not expected to resume execution after halting. + hyperlight_guest_tracing::flush(); + } + + // Signal "I'm done" to the host via an IO port write. This causes a + // VM exit on all backends (KVM, MSHV, WHP). The subsequent cli+hlt + // is dead code—hyperlight never re-enters the guest after seeing + // the Halt port—but serves as a safety fallback. + unsafe { + out32(OutBAction::Halt as u16, 0); + asm!("cli", "hlt", options(nostack)); + } +} /// Exits the VM with an Abort OUT action and code 0. #[unsafe(no_mangle)] diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index bb80b30ed..e904eeb6e 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -68,6 +68,9 @@ core::arch::global_asm!(" mov cr4, rdi flush_done: call {internal_dispatch_function}\n + mov dx, 108\n + out dx, al\n + cli\n hlt\n .cfi_endproc ", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs index 87f89f15c..ca72e62d8 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs @@ -172,6 +172,24 @@ global_asm!( hl_exception_handler = sym super::handle::hl_exception_handler, ); +// Default no-op IRQ handler for hardware interrupts (vectors 0x20-0x2F). +// Sends a non-specific EOI to the master PIC and returns. +// This prevents unhandled-interrupt faults when the in-kernel PIT fires +// before a guest has installed its own IRQ handler. +global_asm!( + ".globl _default_irq_handler", + "_default_irq_handler:", + "push rax", + "mov al, 0x20", + "out 0x20, al", // PIC EOI + "pop rax", + "iretq", +); + +unsafe extern "C" { + fn _default_irq_handler(); +} + pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { let idt = unsafe { &raw mut (*pc).idt }; let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { @@ -203,6 +221,14 @@ pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { set_idt_entry(Exception::VirtualizationException, _do_excp20); // Virtualization Exception set_idt_entry(Exception::SecurityException, _do_excp30); // Security Exception + // Install a default no-op IRQ handler at vector 0x20 (IRQ0 after PIC remapping). + // This ensures HLT can wake when an in-kernel PIT is active, even before the + // guest installs its own IRQ handler. + let irq_handler_addr = _default_irq_handler as *const () as u64; + unsafe { + (&raw mut (*idt).entries[0x20]).write_volatile(IdtEntry::new(irq_handler_addr)); + } + let idtr = IdtPointer { limit: (core::mem::size_of::() - 1) as u16, base: idt as u64, diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 8dab2eec0..24a4d238d 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -133,6 +133,7 @@ trace_guest = ["dep:opentelemetry", "dep:tracing-opentelemetry", "dep:hyperlight mem_profile = [ "trace_guest", "dep:framehop", "dep:fallible-iterator", "hyperlight-common/mem_profile" ] kvm = ["dep:kvm-bindings", "dep:kvm-ioctls"] mshv3 = ["dep:mshv-bindings", "dep:mshv-ioctls"] +hw-interrupts = [] # This enables easy debug in the guest gdb = ["dep:gdbstub", "dep:gdbstub_arch"] fuzzing = ["hyperlight-common/fuzzing"] diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index a582ddb09..7c233af3d 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -38,6 +38,9 @@ pub(crate) mod crashdump; pub(crate) mod hyperlight_vm; +#[cfg(all(feature = "hw-interrupts", any(mshv3, target_os = "windows")))] +pub(crate) mod pic; + use std::fmt::Debug; #[cfg(any(kvm, mshv3))] use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; diff --git a/src/hyperlight_host/src/hypervisor/pic.rs b/src/hyperlight_host/src/hypervisor/pic.rs new file mode 100644 index 000000000..7256ec3e9 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/pic.rs @@ -0,0 +1,228 @@ +/* +Copyright 2026 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Minimal userspace PIC (8259A) emulation for MSHV/WHP. +//! KVM provides in-kernel PIC via `create_irq_chip()`. + +// PIC I/O port constants +const PIC_MASTER_CMD: u16 = 0x20; +const PIC_MASTER_DATA: u16 = 0x21; +const PIC_SLAVE_CMD: u16 = 0xA0; +const PIC_SLAVE_DATA: u16 = 0xA1; + +/// Tracks where we are in the ICW (Initialization Command Word) sequence. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PicInitState { + /// Not in an init sequence; normal operation. + Ready, + /// Expecting ICW2 (vector base). + Icw2, + /// Expecting ICW3 (cascade info). + Icw3, + /// Expecting ICW4 (mode). + Icw4, +} + +/// Minimal 8259A PIC state for one chip (master or slave). +#[derive(Debug)] +pub(crate) struct Pic { + master_init_state: PicInitState, + master_vector_base: u8, + master_imr: u8, + + slave_init_state: PicInitState, + slave_vector_base: u8, + slave_imr: u8, +} + +impl Pic { + /// Create a new PIC pair with default state. + pub(crate) fn new() -> Self { + Self { + master_init_state: PicInitState::Ready, + master_vector_base: 0, + master_imr: 0xFF, // all masked + + slave_init_state: PicInitState::Ready, + slave_vector_base: 0, + slave_imr: 0xFF, // all masked + } + } + + /// Handle an I/O OUT to a PIC port. Returns `true` if the port was handled. + pub(crate) fn handle_io_out(&mut self, port: u16, data: u8) -> bool { + match port { + PIC_MASTER_CMD => { + if data & 0x10 != 0 { + // ICW1 -- start init sequence + self.master_init_state = PicInitState::Icw2; + } + // else: OCW (e.g. EOI) -- we accept but ignore + true + } + PIC_MASTER_DATA => { + match self.master_init_state { + PicInitState::Icw2 => { + self.master_vector_base = data; + self.master_init_state = PicInitState::Icw3; + } + PicInitState::Icw3 => { + self.master_init_state = PicInitState::Icw4; + } + PicInitState::Icw4 => { + self.master_init_state = PicInitState::Ready; + } + PicInitState::Ready => { + // IMR write + self.master_imr = data; + } + } + true + } + PIC_SLAVE_CMD => { + if data & 0x10 != 0 { + self.slave_init_state = PicInitState::Icw2; + } + true + } + PIC_SLAVE_DATA => { + match self.slave_init_state { + PicInitState::Icw2 => { + self.slave_vector_base = data; + self.slave_init_state = PicInitState::Icw3; + } + PicInitState::Icw3 => { + self.slave_init_state = PicInitState::Icw4; + } + PicInitState::Icw4 => { + self.slave_init_state = PicInitState::Ready; + } + PicInitState::Ready => { + self.slave_imr = data; + } + } + true + } + _ => false, + } + } + + /// Handle an I/O IN from a PIC port. Returns `Some(value)` if handled. + pub(crate) fn handle_io_in(&self, port: u16) -> Option { + match port { + PIC_MASTER_DATA => Some(self.master_imr), + PIC_SLAVE_DATA => Some(self.slave_imr), + PIC_MASTER_CMD | PIC_SLAVE_CMD => Some(0), // ISR reads return 0 + _ => None, + } + } + + /// Returns the master PIC vector base. + pub(crate) fn master_vector_base(&self) -> u8 { + self.master_vector_base + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pic_initial_state() { + let pic = Pic::new(); + assert_eq!(pic.master_vector_base, 0); + assert_eq!(pic.slave_vector_base, 0); + assert_eq!(pic.master_imr, 0xFF); + assert_eq!(pic.slave_imr, 0xFF); + } + + #[test] + fn pic_master_icw_sequence() { + let mut pic = Pic::new(); + // ICW1 + assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x11)); + // ICW2: vector base 0x20 + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x20)); + assert_eq!(pic.master_vector_base(), 0x20); + // ICW3 + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x04)); + // ICW4 + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x01)); + // Should be back to Ready -- IMR write + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0xFE)); // unmask IRQ0 + assert_eq!(pic.master_imr, 0xFE); + } + + #[test] + fn pic_slave_icw_sequence() { + let mut pic = Pic::new(); + assert!(pic.handle_io_out(PIC_SLAVE_CMD, 0x11)); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x28)); + assert_eq!(pic.slave_vector_base, 0x28); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x02)); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x01)); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0xFF)); + assert_eq!(pic.slave_imr, 0xFF); + } + + #[test] + fn pic_eoi_accepted() { + let mut pic = Pic::new(); + // EOI (OCW2, non-specific EOI = 0x20) + assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x20)); + } + + #[test] + fn pic_unhandled_port() { + let mut pic = Pic::new(); + assert!(!pic.handle_io_out(0x60, 0x00)); + assert_eq!(pic.handle_io_in(0x60), None); + } + + #[test] + fn pic_reinitialize_master() { + let mut pic = Pic::new(); + // First init + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + pic.handle_io_out(PIC_MASTER_DATA, 0x20); + pic.handle_io_out(PIC_MASTER_DATA, 0x04); + pic.handle_io_out(PIC_MASTER_DATA, 0x01); + assert_eq!(pic.master_vector_base(), 0x20); + // Reinit + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + pic.handle_io_out(PIC_MASTER_DATA, 0x30); + pic.handle_io_out(PIC_MASTER_DATA, 0x04); + pic.handle_io_out(PIC_MASTER_DATA, 0x01); + assert_eq!(pic.master_vector_base(), 0x30); + } + + #[test] + fn pic_imr_preserved_across_reinit() { + let mut pic = Pic::new(); + // Set IMR before init + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + pic.handle_io_out(PIC_MASTER_DATA, 0x20); + pic.handle_io_out(PIC_MASTER_DATA, 0x04); + pic.handle_io_out(PIC_MASTER_DATA, 0x01); + pic.handle_io_out(PIC_MASTER_DATA, 0xFE); + assert_eq!(pic.master_imr, 0xFE); + // Reinit -- IMR is set during ICW sequence, not explicitly preserved + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + // After ICW1, IMR should still read as whatever it was -- we only + // change it on Ready state writes. + assert_eq!(pic.handle_io_in(PIC_MASTER_DATA), Some(0xFE)); + } +} diff --git a/src/hyperlight_host/src/sandbox/outb.rs b/src/hyperlight_host/src/sandbox/outb.rs index 76ec53cee..5d5bd91d8 100644 --- a/src/hyperlight_host/src/sandbox/outb.rs +++ b/src/hyperlight_host/src/sandbox/outb.rs @@ -233,6 +233,11 @@ pub(crate) fn handle_outb( OutBAction::TraceMemoryAlloc => trace_info.handle_trace_mem_alloc(regs, mem_mgr), #[cfg(feature = "mem_profile")] OutBAction::TraceMemoryFree => trace_info.handle_trace_mem_free(regs, mem_mgr), + // PvTimerConfig and Halt are handled at the hypervisor level + // (in run_vcpu) and should never reach this handler. + OutBAction::PvTimerConfig | OutBAction::Halt => Err(HandleOutbError::InvalidPort(format!( + "port {port:#x} should be handled at hypervisor level" + ))), } } #[cfg(test)] From 276793f0d71de3aa39eb9ab1de5b700995aa0e52 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:53:15 +0000 Subject: [PATCH 05/18] feat(kvm): add hardware interrupt support with in-kernel IRQ chip - Create IRQ chip (PIC + IOAPIC + LAPIC) and PIT before vCPU creation - Add hw-interrupts run loop that handles HLT re-entry, Halt port, and PvTimerConfig port (ignored since in-kernel PIT handles scheduling) - Non-hw-interrupts path also recognizes Halt port for compatibility Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm.rs | 72 +++++++++++++++++++ .../src/hypervisor/virtual_machine/mod.rs | 4 +- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index a0382ab6c..d9ad48c9b 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -16,8 +16,11 @@ limitations under the License. use std::sync::LazyLock; +use hyperlight_common::outb::OutBAction; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; +#[cfg(feature = "hw-interrupts")] +use kvm_bindings::{KVM_PIT_SPEAKER_DUMMY, kvm_pit_config}; use kvm_bindings::{ kvm_debugregs, kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region, kvm_xsave, }; @@ -106,6 +109,26 @@ impl KvmVm { let vm_fd = hv .create_vm_with_type(0) .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + + // When hw-interrupts is enabled, create the in-kernel IRQ chip + // (PIC + IOAPIC + LAPIC) and PIT before creating the vCPU so the + // per-vCPU LAPIC is initialised in virtual-wire mode (LINT0 = ExtINT). + // The guest programs the PIC remap and PIT frequency via standard + // IO port writes, which the in-kernel devices handle transparently. + #[cfg(feature = "hw-interrupts")] + { + vm_fd + .create_irq_chip() + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + ..Default::default() + }; + vm_fd + .create_pit2(pit_config) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + } + let vcpu_fd = vm_fd .create_vcpu(0) .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; @@ -171,8 +194,57 @@ impl VirtualMachine for KvmVm { // it sets the guest span, no other traces or spans must be setup in between these calls. #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); + + // When hw-interrupts is enabled, the in-kernel PIC + PIT + LAPIC handle + // timer interrupts natively. The guest signals "I'm done" by writing to + // OutBAction::Halt (an IO port exit) instead of using HLT, because the in-kernel + // LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). + #[cfg(feature = "hw-interrupts")] + loop { + match self.vcpu_fd.run() { + Ok(VcpuExit::Hlt) => { + // The in-kernel LAPIC normally handles HLT internally. + // If we somehow get here, just re-enter the guest. + continue; + } + Ok(VcpuExit::IoOut(port, data)) => { + if port == OutBAction::Halt as u16 { + return Ok(VmExit::Halt()); + } + if port == OutBAction::PvTimerConfig as u16 { + // Ignore: the in-kernel PIT handles timer scheduling. + // The guest writes here for MSHV/WHP compatibility. + continue; + } + return Ok(VmExit::IoOut(port, data.to_vec())); + } + Ok(VcpuExit::MmioRead(addr, _)) => return Ok(VmExit::MmioRead(addr)), + Ok(VcpuExit::MmioWrite(addr, _)) => return Ok(VmExit::MmioWrite(addr)), + #[cfg(gdb)] + Ok(VcpuExit::Debug(debug_exit)) => { + return Ok(VmExit::Debug { + dr6: debug_exit.dr6, + exception: debug_exit.exception, + }); + } + Err(e) => match e.errno() { + libc::EINTR => return Ok(VmExit::Cancelled()), + libc::EAGAIN => continue, + _ => return Err(RunVcpuError::Unknown(e.into())), + }, + Ok(other) => { + return Ok(VmExit::Unknown(format!( + "Unknown KVM VCPU exit: {:?}", + other + ))); + } + } + } + + #[cfg(not(feature = "hw-interrupts"))] match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => Ok(VmExit::Halt()), + Ok(VcpuExit::IoOut(port, _)) if port == OutBAction::Halt as u16 => Ok(VmExit::Halt()), Ok(VcpuExit::IoOut(port, data)) => Ok(VmExit::IoOut(port, data.to_vec())), Ok(VcpuExit::MmioRead(addr, _)) => Ok(VmExit::MmioRead(addr)), Ok(VcpuExit::MmioWrite(addr, _)) => Ok(VmExit::MmioWrite(addr)), diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index 82e05c104..0f9514578 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -136,10 +136,10 @@ pub(crate) enum VmExit { Unknown(String), /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN #[cfg_attr( - target_os = "windows", + any(target_os = "windows", feature = "hw-interrupts"), expect( dead_code, - reason = "Retry() is never constructed on Windows, but it is still matched on (which dead_code lint ignores)" + reason = "Retry() is never constructed on Windows or with hw-interrupts (EAGAIN causes continue instead)" ) )] Retry(), From 6787b585ab4d124c8de23a61d2f7f2e33369f9cd Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:55:35 +0000 Subject: [PATCH 06/18] feat(mshv): add hardware interrupt support with SynIC timer - Enable LAPIC in partition flags for SynIC direct-mode timer delivery - Configure LAPIC (SVR, TPR, LINT0/1, LVT Timer) during VM creation - Install MSR intercept on IA32_APIC_BASE to prevent guest from disabling the LAPIC globally - Add SynIC STIMER0 configuration via PvTimerConfig IO port - Add userspace PIC emulation integration for MSHV - Restructure run_vcpu into a loop for HLT re-entry and hw IO handling - Bridge PIC EOI to LAPIC EOI for SynIC timer interrupt acknowledgment - Handle PIT/speaker/debug IO ports in userspace Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/mshv.rs | 534 +++++++++++++++--- 1 file changed, 452 insertions(+), 82 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 705764dd6..8b4689dc1 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -18,16 +18,32 @@ limitations under the License. use std::fmt::Debug; use std::sync::LazyLock; +use hyperlight_common::outb::OutBAction; +#[cfg(feature = "hw-interrupts")] +use mshv_bindings::LapicState; #[cfg(gdb)] use mshv_bindings::{DebugRegisters, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT}; use mshv_bindings::{ FloatingPointUnit, SpecialRegisters, StandardRegisters, XSave, hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, - hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - hv_partition_synthetic_processor_features, hv_register_assoc, - hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region, + hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_register_assoc, + hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_create_partition_v2, + mshv_user_mem_region, }; +#[cfg(feature = "hw-interrupts")] +use mshv_bindings::{ + HV_INTERCEPT_ACCESS_MASK_WRITE, hv_intercept_parameters, + hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, hv_message_type_HVMSG_X64_MSR_INTERCEPT, + mshv_install_intercept, +}; +#[cfg(feature = "hw-interrupts")] +use mshv_bindings::{ + hv_register_name_HV_REGISTER_SCONTROL, hv_register_name_HV_REGISTER_STIMER0_CONFIG, + hv_register_name_HV_REGISTER_STIMER0_COUNT, hv_register_name_HV_X64_REGISTER_RAX, +}; +#[cfg(feature = "hw-interrupts")] +use mshv_ioctls::make_default_synthetic_features_mask; use mshv_ioctls::{Mshv, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] @@ -35,6 +51,8 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, @@ -67,33 +85,71 @@ pub(crate) fn is_hypervisor_present() -> bool { pub(crate) struct MshvVm { vm_fd: VmFd, vcpu_fd: VcpuFd, + #[cfg(feature = "hw-interrupts")] + pic: Pic, + #[cfg(feature = "hw-interrupts")] + synic_timer_active: bool, } static MSHV: LazyLock> = LazyLock::new(|| Mshv::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); +/// Write a u32 to a LAPIC register at the given APIC offset. +#[cfg(feature = "hw-interrupts")] +fn write_lapic_u32(regs: &mut [::std::os::raw::c_char; 1024], offset: usize, val: u32) { + let bytes = val.to_le_bytes(); + regs[offset] = bytes[0] as _; + regs[offset + 1] = bytes[1] as _; + regs[offset + 2] = bytes[2] as _; + regs[offset + 3] = bytes[3] as _; +} + +/// Read a u32 from a LAPIC register at the given APIC offset. +#[cfg(feature = "hw-interrupts")] +fn read_lapic_u32(regs: &[::std::os::raw::c_char; 1024], offset: usize) -> u32 { + u32::from_le_bytes([ + regs[offset] as u8, + regs[offset + 1] as u8, + regs[offset + 2] as u8, + regs[offset + 3] as u8, + ]) +} + impl MshvVm { /// Create a new instance of a MshvVm #[instrument(skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn new() -> std::result::Result { let mshv = MSHV.as_ref().map_err(|e| e.clone())?; - let pr = Default::default(); - // It's important to avoid create_vm() and explicitly use - // create_vm_with_args() with an empty arguments structure - // here, because otherwise the partition is set up with a SynIC. + #[allow(unused_mut)] + let mut pr: mshv_create_partition_v2 = Default::default(); + // Enable LAPIC for hw-interrupts — required for SynIC direct-mode + // timer delivery via APIC vector. MSHV_PT_BIT_LAPIC = bit 0. + #[cfg(feature = "hw-interrupts")] + { + pr.pt_flags = 1u64; // LAPIC + } let vm_fd = mshv .create_vm_with_args(&pr) .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; let vcpu_fd = { - let features: hv_partition_synthetic_processor_features = Default::default(); + // Use the full default synthetic features mask (includes SynIC, + // synthetic timers, direct mode, hypercall regs, etc.) when + // hw-interrupts is enabled. Without hw-interrupts, leave all + // features at zero. + #[cfg(feature = "hw-interrupts")] + let feature_val = make_default_synthetic_features_mask(); + #[cfg(not(feature = "hw-interrupts"))] + let feature_val = 0u64; + vm_fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - unsafe { features.as_uint64[0] }, + feature_val, ) .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + vm_fd .initialize() .map_err(|e| CreateVmError::InitializeVm(e.into()))?; @@ -103,7 +159,62 @@ impl MshvVm { .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))? }; - Ok(Self { vm_fd, vcpu_fd }) + // Initialize the virtual LAPIC when hw-interrupts is enabled. + // LAPIC defaults to disabled (SVR bit 8 = 0), which means no APIC + // interrupts (including SynIC direct-mode timer) can be delivered. + #[cfg(feature = "hw-interrupts")] + { + let mut lapic: LapicState = vcpu_fd + .get_lapic() + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + // SVR (offset 0xF0): bit 8 = enable APIC, bits 0-7 = spurious vector + write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); + // TPR (offset 0x80): 0 = accept all interrupt priorities + write_lapic_u32(&mut lapic.regs, 0x80, 0); + // DFR (offset 0xE0): 0xFFFFFFFF = flat model + write_lapic_u32(&mut lapic.regs, 0xE0, 0xFFFF_FFFF); + // LDR (offset 0xD0): set logical APIC ID for flat model + write_lapic_u32(&mut lapic.regs, 0xD0, 1 << 24); + // LINT0 (offset 0x350): masked — we don't forward PIC through LAPIC; + // our PIC is emulated in userspace and not wired to LINT0. + write_lapic_u32(&mut lapic.regs, 0x350, 0x0001_0000); + // LINT1 (offset 0x360): NMI delivery, not masked + write_lapic_u32(&mut lapic.regs, 0x360, 0x400); + // LVT Timer (offset 0x320): masked — we use SynIC timer instead + write_lapic_u32(&mut lapic.regs, 0x320, 0x0001_0000); + // LVT Error (offset 0x370): masked + write_lapic_u32(&mut lapic.regs, 0x370, 0x0001_0000); + + vcpu_fd + .set_lapic(&lapic) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + // Install MSR intercept for IA32_APIC_BASE (MSR 0x1B) to prevent + // the guest from globally disabling the LAPIC. The Nanvix kernel + // disables the APIC when no I/O APIC is detected, but we need + // the LAPIC enabled for SynIC direct-mode timer delivery. + // + // This may fail with AccessDenied on some kernel versions; in + // that case we fall back to re-enabling the LAPIC in the timer + // setup path (handle_hw_io_out). + let _ = vm_fd.install_intercept(mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_WRITE, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, + intercept_parameter: hv_intercept_parameters { + msr_index: 0x1B, // IA32_APIC_BASE + }, + }); + } + + Ok(Self { + vm_fd, + vcpu_fd, + #[cfg(feature = "hw-interrupts")] + pic: Pic::new(), + #[cfg(feature = "hw-interrupts")] + synic_timer_active: false, + }) } } @@ -128,6 +239,7 @@ impl VirtualMachine for MshvVm { .map_err(|e| UnmapMemoryError::Hypervisor(e.into())) } + #[cfg_attr(not(feature = "hw-interrupts"), allow(clippy::never_loop))] fn run_vcpu( &mut self, #[cfg(feature = "trace_guest")] tc: &mut SandboxTraceContext, @@ -137,6 +249,8 @@ impl VirtualMachine for MshvVm { hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; + #[cfg(feature = "hw-interrupts")] + const MSR_INTERCEPT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_MSR_INTERCEPT; #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; @@ -144,82 +258,181 @@ impl VirtualMachine for MshvVm { // it sets the guest span, no other traces or spans must be setup in between these calls. #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); - let exit_reason = self.vcpu_fd.run(); - - let result = match exit_reason { - Ok(m) => match m.header.message_type { - HALT_MESSAGE => VmExit::Halt(), - IO_PORT_INTERCEPT_MESSAGE => { - let io_message = m - .to_ioport_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let port_number = io_message.port_number; - let rip = io_message.header.rip; - let rax = io_message.rax; - let instruction_length = io_message.header.instruction_length() as u64; - - // mshv, unlike kvm, does not automatically increment RIP - self.vcpu_fd - .set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_RIP, - value: hv_register_value { - reg64: rip + instruction_length, - }, - ..Default::default() - }]) - .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - VmExit::IoOut(port_number, rax.to_le_bytes().to_vec()) - } - UNMAPPED_GPA_MESSAGE => { - let mimo_message = m - .to_memory_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let addr = mimo_message.guest_physical_address; - match MemoryRegionFlags::try_from(mimo_message) - .map_err(|_| RunVcpuError::ParseGpaAccessInfo)? - { - MemoryRegionFlags::READ => VmExit::MmioRead(addr), - MemoryRegionFlags::WRITE => VmExit::MmioWrite(addr), - _ => VmExit::Unknown("Unknown MMIO access".to_string()), + + loop { + let exit_reason = self.vcpu_fd.run(); + + match exit_reason { + Ok(m) => { + let msg_type = m.header.message_type; + match msg_type { + HALT_MESSAGE => { + // With SynIC timer active, re-enter the guest. + // The hypervisor will deliver pending timer + // interrupts on the next run(), waking the + // vCPU from HLT. + #[cfg(feature = "hw-interrupts")] + if self.synic_timer_active { + continue; + } + return Ok(VmExit::Halt()); + } + IO_PORT_INTERCEPT_MESSAGE => { + let io_message = m + .to_ioport_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let port_number = io_message.port_number; + let rip = io_message.header.rip; + let rax = io_message.rax; + let instruction_length = io_message.header.instruction_length() as u64; + let is_write = io_message.header.intercept_access_type != 0; + + // mshv, unlike kvm, does not automatically increment RIP + self.vcpu_fd + .set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RIP, + value: hv_register_value { + reg64: rip + instruction_length, + }, + ..Default::default() + }]) + .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; + + // OutBAction::Halt always means "I'm done", regardless + // of whether a timer is active. + if is_write && port_number == OutBAction::Halt as u16 { + return Ok(VmExit::Halt()); + } + + #[cfg(feature = "hw-interrupts")] + { + if is_write { + let data = rax.to_le_bytes(); + if self.handle_hw_io_out(port_number, &data) { + continue; + } + } else if let Some(val) = self.handle_hw_io_in(port_number) { + self.vcpu_fd + .set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RAX, + value: hv_register_value { reg64: val }, + ..Default::default() + }]) + .map_err(|e| RunVcpuError::Unknown(e.into()))?; + continue; + } + } + + // Suppress unused variable warning when hw-interrupts is disabled + let _ = is_write; + + return Ok(VmExit::IoOut(port_number, rax.to_le_bytes().to_vec())); + } + UNMAPPED_GPA_MESSAGE => { + let mimo_message = m + .to_memory_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let addr = mimo_message.guest_physical_address; + return match MemoryRegionFlags::try_from(mimo_message) + .map_err(|_| RunVcpuError::ParseGpaAccessInfo)? + { + MemoryRegionFlags::READ => Ok(VmExit::MmioRead(addr)), + MemoryRegionFlags::WRITE => Ok(VmExit::MmioWrite(addr)), + _ => Ok(VmExit::Unknown("Unknown MMIO access".to_string())), + }; + } + INVALID_GPA_ACCESS_MESSAGE => { + let mimo_message = m + .to_memory_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let gpa = mimo_message.guest_physical_address; + let access_info = MemoryRegionFlags::try_from(mimo_message) + .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; + return match access_info { + MemoryRegionFlags::READ => Ok(VmExit::MmioRead(gpa)), + MemoryRegionFlags::WRITE => Ok(VmExit::MmioWrite(gpa)), + _ => Ok(VmExit::Unknown("Unknown MMIO access".to_string())), + }; + } + #[cfg(feature = "hw-interrupts")] + MSR_INTERCEPT_MESSAGE => { + // Guest is writing to MSR 0x1B (IA32_APIC_BASE). + // Force bit 11 (global APIC enable) to stay set, + // preventing the guest from disabling the LAPIC. + let msr_msg = m + .to_msr_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let rip = msr_msg.header.rip; + let instruction_length = msr_msg.header.instruction_length() as u64; + let msr_val = (msr_msg.rdx << 32) | (msr_msg.rax & 0xFFFF_FFFF); + + // Force APIC global enable (bit 11) to remain set, + // preserving the standard base address. + let forced_val = msr_val | (1 << 11) | 0xFEE00000; + self.vcpu_fd + .set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RIP, + value: hv_register_value { + reg64: rip + instruction_length, + }, + ..Default::default() + }]) + .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; + + use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { reg64: forced_val }, + ..Default::default() + }]); + continue; + } + #[cfg(gdb)] + EXCEPTION_INTERCEPT => { + let ex_info = m + .to_exception_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let DebugRegisters { dr6, .. } = self + .vcpu_fd + .get_debug_regs() + .map_err(|e| RunVcpuError::GetDr6(e.into()))?; + return Ok(VmExit::Debug { + dr6, + exception: ex_info.exception_vector as u32, + }); + } + other => { + return Ok(VmExit::Unknown(format!( + "Unknown MSHV VCPU exit: {:?}", + other + ))); + } } } - INVALID_GPA_ACCESS_MESSAGE => { - let mimo_message = m - .to_memory_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let gpa = mimo_message.guest_physical_address; - let access_info = MemoryRegionFlags::try_from(mimo_message) - .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; - match access_info { - MemoryRegionFlags::READ => VmExit::MmioRead(gpa), - MemoryRegionFlags::WRITE => VmExit::MmioWrite(gpa), - _ => VmExit::Unknown("Unknown MMIO access".to_string()), + Err(e) => match e.errno() { + libc::EINTR => { + // When the SynIC timer is active, EINTR may be + // a spurious signal. Continue the run loop to + // let the hypervisor deliver any pending timer + // interrupt. + #[cfg(feature = "hw-interrupts")] + if self.synic_timer_active { + continue; + } + return Ok(VmExit::Cancelled()); } - } - #[cfg(gdb)] - EXCEPTION_INTERCEPT => { - let ex_info = m - .to_exception_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let DebugRegisters { dr6, .. } = self - .vcpu_fd - .get_debug_regs() - .map_err(|e| RunVcpuError::GetDr6(e.into()))?; - VmExit::Debug { - dr6, - exception: ex_info.exception_vector as u32, + libc::EAGAIN => { + #[cfg(not(feature = "hw-interrupts"))] + { + return Ok(VmExit::Retry()); + } + #[cfg(feature = "hw-interrupts")] + continue; } - } - other => VmExit::Unknown(format!("Unknown MSHV VCPU exit: {:?}", other)), - }, - Err(e) => match e.errno() { - // InterruptHandle::kill() sends a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR - libc::EINTR => VmExit::Cancelled(), - libc::EAGAIN => VmExit::Retry(), - _ => Err(RunVcpuError::Unknown(e.into()))?, - }, - }; - Ok(result) + _ => return Err(RunVcpuError::Unknown(e.into())), + }, + } + } } fn regs(&self) -> std::result::Result { @@ -463,3 +676,160 @@ impl DebuggableVm for MshvVm { } } } + +#[cfg(feature = "hw-interrupts")] +impl MshvVm { + /// Standard x86 APIC base MSR value: base address 0xFEE00000 + + /// BSP flag (bit 8) + global enable (bit 11). + const APIC_BASE_DEFAULT: u64 = 0xFEE00900; + + /// Perform LAPIC EOI: clear the highest-priority in-service bit. + /// Called when the guest sends PIC EOI, since SynIC direct-mode timer + /// delivers through the LAPIC and the guest only acknowledges via PIC. + fn do_lapic_eoi(&self) { + if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { + // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). + // Scan from highest priority (ISR[7]) to lowest (ISR[0]). + for i in (0u32..8).rev() { + let offset = 0x100 + (i as usize) * 0x10; + let isr_val = read_lapic_u32(&lapic.regs, offset); + if isr_val != 0 { + let bit = 31 - isr_val.leading_zeros(); + write_lapic_u32(&mut lapic.regs, offset, isr_val & !(1u32 << bit)); + let _ = self.vcpu_fd.set_lapic(&lapic); + break; + } + } + } + } + + /// Handle a hardware-interrupt IO IN request. + /// Returns `Some(value)` if the port was handled (PIC or PIT read), + /// `None` if the port should be passed through to the guest handler. + fn handle_hw_io_in(&self, port: u16) -> Option { + if let Some(val) = self.pic.handle_io_in(port) { + return Some(val as u64); + } + // PIT data port read -- return 0 + if port == 0x40 { + return Some(0); + } + None + } + + /// Build SynIC timer config register value. + /// + /// Layout (from Hyper-V TLFS): + /// - Bit 0: enable + /// - Bit 1: periodic + /// - Bit 3: auto_enable + /// - Bits 4-11: apic_vector + /// - Bit 12: direct_mode + fn build_stimer_config(vector: u8) -> u64 { + 1 // enable + | (1 << 1) // periodic + | (1 << 3) // auto_enable + | ((vector as u64) << 4) // apic_vector + | (1 << 12) // direct_mode + } + + /// Convert a timer period in microseconds to the SynIC timer count + /// in 100-nanosecond units (Hyper-V reference time). + fn period_us_to_100ns(period_us: u32) -> u64 { + (period_us as u64) * 10 + } + + fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { + if port == OutBAction::PvTimerConfig as u16 { + if data.len() >= 4 { + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + if period_us > 0 { + // Re-enable LAPIC if the guest disabled it (via WRMSR + // to MSR 0x1B clearing bit 11). This happens when the + // Nanvix kernel doesn't detect an I/O APIC. + // + // The hypervisor may return 0 for APIC_BASE when the + // APIC is globally disabled, so we always restore the + // standard value (0xFEE00900). + use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; + let mut apic_base_reg = [hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { reg64: 0 }, + ..Default::default() + }]; + if self.vcpu_fd.get_reg(&mut apic_base_reg).is_ok() { + let cur = unsafe { apic_base_reg[0].value.reg64 }; + if cur & (1 << 11) == 0 { + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { + reg64: Self::APIC_BASE_DEFAULT, + }, + ..Default::default() + }]); + } + } + // Re-initialize LAPIC SVR (may have been zeroed when + // guest disabled the APIC globally) + if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { + let svr = read_lapic_u32(&lapic.regs, 0xF0); + if svr & 0x100 == 0 { + write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); + write_lapic_u32(&mut lapic.regs, 0x80, 0); // TPR + let _ = self.vcpu_fd.set_lapic(&lapic); + } + } + + // Enable SynIC on this vCPU (SCONTROL bit 0 = enable) + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_REGISTER_SCONTROL, + value: hv_register_value { reg64: 1 }, + ..Default::default() + }]); + + // Arm STIMER0 immediately (the guest needs timer + // interrupts for thread preemption, not just idle + // wakeup). + let vector = self.pic.master_vector_base(); + let config = Self::build_stimer_config(vector); + let count_100ns = Self::period_us_to_100ns(period_us); + + let _ = self.vcpu_fd.set_reg(&[ + hv_register_assoc { + name: hv_register_name_HV_REGISTER_STIMER0_CONFIG, + value: hv_register_value { reg64: config }, + ..Default::default() + }, + hv_register_assoc { + name: hv_register_name_HV_REGISTER_STIMER0_COUNT, + value: hv_register_value { reg64: count_100ns }, + ..Default::default() + }, + ]); + self.synic_timer_active = true; + } + } + return true; + } + if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { + // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), + // also perform LAPIC EOI since SynIC timer delivers via LAPIC and + // the guest only knows about the PIC. + if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.synic_timer_active { + self.do_lapic_eoi(); + } + return true; + } + if port == 0x43 || port == 0x40 { + return true; + } + if port == 0x61 { + return true; + } + if port == 0x80 { + return true; + } + false + } +} + From ff19a7a13ed680d68346cd90f60dacaba1e140c8 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:58:47 +0000 Subject: [PATCH 07/18] feat(whp): add hardware interrupt support with software timer Add WHP hardware interrupt support using a host-side software timer thread that periodically injects interrupts via WHvRequestInterrupt. Key changes: - Detect LAPIC emulation support via WHvGetCapability - Initialize LAPIC via bulk interrupt-controller state API (WHvGet/SetVirtualProcessorInterruptControllerState2) since individual APIC register writes fail with ACCESS_DENIED - Software timer thread for periodic interrupt injection - LAPIC EOI handling for PIC-only guest acknowledgment - PIC emulation integration for MSHV/WHP shared 8259A - Filter APIC_BASE from set_sregs when LAPIC emulation active - HLT re-entry when timer is active Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/whp.rs | 668 +++++++++++++++--- 1 file changed, 573 insertions(+), 95 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 07f4b5a97..b59aec00d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -15,7 +15,12 @@ limitations under the License. */ use std::os::raw::c_void; +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; +#[cfg(feature = "hw-interrupts")] +use std::sync::atomic::{AtomicBool, Ordering}; +use hyperlight_common::outb::OutBAction; #[cfg(feature = "trace_guest")] use tracing::Span; #[cfg(feature = "trace_guest")] @@ -28,6 +33,8 @@ use windows_result::HRESULT; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ Align16, CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, @@ -71,6 +78,20 @@ pub(crate) struct WhpVm { partition: WHV_PARTITION_HANDLE, // Surrogate process for memory mapping surrogate_process: SurrogateProcess, + #[cfg(feature = "hw-interrupts")] + pic: Pic, + /// Whether the WHP host supports LAPIC emulation mode. + #[cfg(feature = "hw-interrupts")] + lapic_emulation: bool, + /// Whether the LAPIC timer is actively delivering periodic interrupts. + #[cfg(feature = "hw-interrupts")] + lapic_timer_active: bool, + /// Signal to stop the software timer thread. + #[cfg(feature = "hw-interrupts")] + timer_stop: Option>, + /// Handle for the software timer thread. + #[cfg(feature = "hw-interrupts")] + timer_thread: Option>, } // Safety: `WhpVm` is !Send because it holds `SurrogateProcess` which contains a raw pointer @@ -83,33 +104,241 @@ unsafe impl Send for WhpVm {} impl WhpVm { pub(crate) fn new() -> Result { const NUM_CPU: u32 = 1; - let partition = unsafe { - let partition = - WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - partition, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(partition).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(partition, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - partition + + // Check whether the WHP host supports LAPIC emulation. + // Bit 1 of WHV_CAPABILITY_FEATURES corresponds to LocalApicEmulation. + // LAPIC emulation is required for the LAPIC timer to deliver interrupts. + #[cfg(feature = "hw-interrupts")] + const LAPIC_EMULATION_BIT: u64 = 1 << 1; + + #[cfg(feature = "hw-interrupts")] + let mut lapic_emulation = { + let mut capability: WHV_CAPABILITY = Default::default(); + let ok = unsafe { + WHvGetCapability( + WHvCapabilityCodeFeatures, + &mut capability as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + None, + ) + }; + if ok.is_ok() { + unsafe { capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0 } + } else { + false + } }; - // Create the surrogate process with the total memory size - let mgr = get_surrogate_process_manager() - .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; - let surrogate_process = mgr - .get_surrogate_process() - .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + // Create partition, set CPU count, setup, and create vCPU. + // When hw-interrupts is enabled and LAPIC emulation is supported, + // we create a partition with LAPIC emulation mode. This is + // required for the LAPIC timer. + let partition = unsafe { + #[cfg(feature = "hw-interrupts")] + { + if lapic_emulation { + let p = + WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + + let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic + let partition_ok = WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &apic_mode as *const _ as *const _, + std::mem::size_of_val(&apic_mode) as _, + ) + .is_ok() + && WHvSetupPartition(p).is_ok() + && WHvCreateVirtualProcessor(p, 0, 0).is_ok(); + + if partition_ok { + log::info!("[WHP] LAPIC partition created, running init_lapic_bulk"); + // Initialize the LAPIC via the bulk interrupt-controller + // state API (individual APIC register writes via + // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). + if let Err(e) = Self::init_lapic_bulk(p) { + log::warn!( + "[WHP] Bulk LAPIC init failed ({e}); \ + falling back to non-LAPIC mode." + ); + let _ = WHvDeleteVirtualProcessor(p, 0); + let _ = WHvDeletePartition(p); + lapic_emulation = false; + + let p = WHvCreatePartition() + .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } else { + log::info!("[WHP] Bulk LAPIC init succeeded, lapic_emulation=true"); + p + } + } else { + log::warn!("LAPIC emulation setup failed, falling back to non-LAPIC mode"); + let _ = WHvDeleteVirtualProcessor(p, 0); + let _ = WHvDeletePartition(p); + lapic_emulation = false; + + let p = WHvCreatePartition() + .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } + } else { + let p = + WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } + } + + #[cfg(not(feature = "hw-interrupts"))] + { + let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } + }; - Ok(WhpVm { + let vm = WhpVm { partition, - surrogate_process, - }) + surrogate_process: { + let mgr = get_surrogate_process_manager() + .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + mgr.get_surrogate_process() + .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))? + }, + #[cfg(feature = "hw-interrupts")] + pic: Pic::new(), + #[cfg(feature = "hw-interrupts")] + lapic_emulation, + #[cfg(feature = "hw-interrupts")] + lapic_timer_active: false, + #[cfg(feature = "hw-interrupts")] + timer_stop: None, + #[cfg(feature = "hw-interrupts")] + timer_thread: None, + }; + + Ok(vm) + } + + /// Maximum size for the interrupt controller state blob. + const LAPIC_STATE_MAX_SIZE: u32 = 4096; + + /// Initialize the LAPIC via the bulk interrupt-controller state API. + /// Individual APIC register writes via `WHvSetVirtualProcessorRegisters` + /// fail with ACCESS_DENIED on WHP when LAPIC emulation is enabled, + /// so we use `WHvGet/SetVirtualProcessorInterruptControllerState2` + /// to read-modify-write the entire LAPIC register page. + #[cfg(feature = "hw-interrupts")] + unsafe fn init_lapic_bulk(partition: WHV_PARTITION_HANDLE) -> windows_result::Result<()> { + let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; + let mut written: u32 = 0; + + unsafe { + WHvGetVirtualProcessorInterruptControllerState2( + partition, + 0, + state.as_mut_ptr() as *mut c_void, + Self::LAPIC_STATE_MAX_SIZE, + Some(&mut written), + )?; + } + state.truncate(written as usize); + log::info!( + "[WHP] init_lapic_bulk: got {} bytes of LAPIC state", + written + ); + + // SVR (offset 0xF0): enable APIC (bit 8) + spurious vector 0xFF + Self::write_lapic_u32(&mut state, 0xF0, 0x1FF); + // TPR (offset 0x80): 0 = accept all interrupt priorities + Self::write_lapic_u32(&mut state, 0x80, 0); + // DFR (offset 0xE0): 0xFFFFFFFF = flat model + Self::write_lapic_u32(&mut state, 0xE0, 0xFFFF_FFFF); + // LDR (offset 0xD0): set logical APIC ID for flat model + Self::write_lapic_u32(&mut state, 0xD0, 1 << 24); + // LINT0 (offset 0x350): masked + Self::write_lapic_u32(&mut state, 0x350, 0x0001_0000); + // LINT1 (offset 0x360): NMI delivery, not masked + Self::write_lapic_u32(&mut state, 0x360, 0x400); + // LVT Timer (offset 0x320): masked initially (configured on PvTimerConfig) + Self::write_lapic_u32(&mut state, 0x320, 0x0001_0000); + // LVT Error (offset 0x370): masked + Self::write_lapic_u32(&mut state, 0x370, 0x0001_0000); + + unsafe { + WHvSetVirtualProcessorInterruptControllerState2( + partition, + 0, + state.as_ptr() as *const c_void, + state.len() as u32, + )?; + } + + Ok(()) + } + + /// Read a u32 from a LAPIC register page at the given APIC offset. + #[cfg(feature = "hw-interrupts")] + fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { + u32::from_le_bytes([ + state[offset], + state[offset + 1], + state[offset + 2], + state[offset + 3], + ]) + } + + /// Write a u32 to a LAPIC register page at the given APIC offset. + #[cfg(feature = "hw-interrupts")] + fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { + state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); } /// Helper for setting arbitrary registers. Makes sure the same number @@ -224,85 +453,155 @@ impl VirtualMachine for WhpVm { // it sets the guest span, no other traces or spans must be setup in between these calls. #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); - unsafe { - WHvRunVirtualProcessor( - self.partition, - 0, - &mut exit_context as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - ) - .map_err(|e| RunVcpuError::Unknown(e.into()))?; - } - let result = match exit_context.ExitReason { - WHvRunVpExitReasonX64IoPortAccess => unsafe { - let instruction_length = exit_context.VpContext._bitfield & 0xF; - let rip = exit_context.VpContext.Rip + instruction_length as u64; - self.set_registers(&[( - WHvX64RegisterRip, - Align16(WHV_REGISTER_VALUE { Reg64: rip }), - )]) - .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - VmExit::IoOut( - exit_context.Anonymous.IoPortAccess.PortNumber, - exit_context + + loop { + unsafe { + WHvRunVirtualProcessor( + self.partition, + 0, + &mut exit_context as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + ) + .map_err(|e| RunVcpuError::Unknown(e.into()))?; + } + + match exit_context.ExitReason { + WHvRunVpExitReasonX64IoPortAccess => unsafe { + let instruction_length = exit_context.VpContext._bitfield & 0xF; + let rip = exit_context.VpContext.Rip + instruction_length as u64; + let port = exit_context.Anonymous.IoPortAccess.PortNumber; + let rax = exit_context.Anonymous.IoPortAccess.Rax; + let is_write = exit_context .Anonymous .IoPortAccess - .Rax - .to_le_bytes() - .to_vec(), - ) - }, - WHvRunVpExitReasonX64Halt => VmExit::Halt(), - WHvRunVpExitReasonMemoryAccess => { - let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; - let access_info = unsafe { - WHV_MEMORY_ACCESS_TYPE( - // 2 first bits are the access type, see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/memoryaccess#syntax - (exit_context.Anonymous.MemoryAccess.AccessInfo.AsUINT32 & 0b11) as i32, - ) - }; - let access_info = MemoryRegionFlags::try_from(access_info) - .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; - match access_info { - MemoryRegionFlags::READ => VmExit::MmioRead(gpa), - MemoryRegionFlags::WRITE => VmExit::MmioWrite(gpa), - _ => VmExit::Unknown("Unknown memory access type".to_string()), + .AccessInfo + .Anonymous + ._bitfield + & 1 + != 0; + + self.set_registers(&[( + WHvX64RegisterRip, + Align16(WHV_REGISTER_VALUE { Reg64: rip }), + )]) + .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; + + // OutBAction::Halt always means "I'm done", regardless + // of whether a timer is active. + if is_write && port == OutBAction::Halt as u16 { + return Ok(VmExit::Halt()); + } + + #[cfg(feature = "hw-interrupts")] + { + if is_write { + let data = rax.to_le_bytes(); + if self.handle_hw_io_out(port, &data) { + continue; + } + } else if let Some(val) = self.handle_hw_io_in(port) { + self.set_registers(&[( + WHvX64RegisterRax, + Align16(WHV_REGISTER_VALUE { Reg64: val }), + )]) + .map_err(|e| RunVcpuError::Unknown(e.into()))?; + continue; + } + } + + // Suppress unused variable warnings when hw-interrupts is disabled + let _ = is_write; + + return Ok(VmExit::IoOut(port, rax.to_le_bytes().to_vec())); + }, + WHvRunVpExitReasonX64Halt => { + // With software timer active, re-enter the guest. + // WHvRunVirtualProcessor will block until the timer + // thread injects an interrupt via WHvRequestInterrupt, + // waking the vCPU from HLT. + #[cfg(feature = "hw-interrupts")] + if self.lapic_timer_active { + continue; + } + return Ok(VmExit::Halt()); } - } - // Execution was cancelled by the host. - WHvRunVpExitReasonCanceled => VmExit::Cancelled(), - #[cfg(gdb)] - WHvRunVpExitReasonException => { - let exception = unsafe { exit_context.Anonymous.VpException }; - - // Get the DR6 register to see which breakpoint was hit - let dr6 = { - let names = [WHvX64RegisterDr6]; - let mut out: [Align16; 1] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - names.as_ptr(), - 1, - out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + WHvRunVpExitReasonMemoryAccess => { + let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; + let access_info = unsafe { + WHV_MEMORY_ACCESS_TYPE( + (exit_context.Anonymous.MemoryAccess.AccessInfo.AsUINT32 & 0b11) as i32, ) - .map_err(|e| RunVcpuError::GetDr6(e.into()))?; + }; + let access_info = MemoryRegionFlags::try_from(access_info) + .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; + return match access_info { + MemoryRegionFlags::READ => Ok(VmExit::MmioRead(gpa)), + MemoryRegionFlags::WRITE => Ok(VmExit::MmioWrite(gpa)), + _ => Ok(VmExit::Unknown("Unknown memory access type".to_string())), + }; + } + // Execution was cancelled by the host. + WHvRunVpExitReasonCanceled => { + return Ok(VmExit::Cancelled()); + } + #[cfg(gdb)] + WHvRunVpExitReasonException => { + let exception = unsafe { exit_context.Anonymous.VpException }; + + // Get the DR6 register to see which breakpoint was hit + let dr6 = { + let names = [WHvX64RegisterDr6]; + let mut out: [Align16; 1] = + unsafe { std::mem::zeroed() }; + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + names.as_ptr(), + 1, + out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + ) + .map_err(|e| RunVcpuError::GetDr6(e.into()))?; + } + unsafe { out[0].0.Reg64 } + }; + + return Ok(VmExit::Debug { + dr6, + exception: exception.ExceptionType as u32, + }); + } + WHV_RUN_VP_EXIT_REASON(_) => { + let rip = exit_context.VpContext.Rip; + log::error!( + "WHP unknown exit reason {}: RIP={:#x}", + exit_context.ExitReason.0, + rip, + ); + if let Ok(regs) = self.regs() { + log::error!( + " RAX={:#x} RCX={:#x} RDX={:#x}", + regs.rax, + regs.rcx, + regs.rdx + ); } - unsafe { out[0].0.Reg64 } - }; - - VmExit::Debug { - dr6, - exception: exception.ExceptionType as u32, + if let Ok(sregs) = self.sregs() { + log::error!( + " CR0={:#x} CR4={:#x} EFER={:#x} APIC_BASE={:#x}", + sregs.cr0, + sregs.cr4, + sregs.efer, + sregs.apic_base + ); + } + return Ok(VmExit::Unknown(format!( + "Unknown exit reason '{}' at RIP={:#x}", + exit_context.ExitReason.0, rip + ))); } } - WHV_RUN_VP_EXIT_REASON(_) => VmExit::Unknown(format!( - "Unknown exit reason '{}'", - exit_context.ExitReason.0 - )), - }; - Ok(result) + } } fn regs(&self) -> std::result::Result { @@ -411,6 +710,26 @@ impl VirtualMachine for WhpVm { fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> std::result::Result<(), RegisterError> { let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = sregs.into(); + + // When LAPIC emulation is active, skip writing APIC_BASE. + // The generic CommonSpecialRegisters defaults APIC_BASE to 0 + // which would globally disable the LAPIC. On some WHP hosts, + // host-side APIC register writes are blocked entirely + // (ACCESS_DENIED), so attempting to write even a valid value + // would fail and abort set_sregs. Omitting it lets the VP + // keep its default APIC_BASE and the guest configures it. + #[cfg(feature = "hw-interrupts")] + if self.lapic_emulation { + let filtered: Vec<_> = whp_regs + .iter() + .copied() + .filter(|(name, _)| *name != WHvX64RegisterApicBase) + .collect(); + self.set_registers(&filtered) + .map_err(|e| RegisterError::SetSregs(e.into()))?; + return Ok(()); + } + self.set_registers(&whp_regs) .map_err(|e| RegisterError::SetSregs(e.into()))?; Ok(()) @@ -762,8 +1081,166 @@ impl DebuggableVm for WhpVm { } } +#[cfg(feature = "hw-interrupts")] +impl WhpVm { + /// Get the LAPIC state via the bulk interrupt-controller state API. + fn get_lapic_state(&self) -> windows_result::Result> { + let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; + let mut written: u32 = 0; + + unsafe { + WHvGetVirtualProcessorInterruptControllerState2( + self.partition, + 0, + state.as_mut_ptr() as *mut c_void, + Self::LAPIC_STATE_MAX_SIZE, + Some(&mut written), + )?; + } + state.truncate(written as usize); + Ok(state) + } + + /// Set the LAPIC state via the bulk interrupt-controller state API. + fn set_lapic_state(&self, state: &[u8]) -> windows_result::Result<()> { + unsafe { + WHvSetVirtualProcessorInterruptControllerState2( + self.partition, + 0, + state.as_ptr() as *const c_void, + state.len() as u32, + ) + } + } + + /// Perform LAPIC EOI: clear the highest-priority in-service bit. + /// Called when the guest sends PIC EOI, since the LAPIC timer + /// delivers through the LAPIC and the guest only acknowledges via PIC. + fn do_lapic_eoi(&self) { + if let Ok(mut state) = self.get_lapic_state() { + // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). + // Scan from highest priority (ISR[7]) to lowest (ISR[0]). + for i in (0u32..8).rev() { + let offset = 0x100 + (i as usize) * 0x10; + let isr_val = Self::read_lapic_u32(&state, offset); + if isr_val != 0 { + let bit = 31 - isr_val.leading_zeros(); + Self::write_lapic_u32(&mut state, offset, isr_val & !(1u32 << bit)); + let _ = self.set_lapic_state(&state); + break; + } + } + } + } + + /// Handle a hardware-interrupt IO IN request. + /// Returns `Some(value)` if the port was handled (PIC or PIT read), + /// `None` if the port should be passed through to the guest handler. + fn handle_hw_io_in(&self, port: u16) -> Option { + if let Some(val) = self.pic.handle_io_in(port) { + return Some(val as u64); + } + // PIT data port read -- return 0 + if port == 0x40 { + return Some(0); + } + None + } + + /// Stop the software timer thread if running. + fn stop_timer_thread(&mut self) { + if let Some(stop) = self.timer_stop.take() { + stop.store(true, Ordering::Relaxed); + } + if let Some(handle) = self.timer_thread.take() { + let _ = handle.join(); + } + self.lapic_timer_active = false; + } + + fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { + if port == OutBAction::PvTimerConfig as u16 { + if data.len() >= 4 { + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + log::info!( + "[WHP] PvTimerConfig: period_us={period_us}, lapic_emulation={}", + self.lapic_emulation + ); + + // Stop any existing timer thread before (re-)configuring. + self.stop_timer_thread(); + + if period_us > 0 && self.lapic_emulation { + // WHP's bulk LAPIC state API does not start the LAPIC + // timer countdown (CurCount stays 0). Instead we use a + // host-side software timer thread that periodically + // injects interrupts via WHvRequestInterrupt. + let partition_raw = self.partition.0; + let vector = self.pic.master_vector_base() as u32; + let stop = Arc::new(AtomicBool::new(false)); + let stop_clone = stop.clone(); + let period = std::time::Duration::from_micros(period_us as u64); + + let handle = std::thread::spawn(move || { + while !stop_clone.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop_clone.load(Ordering::Relaxed) { + break; + } + let partition = WHV_PARTITION_HANDLE(partition_raw); + let interrupt = WHV_INTERRUPT_CONTROL { + _bitfield: 0, // Type=Fixed, DestMode=Physical, Trigger=Edge + Destination: 0, + Vector: vector, + }; + let _ = unsafe { + WHvRequestInterrupt( + partition, + &interrupt, + std::mem::size_of::() as u32, + ) + }; + } + }); + + self.timer_stop = Some(stop); + self.timer_thread = Some(handle); + self.lapic_timer_active = true; + log::info!( + "[WHP] Software timer started (period={period_us}us, vector={vector:#x})" + ); + } + } + return true; + } + if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { + // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), + // also perform LAPIC EOI since the software timer delivers + // through the LAPIC and the guest only acknowledges via PIC. + if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.lapic_timer_active { + self.do_lapic_eoi(); + } + return true; + } + if port == 0x43 || port == 0x40 { + return true; + } + if port == 0x61 { + return true; + } + if port == 0x80 { + return true; + } + false + } +} + impl Drop for WhpVm { fn drop(&mut self) { + // Stop the software timer thread before tearing down the partition. + #[cfg(feature = "hw-interrupts")] + self.stop_timer_thread(); + // HyperlightVm::drop() calls set_dropped() before this runs. // set_dropped() ensures no WHvCancelRunVirtualProcessor calls are in progress // or will be made in the future, so it's safe to delete the partition. @@ -812,3 +1289,4 @@ unsafe fn try_load_whv_map_gpa_range2() -> windows_result::Result Date: Tue, 3 Mar 2026 22:15:23 +0000 Subject: [PATCH 08/18] test: add hardware interrupt unit and integration tests Signed-off-by: danbugs --- .../src/hypervisor/hyperlight_vm.rs | 6 + src/hyperlight_host/src/hypervisor/mod.rs | 1 + .../src/hypervisor/virtual_machine/kvm.rs | 16 ++ .../src/hypervisor/virtual_machine/mshv.rs | 99 ++++++++++ .../src/hypervisor/virtual_machine/whp.rs | 88 +++++++++ src/tests/rust_guests/simpleguest/src/main.rs | 171 +++++++++++++++++- 6 files changed, 378 insertions(+), 3 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index d3a210926..05628de7e 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -2272,6 +2272,7 @@ mod tests { // Tests // ========================================================================== + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_simple() { // push rax; hlt - aligns stack to 16 bytes @@ -2417,6 +2418,7 @@ mod tests { use super::*; + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -2476,6 +2478,7 @@ mod tests { assert_regs_reset(hyperlight_vm.vm.as_ref()); } + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_fpu() { #[cfg(kvm)] @@ -2607,6 +2610,7 @@ mod tests { } } + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_debug_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -2649,6 +2653,7 @@ mod tests { assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); } + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_sregs() { // Build code that modifies special registers and halts @@ -2702,6 +2707,7 @@ mod tests { /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_fpu_guest_visible_state() { let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 7c233af3d..04d13f9ec 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -472,6 +472,7 @@ pub(crate) mod tests { use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; use crate::{Result, is_hypervisor_present, new_error}; + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn test_initialise() -> Result<()> { if !is_hypervisor_present() { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index d9ad48c9b..0f70a464e 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -500,3 +500,19 @@ impl DebuggableVm for KvmVm { Ok(()) } } + +#[cfg(test)] +#[cfg(feature = "hw-interrupts")] +mod hw_interrupt_tests { + use super::*; + + #[test] + fn halt_port_is_not_standard_device() { + // OutBAction::Halt port must not overlap in-kernel PIC/PIT/speaker ports + const HALT: u16 = OutBAction::Halt as u16; + const _: () = assert!(HALT != 0x20 && HALT != 0x21); + const _: () = assert!(HALT != 0xA0 && HALT != 0xA1); + const _: () = assert!(HALT != 0x40 && HALT != 0x41 && HALT != 0x42 && HALT != 0x43); + const _: () = assert!(HALT != 0x61); + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 8b4689dc1..5e19041ac 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -833,3 +833,102 @@ impl MshvVm { } } +#[cfg(test)] +#[cfg(feature = "hw-interrupts")] +mod hw_interrupt_tests { + use super::*; + + #[test] + fn write_read_lapic_u32_roundtrip() { + let mut regs = [0i8; 1024]; + write_lapic_u32(&mut regs, 0xF0, 0xDEAD_BEEF); + assert_eq!(read_lapic_u32(®s, 0xF0), 0xDEAD_BEEF); + } + + #[test] + fn write_read_lapic_u32_multiple_offsets() { + let mut regs = [0i8; 1024]; + write_lapic_u32(&mut regs, 0x80, 0x1234_5678); + write_lapic_u32(&mut regs, 0xF0, 0xABCD_EF01); + write_lapic_u32(&mut regs, 0xE0, 0xFFFF_FFFF); + assert_eq!(read_lapic_u32(®s, 0x80), 0x1234_5678); + assert_eq!(read_lapic_u32(®s, 0xF0), 0xABCD_EF01); + assert_eq!(read_lapic_u32(®s, 0xE0), 0xFFFF_FFFF); + } + + #[test] + fn write_read_lapic_u32_zero() { + let mut regs = [0xFFu8 as i8; 1024]; + write_lapic_u32(&mut regs, 0x80, 0); + assert_eq!(read_lapic_u32(®s, 0x80), 0); + } + + #[test] + fn write_read_lapic_u32_does_not_clobber_neighbors() { + let mut regs = [0i8; 1024]; + write_lapic_u32(&mut regs, 0x80, 0xAAAA_BBBB); + // Check that bytes before and after are untouched + assert_eq!(regs[0x7F], 0); + assert_eq!(regs[0x84], 0); + } + + #[test] + fn synic_timer_config_bitfield() { + let vector: u8 = 0x20; + let config = MshvVm::build_stimer_config(vector); + + assert_ne!(config & 1, 0, "enable bit should be set"); + assert_ne!(config & (1 << 1), 0, "periodic bit should be set"); + assert_eq!(config & (1 << 2), 0, "lazy bit should be clear"); + assert_ne!(config & (1 << 3), 0, "auto_enable bit should be set"); + assert_eq!((config >> 4) & 0xFF, 0x20, "apic_vector should be 0x20"); + assert_ne!(config & (1 << 12), 0, "direct_mode bit should be set"); + } + + #[test] + fn synic_timer_config_different_vectors() { + for vector in [0x20u8, 0x30, 0x40, 0xFF] { + let config = MshvVm::build_stimer_config(vector); + assert_eq!( + (config >> 4) & 0xFF, + vector as u64, + "vector mismatch for {:#x}", + vector + ); + } + } + + #[test] + fn timer_count_us_to_100ns() { + assert_eq!(MshvVm::period_us_to_100ns(1000), 10_000); + assert_eq!(MshvVm::period_us_to_100ns(10_000), 100_000); + assert_eq!(MshvVm::period_us_to_100ns(1), 10); + } + + #[test] + fn apic_base_default_value() { + let base = MshvVm::APIC_BASE_DEFAULT; + assert_ne!(base & (1 << 8), 0, "BSP flag should be set"); + assert_ne!(base & (1 << 11), 0, "global enable should be set"); + assert_eq!( + base & 0xFFFFF000, + 0xFEE00000, + "base address should be 0xFEE00000" + ); + } + + #[test] + fn lapic_svr_init_value() { + // SVR = 0x1FF: bit 8 = enable APIC, bits 0-7 = spurious vector 0xFF + let svr: u32 = 0x1FF; + assert_ne!(svr & 0x100, 0, "APIC enable bit should be set"); + assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); + } + + #[test] + fn lapic_lvt_masked_value() { + // Masked LVT entry: bit 16 = 1 + let masked: u32 = 0x0001_0000; + assert_ne!(masked & (1 << 16), 0, "mask bit should be set"); + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index b59aec00d..41d999840 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -1290,3 +1290,91 @@ unsafe fn try_load_whv_map_gpa_range2() -> windows_result::Result() as u32, + None, + ) + }; + assert!( + result.is_ok(), + "WHvGetCapability(Features) failed: {result:?}" + ); + let raw = unsafe { capability.Features.AsUINT64 }; + let has_lapic = raw & (1 << 1) != 0; // bit 1 = LocalApicEmulation + println!("WHP Features raw: {raw:#018x}"); + println!("WHP LocalApicEmulation supported: {has_lapic}"); + assert!( + has_lapic, + "This host does not support WHP LocalApicEmulation. \ + hw-interrupts LAPIC timer tests will be skipped. \ + Windows 11 22H2+ or a recent Windows Server build is typically required." + ); + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 6d53c94c6..2b694be42 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -32,7 +32,7 @@ use alloc::{format, vec}; use core::alloc::Layout; use core::ffi::c_char; use core::hint::black_box; -use core::sync::atomic::{AtomicU64, Ordering}; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; use hyperlight_common::flatbuffer_wrappers::function_types::{ @@ -499,6 +499,157 @@ fn outb_with_port(port: u32, value: u32) { } } +// ============================================================================= +// Hardware timer interrupt test infrastructure +// ============================================================================= + +/// Counter incremented by the timer interrupt handler. +static TIMER_IRQ_COUNT: AtomicU32 = AtomicU32::new(0); + +// Timer IRQ handler (vector 0x20 = IRQ0 after PIC remapping). +// Increments the global counter, sends PIC EOI, and returns from interrupt. +// +// This handler is intentionally minimal: it only touches RAX, uses `lock inc` +// for the atomic counter update, and sends a non-specific EOI to the master PIC. +// +// NOTE: global_asm! on x86_64 in Rust defaults to Intel syntax. +core::arch::global_asm!( + ".globl _timer_irq_handler", + "_timer_irq_handler:", + "push rax", + "lock inc dword ptr [rip + {counter}]", + "mov al, 0x20", + "out 0x20, al", + "pop rax", + "iretq", + counter = sym TIMER_IRQ_COUNT, +); + +unsafe extern "C" { + fn _timer_irq_handler(); +} + +/// IDT pointer structure for SIDT/LIDT instructions. +#[repr(C, packed)] +struct IdtPtr { + limit: u16, + base: u64, +} + +/// Test hardware timer interrupt delivery. +/// +/// This function: +/// 1. Initializes the PIC (remaps IRQ0 to vector 0x20) +/// 2. Installs an IDT entry for vector 0x20 pointing to `_timer_irq_handler` +/// 3. Programs PIT channel 0 as a rate generator at the requested period +/// 4. Arms the PV timer by writing the period to OutBAction::PvTimerConfig port (for MSHV/WHP) +/// 5. Enables interrupts (STI) and busy-waits for timer delivery +/// 6. Disables interrupts (CLI) and returns the interrupt count +/// +/// Parameters: +/// - `period_us`: timer period in microseconds (written to OutBAction::PvTimerConfig port) +/// - `max_spin`: maximum busy-wait iterations before giving up +/// +/// Returns the number of timer interrupts received. +#[guest_function("TestTimerInterrupts")] +fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 { + // Reset counter + TIMER_IRQ_COUNT.store(0, Ordering::SeqCst); + + // 1) Initialize PIC — remap IRQ0 to vector 0x20 + unsafe { + // Master PIC + core::arch::asm!("out 0x20, al", in("al") 0x11u8, options(nomem, nostack)); // ICW1 + core::arch::asm!("out 0x21, al", in("al") 0x20u8, options(nomem, nostack)); // ICW2: base 0x20 + core::arch::asm!("out 0x21, al", in("al") 0x04u8, options(nomem, nostack)); // ICW3 + core::arch::asm!("out 0x21, al", in("al") 0x01u8, options(nomem, nostack)); // ICW4 + core::arch::asm!("out 0x21, al", in("al") 0xFEu8, options(nomem, nostack)); // IMR: unmask IRQ0 + + // Slave PIC + core::arch::asm!("out 0xA0, al", in("al") 0x11u8, options(nomem, nostack)); // ICW1 + core::arch::asm!("out 0xA1, al", in("al") 0x28u8, options(nomem, nostack)); // ICW2: base 0x28 + core::arch::asm!("out 0xA1, al", in("al") 0x02u8, options(nomem, nostack)); // ICW3 + core::arch::asm!("out 0xA1, al", in("al") 0x01u8, options(nomem, nostack)); // ICW4 + core::arch::asm!("out 0xA1, al", in("al") 0xFFu8, options(nomem, nostack)); // IMR: mask all + } + + // 2) Install IDT entry for vector 0x20 (timer interrupt) + let handler_addr = _timer_irq_handler as *const () as u64; + + // Read current IDT base via SIDT + let mut idtr = IdtPtr { limit: 0, base: 0 }; + unsafe { + core::arch::asm!( + "sidt [{}]", + in(reg) &mut idtr as *mut IdtPtr, + options(nostack, preserves_flags) + ); + } + + // Write a 16-byte IDT entry at vector 0x20 (offset = 0x20 * 16 = 0x200) + let entry_ptr = (idtr.base as usize + 0x20 * 16) as *mut u8; + unsafe { + // offset_low (bits 0-15 of handler) + core::ptr::write_volatile(entry_ptr as *mut u16, handler_addr as u16); + // selector: 0x08 = kernel code segment + core::ptr::write_volatile(entry_ptr.add(2) as *mut u16, 0x08); + // IST=0, reserved=0 + core::ptr::write_volatile(entry_ptr.add(4), 0); + // type_attr: 0x8E = interrupt gate, present, DPL=0 + core::ptr::write_volatile(entry_ptr.add(5), 0x8E); + // offset_mid (bits 16-31) + core::ptr::write_volatile(entry_ptr.add(6) as *mut u16, (handler_addr >> 16) as u16); + // offset_high (bits 32-63) + core::ptr::write_volatile(entry_ptr.add(8) as *mut u32, (handler_addr >> 32) as u32); + // reserved + core::ptr::write_volatile(entry_ptr.add(12) as *mut u32, 0); + } + + // 3) Program PIT channel 0 as rate generator (mode 2). + // Divisor = period_us * 1_193_182 / 1_000_000 (PIT oscillator is 1.193182 MHz). + // On KVM the in-kernel PIT handles these IO writes directly. + // On MSHV/WHP these ports are silently absorbed (timer is set via OutBAction::PvTimerConfig). + let divisor = ((period_us as u64) * 1_193_182 / 1_000_000).clamp(1, 0xFFFF) as u16; + unsafe { + // Command: channel 0, lobyte/hibyte access, mode 2 (rate generator) + core::arch::asm!("out 0x43, al", in("al") 0x34u8, options(nomem, nostack)); + // Channel 0 data: low byte of divisor + core::arch::asm!("out 0x40, al", in("al") (divisor & 0xFF) as u8, options(nomem, nostack)); + // Channel 0 data: high byte of divisor + core::arch::asm!("out 0x40, al", in("al") (divisor >> 8) as u8, options(nomem, nostack)); + } + + // 4) Arm timer: write period_us to OutBAction::PvTimerConfig port + unsafe { + core::arch::asm!( + "out dx, eax", + in("dx") hyperlight_common::outb::OutBAction::PvTimerConfig as u16, + in("eax") period_us as u32, + options(nomem, nostack, preserves_flags) + ); + } + + // 5) Enable interrupts and wait for at least one timer tick + unsafe { + core::arch::asm!("sti", options(nomem, nostack)); + } + + let max = max_spin as u32; + for _ in 0..max { + if TIMER_IRQ_COUNT.load(Ordering::SeqCst) > 0 { + break; + } + core::hint::spin_loop(); + } + + // 6) Disable interrupts and return count + unsafe { + core::arch::asm!("cli", options(nomem, nostack)); + } + + TIMER_IRQ_COUNT.load(Ordering::SeqCst) as i32 +} + static mut COUNTER: i32 = 0; #[guest_function("AddToStatic")] @@ -823,7 +974,14 @@ fn corrupt_output_size_prefix() -> i32 { buf[12..16].copy_from_slice(&[0u8; 4]); buf[16..24].copy_from_slice(&8_u64.to_le_bytes()); - core::arch::asm!("hlt", options(noreturn)); + core::arch::asm!( + "out dx, eax", + "cli", + "hlt", + in("dx") hyperlight_common::outb::OutBAction::Halt as u16, + in("eax") 0u32, + options(noreturn), + ); } } @@ -839,7 +997,14 @@ fn corrupt_output_back_pointer() -> i32 { buf[8..16].copy_from_slice(&[0u8; 8]); buf[16..24].copy_from_slice(&0xDEAD_u64.to_le_bytes()); - core::arch::asm!("hlt", options(noreturn)); + core::arch::asm!( + "out dx, eax", + "cli", + "hlt", + in("dx") hyperlight_common::outb::OutBAction::Halt as u16, + in("eax") 0u32, + options(noreturn), + ); } } From 1de9625abd5f92d90f72fa88fdac0d08e786cfd1 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 22:31:18 +0000 Subject: [PATCH 09/18] ci: add hw-interrupts test step to CI and Justfile Signed-off-by: danbugs --- .github/workflows/dep_build_test.yml | 5 +++++ Justfile | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/dep_build_test.yml b/.github/workflows/dep_build_test.yml index 0c72baa66..e4d64fe75 100644 --- a/.github/workflows/dep_build_test.yml +++ b/.github/workflows/dep_build_test.yml @@ -105,6 +105,11 @@ jobs: # with only one driver enabled (kvm/mshv3 features are unix-only, no-op on Windows) just test ${{ inputs.config }} ${{ inputs.hypervisor == 'mshv3' && 'mshv3' || 'kvm' }} + - name: Run Rust tests with hw-interrupts + run: | + # with hw-interrupts feature enabled (+ explicit driver on Linux) + just test ${{ inputs.config }} ${{ runner.os == 'Linux' && (inputs.hypervisor == 'mshv3' && 'mshv3,hw-interrupts' || 'kvm,hw-interrupts') || 'hw-interrupts' }} + - name: Run Rust Gdb tests env: RUST_LOG: debug diff --git a/Justfile b/Justfile index dda6897bb..fa3bf617d 100644 --- a/Justfile +++ b/Justfile @@ -87,6 +87,9 @@ test-like-ci config=default-target hypervisor="kvm": @# with only one driver enabled + build-metadata + init-paging just test {{config}} build-metadata,init-paging,{{ if hypervisor == "mshv3" {"mshv3"} else {"kvm"} }} + @# with hw-interrupts enabled (+ explicit driver on Linux) + {{ if os() == "linux" { if hypervisor == "mshv3" { "just test " + config + " mshv3,hw-interrupts" } else { "just test " + config + " kvm,hw-interrupts" } } else { "just test " + config + " hw-interrupts" } }} + @# make sure certain cargo features compile just check @@ -151,6 +154,9 @@ build-test-like-ci config=default-target hypervisor="kvm": @# Run Rust tests with single driver {{ if os() == "linux" { "just test " + config+ " " + if hypervisor == "mshv3" { "mshv3" } else { "kvm" } } else { "" } }} + @# Run Rust tests with hw-interrupts + {{ if os() == "linux" { if hypervisor == "mshv3" { "just test " + config + " mshv3,hw-interrupts" } else { "just test " + config + " kvm,hw-interrupts" } } else { "just test " + config + " hw-interrupts" } }} + @# Run Rust Gdb tests just test-rust-gdb-debugging {{config}} @@ -283,6 +289,7 @@ check: {{ cargo-cmd }} check -p hyperlight-host --features print_debug {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features gdb {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features trace_guest,mem_profile {{ target-triple-flag }} + {{ cargo-cmd }} check -p hyperlight-host --features hw-interrupts {{ target-triple-flag }} fmt-check: rustup +nightly component list | grep -q "rustfmt.*installed" || rustup component add rustfmt --toolchain nightly From 78915b47878fd897b4de4ecebf77c20a38f23c83 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 4 Mar 2026 01:14:39 +0000 Subject: [PATCH 10/18] fix: add halt port IO write and restore hw_timer_interrupts test Add Halt outb (port 108) before cli/hlt in guest init and dummyguest so KVM's in-kernel LAPIC does not absorb the HLT exit. Also restore the hw_timer_interrupts integration test that was inadvertently dropped. Signed-off-by: danbugs --- .../src/arch/amd64/init.rs | 3 +++ src/hyperlight_host/tests/integration_test.rs | 25 +++++++++++++++++++ src/tests/rust_guests/dummyguest/src/main.rs | 10 +++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 09f3c4775..fa8b86c4d 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -173,6 +173,9 @@ core::arch::global_asm!(" mov rsp, r8\n xor ebp, ebp\n call {generic_init}\n + mov dx, 108\n + out dx, al\n + cli\n hlt\n .cfi_endproc\n ", generic_init = sym crate::generic_init); diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 80f997360..9d63e46fc 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1768,3 +1768,28 @@ fn interrupt_cancel_delete_race() { handle.join().unwrap(); } } + +/// Test that hardware timer interrupts are delivered to the guest via OutBAction::PvTimerConfig port. +/// +/// The guest function `TestTimerInterrupts`: +/// 1. Initializes the PIC (IRQ0 -> vector 0x20) +/// 2. Installs an IDT entry for vector 0x20 +/// 3. Arms the timer via OutBAction::PvTimerConfig port +/// 4. Enables interrupts and busy-waits +/// 5. Returns the number of timer interrupts received +/// +/// Requires the `hw-interrupts` feature on the host side. +#[test] +#[cfg(feature = "hw-interrupts")] +fn hw_timer_interrupts() { + with_rust_sandbox(|mut sbox| { + // 1ms period, up to 100M spin iterations (should fire well before that) + let count: i32 = sbox + .call("TestTimerInterrupts", (1000_i32, 100_000_000_i32)) + .unwrap(); + assert!( + count > 0, + "Expected at least one timer interrupt, got {count}" + ); + }); +} \ No newline at end of file diff --git a/src/tests/rust_guests/dummyguest/src/main.rs b/src/tests/rust_guests/dummyguest/src/main.rs index 01063dbdb..2d9db910e 100644 --- a/src/tests/rust_guests/dummyguest/src/main.rs +++ b/src/tests/rust_guests/dummyguest/src/main.rs @@ -30,8 +30,16 @@ fn panic(_info: &PanicInfo) -> ! { } fn halt() { + // OutBAction::Halt = 108; using raw constant to avoid pulling in + // anyhow (via hyperlight_common's TryFrom impl) which requires alloc. unsafe { - asm!("hlt"); + asm!( + "out dx, eax", + "cli", + "hlt", + in("dx") 108u16, + in("eax") 0u32, + ); } } From d68e899d9cb14a671f2c01e085a3e55d49ff6cf9 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 4 Mar 2026 02:04:24 +0000 Subject: [PATCH 11/18] style: add trailing newline to integration_test.rs Signed-off-by: danbugs --- src/hyperlight_host/tests/integration_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 9d63e46fc..c39946096 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1792,4 +1792,4 @@ fn hw_timer_interrupts() { "Expected at least one timer interrupt, got {count}" ); }); -} \ No newline at end of file +} From c6d6077917a9d75369ba526fd04cfb57d87f8094 Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 5 Mar 2026 01:30:50 +0000 Subject: [PATCH 12/18] experiment: replace in-kernel PIT with irqfd + host timer thread Replace create_pit2() with a host-side timer thread that fires an EventFd registered via register_irqfd for GSI 0 (IRQ0). The in-kernel IRQ chip (PIC + IOAPIC + LAPIC) is kept for proper interrupt routing. When the guest writes to PvTimerConfig (port 107), the host parses the requested period and spawns a timer thread that periodically writes to the EventFd. Guest PIT port writes (0x40-0x43) exit to userspace and are silently ignored. Tested with nanvix hello-c.elf: works correctly. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm.rs | 121 +++++++++++++++--- 1 file changed, 103 insertions(+), 18 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index 0f70a464e..0d416027d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -15,12 +15,14 @@ limitations under the License. */ use std::sync::LazyLock; +#[cfg(feature = "hw-interrupts")] +use std::sync::atomic::{AtomicBool, Ordering}; +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; use hyperlight_common::outb::OutBAction; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; -#[cfg(feature = "hw-interrupts")] -use kvm_bindings::{KVM_PIT_SPEAKER_DUMMY, kvm_pit_config}; use kvm_bindings::{ kvm_debugregs, kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region, kvm_xsave, }; @@ -29,6 +31,8 @@ use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] use tracing_opentelemetry::OpenTelemetrySpanExt; +#[cfg(feature = "hw-interrupts")] +use vmm_sys_util::eventfd::EventFd; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; @@ -87,16 +91,35 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A KVM implementation of a single-vcpu VM -#[derive(Debug)] pub(crate) struct KvmVm { vm_fd: VmFd, vcpu_fd: VcpuFd, + /// EventFd registered via irqfd for GSI 0 (IRQ0). A timer thread + /// writes to this to inject periodic timer interrupts. + #[cfg(feature = "hw-interrupts")] + timer_irq_eventfd: EventFd, + /// Signals the timer thread to stop. + #[cfg(feature = "hw-interrupts")] + timer_stop: Arc, + /// Handle to the background timer thread (if started). + #[cfg(feature = "hw-interrupts")] + timer_thread: Option>, + // KVM, as opposed to mshv/whp, has no get_guest_debug() ioctl, so we must track the state ourselves #[cfg(gdb)] debug_regs: kvm_guest_debug, } +impl std::fmt::Debug for KvmVm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KvmVm") + .field("vm_fd", &self.vm_fd) + .field("vcpu_fd", &self.vcpu_fd) + .finish_non_exhaustive() + } +} + static KVM: LazyLock> = LazyLock::new(|| Kvm::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); @@ -111,23 +134,35 @@ impl KvmVm { .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; // When hw-interrupts is enabled, create the in-kernel IRQ chip - // (PIC + IOAPIC + LAPIC) and PIT before creating the vCPU so the + // (PIC + IOAPIC + LAPIC) before creating the vCPU so the // per-vCPU LAPIC is initialised in virtual-wire mode (LINT0 = ExtINT). - // The guest programs the PIC remap and PIT frequency via standard - // IO port writes, which the in-kernel devices handle transparently. + // The guest programs the PIC remap via standard IO port writes, + // which the in-kernel PIC handles transparently. + // + // Instead of creating an in-kernel PIT (create_pit2), we use a + // host-side timer thread + irqfd to inject IRQ0 at the rate + // requested by the guest via OutBAction::PvTimerConfig (port 107). + // This eliminates the in-kernel PIT device. Guest PIT port writes + // (0x40, 0x43) become no-ops handled in the run loop. #[cfg(feature = "hw-interrupts")] - { + let timer_irq_eventfd = { vm_fd .create_irq_chip() .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - let pit_config = kvm_pit_config { - flags: KVM_PIT_SPEAKER_DUMMY, - ..Default::default() - }; + + // Create an EventFd and register it via irqfd for GSI 0 (IRQ0). + // When the timer thread writes to this EventFd, the in-kernel + // PIC will assert IRQ0, which is delivered as the vector the + // guest configured during PIC remap (typically vector 0x20). + let eventfd = EventFd::new(0) + .map_err(|e| CreateVmError::InitializeVm( + kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into() + ))?; vm_fd - .create_pit2(pit_config) + .register_irqfd(&eventfd, 0) .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - } + eventfd + }; let vcpu_fd = vm_fd .create_vcpu(0) @@ -155,6 +190,12 @@ impl KvmVm { Ok(Self { vm_fd, vcpu_fd, + #[cfg(feature = "hw-interrupts")] + timer_irq_eventfd, + #[cfg(feature = "hw-interrupts")] + timer_stop: Arc::new(AtomicBool::new(false)), + #[cfg(feature = "hw-interrupts")] + timer_thread: None, #[cfg(gdb)] debug_regs: kvm_guest_debug::default(), }) @@ -195,9 +236,11 @@ impl VirtualMachine for KvmVm { #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); - // When hw-interrupts is enabled, the in-kernel PIC + PIT + LAPIC handle - // timer interrupts natively. The guest signals "I'm done" by writing to - // OutBAction::Halt (an IO port exit) instead of using HLT, because the in-kernel + // When hw-interrupts is enabled, the in-kernel PIC + LAPIC deliver + // interrupts triggered by the host-side timer thread via irqfd. + // There is no in-kernel PIT; guest PIT port writes are no-ops. + // The guest signals "I'm done" by writing to OutBAction::Halt + // (an IO port exit) instead of using HLT, because the in-kernel // LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). #[cfg(feature = "hw-interrupts")] loop { @@ -209,11 +252,43 @@ impl VirtualMachine for KvmVm { } Ok(VcpuExit::IoOut(port, data)) => { if port == OutBAction::Halt as u16 { + // Stop the timer thread before returning. + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } return Ok(VmExit::Halt()); } if port == OutBAction::PvTimerConfig as u16 { - // Ignore: the in-kernel PIT handles timer scheduling. - // The guest writes here for MSHV/WHP compatibility. + // The guest is configuring the timer period. + // Extract the period in microseconds (LE u32). + if data.len() >= 4 { + let period_us = u32::from_le_bytes( + data[..4].try_into().unwrap(), + ) as u64; + if period_us > 0 && self.timer_thread.is_none() { + let eventfd = self + .timer_irq_eventfd + .try_clone() + .expect("failed to clone timer EventFd"); + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = eventfd.write(1); + } + })); + } + } + continue; + } + // PIT ports (0x40-0x43): no in-kernel PIT, so these + // exit to userspace. Silently ignore them. + if (0x40..=0x43).contains(&port) { continue; } return Ok(VmExit::IoOut(port, data.to_vec())); @@ -501,6 +576,16 @@ impl DebuggableVm for KvmVm { } } +#[cfg(feature = "hw-interrupts")] +impl Drop for KvmVm { + fn drop(&mut self) { + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } + } +} + #[cfg(test)] #[cfg(feature = "hw-interrupts")] mod hw_interrupt_tests { From 0b0e36b5532e6c91a4bda7e2e1e291a973fb29d7 Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 5 Mar 2026 03:45:00 +0000 Subject: [PATCH 13/18] =?UTF-8?q?experiment:=20MSHV=20=E2=80=94=20replace?= =?UTF-8?q?=20SynIC=20timer=20with=20request=5Fvirtual=5Finterrupt=20+=20h?= =?UTF-8?q?ost=20timer=20thread?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the SynIC-based timer (STIMER0_CONFIG/COUNT + SCONTROL) with a host timer thread that periodically calls request_virtual_interrupt() (HVCALL_ASSERT_VIRTUAL_INTERRUPT, hypercall 148). This makes MSHV consistent with the KVM irqfd and WHP software timer patterns — all three platforms now use: (1) start timer on PvTimerConfig, (2) host thread periodically injects interrupt, (3) stop on Halt/Drop. Changes: - Remove SynIC dependency: make_default_synthetic_features_mask, SCONTROL, STIMER0_CONFIG, STIMER0_COUNT imports and usage - Set synthetic_proc_features to 0 (no SynIC features needed) - Wrap vm_fd in Arc for thread-safe sharing - Add timer_stop + timer_thread fields (same pattern as KVM) - PvTimerConfig handler spawns timer thread with request_virtual_interrupt - Add Drop impl for timer cleanup - Remove build_stimer_config() and period_us_to_100ns() helpers - Remove SynIC-related unit tests Kept unchanged: LAPIC init, MSR intercept, PIC emulation, EOI bridging (all still required for interrupt delivery). NOT RUNTIME TESTED — MSHV not available on this system. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/mshv.rs | 202 ++++++++---------- 1 file changed, 94 insertions(+), 108 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 5e19041ac..6a92f08fd 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -16,7 +16,11 @@ limitations under the License. #[cfg(gdb)] use std::fmt::Debug; +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; use std::sync::LazyLock; +#[cfg(feature = "hw-interrupts")] +use std::sync::atomic::{AtomicBool, Ordering}; use hyperlight_common::outb::OutBAction; #[cfg(feature = "hw-interrupts")] @@ -39,11 +43,10 @@ use mshv_bindings::{ }; #[cfg(feature = "hw-interrupts")] use mshv_bindings::{ - hv_register_name_HV_REGISTER_SCONTROL, hv_register_name_HV_REGISTER_STIMER0_CONFIG, - hv_register_name_HV_REGISTER_STIMER0_COUNT, hv_register_name_HV_X64_REGISTER_RAX, + hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, hv_register_name_HV_X64_REGISTER_RAX, }; #[cfg(feature = "hw-interrupts")] -use mshv_ioctls::make_default_synthetic_features_mask; +use mshv_ioctls::InterruptRequest; use mshv_ioctls::{Mshv, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] @@ -81,14 +84,31 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A MSHV implementation of a single-vcpu VM -#[derive(Debug)] pub(crate) struct MshvVm { + /// VmFd wrapped in Arc so the timer thread can call + /// `request_virtual_interrupt` from a background thread. + #[cfg(feature = "hw-interrupts")] + vm_fd: Arc, + #[cfg(not(feature = "hw-interrupts"))] vm_fd: VmFd, vcpu_fd: VcpuFd, #[cfg(feature = "hw-interrupts")] pic: Pic, + /// Signals the timer thread to stop. #[cfg(feature = "hw-interrupts")] - synic_timer_active: bool, + timer_stop: Arc, + /// Handle to the background timer thread (if started). + #[cfg(feature = "hw-interrupts")] + timer_thread: Option>, +} + +impl std::fmt::Debug for MshvVm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MshvVm") + .field("vm_fd", &self.vm_fd) + .field("vcpu_fd", &self.vcpu_fd) + .finish_non_exhaustive() + } } static MSHV: LazyLock> = @@ -123,8 +143,8 @@ impl MshvVm { #[allow(unused_mut)] let mut pr: mshv_create_partition_v2 = Default::default(); - // Enable LAPIC for hw-interrupts — required for SynIC direct-mode - // timer delivery via APIC vector. MSHV_PT_BIT_LAPIC = bit 0. + // Enable LAPIC for hw-interrupts — required for interrupt delivery + // via request_virtual_interrupt. MSHV_PT_BIT_LAPIC = bit 0. #[cfg(feature = "hw-interrupts")] { pr.pt_flags = 1u64; // LAPIC @@ -134,12 +154,11 @@ impl MshvVm { .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; let vcpu_fd = { - // Use the full default synthetic features mask (includes SynIC, - // synthetic timers, direct mode, hypercall regs, etc.) when - // hw-interrupts is enabled. Without hw-interrupts, leave all - // features at zero. + // No synthetic features needed — timer interrupts are injected + // directly via request_virtual_interrupt(), not through SynIC. + // The LAPIC (enabled via pt_flags) handles interrupt delivery. #[cfg(feature = "hw-interrupts")] - let feature_val = make_default_synthetic_features_mask(); + let feature_val = 0u64; #[cfg(not(feature = "hw-interrupts"))] let feature_val = 0u64; @@ -161,7 +180,7 @@ impl MshvVm { // Initialize the virtual LAPIC when hw-interrupts is enabled. // LAPIC defaults to disabled (SVR bit 8 = 0), which means no APIC - // interrupts (including SynIC direct-mode timer) can be delivered. + // interrupts can be delivered (request_virtual_interrupt would fail). #[cfg(feature = "hw-interrupts")] { let mut lapic: LapicState = vcpu_fd @@ -181,7 +200,7 @@ impl MshvVm { write_lapic_u32(&mut lapic.regs, 0x350, 0x0001_0000); // LINT1 (offset 0x360): NMI delivery, not masked write_lapic_u32(&mut lapic.regs, 0x360, 0x400); - // LVT Timer (offset 0x320): masked — we use SynIC timer instead + // LVT Timer (offset 0x320): masked — we use host timer thread instead write_lapic_u32(&mut lapic.regs, 0x320, 0x0001_0000); // LVT Error (offset 0x370): masked write_lapic_u32(&mut lapic.regs, 0x370, 0x0001_0000); @@ -193,7 +212,7 @@ impl MshvVm { // Install MSR intercept for IA32_APIC_BASE (MSR 0x1B) to prevent // the guest from globally disabling the LAPIC. The Nanvix kernel // disables the APIC when no I/O APIC is detected, but we need - // the LAPIC enabled for SynIC direct-mode timer delivery. + // the LAPIC enabled for request_virtual_interrupt delivery. // // This may fail with AccessDenied on some kernel versions; in // that case we fall back to re-enabling the LAPIC in the timer @@ -208,12 +227,17 @@ impl MshvVm { } Ok(Self { + #[cfg(feature = "hw-interrupts")] + vm_fd: Arc::new(vm_fd), + #[cfg(not(feature = "hw-interrupts"))] vm_fd, vcpu_fd, #[cfg(feature = "hw-interrupts")] pic: Pic::new(), #[cfg(feature = "hw-interrupts")] - synic_timer_active: false, + timer_stop: Arc::new(AtomicBool::new(false)), + #[cfg(feature = "hw-interrupts")] + timer_thread: None, }) } } @@ -267,12 +291,12 @@ impl VirtualMachine for MshvVm { let msg_type = m.header.message_type; match msg_type { HALT_MESSAGE => { - // With SynIC timer active, re-enter the guest. + // With timer thread active, re-enter the guest. // The hypervisor will deliver pending timer // interrupts on the next run(), waking the // vCPU from HLT. #[cfg(feature = "hw-interrupts")] - if self.synic_timer_active { + if self.timer_thread.is_some() { continue; } return Ok(VmExit::Halt()); @@ -301,6 +325,14 @@ impl VirtualMachine for MshvVm { // OutBAction::Halt always means "I'm done", regardless // of whether a timer is active. if is_write && port_number == OutBAction::Halt as u16 { + // Stop the timer thread before returning. + #[cfg(feature = "hw-interrupts")] + { + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } + } return Ok(VmExit::Halt()); } @@ -411,12 +443,12 @@ impl VirtualMachine for MshvVm { } Err(e) => match e.errno() { libc::EINTR => { - // When the SynIC timer is active, EINTR may be + // When the timer thread is active, EINTR may be // a spurious signal. Continue the run loop to // let the hypervisor deliver any pending timer // interrupt. #[cfg(feature = "hw-interrupts")] - if self.synic_timer_active { + if self.timer_thread.is_some() { continue; } return Ok(VmExit::Cancelled()); @@ -684,8 +716,9 @@ impl MshvVm { const APIC_BASE_DEFAULT: u64 = 0xFEE00900; /// Perform LAPIC EOI: clear the highest-priority in-service bit. - /// Called when the guest sends PIC EOI, since SynIC direct-mode timer - /// delivers through the LAPIC and the guest only acknowledges via PIC. + /// Called when the guest sends PIC EOI, since the timer thread + /// delivers interrupts through the LAPIC and the guest only + /// acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). @@ -717,33 +750,11 @@ impl MshvVm { None } - /// Build SynIC timer config register value. - /// - /// Layout (from Hyper-V TLFS): - /// - Bit 0: enable - /// - Bit 1: periodic - /// - Bit 3: auto_enable - /// - Bits 4-11: apic_vector - /// - Bit 12: direct_mode - fn build_stimer_config(vector: u8) -> u64 { - 1 // enable - | (1 << 1) // periodic - | (1 << 3) // auto_enable - | ((vector as u64) << 4) // apic_vector - | (1 << 12) // direct_mode - } - - /// Convert a timer period in microseconds to the SynIC timer count - /// in 100-nanosecond units (Hyper-V reference time). - fn period_us_to_100ns(period_us: u32) -> u64 { - (period_us as u64) * 10 - } - fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == OutBAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - if period_us > 0 { + if period_us > 0 && self.timer_thread.is_none() { // Re-enable LAPIC if the guest disabled it (via WRMSR // to MSR 0x1B clearing bit 11). This happens when the // Nanvix kernel doesn't detect an I/O APIC. @@ -780,42 +791,40 @@ impl MshvVm { } } - // Enable SynIC on this vCPU (SCONTROL bit 0 = enable) - let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_REGISTER_SCONTROL, - value: hv_register_value { reg64: 1 }, - ..Default::default() - }]); - - // Arm STIMER0 immediately (the guest needs timer - // interrupts for thread preemption, not just idle - // wakeup). - let vector = self.pic.master_vector_base(); - let config = Self::build_stimer_config(vector); - let count_100ns = Self::period_us_to_100ns(period_us); - - let _ = self.vcpu_fd.set_reg(&[ - hv_register_assoc { - name: hv_register_name_HV_REGISTER_STIMER0_CONFIG, - value: hv_register_value { reg64: config }, - ..Default::default() - }, - hv_register_assoc { - name: hv_register_name_HV_REGISTER_STIMER0_COUNT, - value: hv_register_value { reg64: count_100ns }, - ..Default::default() - }, - ]); - self.synic_timer_active = true; + // Start a host timer thread that periodically injects + // interrupts via request_virtual_interrupt (HVCALL 148). + // This replaces the SynIC timer approach and makes MSHV + // consistent with the KVM irqfd and WHP software timer + // patterns. + let vm_fd = self.vm_fd.clone(); + let vector = self.pic.master_vector_base() as u32; + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us as u64); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = vm_fd.request_virtual_interrupt(&InterruptRequest { + interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, + apic_id: 0, + vector, + level_triggered: false, + logical_destination_mode: false, + long_mode: false, + }); + } + })); } } return true; } if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), - // also perform LAPIC EOI since SynIC timer delivers via LAPIC and - // the guest only knows about the PIC. - if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.synic_timer_active { + // also perform LAPIC EOI since the timer thread delivers via LAPIC + // and the guest only acknowledges via PIC. + if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.timer_thread.is_some() { self.do_lapic_eoi(); } return true; @@ -833,6 +842,16 @@ impl MshvVm { } } +#[cfg(feature = "hw-interrupts")] +impl Drop for MshvVm { + fn drop(&mut self) { + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } + } +} + #[cfg(test)] #[cfg(feature = "hw-interrupts")] mod hw_interrupt_tests { @@ -872,39 +891,6 @@ mod hw_interrupt_tests { assert_eq!(regs[0x84], 0); } - #[test] - fn synic_timer_config_bitfield() { - let vector: u8 = 0x20; - let config = MshvVm::build_stimer_config(vector); - - assert_ne!(config & 1, 0, "enable bit should be set"); - assert_ne!(config & (1 << 1), 0, "periodic bit should be set"); - assert_eq!(config & (1 << 2), 0, "lazy bit should be clear"); - assert_ne!(config & (1 << 3), 0, "auto_enable bit should be set"); - assert_eq!((config >> 4) & 0xFF, 0x20, "apic_vector should be 0x20"); - assert_ne!(config & (1 << 12), 0, "direct_mode bit should be set"); - } - - #[test] - fn synic_timer_config_different_vectors() { - for vector in [0x20u8, 0x30, 0x40, 0xFF] { - let config = MshvVm::build_stimer_config(vector); - assert_eq!( - (config >> 4) & 0xFF, - vector as u64, - "vector mismatch for {:#x}", - vector - ); - } - } - - #[test] - fn timer_count_us_to_100ns() { - assert_eq!(MshvVm::period_us_to_100ns(1000), 10_000); - assert_eq!(MshvVm::period_us_to_100ns(10_000), 100_000); - assert_eq!(MshvVm::period_us_to_100ns(1), 10); - } - #[test] fn apic_base_default_value() { let base = MshvVm::APIC_BASE_DEFAULT; From f9282424d2a3befbe5317a1fecd6ff607f20bdf6 Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 5 Mar 2026 06:45:10 +0000 Subject: [PATCH 14/18] =?UTF-8?q?experiment:=20eliminate=20PIC=20state=20m?= =?UTF-8?q?achine=20=E2=80=94=20hardcode=20vector=200x20,=20no-op=20PIC=20?= =?UTF-8?q?ports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the Pic struct from both MSHV and WHP. The nanvix guest always remaps IRQ0 to vector 0x20 via PIC ICW2, so the host can hardcode the timer vector instead of tracking the ICW initialization sequence. PIC ports (0x20/0x21/0xA0/0xA1) are now accepted as no-ops. The only PIC-related logic retained is LAPIC EOI bridging: when the guest sends a non-specific EOI on port 0x20, the host clears the LAPIC ISR bit. This is 3 lines of logic vs the ~130-line pic.rs state machine. This addresses the PR 1272 reviewer concern about emulating "a good fraction of the IC controller" — the PIC state machine was the unnecessary complexity, not the timer delivery mechanism. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/mshv.rs | 50 +++++++++++-------- .../src/hypervisor/virtual_machine/whp.rs | 50 +++++++++++-------- 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 6a92f08fd..d26284d4e 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -54,8 +54,6 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, @@ -92,8 +90,6 @@ pub(crate) struct MshvVm { #[cfg(not(feature = "hw-interrupts"))] vm_fd: VmFd, vcpu_fd: VcpuFd, - #[cfg(feature = "hw-interrupts")] - pic: Pic, /// Signals the timer thread to stop. #[cfg(feature = "hw-interrupts")] timer_stop: Arc, @@ -233,8 +229,6 @@ impl MshvVm { vm_fd, vcpu_fd, #[cfg(feature = "hw-interrupts")] - pic: Pic::new(), - #[cfg(feature = "hw-interrupts")] timer_stop: Arc::new(AtomicBool::new(false)), #[cfg(feature = "hw-interrupts")] timer_thread: None, @@ -715,6 +709,11 @@ impl MshvVm { /// BSP flag (bit 8) + global enable (bit 11). const APIC_BASE_DEFAULT: u64 = 0xFEE00900; + /// Hardcoded timer interrupt vector. The nanvix guest always + /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same + /// vector directly — no PIC state machine needed. + const TIMER_VECTOR: u32 = 0x20; + /// Perform LAPIC EOI: clear the highest-priority in-service bit. /// Called when the guest sends PIC EOI, since the timer thread /// delivers interrupts through the LAPIC and the guest only @@ -737,17 +736,21 @@ impl MshvVm { } /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled (PIC or PIT read), - /// `None` if the port should be passed through to the guest handler. + /// Returns `Some(value)` if the port was handled, `None` if the + /// port should be passed through to the guest handler. + /// + /// No PIC state machine — PIC data ports return 0xFF (all masked), + /// PIC command ports return 0 (no pending IRQ), PIT returns 0. fn handle_hw_io_in(&self, port: u16) -> Option { - if let Some(val) = self.pic.handle_io_in(port) { - return Some(val as u64); - } - // PIT data port read -- return 0 - if port == 0x40 { - return Some(0); + match port { + // PIC master/slave data ports — return "all masked" + 0x21 | 0xA1 => Some(0xFF), + // PIC master/slave command ports — return 0 (ISR/IRR read) + 0x20 | 0xA0 => Some(0), + // PIT data port read — return 0 + 0x40 => Some(0), + _ => None, } - None } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { @@ -797,7 +800,7 @@ impl MshvVm { // consistent with the KVM irqfd and WHP software timer // patterns. let vm_fd = self.vm_fd.clone(); - let vector = self.pic.master_vector_base() as u32; + let vector = Self::TIMER_VECTOR; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); self.timer_thread = Some(std::thread::spawn(move || { @@ -820,11 +823,16 @@ impl MshvVm { } return true; } - if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { - // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), - // also perform LAPIC EOI since the timer thread delivers via LAPIC - // and the guest only acknowledges via PIC. - if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.timer_thread.is_some() { + // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. + // No PIC state machine — we only need LAPIC EOI bridging when + // the guest sends a non-specific EOI on the master PIC command + // port (0x20, OCW2 byte with bits 7:5 = 001). + if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { + if port == 0x20 + && !data.is_empty() + && (data[0] & 0xE0) == 0x20 + && self.timer_thread.is_some() + { self.do_lapic_eoi(); } return true; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 41d999840..af74c7e7d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -33,8 +33,6 @@ use windows_result::HRESULT; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ Align16, CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, @@ -78,8 +76,6 @@ pub(crate) struct WhpVm { partition: WHV_PARTITION_HANDLE, // Surrogate process for memory mapping surrogate_process: SurrogateProcess, - #[cfg(feature = "hw-interrupts")] - pic: Pic, /// Whether the WHP host supports LAPIC emulation mode. #[cfg(feature = "hw-interrupts")] lapic_emulation: bool, @@ -253,8 +249,6 @@ impl WhpVm { .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))? }, #[cfg(feature = "hw-interrupts")] - pic: Pic::new(), - #[cfg(feature = "hw-interrupts")] lapic_emulation, #[cfg(feature = "hw-interrupts")] lapic_timer_active: false, @@ -1083,6 +1077,11 @@ impl DebuggableVm for WhpVm { #[cfg(feature = "hw-interrupts")] impl WhpVm { + /// Hardcoded timer interrupt vector. The nanvix guest always + /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same + /// vector directly — no PIC state machine needed. + const TIMER_VECTOR: u32 = 0x20; + /// Get the LAPIC state via the bulk interrupt-controller state API. fn get_lapic_state(&self) -> windows_result::Result> { let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; @@ -1134,17 +1133,21 @@ impl WhpVm { } /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled (PIC or PIT read), - /// `None` if the port should be passed through to the guest handler. + /// Returns `Some(value)` if the port was handled, `None` if the + /// port should be passed through to the guest handler. + /// + /// No PIC state machine — PIC data ports return 0xFF (all masked), + /// PIC command ports return 0 (no pending IRQ), PIT returns 0. fn handle_hw_io_in(&self, port: u16) -> Option { - if let Some(val) = self.pic.handle_io_in(port) { - return Some(val as u64); - } - // PIT data port read -- return 0 - if port == 0x40 { - return Some(0); + match port { + // PIC master/slave data ports — return "all masked" + 0x21 | 0xA1 => Some(0xFF), + // PIC master/slave command ports — return 0 (ISR/IRR read) + 0x20 | 0xA0 => Some(0), + // PIT data port read — return 0 + 0x40 => Some(0), + _ => None, } - None } /// Stop the software timer thread if running. @@ -1176,7 +1179,7 @@ impl WhpVm { // host-side software timer thread that periodically // injects interrupts via WHvRequestInterrupt. let partition_raw = self.partition.0; - let vector = self.pic.master_vector_base() as u32; + let vector = Self::TIMER_VECTOR; let stop = Arc::new(AtomicBool::new(false)); let stop_clone = stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); @@ -1213,11 +1216,16 @@ impl WhpVm { } return true; } - if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { - // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), - // also perform LAPIC EOI since the software timer delivers - // through the LAPIC and the guest only acknowledges via PIC. - if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.lapic_timer_active { + // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. + // No PIC state machine — we only need LAPIC EOI bridging when + // the guest sends a non-specific EOI on the master PIC command + // port (0x20, OCW2 byte with bits 7:5 = 001). + if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { + if port == 0x20 + && !data.is_empty() + && (data[0] & 0xE0) == 0x20 + && self.lapic_timer_active + { self.do_lapic_eoi(); } return true; From 708221d554af47f130478567a1ef7c74681be6ec Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 17:25:08 +0000 Subject: [PATCH 15/18] fix: delete unused pic.rs file The PIC state machine was eliminated in the previous experimental commits (MSHV/WHP now hardcode vector 0x20 and no-op PIC ports). Remove the orphaned pic.rs file and its mod declaration. Signed-off-by: danbugs --- src/hyperlight_host/src/hypervisor/mod.rs | 3 - src/hyperlight_host/src/hypervisor/pic.rs | 228 ---------------------- 2 files changed, 231 deletions(-) delete mode 100644 src/hyperlight_host/src/hypervisor/pic.rs diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 04d13f9ec..600bcd939 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -38,9 +38,6 @@ pub(crate) mod crashdump; pub(crate) mod hyperlight_vm; -#[cfg(all(feature = "hw-interrupts", any(mshv3, target_os = "windows")))] -pub(crate) mod pic; - use std::fmt::Debug; #[cfg(any(kvm, mshv3))] use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; diff --git a/src/hyperlight_host/src/hypervisor/pic.rs b/src/hyperlight_host/src/hypervisor/pic.rs deleted file mode 100644 index 7256ec3e9..000000000 --- a/src/hyperlight_host/src/hypervisor/pic.rs +++ /dev/null @@ -1,228 +0,0 @@ -/* -Copyright 2026 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -//! Minimal userspace PIC (8259A) emulation for MSHV/WHP. -//! KVM provides in-kernel PIC via `create_irq_chip()`. - -// PIC I/O port constants -const PIC_MASTER_CMD: u16 = 0x20; -const PIC_MASTER_DATA: u16 = 0x21; -const PIC_SLAVE_CMD: u16 = 0xA0; -const PIC_SLAVE_DATA: u16 = 0xA1; - -/// Tracks where we are in the ICW (Initialization Command Word) sequence. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum PicInitState { - /// Not in an init sequence; normal operation. - Ready, - /// Expecting ICW2 (vector base). - Icw2, - /// Expecting ICW3 (cascade info). - Icw3, - /// Expecting ICW4 (mode). - Icw4, -} - -/// Minimal 8259A PIC state for one chip (master or slave). -#[derive(Debug)] -pub(crate) struct Pic { - master_init_state: PicInitState, - master_vector_base: u8, - master_imr: u8, - - slave_init_state: PicInitState, - slave_vector_base: u8, - slave_imr: u8, -} - -impl Pic { - /// Create a new PIC pair with default state. - pub(crate) fn new() -> Self { - Self { - master_init_state: PicInitState::Ready, - master_vector_base: 0, - master_imr: 0xFF, // all masked - - slave_init_state: PicInitState::Ready, - slave_vector_base: 0, - slave_imr: 0xFF, // all masked - } - } - - /// Handle an I/O OUT to a PIC port. Returns `true` if the port was handled. - pub(crate) fn handle_io_out(&mut self, port: u16, data: u8) -> bool { - match port { - PIC_MASTER_CMD => { - if data & 0x10 != 0 { - // ICW1 -- start init sequence - self.master_init_state = PicInitState::Icw2; - } - // else: OCW (e.g. EOI) -- we accept but ignore - true - } - PIC_MASTER_DATA => { - match self.master_init_state { - PicInitState::Icw2 => { - self.master_vector_base = data; - self.master_init_state = PicInitState::Icw3; - } - PicInitState::Icw3 => { - self.master_init_state = PicInitState::Icw4; - } - PicInitState::Icw4 => { - self.master_init_state = PicInitState::Ready; - } - PicInitState::Ready => { - // IMR write - self.master_imr = data; - } - } - true - } - PIC_SLAVE_CMD => { - if data & 0x10 != 0 { - self.slave_init_state = PicInitState::Icw2; - } - true - } - PIC_SLAVE_DATA => { - match self.slave_init_state { - PicInitState::Icw2 => { - self.slave_vector_base = data; - self.slave_init_state = PicInitState::Icw3; - } - PicInitState::Icw3 => { - self.slave_init_state = PicInitState::Icw4; - } - PicInitState::Icw4 => { - self.slave_init_state = PicInitState::Ready; - } - PicInitState::Ready => { - self.slave_imr = data; - } - } - true - } - _ => false, - } - } - - /// Handle an I/O IN from a PIC port. Returns `Some(value)` if handled. - pub(crate) fn handle_io_in(&self, port: u16) -> Option { - match port { - PIC_MASTER_DATA => Some(self.master_imr), - PIC_SLAVE_DATA => Some(self.slave_imr), - PIC_MASTER_CMD | PIC_SLAVE_CMD => Some(0), // ISR reads return 0 - _ => None, - } - } - - /// Returns the master PIC vector base. - pub(crate) fn master_vector_base(&self) -> u8 { - self.master_vector_base - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn pic_initial_state() { - let pic = Pic::new(); - assert_eq!(pic.master_vector_base, 0); - assert_eq!(pic.slave_vector_base, 0); - assert_eq!(pic.master_imr, 0xFF); - assert_eq!(pic.slave_imr, 0xFF); - } - - #[test] - fn pic_master_icw_sequence() { - let mut pic = Pic::new(); - // ICW1 - assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x11)); - // ICW2: vector base 0x20 - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x20)); - assert_eq!(pic.master_vector_base(), 0x20); - // ICW3 - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x04)); - // ICW4 - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x01)); - // Should be back to Ready -- IMR write - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0xFE)); // unmask IRQ0 - assert_eq!(pic.master_imr, 0xFE); - } - - #[test] - fn pic_slave_icw_sequence() { - let mut pic = Pic::new(); - assert!(pic.handle_io_out(PIC_SLAVE_CMD, 0x11)); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x28)); - assert_eq!(pic.slave_vector_base, 0x28); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x02)); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x01)); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0xFF)); - assert_eq!(pic.slave_imr, 0xFF); - } - - #[test] - fn pic_eoi_accepted() { - let mut pic = Pic::new(); - // EOI (OCW2, non-specific EOI = 0x20) - assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x20)); - } - - #[test] - fn pic_unhandled_port() { - let mut pic = Pic::new(); - assert!(!pic.handle_io_out(0x60, 0x00)); - assert_eq!(pic.handle_io_in(0x60), None); - } - - #[test] - fn pic_reinitialize_master() { - let mut pic = Pic::new(); - // First init - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - pic.handle_io_out(PIC_MASTER_DATA, 0x20); - pic.handle_io_out(PIC_MASTER_DATA, 0x04); - pic.handle_io_out(PIC_MASTER_DATA, 0x01); - assert_eq!(pic.master_vector_base(), 0x20); - // Reinit - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - pic.handle_io_out(PIC_MASTER_DATA, 0x30); - pic.handle_io_out(PIC_MASTER_DATA, 0x04); - pic.handle_io_out(PIC_MASTER_DATA, 0x01); - assert_eq!(pic.master_vector_base(), 0x30); - } - - #[test] - fn pic_imr_preserved_across_reinit() { - let mut pic = Pic::new(); - // Set IMR before init - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - pic.handle_io_out(PIC_MASTER_DATA, 0x20); - pic.handle_io_out(PIC_MASTER_DATA, 0x04); - pic.handle_io_out(PIC_MASTER_DATA, 0x01); - pic.handle_io_out(PIC_MASTER_DATA, 0xFE); - assert_eq!(pic.master_imr, 0xFE); - // Reinit -- IMR is set during ICW sequence, not explicitly preserved - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - // After ICW1, IMR should still read as whatever it was -- we only - // change it on Ready state writes. - assert_eq!(pic.handle_io_in(PIC_MASTER_DATA), Some(0xFE)); - } -} From d669e93a395528bb8148c93d2ac0c8ff30fb0eed Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 17:26:51 +0000 Subject: [PATCH 16/18] style: rustfmt fixes in kvm.rs Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index 0d416027d..ae20b65ab 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -14,11 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; use std::sync::LazyLock; #[cfg(feature = "hw-interrupts")] use std::sync::atomic::{AtomicBool, Ordering}; -#[cfg(feature = "hw-interrupts")] -use std::sync::Arc; use hyperlight_common::outb::OutBAction; #[cfg(gdb)] @@ -154,10 +154,11 @@ impl KvmVm { // When the timer thread writes to this EventFd, the in-kernel // PIC will assert IRQ0, which is delivered as the vector the // guest configured during PIC remap (typically vector 0x20). - let eventfd = EventFd::new(0) - .map_err(|e| CreateVmError::InitializeVm( - kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into() - ))?; + let eventfd = EventFd::new(0).map_err(|e| { + CreateVmError::InitializeVm( + kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into(), + ) + })?; vm_fd .register_irqfd(&eventfd, 0) .map_err(|e| CreateVmError::InitializeVm(e.into()))?; @@ -263,9 +264,8 @@ impl VirtualMachine for KvmVm { // The guest is configuring the timer period. // Extract the period in microseconds (LE u32). if data.len() >= 4 { - let period_us = u32::from_le_bytes( - data[..4].try_into().unwrap(), - ) as u64; + let period_us = + u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; if period_us > 0 && self.timer_thread.is_none() { let eventfd = self .timer_irq_eventfd From 3f45b9f6939d4b05163ba33d0380a9c3f66ee57e Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 22:24:20 +0000 Subject: [PATCH 17/18] refactor: address PR 1272 review feedback - Remove _default_irq_handler (timer thread, not PIT, injects IRQs) - Remove cli;hlt after outb port 108 in guest asm and exit.rs - Revert to #[derive(Debug)] for all backends - Revert feature_val to upstream unsafe { features.as_uint64[0] } - Remove MSR intercept entirely from MSHV - Extract shared hw_interrupts module (LAPIC helpers, IO port handling, EOI bridging) to reduce duplication between MSHV and WHP - Replace lapic_timer_active with timer_thread.is_some() - Error on missing LAPIC emulation in WHP instead of fallback path - Remove lapic_emulation field from WhpVm - Remove Nanvix mentions from comments - Add Intel SDM references to LAPIC register writes Signed-off-by: danbugs --- src/hyperlight_guest/src/exit.rs | 18 +- .../src/arch/amd64/dispatch.rs | 2 - .../src/arch/amd64/exception/entry.rs | 26 -- .../src/arch/amd64/init.rs | 2 - .../virtual_machine/hw_interrupts.rs | 253 ++++++++++++ .../src/hypervisor/virtual_machine/kvm.rs | 24 +- .../src/hypervisor/virtual_machine/mod.rs | 4 + .../src/hypervisor/virtual_machine/mshv.rs | 265 ++---------- .../src/hypervisor/virtual_machine/whp.rs | 384 ++++-------------- 9 files changed, 394 insertions(+), 584 deletions(-) create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs diff --git a/src/hyperlight_guest/src/exit.rs b/src/hyperlight_guest/src/exit.rs index 785e10a19..1926340a4 100644 --- a/src/hyperlight_guest/src/exit.rs +++ b/src/hyperlight_guest/src/exit.rs @@ -20,10 +20,15 @@ use core::ffi::{CStr, c_char}; use hyperlight_common::outb::OutBAction; use tracing::instrument; -/// Halt the execution of the guest and returns control to the host. -/// Halt is generally called for a successful completion of the guest's work, -/// this means we can instrument it as a trace point because the trace state -/// shall not be locked at this point (we are not in an exception context). +/// Signal successful completion of the guest's work and return control +/// to the host. This replaces the previous `hlt`-based exit: under the +/// `hw-interrupts` feature, `hlt` becomes a wait-for-interrupt (the +/// in-kernel IRQ chip wakes the vCPU), so we use an explicit IO-port +/// write (port 108) to trigger a VM exit that the host treats as a +/// clean shutdown. +/// +/// This function never returns — the host does not re-enter the guest +/// after seeing the Halt port. #[inline(never)] #[instrument(skip_all, level = "Trace")] pub fn halt() { @@ -37,13 +42,8 @@ pub fn halt() { hyperlight_guest_tracing::flush(); } - // Signal "I'm done" to the host via an IO port write. This causes a - // VM exit on all backends (KVM, MSHV, WHP). The subsequent cli+hlt - // is dead code—hyperlight never re-enters the guest after seeing - // the Halt port—but serves as a safety fallback. unsafe { out32(OutBAction::Halt as u16, 0); - asm!("cli", "hlt", options(nostack)); } } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index e904eeb6e..a39de9020 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -70,7 +70,5 @@ core::arch::global_asm!(" call {internal_dispatch_function}\n mov dx, 108\n out dx, al\n - cli\n - hlt\n .cfi_endproc ", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs index ca72e62d8..87f89f15c 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs @@ -172,24 +172,6 @@ global_asm!( hl_exception_handler = sym super::handle::hl_exception_handler, ); -// Default no-op IRQ handler for hardware interrupts (vectors 0x20-0x2F). -// Sends a non-specific EOI to the master PIC and returns. -// This prevents unhandled-interrupt faults when the in-kernel PIT fires -// before a guest has installed its own IRQ handler. -global_asm!( - ".globl _default_irq_handler", - "_default_irq_handler:", - "push rax", - "mov al, 0x20", - "out 0x20, al", // PIC EOI - "pop rax", - "iretq", -); - -unsafe extern "C" { - fn _default_irq_handler(); -} - pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { let idt = unsafe { &raw mut (*pc).idt }; let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { @@ -221,14 +203,6 @@ pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { set_idt_entry(Exception::VirtualizationException, _do_excp20); // Virtualization Exception set_idt_entry(Exception::SecurityException, _do_excp30); // Security Exception - // Install a default no-op IRQ handler at vector 0x20 (IRQ0 after PIC remapping). - // This ensures HLT can wake when an in-kernel PIT is active, even before the - // guest installs its own IRQ handler. - let irq_handler_addr = _default_irq_handler as *const () as u64; - unsafe { - (&raw mut (*idt).entries[0x20]).write_volatile(IdtEntry::new(irq_handler_addr)); - } - let idtr = IdtPointer { limit: (core::mem::size_of::() - 1) as u16, base: idt as u64, diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index fa8b86c4d..bfa458059 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -175,7 +175,5 @@ core::arch::global_asm!(" call {generic_init}\n mov dx, 108\n out dx, al\n - cli\n - hlt\n .cfi_endproc\n ", generic_init = sym crate::generic_init); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs new file mode 100644 index 000000000..e20718fce --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs @@ -0,0 +1,253 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Shared helpers for the `hw-interrupts` feature, used by MSHV and WHP +//! backends. KVM uses an in-kernel IRQ chip so it handles most of +//! this transparently. + +/// Hardcoded timer interrupt vector. The guest remaps IRQ0 to +/// vector 0x20 via PIC ICW2, so we use that same vector directly +/// — no PIC state machine needed. +pub(crate) const TIMER_VECTOR: u32 = 0x20; + +/// Handle an IO IN request for hardware-interrupt related ports. +/// Returns `Some(value)` if the port was handled, `None` if the +/// port should be passed through to the guest handler. +/// +/// No PIC state machine — PIC data ports return 0xFF (all masked), +/// PIC command ports return 0 (no pending IRQ), PIT returns 0. +pub(crate) fn handle_io_in(port: u16) -> Option { + match port { + // PIC master/slave data ports — return "all masked" + 0x21 | 0xA1 => Some(0xFF), + // PIC master/slave command ports — return 0 (ISR/IRR read) + 0x20 | 0xA0 => Some(0), + // PIT data port read — return 0 + 0x40 => Some(0), + _ => None, + } +} + +/// Handle IO OUT requests for common legacy hardware ports +/// (PIC, PIT, PC speaker, diagnostic port). +/// +/// Returns `true` if the port was fully handled, `false` otherwise. +/// When the guest sends a non-specific EOI on the master PIC command +/// port (0x20, OCW2 byte with bits 7:5 = 001) and a timer is active, +/// `eoi_callback` is invoked to bridge the PIC EOI to the LAPIC. +pub(crate) fn handle_common_io_out( + port: u16, + data: &[u8], + timer_active: bool, + eoi_callback: impl FnOnce(), +) -> bool { + // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. + // We only need LAPIC EOI bridging when the guest sends a + // non-specific EOI on the master PIC command port. + if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { + if port == 0x20 && !data.is_empty() && (data[0] & 0xE0) == 0x20 && timer_active { + eoi_callback(); + } + return true; + } + // PIT ports + if port == 0x43 || port == 0x40 { + return true; + } + // PC speaker + if port == 0x61 { + return true; + } + // Diagnostic port + if port == 0x80 { + return true; + } + false +} + +// --------------------------------------------------------------------------- +// LAPIC register helpers +// +// LAPIC register offsets from Intel SDM Vol. 3A, Table 11-1. +// These operate on a raw byte slice representing the LAPIC register page. +// --------------------------------------------------------------------------- + +/// Write a u32 to a LAPIC register page at the given APIC offset. +pub(crate) fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { + state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); +} + +/// Read a u32 from a LAPIC register page at the given APIC offset. +pub(crate) fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { + u32::from_le_bytes([ + state[offset], + state[offset + 1], + state[offset + 2], + state[offset + 3], + ]) +} + +/// Initialize LAPIC registers in a raw register page to sensible +/// defaults for timer interrupt delivery. +/// +/// Register values from Intel SDM Vol. 3A, Table 11-1: +/// - SVR (0xF0): bit 8 = enable APIC, bits 0-7 = spurious vector 0xFF +/// - TPR (0x80): 0 = accept all interrupt priorities +/// - DFR (0xE0): 0xFFFF_FFFF = flat model +/// - LDR (0xD0): logical APIC ID for flat model +/// - LINT0 (0x350): masked — not wired to PIC +/// - LINT1 (0x360): NMI delivery, not masked +/// - LVT Timer (0x320): masked — host timer thread injects instead +/// - LVT Error (0x370): masked +pub(crate) fn init_lapic_registers(state: &mut [u8]) { + write_lapic_u32(state, 0xF0, 0x1FF); + write_lapic_u32(state, 0x80, 0); + write_lapic_u32(state, 0xE0, 0xFFFF_FFFF); + write_lapic_u32(state, 0xD0, 1 << 24); + write_lapic_u32(state, 0x350, 0x0001_0000); + write_lapic_u32(state, 0x360, 0x400); + write_lapic_u32(state, 0x320, 0x0001_0000); + write_lapic_u32(state, 0x370, 0x0001_0000); +} + +/// Perform LAPIC EOI: clear the highest-priority in-service bit. +/// +/// The ISR is at LAPIC offset 0x100, organized as 8 × 32-bit words +/// (one per 16 bytes). Scans from highest priority (ISR\[7\]) to +/// lowest (ISR\[0\]). +pub(crate) fn lapic_eoi(state: &mut [u8]) { + for i in (0u32..8).rev() { + let offset = 0x100 + (i as usize) * 0x10; + let isr_val = read_lapic_u32(state, offset); + if isr_val != 0 { + let bit = 31 - isr_val.leading_zeros(); + write_lapic_u32(state, offset, isr_val & !(1u32 << bit)); + break; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn write_read_lapic_roundtrip() { + let mut state = vec![0u8; 1024]; + write_lapic_u32(&mut state, 0xF0, 0xDEAD_BEEF); + assert_eq!(read_lapic_u32(&state, 0xF0), 0xDEAD_BEEF); + } + + #[test] + fn write_read_lapic_multiple_offsets() { + let mut state = vec![0u8; 1024]; + write_lapic_u32(&mut state, 0x80, 0x1234_5678); + write_lapic_u32(&mut state, 0xF0, 0xABCD_EF01); + write_lapic_u32(&mut state, 0xE0, 0xFFFF_FFFF); + assert_eq!(read_lapic_u32(&state, 0x80), 0x1234_5678); + assert_eq!(read_lapic_u32(&state, 0xF0), 0xABCD_EF01); + assert_eq!(read_lapic_u32(&state, 0xE0), 0xFFFF_FFFF); + } + + #[test] + fn write_read_lapic_zero() { + let mut state = vec![0xFFu8; 1024]; + write_lapic_u32(&mut state, 0x80, 0); + assert_eq!(read_lapic_u32(&state, 0x80), 0); + } + + #[test] + fn write_does_not_clobber_neighbors() { + let mut state = vec![0u8; 1024]; + write_lapic_u32(&mut state, 0x80, 0xAAAA_BBBB); + assert_eq!(state[0x7F], 0); + assert_eq!(state[0x84], 0); + } + + #[test] + fn lapic_eoi_clears_highest_isr_bit() { + let mut state = vec![0u8; 1024]; + // Set bit 5 in ISR[0] (offset 0x100) + write_lapic_u32(&mut state, 0x100, 1 << 5); + lapic_eoi(&mut state); + assert_eq!(read_lapic_u32(&state, 0x100), 0); + } + + #[test] + fn lapic_eoi_clears_only_highest() { + let mut state = vec![0u8; 1024]; + // Set bits in ISR[0] and ISR[1] + write_lapic_u32(&mut state, 0x100, 0b11); // bits 0 and 1 + write_lapic_u32(&mut state, 0x110, 1 << 2); // bit 2 in ISR[1] + lapic_eoi(&mut state); + // ISR[1] should be cleared (higher priority), ISR[0] untouched + assert_eq!(read_lapic_u32(&state, 0x110), 0); + assert_eq!(read_lapic_u32(&state, 0x100), 0b11); + } + + #[test] + fn init_lapic_registers_sets_svr() { + let mut state = vec![0u8; 1024]; + init_lapic_registers(&mut state); + let svr = read_lapic_u32(&state, 0xF0); + assert_ne!(svr & 0x100, 0, "APIC enable bit should be set"); + assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); + } + + #[test] + fn handle_io_in_pic_ports() { + assert_eq!(handle_io_in(0x21), Some(0xFF)); + assert_eq!(handle_io_in(0xA1), Some(0xFF)); + assert_eq!(handle_io_in(0x20), Some(0)); + assert_eq!(handle_io_in(0xA0), Some(0)); + assert_eq!(handle_io_in(0x40), Some(0)); + assert_eq!(handle_io_in(0x42), None); + } + + #[test] + fn handle_common_io_out_pic_eoi() { + let mut eoi_called = false; + // Non-specific EOI (0x20) on master PIC command port + let handled = handle_common_io_out(0x20, &[0x20], true, || eoi_called = true); + assert!(handled); + assert!(eoi_called); + } + + #[test] + fn handle_common_io_out_no_eoi_when_timer_inactive() { + let mut eoi_called = false; + let handled = handle_common_io_out(0x20, &[0x20], false, || eoi_called = true); + assert!(handled); + assert!(!eoi_called); + } + + #[test] + fn handle_common_io_out_pit_ports() { + assert!(handle_common_io_out(0x40, &[], false, || {})); + assert!(handle_common_io_out(0x43, &[], false, || {})); + } + + #[test] + fn handle_common_io_out_speaker_and_diag() { + assert!(handle_common_io_out(0x61, &[], false, || {})); + assert!(handle_common_io_out(0x80, &[], false, || {})); + } + + #[test] + fn handle_common_io_out_unknown_port() { + assert!(!handle_common_io_out(0x100, &[], false, || {})); + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index ae20b65ab..70e4b9522 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -40,6 +40,8 @@ use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, }; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::HypervisorError; #[cfg(all(test, feature = "init-paging"))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; use crate::hypervisor::virtual_machine::{ @@ -91,6 +93,7 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A KVM implementation of a single-vcpu VM +#[derive(Debug)] pub(crate) struct KvmVm { vm_fd: VmFd, vcpu_fd: VcpuFd, @@ -111,15 +114,6 @@ pub(crate) struct KvmVm { debug_regs: kvm_guest_debug, } -impl std::fmt::Debug for KvmVm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("KvmVm") - .field("vm_fd", &self.vm_fd) - .field("vcpu_fd", &self.vcpu_fd) - .finish_non_exhaustive() - } -} - static KVM: LazyLock> = LazyLock::new(|| Kvm::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); @@ -263,14 +257,12 @@ impl VirtualMachine for KvmVm { if port == OutBAction::PvTimerConfig as u16 { // The guest is configuring the timer period. // Extract the period in microseconds (LE u32). - if data.len() >= 4 { - let period_us = - u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; + if let Ok(bytes) = data[..4].try_into() { + let period_us = u32::from_le_bytes(bytes) as u64; if period_us > 0 && self.timer_thread.is_none() { - let eventfd = self - .timer_irq_eventfd - .try_clone() - .expect("failed to clone timer EventFd"); + let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { + RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) + })?; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us); self.timer_thread = Some(std::thread::spawn(move || { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index 0f9514578..469f84d9d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -38,6 +38,10 @@ pub(crate) mod mshv; #[cfg(target_os = "windows")] pub(crate) mod whp; +/// Shared helpers for hardware interrupt support (MSHV and WHP) +#[cfg(feature = "hw-interrupts")] +pub(crate) mod hw_interrupts; + static AVAILABLE_HYPERVISOR: OnceLock> = OnceLock::new(); /// Returns which type of hypervisor is available, if any diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index d26284d4e..4382eb43d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -31,17 +31,12 @@ use mshv_bindings::{ FloatingPointUnit, SpecialRegisters, StandardRegisters, XSave, hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, - hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_register_assoc, + hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, + hv_partition_synthetic_processor_features, hv_register_assoc, hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_create_partition_v2, mshv_user_mem_region, }; #[cfg(feature = "hw-interrupts")] -use mshv_bindings::{ - HV_INTERCEPT_ACCESS_MASK_WRITE, hv_intercept_parameters, - hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, hv_message_type_HVMSG_X64_MSR_INTERCEPT, - mshv_install_intercept, -}; -#[cfg(feature = "hw-interrupts")] use mshv_bindings::{ hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, hv_register_name_HV_X64_REGISTER_RAX, }; @@ -82,6 +77,7 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A MSHV implementation of a single-vcpu VM +#[derive(Debug)] pub(crate) struct MshvVm { /// VmFd wrapped in Arc so the timer thread can call /// `request_virtual_interrupt` from a background thread. @@ -98,37 +94,24 @@ pub(crate) struct MshvVm { timer_thread: Option>, } -impl std::fmt::Debug for MshvVm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MshvVm") - .field("vm_fd", &self.vm_fd) - .field("vcpu_fd", &self.vcpu_fd) - .finish_non_exhaustive() - } -} - static MSHV: LazyLock> = LazyLock::new(|| Mshv::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); -/// Write a u32 to a LAPIC register at the given APIC offset. +/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&[u8]` slice +/// for use with the shared LAPIC helpers. #[cfg(feature = "hw-interrupts")] -fn write_lapic_u32(regs: &mut [::std::os::raw::c_char; 1024], offset: usize, val: u32) { - let bytes = val.to_le_bytes(); - regs[offset] = bytes[0] as _; - regs[offset + 1] = bytes[1] as _; - regs[offset + 2] = bytes[2] as _; - regs[offset + 3] = bytes[3] as _; +fn lapic_regs_as_u8(regs: &[::std::os::raw::c_char; 1024]) -> &[u8] { + // Safety: c_char (i8) and u8 have the same size and alignment; + // LAPIC register values are treated as raw bytes. + unsafe { &*(regs as *const [::std::os::raw::c_char; 1024] as *const [u8; 1024]) } } -/// Read a u32 from a LAPIC register at the given APIC offset. +/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&mut [u8]` slice +/// for use with the shared LAPIC helpers. #[cfg(feature = "hw-interrupts")] -fn read_lapic_u32(regs: &[::std::os::raw::c_char; 1024], offset: usize) -> u32 { - u32::from_le_bytes([ - regs[offset] as u8, - regs[offset + 1] as u8, - regs[offset + 2] as u8, - regs[offset + 3] as u8, - ]) +fn lapic_regs_as_u8_mut(regs: &mut [::std::os::raw::c_char; 1024]) -> &mut [u8] { + // Safety: same as above. + unsafe { &mut *(regs as *mut [::std::os::raw::c_char; 1024] as *mut [u8; 1024]) } } impl MshvVm { @@ -150,18 +133,12 @@ impl MshvVm { .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; let vcpu_fd = { - // No synthetic features needed — timer interrupts are injected - // directly via request_virtual_interrupt(), not through SynIC. - // The LAPIC (enabled via pt_flags) handles interrupt delivery. - #[cfg(feature = "hw-interrupts")] - let feature_val = 0u64; - #[cfg(not(feature = "hw-interrupts"))] - let feature_val = 0u64; + let features: hv_partition_synthetic_processor_features = Default::default(); vm_fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - feature_val, + unsafe { features.as_uint64[0] }, ) .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; @@ -179,47 +156,17 @@ impl MshvVm { // interrupts can be delivered (request_virtual_interrupt would fail). #[cfg(feature = "hw-interrupts")] { + use super::hw_interrupts::init_lapic_registers; + let mut lapic: LapicState = vcpu_fd .get_lapic() .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - // SVR (offset 0xF0): bit 8 = enable APIC, bits 0-7 = spurious vector - write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); - // TPR (offset 0x80): 0 = accept all interrupt priorities - write_lapic_u32(&mut lapic.regs, 0x80, 0); - // DFR (offset 0xE0): 0xFFFFFFFF = flat model - write_lapic_u32(&mut lapic.regs, 0xE0, 0xFFFF_FFFF); - // LDR (offset 0xD0): set logical APIC ID for flat model - write_lapic_u32(&mut lapic.regs, 0xD0, 1 << 24); - // LINT0 (offset 0x350): masked — we don't forward PIC through LAPIC; - // our PIC is emulated in userspace and not wired to LINT0. - write_lapic_u32(&mut lapic.regs, 0x350, 0x0001_0000); - // LINT1 (offset 0x360): NMI delivery, not masked - write_lapic_u32(&mut lapic.regs, 0x360, 0x400); - // LVT Timer (offset 0x320): masked — we use host timer thread instead - write_lapic_u32(&mut lapic.regs, 0x320, 0x0001_0000); - // LVT Error (offset 0x370): masked - write_lapic_u32(&mut lapic.regs, 0x370, 0x0001_0000); + init_lapic_registers(lapic_regs_as_u8_mut(&mut lapic.regs)); vcpu_fd .set_lapic(&lapic) .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - - // Install MSR intercept for IA32_APIC_BASE (MSR 0x1B) to prevent - // the guest from globally disabling the LAPIC. The Nanvix kernel - // disables the APIC when no I/O APIC is detected, but we need - // the LAPIC enabled for request_virtual_interrupt delivery. - // - // This may fail with AccessDenied on some kernel versions; in - // that case we fall back to re-enabling the LAPIC in the timer - // setup path (handle_hw_io_out). - let _ = vm_fd.install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_WRITE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, - intercept_parameter: hv_intercept_parameters { - msr_index: 0x1B, // IA32_APIC_BASE - }, - }); } Ok(Self { @@ -267,8 +214,6 @@ impl VirtualMachine for MshvVm { hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; - #[cfg(feature = "hw-interrupts")] - const MSR_INTERCEPT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_MSR_INTERCEPT; #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; @@ -337,7 +282,9 @@ impl VirtualMachine for MshvVm { if self.handle_hw_io_out(port_number, &data) { continue; } - } else if let Some(val) = self.handle_hw_io_in(port_number) { + } else if let Some(val) = + super::hw_interrupts::handle_io_in(port_number) + { self.vcpu_fd .set_reg(&[hv_register_assoc { name: hv_register_name_HV_X64_REGISTER_RAX, @@ -380,39 +327,6 @@ impl VirtualMachine for MshvVm { _ => Ok(VmExit::Unknown("Unknown MMIO access".to_string())), }; } - #[cfg(feature = "hw-interrupts")] - MSR_INTERCEPT_MESSAGE => { - // Guest is writing to MSR 0x1B (IA32_APIC_BASE). - // Force bit 11 (global APIC enable) to stay set, - // preventing the guest from disabling the LAPIC. - let msr_msg = m - .to_msr_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; - let rip = msr_msg.header.rip; - let instruction_length = msr_msg.header.instruction_length() as u64; - let msr_val = (msr_msg.rdx << 32) | (msr_msg.rax & 0xFFFF_FFFF); - - // Force APIC global enable (bit 11) to remain set, - // preserving the standard base address. - let forced_val = msr_val | (1 << 11) | 0xFEE00000; - self.vcpu_fd - .set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_RIP, - value: hv_register_value { - reg64: rip + instruction_length, - }, - ..Default::default() - }]) - .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - - use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; - let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_APIC_BASE, - value: hv_register_value { reg64: forced_val }, - ..Default::default() - }]); - continue; - } #[cfg(gdb)] EXCEPTION_INTERCEPT => { let ex_info = m @@ -709,47 +623,14 @@ impl MshvVm { /// BSP flag (bit 8) + global enable (bit 11). const APIC_BASE_DEFAULT: u64 = 0xFEE00900; - /// Hardcoded timer interrupt vector. The nanvix guest always - /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same - /// vector directly — no PIC state machine needed. - const TIMER_VECTOR: u32 = 0x20; - /// Perform LAPIC EOI: clear the highest-priority in-service bit. /// Called when the guest sends PIC EOI, since the timer thread /// delivers interrupts through the LAPIC and the guest only /// acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { - // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). - // Scan from highest priority (ISR[7]) to lowest (ISR[0]). - for i in (0u32..8).rev() { - let offset = 0x100 + (i as usize) * 0x10; - let isr_val = read_lapic_u32(&lapic.regs, offset); - if isr_val != 0 { - let bit = 31 - isr_val.leading_zeros(); - write_lapic_u32(&mut lapic.regs, offset, isr_val & !(1u32 << bit)); - let _ = self.vcpu_fd.set_lapic(&lapic); - break; - } - } - } - } - - /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled, `None` if the - /// port should be passed through to the guest handler. - /// - /// No PIC state machine — PIC data ports return 0xFF (all masked), - /// PIC command ports return 0 (no pending IRQ), PIT returns 0. - fn handle_hw_io_in(&self, port: u16) -> Option { - match port { - // PIC master/slave data ports — return "all masked" - 0x21 | 0xA1 => Some(0xFF), - // PIC master/slave command ports — return 0 (ISR/IRR read) - 0x20 | 0xA0 => Some(0), - // PIT data port read — return 0 - 0x40 => Some(0), - _ => None, + super::hw_interrupts::lapic_eoi(lapic_regs_as_u8_mut(&mut lapic.regs)); + let _ = self.vcpu_fd.set_lapic(&lapic); } } @@ -759,8 +640,8 @@ impl MshvVm { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us > 0 && self.timer_thread.is_none() { // Re-enable LAPIC if the guest disabled it (via WRMSR - // to MSR 0x1B clearing bit 11). This happens when the - // Nanvix kernel doesn't detect an I/O APIC. + // to MSR 0x1B clearing bit 11). Some guests clear + // the global APIC enable when no I/O APIC is detected. // // The hypervisor may return 0 for APIC_BASE when the // APIC is globally disabled, so we always restore the @@ -786,21 +667,18 @@ impl MshvVm { // Re-initialize LAPIC SVR (may have been zeroed when // guest disabled the APIC globally) if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { - let svr = read_lapic_u32(&lapic.regs, 0xF0); + let regs = lapic_regs_as_u8(&lapic.regs); + let svr = super::hw_interrupts::read_lapic_u32(regs, 0xF0); if svr & 0x100 == 0 { - write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); - write_lapic_u32(&mut lapic.regs, 0x80, 0); // TPR + let regs_mut = lapic_regs_as_u8_mut(&mut lapic.regs); + super::hw_interrupts::write_lapic_u32(regs_mut, 0xF0, 0x1FF); + super::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR let _ = self.vcpu_fd.set_lapic(&lapic); } } - // Start a host timer thread that periodically injects - // interrupts via request_virtual_interrupt (HVCALL 148). - // This replaces the SynIC timer approach and makes MSHV - // consistent with the KVM irqfd and WHP software timer - // patterns. let vm_fd = self.vm_fd.clone(); - let vector = Self::TIMER_VECTOR; + let vector = super::hw_interrupts::TIMER_VECTOR; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); self.timer_thread = Some(std::thread::spawn(move || { @@ -823,30 +701,8 @@ impl MshvVm { } return true; } - // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. - // No PIC state machine — we only need LAPIC EOI bridging when - // the guest sends a non-specific EOI on the master PIC command - // port (0x20, OCW2 byte with bits 7:5 = 001). - if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { - if port == 0x20 - && !data.is_empty() - && (data[0] & 0xE0) == 0x20 - && self.timer_thread.is_some() - { - self.do_lapic_eoi(); - } - return true; - } - if port == 0x43 || port == 0x40 { - return true; - } - if port == 0x61 { - return true; - } - if port == 0x80 { - return true; - } - false + let timer_active = self.timer_thread.is_some(); + super::hw_interrupts::handle_common_io_out(port, data, timer_active, || self.do_lapic_eoi()) } } @@ -866,37 +722,15 @@ mod hw_interrupt_tests { use super::*; #[test] - fn write_read_lapic_u32_roundtrip() { + fn lapic_regs_conversion_roundtrip() { let mut regs = [0i8; 1024]; - write_lapic_u32(&mut regs, 0xF0, 0xDEAD_BEEF); - assert_eq!(read_lapic_u32(®s, 0xF0), 0xDEAD_BEEF); - } - - #[test] - fn write_read_lapic_u32_multiple_offsets() { - let mut regs = [0i8; 1024]; - write_lapic_u32(&mut regs, 0x80, 0x1234_5678); - write_lapic_u32(&mut regs, 0xF0, 0xABCD_EF01); - write_lapic_u32(&mut regs, 0xE0, 0xFFFF_FFFF); - assert_eq!(read_lapic_u32(®s, 0x80), 0x1234_5678); - assert_eq!(read_lapic_u32(®s, 0xF0), 0xABCD_EF01); - assert_eq!(read_lapic_u32(®s, 0xE0), 0xFFFF_FFFF); - } - - #[test] - fn write_read_lapic_u32_zero() { - let mut regs = [0xFFu8 as i8; 1024]; - write_lapic_u32(&mut regs, 0x80, 0); - assert_eq!(read_lapic_u32(®s, 0x80), 0); - } - - #[test] - fn write_read_lapic_u32_does_not_clobber_neighbors() { - let mut regs = [0i8; 1024]; - write_lapic_u32(&mut regs, 0x80, 0xAAAA_BBBB); - // Check that bytes before and after are untouched - assert_eq!(regs[0x7F], 0); - assert_eq!(regs[0x84], 0); + let bytes = lapic_regs_as_u8_mut(&mut regs); + super::super::hw_interrupts::write_lapic_u32(bytes, 0xF0, 0xDEAD_BEEF); + let bytes = lapic_regs_as_u8(®s); + assert_eq!( + super::super::hw_interrupts::read_lapic_u32(bytes, 0xF0), + 0xDEAD_BEEF + ); } #[test] @@ -910,19 +744,4 @@ mod hw_interrupt_tests { "base address should be 0xFEE00000" ); } - - #[test] - fn lapic_svr_init_value() { - // SVR = 0x1FF: bit 8 = enable APIC, bits 0-7 = spurious vector 0xFF - let svr: u32 = 0x1FF; - assert_ne!(svr & 0x100, 0, "APIC enable bit should be set"); - assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); - } - - #[test] - fn lapic_lvt_masked_value() { - // Masked LVT entry: bit 16 = 1 - let masked: u32 = 0x0001_0000; - assert_ne!(masked & (1 << 16), 0, "mask bit should be set"); - } } diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index af74c7e7d..66c0a0a0b 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -76,12 +76,6 @@ pub(crate) struct WhpVm { partition: WHV_PARTITION_HANDLE, // Surrogate process for memory mapping surrogate_process: SurrogateProcess, - /// Whether the WHP host supports LAPIC emulation mode. - #[cfg(feature = "hw-interrupts")] - lapic_emulation: bool, - /// Whether the LAPIC timer is actively delivering periodic interrupts. - #[cfg(feature = "hw-interrupts")] - lapic_timer_active: bool, /// Signal to stop the software timer thread. #[cfg(feature = "hw-interrupts")] timer_stop: Option>, @@ -101,126 +95,62 @@ impl WhpVm { pub(crate) fn new() -> Result { const NUM_CPU: u32 = 1; - // Check whether the WHP host supports LAPIC emulation. - // Bit 1 of WHV_CAPABILITY_FEATURES corresponds to LocalApicEmulation. - // LAPIC emulation is required for the LAPIC timer to deliver interrupts. - #[cfg(feature = "hw-interrupts")] - const LAPIC_EMULATION_BIT: u64 = 1 << 1; + let partition = unsafe { + #[cfg(feature = "hw-interrupts")] + { + // Check whether the WHP host supports LAPIC emulation. + // Bit 1 of WHV_CAPABILITY_FEATURES = LocalApicEmulation. + // LAPIC emulation is required for timer interrupt delivery. + const LAPIC_EMULATION_BIT: u64 = 1 << 1; - #[cfg(feature = "hw-interrupts")] - let mut lapic_emulation = { - let mut capability: WHV_CAPABILITY = Default::default(); - let ok = unsafe { - WHvGetCapability( + let mut capability: WHV_CAPABILITY = Default::default(); + let has_lapic = WHvGetCapability( WHvCapabilityCodeFeatures, &mut capability as *mut _ as *mut c_void, std::mem::size_of::() as u32, None, ) - }; - if ok.is_ok() { - unsafe { capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0 } - } else { - false - } - }; - - // Create partition, set CPU count, setup, and create vCPU. - // When hw-interrupts is enabled and LAPIC emulation is supported, - // we create a partition with LAPIC emulation mode. This is - // required for the LAPIC timer. - let partition = unsafe { - #[cfg(feature = "hw-interrupts")] - { - if lapic_emulation { - let p = - WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - - let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic - let partition_ok = WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeLocalApicEmulationMode, - &apic_mode as *const _ as *const _, - std::mem::size_of_val(&apic_mode) as _, - ) - .is_ok() - && WHvSetupPartition(p).is_ok() - && WHvCreateVirtualProcessor(p, 0, 0).is_ok(); - - if partition_ok { - log::info!("[WHP] LAPIC partition created, running init_lapic_bulk"); - // Initialize the LAPIC via the bulk interrupt-controller - // state API (individual APIC register writes via - // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). - if let Err(e) = Self::init_lapic_bulk(p) { - log::warn!( - "[WHP] Bulk LAPIC init failed ({e}); \ - falling back to non-LAPIC mode." - ); - let _ = WHvDeleteVirtualProcessor(p, 0); - let _ = WHvDeletePartition(p); - lapic_emulation = false; - - let p = WHvCreatePartition() - .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p) - .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p - } else { - log::info!("[WHP] Bulk LAPIC init succeeded, lapic_emulation=true"); - p - } - } else { - log::warn!("LAPIC emulation setup failed, falling back to non-LAPIC mode"); - let _ = WHvDeleteVirtualProcessor(p, 0); - let _ = WHvDeletePartition(p); - lapic_emulation = false; - - let p = WHvCreatePartition() - .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, + .is_ok() + && (capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0); + + if !has_lapic { + return Err(CreateVmError::InitializeVm( + windows_result::Error::new( + HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED + "WHP LocalApicEmulation capability is required for hw-interrupts", ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p - } - } else { - let p = - WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p + .into(), + )); } + + let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + + let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &apic_mode as *const _ as *const _, + std::mem::size_of_val(&apic_mode) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + + // Initialize the LAPIC via the bulk interrupt-controller + // state API (individual APIC register writes via + // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). + Self::init_lapic_bulk(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + p } #[cfg(not(feature = "hw-interrupts"))] @@ -249,10 +179,6 @@ impl WhpVm { .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))? }, #[cfg(feature = "hw-interrupts")] - lapic_emulation, - #[cfg(feature = "hw-interrupts")] - lapic_timer_active: false, - #[cfg(feature = "hw-interrupts")] timer_stop: None, #[cfg(feature = "hw-interrupts")] timer_thread: None, @@ -284,27 +210,8 @@ impl WhpVm { )?; } state.truncate(written as usize); - log::info!( - "[WHP] init_lapic_bulk: got {} bytes of LAPIC state", - written - ); - // SVR (offset 0xF0): enable APIC (bit 8) + spurious vector 0xFF - Self::write_lapic_u32(&mut state, 0xF0, 0x1FF); - // TPR (offset 0x80): 0 = accept all interrupt priorities - Self::write_lapic_u32(&mut state, 0x80, 0); - // DFR (offset 0xE0): 0xFFFFFFFF = flat model - Self::write_lapic_u32(&mut state, 0xE0, 0xFFFF_FFFF); - // LDR (offset 0xD0): set logical APIC ID for flat model - Self::write_lapic_u32(&mut state, 0xD0, 1 << 24); - // LINT0 (offset 0x350): masked - Self::write_lapic_u32(&mut state, 0x350, 0x0001_0000); - // LINT1 (offset 0x360): NMI delivery, not masked - Self::write_lapic_u32(&mut state, 0x360, 0x400); - // LVT Timer (offset 0x320): masked initially (configured on PvTimerConfig) - Self::write_lapic_u32(&mut state, 0x320, 0x0001_0000); - // LVT Error (offset 0x370): masked - Self::write_lapic_u32(&mut state, 0x370, 0x0001_0000); + super::hw_interrupts::init_lapic_registers(&mut state); unsafe { WHvSetVirtualProcessorInterruptControllerState2( @@ -318,23 +225,6 @@ impl WhpVm { Ok(()) } - /// Read a u32 from a LAPIC register page at the given APIC offset. - #[cfg(feature = "hw-interrupts")] - fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { - u32::from_le_bytes([ - state[offset], - state[offset + 1], - state[offset + 2], - state[offset + 3], - ]) - } - - /// Write a u32 to a LAPIC register page at the given APIC offset. - #[cfg(feature = "hw-interrupts")] - fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { - state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); - } - /// Helper for setting arbitrary registers. Makes sure the same number /// of names and values are passed (at the expense of some performance). fn set_registers( @@ -493,7 +383,7 @@ impl VirtualMachine for WhpVm { if self.handle_hw_io_out(port, &data) { continue; } - } else if let Some(val) = self.handle_hw_io_in(port) { + } else if let Some(val) = super::hw_interrupts::handle_io_in(port) { self.set_registers(&[( WHvX64RegisterRax, Align16(WHV_REGISTER_VALUE { Reg64: val }), @@ -514,7 +404,7 @@ impl VirtualMachine for WhpVm { // thread injects an interrupt via WHvRequestInterrupt, // waking the vCPU from HLT. #[cfg(feature = "hw-interrupts")] - if self.lapic_timer_active { + if self.timer_thread.is_some() { continue; } return Ok(VmExit::Halt()); @@ -567,13 +457,13 @@ impl VirtualMachine for WhpVm { } WHV_RUN_VP_EXIT_REASON(_) => { let rip = exit_context.VpContext.Rip; - log::error!( + tracing::error!( "WHP unknown exit reason {}: RIP={:#x}", exit_context.ExitReason.0, rip, ); if let Ok(regs) = self.regs() { - log::error!( + tracing::error!( " RAX={:#x} RCX={:#x} RDX={:#x}", regs.rax, regs.rcx, @@ -581,7 +471,7 @@ impl VirtualMachine for WhpVm { ); } if let Ok(sregs) = self.sregs() { - log::error!( + tracing::error!( " CR0={:#x} CR4={:#x} EFER={:#x} APIC_BASE={:#x}", sregs.cr0, sregs.cr4, @@ -705,15 +595,13 @@ impl VirtualMachine for WhpVm { let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = sregs.into(); - // When LAPIC emulation is active, skip writing APIC_BASE. - // The generic CommonSpecialRegisters defaults APIC_BASE to 0 - // which would globally disable the LAPIC. On some WHP hosts, - // host-side APIC register writes are blocked entirely - // (ACCESS_DENIED), so attempting to write even a valid value - // would fail and abort set_sregs. Omitting it lets the VP - // keep its default APIC_BASE and the guest configures it. + // When LAPIC emulation is active (always with hw-interrupts), + // skip writing APIC_BASE. The generic CommonSpecialRegisters + // defaults APIC_BASE to 0 which would globally disable the LAPIC. + // On some WHP hosts, host-side APIC register writes are blocked + // entirely (ACCESS_DENIED). #[cfg(feature = "hw-interrupts")] - if self.lapic_emulation { + { let filtered: Vec<_> = whp_regs .iter() .copied() @@ -721,12 +609,15 @@ impl VirtualMachine for WhpVm { .collect(); self.set_registers(&filtered) .map_err(|e| RegisterError::SetSregs(e.into()))?; - return Ok(()); + Ok(()) } - self.set_registers(&whp_regs) - .map_err(|e| RegisterError::SetSregs(e.into()))?; - Ok(()) + #[cfg(not(feature = "hw-interrupts"))] + { + self.set_registers(&whp_regs) + .map_err(|e| RegisterError::SetSregs(e.into()))?; + Ok(()) + } } fn debug_regs(&self) -> std::result::Result { @@ -1077,11 +968,6 @@ impl DebuggableVm for WhpVm { #[cfg(feature = "hw-interrupts")] impl WhpVm { - /// Hardcoded timer interrupt vector. The nanvix guest always - /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same - /// vector directly — no PIC state machine needed. - const TIMER_VECTOR: u32 = 0x20; - /// Get the LAPIC state via the bulk interrupt-controller state API. fn get_lapic_state(&self) -> windows_result::Result> { let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; @@ -1117,36 +1003,8 @@ impl WhpVm { /// delivers through the LAPIC and the guest only acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut state) = self.get_lapic_state() { - // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). - // Scan from highest priority (ISR[7]) to lowest (ISR[0]). - for i in (0u32..8).rev() { - let offset = 0x100 + (i as usize) * 0x10; - let isr_val = Self::read_lapic_u32(&state, offset); - if isr_val != 0 { - let bit = 31 - isr_val.leading_zeros(); - Self::write_lapic_u32(&mut state, offset, isr_val & !(1u32 << bit)); - let _ = self.set_lapic_state(&state); - break; - } - } - } - } - - /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled, `None` if the - /// port should be passed through to the guest handler. - /// - /// No PIC state machine — PIC data ports return 0xFF (all masked), - /// PIC command ports return 0 (no pending IRQ), PIT returns 0. - fn handle_hw_io_in(&self, port: u16) -> Option { - match port { - // PIC master/slave data ports — return "all masked" - 0x21 | 0xA1 => Some(0xFF), - // PIC master/slave command ports — return 0 (ISR/IRR read) - 0x20 | 0xA0 => Some(0), - // PIT data port read — return 0 - 0x40 => Some(0), - _ => None, + super::hw_interrupts::lapic_eoi(&mut state); + let _ = self.set_lapic_state(&state); } } @@ -1158,28 +1016,19 @@ impl WhpVm { if let Some(handle) = self.timer_thread.take() { let _ = handle.join(); } - self.lapic_timer_active = false; } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == OutBAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - log::info!( - "[WHP] PvTimerConfig: period_us={period_us}, lapic_emulation={}", - self.lapic_emulation - ); // Stop any existing timer thread before (re-)configuring. self.stop_timer_thread(); - if period_us > 0 && self.lapic_emulation { - // WHP's bulk LAPIC state API does not start the LAPIC - // timer countdown (CurCount stays 0). Instead we use a - // host-side software timer thread that periodically - // injects interrupts via WHvRequestInterrupt. + if period_us > 0 { let partition_raw = self.partition.0; - let vector = Self::TIMER_VECTOR; + let vector = super::hw_interrupts::TIMER_VECTOR; let stop = Arc::new(AtomicBool::new(false)); let stop_clone = stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); @@ -1208,38 +1057,12 @@ impl WhpVm { self.timer_stop = Some(stop); self.timer_thread = Some(handle); - self.lapic_timer_active = true; - log::info!( - "[WHP] Software timer started (period={period_us}us, vector={vector:#x})" - ); } } return true; } - // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. - // No PIC state machine — we only need LAPIC EOI bridging when - // the guest sends a non-specific EOI on the master PIC command - // port (0x20, OCW2 byte with bits 7:5 = 001). - if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { - if port == 0x20 - && !data.is_empty() - && (data[0] & 0xE0) == 0x20 - && self.lapic_timer_active - { - self.do_lapic_eoi(); - } - return true; - } - if port == 0x43 || port == 0x40 { - return true; - } - if port == 0x61 { - return true; - } - if port == 0x80 { - return true; - } - false + let timer_active = self.timer_thread.is_some(); + super::hw_interrupts::handle_common_io_out(port, data, timer_active, || self.do_lapic_eoi()) } } @@ -1304,59 +1127,11 @@ mod hw_interrupt_tests { use super::*; #[test] - fn lapic_register_helpers() { + fn lapic_register_helpers_delegate() { + use crate::hypervisor::virtual_machine::hw_interrupts; let mut state = vec![0u8; 1024]; - WhpVm::write_lapic_u32(&mut state, 0xF0, 0x1FF); - assert_eq!(WhpVm::read_lapic_u32(&state, 0xF0), 0x1FF); - - WhpVm::write_lapic_u32(&mut state, 0x320, 0x0002_0020); - assert_eq!(WhpVm::read_lapic_u32(&state, 0x320), 0x0002_0020); - } - - #[test] - fn lapic_lvt_timer_periodic() { - // LVT Timer: vector 0x20, periodic (bit 17), unmasked - let vector: u32 = 0x20; - let lvt = vector | (1 << 17); - assert_eq!(lvt & 0xFF, 0x20, "vector should be 0x20"); - assert_ne!(lvt & (1 << 17), 0, "periodic bit should be set"); - assert_eq!(lvt & (1 << 16), 0, "mask bit should be clear"); - } - - #[test] - fn lapic_timer_initial_count() { - // Hyper-V LAPIC timer is 10 MHz with divide-by-1. - // 1 tick = 100 ns, so period_us * 10 = initial count. - let period_us: u32 = 1000; // 1 ms - let initial_count = period_us * 10; - assert_eq!(initial_count, 10_000, "1ms should be 10000 ticks"); - - let period_us: u32 = 100; // 100 us - let initial_count = period_us * 10; - assert_eq!(initial_count, 1_000, "100us should be 1000 ticks"); - } - - #[test] - fn lapic_svr_value() { - let svr: u64 = 0x1FF; - assert_ne!(svr & (1 << 8), 0, "APIC should be enabled"); - assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); - } - - #[test] - fn apic_base_re_enable_value() { - // When re-enabling: base 0xFEE00000 + BSP (bit 8) + enable (bit 11) - let apic_base: u64 = 0xFEE00900; - assert_ne!(apic_base & (1 << 8), 0, "BSP flag"); - assert_ne!(apic_base & (1 << 11), 0, "global enable"); - assert_eq!(apic_base & 0xFFFFF000, 0xFEE00000, "base address"); - } - - #[test] - fn lapic_divide_config() { - // 0x0B = divide-by-1 - let divide: u32 = 0x0B; - assert_eq!(divide, 0x0B, "divide config should be 0x0B (divide-by-1)"); + hw_interrupts::write_lapic_u32(&mut state, 0xF0, 0x1FF); + assert_eq!(hw_interrupts::read_lapic_u32(&state, 0xF0), 0x1FF); } #[test] @@ -1376,13 +1151,10 @@ mod hw_interrupt_tests { ); let raw = unsafe { capability.Features.AsUINT64 }; let has_lapic = raw & (1 << 1) != 0; // bit 1 = LocalApicEmulation - println!("WHP Features raw: {raw:#018x}"); - println!("WHP LocalApicEmulation supported: {has_lapic}"); assert!( has_lapic, "This host does not support WHP LocalApicEmulation. \ - hw-interrupts LAPIC timer tests will be skipped. \ - Windows 11 22H2+ or a recent Windows Server build is typically required." + hw-interrupts requires Windows 11 22H2+ or a recent Windows Server build." ); } } From ca21917721e807affb92cd62bf7d6b979b18cdfa Mon Sep 17 00:00:00 2001 From: danbugs Date: Sat, 7 Mar 2026 08:32:54 +0000 Subject: [PATCH 18/18] fix: reset timer_stop flag before spawning timer thread During evolve(), the guest init sequence halts via OutBAction::Halt, which sets timer_stop=true. When the guest later configures a timer via PvTimerConfig, the timer thread inherits the already-true stop flag and exits immediately without ever firing. Reset the flag to false right before spawning the timer thread in both KVM and MSHV backends. WHP was not affected because it creates a fresh AtomicBool each time. Signed-off-by: danbugs --- src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs | 3 +++ src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index 70e4b9522..be64581b2 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -260,6 +260,9 @@ impl VirtualMachine for KvmVm { if let Ok(bytes) = data[..4].try_into() { let period_us = u32::from_le_bytes(bytes) as u64; if period_us > 0 && self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) })?; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 4382eb43d..0c698d94b 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -639,6 +639,9 @@ impl MshvVm { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us > 0 && self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); // Re-enable LAPIC if the guest disabled it (via WRMSR // to MSR 0x1B clearing bit 11). Some guests clear // the global APIC enable when no I/O APIC is detected.