Skip to content

Commit 36b3fe3

Browse files
committed
virtio-fs: signal exit_evt when guest reports an exit code
When a virtual machine (VM) passes its final exit status via the `KRUN_EXIT_CODE_IOCTL` function on the virtio-fs root filesystem, the host records it in the shared `exit_code` AtomicI32, but does not signal it to the VMM's `EventFd`. The VMM only stops when `exit_evt` becomes readable, so the VM's reported exit status itself does not wake up the `EventManager` and terminate the VM. This is a common bug in virtio-fs/VMM exit interactions, not a code path specific to any particular architecture (but the reset path on x86-64 i8042 is configured as a VMM exit event, masking the problem). User-visible symptoms depend on the architecture/platform: - On platforms without an equivalent rollback mechanism, or where the VM continues to perform reboot/shutdown processing after reporting the exit code, it may continue running even after reporting completion. We observed the latter scenario on LoongArch, where the virtual machine could continue to restart/shut down, eventually leading to a soft lock rather than the VMM exiting immediately. Signed-off-by: Zewei Yang <yangzewei@loongson.cn>
1 parent 788cf91 commit 36b3fe3

3 files changed

Lines changed: 21 additions & 1 deletion

File tree

src/devices/src/virtio/fs/device.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ pub struct Fs {
4949
worker_thread: Option<JoinHandle<()>>,
5050
worker_stopfd: EventFd,
5151
exit_code: Arc<AtomicI32>,
52+
exit_evt: EventFd,
5253
#[cfg(target_os = "macos")]
5354
map_sender: Option<Sender<WorkerMessage>>,
5455
}
@@ -58,6 +59,7 @@ impl Fs {
5859
fs_id: String,
5960
shared_dir: String,
6061
exit_code: Arc<AtomicI32>,
62+
exit_evt: EventFd,
6163
allow_root_dir_delete: bool,
6264
) -> super::Result<Fs> {
6365
let avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX);
@@ -83,6 +85,7 @@ impl Fs {
8385
worker_thread: None,
8486
worker_stopfd: EventFd::new(EFD_NONBLOCK).map_err(FsError::EventFd)?,
8587
exit_code,
88+
exit_evt,
8689
#[cfg(target_os = "macos")]
8790
map_sender: None,
8891
})
@@ -185,6 +188,7 @@ impl VirtioDevice for Fs {
185188
self.passthrough_cfg.clone(),
186189
self.worker_stopfd.try_clone().unwrap(),
187190
self.exit_code.clone(),
191+
self.exit_evt.try_clone().unwrap(),
188192
#[cfg(target_os = "macos")]
189193
self.map_sender.clone(),
190194
);

src/devices/src/virtio/fs/worker.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crossbeam_channel::Sender;
44
use utils::worker_message::WorkerMessage;
55

66
use std::os::fd::AsRawFd;
7-
use std::sync::atomic::AtomicI32;
7+
use std::sync::atomic::{AtomicI32, Ordering};
88
use std::sync::Arc;
99
use std::thread;
1010

@@ -28,6 +28,7 @@ pub struct FsWorker {
2828
server: Server<PassthroughFs>,
2929
stop_fd: EventFd,
3030
exit_code: Arc<AtomicI32>,
31+
exit_evt: EventFd,
3132
#[cfg(target_os = "macos")]
3233
map_sender: Option<Sender<WorkerMessage>>,
3334
}
@@ -43,6 +44,7 @@ impl FsWorker {
4344
passthrough_cfg: passthrough::Config,
4445
stop_fd: EventFd,
4546
exit_code: Arc<AtomicI32>,
47+
exit_evt: EventFd,
4648
#[cfg(target_os = "macos")] map_sender: Option<Sender<WorkerMessage>>,
4749
) -> Self {
4850
Self {
@@ -54,6 +56,7 @@ impl FsWorker {
5456
server: Server::new(PassthroughFs::new(passthrough_cfg).unwrap()),
5557
stop_fd,
5658
exit_code,
59+
exit_evt,
5760
#[cfg(target_os = "macos")]
5861
map_sender,
5962
}
@@ -155,6 +158,7 @@ impl FsWorker {
155158
.map_err(FsError::QueueWriter)
156159
.unwrap();
157160

161+
let exit_evt_before = self.exit_code.load(Ordering::SeqCst);
158162
if let Err(e) = self.server.handle_message(
159163
reader,
160164
writer,
@@ -173,6 +177,12 @@ impl FsWorker {
173177
if queue.needs_notification(&self.mem).unwrap() {
174178
self.interrupt.signal_used_queue();
175179
}
180+
let exit_evt_after = self.exit_code.load(Ordering::SeqCst);
181+
if exit_evt_before == i32::MAX && exit_evt_after != i32::MAX {
182+
if let Err(e) = self.exit_evt.write(exit_evt_after as u64) {
183+
error!("failed to signal exit event: {e:?}");
184+
}
185+
}
176186
}
177187
}
178188
}

src/vmm/src/builder.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,11 +1885,17 @@ fn attach_fs_devices(
18851885
use self::StartMicrovmError::*;
18861886

18871887
for (i, config) in fs_devs.iter().enumerate() {
1888+
let exit_evt = vmm
1889+
.exit_evt
1890+
.try_clone()
1891+
.map_err(Error::EventFd)
1892+
.map_err(StartMicrovmError::Internal)?;
18881893
let fs = Arc::new(Mutex::new(
18891894
devices::virtio::Fs::new(
18901895
config.fs_id.clone(),
18911896
config.shared_dir.clone(),
18921897
exit_code.clone(),
1898+
exit_evt,
18931899
config.allow_root_dir_delete,
18941900
)
18951901
.unwrap(),

0 commit comments

Comments
 (0)