Skip to content

Commit 35db324

Browse files
committed
[shimV2] adds SCSI disk manager
Add the SCSI manager that manages the full lifecycle of SCSI disk attachments on a Hyper-V VM. It abstracts host-side slot allocation, guest-side mount/unmount (with platform-specific paths for LCOW and WCOW), reference counting for shared disks, and two-phase teardown (guest unplug followed by host detach). Signed-off-by: Harsh Rawat <harshrawat@microsoft.com>
1 parent 87708ff commit 35db324

11 files changed

Lines changed: 1107 additions & 23 deletions

File tree

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
//go:build windows
2+
3+
// Package scsi manages the full lifecycle of SCSI disk mappings on a
4+
// Hyper-V VM, from host-side slot allocation through guest-side mounting.
5+
//
6+
// # Architecture
7+
//
8+
// [Manager] is the primary entry point, exposing two methods:
9+
//
10+
// - [Manager.MapToGuest]: allocates a SCSI slot (if needed), attaches the
11+
// disk to the VM's SCSI bus, and mounts the specified partition inside the
12+
// guest. The caller supplies a stable mappingID that identifies the mapping
13+
// across retries.
14+
// - [Manager.UnmapFromGuest]: unmounts the partition from the guest, and
15+
// when all mappings for an attachment are released, unplugs the SCSI
16+
// device and detaches the disk from the VM.
17+
//
18+
// All operations are serialized by a single mutex on the [Manager]. Guest
19+
// paths are always auto-generated; callers cannot supply their own.
20+
//
21+
// # Layered State Model
22+
//
23+
// The state is tracked at two layers:
24+
//
25+
// - [attachment]: represents a disk on the VM's SCSI bus (one per [VMSlot]).
26+
// States: attachPending → attachAttached → attachDetaching → attachUnplugged → attachDetached.
27+
// - [mount]: represents a partition mounted inside the guest (keyed by
28+
// partition index within an attachment).
29+
// States: mountPending → mountMounted → mountUnmounted.
30+
//
31+
// A third structure, [mapping], links a caller-supplied mappingID to an
32+
// [attachment] and partition index. It carries no lifecycle state of its own;
33+
// the [attachment] and [mount] state machines drive all transitions.
34+
//
35+
// # Retry / Idempotency
36+
//
37+
// Both [Manager.MapToGuest] and [Manager.UnmapFromGuest] are designed to be
38+
// retriable. On failure, the [attachment] and [mount] states remain at their
39+
// pre-operation position (no poisoning). A subsequent call with the same
40+
// mappingID resumes from where the previous attempt stopped.
41+
//
42+
// Calling [Manager.MapToGuest] with the same mappingID after a successful call
43+
// is a no-op that returns the existing guest path.
44+
//
45+
// # Attachment Lifecycle
46+
//
47+
// ┌──────────────────┐
48+
// │ attachPending │ ← stays here on attach failure (retriable)
49+
// └────────┬─────────┘
50+
// │ disk added to VM SCSI bus
51+
// ▼
52+
// ┌──────────────────┐
53+
// │ attachAttached │
54+
// └────────┬─────────┘
55+
// (mounts driven here)
56+
// │ all partitions released;
57+
// │ detach initiated
58+
// ▼
59+
// ┌──────────────────┐
60+
// │ attachDetaching │ ← stays here on unplug failure (retriable)
61+
// └────────┬─────────┘
62+
// │ SCSI device unplugged from guest
63+
// ▼
64+
// ┌──────────────────┐
65+
// │ attachUnplugged │
66+
// └────────┬─────────┘
67+
// │ disk removed from VM SCSI bus
68+
// ▼
69+
// ┌──────────────────┐
70+
// │ attachDetached │
71+
// └──────────────────┘
72+
// (entry removed from map)
73+
//
74+
// ┌──────────────────┐
75+
// │ attachReserved │ ← no transitions; pre-reserved at construction
76+
// └──────────────────┘
77+
//
78+
// # Mount Lifecycle
79+
//
80+
// ┌──────────────────┐
81+
// │ mountPending │ ← stays here on mount failure (retriable)
82+
// └────────┬─────────┘
83+
// │ guest mount succeeds
84+
// ▼
85+
// ┌──────────────────┐
86+
// │ mountMounted │
87+
// └────────┬─────────┘
88+
// │ refCount → 0;
89+
// │ guest unmount
90+
// ▼
91+
// ┌──────────────────┐
92+
// │ mountUnmounted │
93+
// └──────────────────┘
94+
// (partition entry removed from attachment)
95+
//
96+
// # Reference Counting
97+
//
98+
// Multiple mappingIDs may target the same disk and partition. [Manager.MapToGuest]
99+
// detects duplicates and increments a reference count on the [mount] instead of
100+
// issuing duplicate guest operations; the guest path is shared.
101+
//
102+
// [Manager.UnmapFromGuest] decrements the count and only unmounts when it reaches
103+
// zero.
104+
//
105+
// # Platform Variants
106+
//
107+
// Guest-side mount, unmount, and unplug steps differ between LCOW and WCOW
108+
// guests and are selected via build tags (default for the LCOW shim;
109+
// "wcow" tag for the WCOW shim):
110+
//
111+
// - LCOW: mounts via AddLCOWMappedVirtualDisk, unmounts via
112+
// RemoveLCOWMappedVirtualDisk, and unplugs via RemoveSCSIDevice.
113+
// - WCOW: mounts via AddWCOWMappedVirtualDisk (or
114+
// AddWCOWMappedVirtualDiskForContainerScratch for scratch disks),
115+
// unmounts via RemoveWCOWMappedVirtualDisk; unplug is a no-op because
116+
// Windows handles SCSI hot-unplug automatically when the host removes
117+
// the disk from the VM.
118+
//
119+
// # Usage
120+
//
121+
// mgr := scsi.New(vmID, vmScsi, linuxGuestScsi, windowsGuestScsi, numControllers, reservedSlots)
122+
//
123+
// diskConfig := scsi.DiskConfig{HostPath: "/path/to/disk.vhdx", Type: scsi.DiskTypeVirtualDisk}
124+
// mountConfig := scsi.MountConfig{ReadOnly: true}
125+
//
126+
// // Map the disk to the guest (allocate slot + attach + mount):
127+
// guestPath, err := mgr.MapToGuest(ctx, "container-abc/layer-0", diskConfig, mountConfig)
128+
// if err != nil {
129+
// // Retry with the same mappingID to resume:
130+
// guestPath, err = mgr.MapToGuest(ctx, "container-abc/layer-0", diskConfig, mountConfig)
131+
// }
132+
//
133+
// // Unmap (unmount + unplug + detach when last mapping):
134+
// if err := mgr.UnmapFromGuest(ctx, "container-abc/layer-0"); err != nil {
135+
// // Retry:
136+
// _ = mgr.UnmapFromGuest(ctx, "container-abc/layer-0")
137+
// }
138+
package scsi
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//go:build windows
2+
3+
package scsi
4+
5+
import (
6+
"sync"
7+
)
8+
9+
// Manager implements the methods to manage the full SCSI disk lifecycle —
10+
// slot allocation, VM attach, guest mount, and teardown — across one or more
11+
// controllers on a Hyper-V VM. All operations are serialized by a single mutex.
12+
type Manager struct {
13+
// mu serializes all public operations on the Manager.
14+
mu sync.Mutex
15+
16+
// vmID identifies the HCS compute system. Immutable after construction.
17+
vmID string
18+
19+
// numControllers is the number of SCSI controllers on the VM. Immutable after construction.
20+
numControllers int
21+
22+
// attachmentMap tracks SCSI slot occupancy keyed by controller and LUN. Guarded by mu.
23+
attachmentMap map[VMSlot]*attachment
24+
25+
// mappingMap indexes active mappings by caller-supplied ID. Guarded by mu.
26+
mappingMap map[string]*mapping
27+
28+
// nextMountIdx is a monotonic counter for generating unique guest mount paths. Guarded by mu.
29+
nextMountIdx int
30+
31+
// vmSCSI is the host-side interface for adding and removing disks from the VM. Immutable after construction.
32+
vmSCSI vmSCSI
33+
34+
// linuxGuestSCSI is the guest-side interface for SCSI operations in LCOW guests. Immutable after construction.
35+
linuxGuestSCSI linuxGuestSCSI
36+
37+
// windowsGuestSCSI is the guest-side interface for SCSI operations in WCOW guests. Immutable after construction.
38+
windowsGuestSCSI windowsGuestSCSI
39+
}
40+
41+
// New creates a new [Manager] for the given VM and controllers.
42+
func New(
43+
vmID string,
44+
vmScsi vmSCSI,
45+
linuxGuestScsi linuxGuestSCSI,
46+
windowsGuestScsi windowsGuestSCSI,
47+
numControllers int,
48+
reservedSlots []VMSlot,
49+
) *Manager {
50+
m := &Manager{
51+
vmID: vmID,
52+
numControllers: numControllers,
53+
attachmentMap: make(map[VMSlot]*attachment),
54+
mappingMap: make(map[string]*mapping),
55+
vmSCSI: vmScsi,
56+
linuxGuestSCSI: linuxGuestScsi,
57+
windowsGuestSCSI: windowsGuestScsi,
58+
}
59+
60+
// Pre-populate attachmentMap with reserved slots so they are never allocated.
61+
for _, s := range reservedSlots {
62+
m.attachmentMap[s] = &attachment{
63+
controller: s.Controller,
64+
lun: s.LUN,
65+
state: attachReserved,
66+
partitions: make(map[uint64]*mount),
67+
}
68+
}
69+
70+
return m
71+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//go:build windows && !wcow
2+
3+
package scsi
4+
5+
import (
6+
"context"
7+
"fmt"
8+
9+
"github.com/Microsoft/hcsshim/internal/protocol/guestresource"
10+
)
11+
12+
// mountFmt is the guest path template for SCSI mounts on LCOW.
13+
const mountFmt = "/run/mounts/scsi/m%d"
14+
15+
// mountInGuest mounts a SCSI disk partition into the Linux guest at the path
16+
// stored in [mount.guestPath].
17+
func (m *Manager) mountInGuest(ctx context.Context, controller, lun uint, mnt *mount) error {
18+
settings := guestresource.LCOWMappedVirtualDisk{
19+
MountPath: mnt.guestPath,
20+
Controller: uint8(controller),
21+
Lun: uint8(lun),
22+
Partition: mnt.config.Partition,
23+
ReadOnly: mnt.config.ReadOnly,
24+
Encrypted: mnt.config.Encrypted,
25+
Options: mnt.config.Options,
26+
EnsureFilesystem: mnt.config.EnsureFilesystem,
27+
Filesystem: mnt.config.Filesystem,
28+
BlockDev: mnt.config.BlockDev,
29+
}
30+
if err := m.linuxGuestSCSI.AddLCOWMappedVirtualDisk(ctx, settings); err != nil {
31+
return fmt.Errorf("add LCOW mapped virtual disk controller=%d lun=%d: %w", controller, lun, err)
32+
}
33+
return nil
34+
}
35+
36+
// unmountFromGuest unmounts the SCSI disk partition from the Linux guest.
37+
func (m *Manager) unmountFromGuest(ctx context.Context, controller, lun uint, mnt *mount) error {
38+
settings := guestresource.LCOWMappedVirtualDisk{
39+
MountPath: mnt.guestPath,
40+
Controller: uint8(controller),
41+
Lun: uint8(lun),
42+
ReadOnly: mnt.config.ReadOnly,
43+
Partition: mnt.config.Partition,
44+
BlockDev: mnt.config.BlockDev,
45+
}
46+
if err := m.linuxGuestSCSI.RemoveLCOWMappedVirtualDisk(ctx, settings); err != nil {
47+
return fmt.Errorf("remove LCOW mapped virtual disk controller=%d lun=%d path=%q: %w",
48+
controller, lun, mnt.guestPath, err)
49+
}
50+
return nil
51+
}
52+
53+
// unplugFromGuest ejects a SCSI device from the Linux guest before the host
54+
// removes it from the VM.
55+
func (m *Manager) unplugFromGuest(ctx context.Context, controller, lun uint) error {
56+
settings := guestresource.SCSIDevice{
57+
Controller: uint8(controller),
58+
Lun: uint8(lun),
59+
}
60+
61+
// RemoveSCSIDevice sends a guest modification request that the GCS handles
62+
// by first remapping the logical controller number to the actual kernel-visible
63+
// controller index (HCS and the Linux kernel assign them independently), then
64+
// writing "1" to /sys/bus/scsi/devices/<id>/delete. That sysfs write is a
65+
// guest-initiated hot-unplug: the kernel removes the device from its bus and
66+
// flushes any in-flight I/O before the host removes the disk from the VM.
67+
if err := m.linuxGuestSCSI.RemoveSCSIDevice(ctx, settings); err != nil {
68+
return fmt.Errorf("remove scsi device at controller=%d lun=%d from lcow guest: %w", controller, lun, err)
69+
}
70+
return nil
71+
}

0 commit comments

Comments
 (0)