Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions lib/hypervisor/firecracker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,21 @@ type snapshotCreateParams struct {

type snapshotLoadParams struct {
MemFilePath string `json:"mem_file_path,omitempty"`
MemBackend *memBackend `json:"mem_backend,omitempty"`
SnapshotPath string `json:"snapshot_path"`
EnableDiffSnapshots bool `json:"enable_diff_snapshots,omitempty"`
ResumeVM bool `json:"resume_vm,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
}

// memBackend selects how firecracker materializes guest memory during
// restore. backend_type "Uffd" hands page-fault handling off to a
// userfaultfd page server reachable at backend_path (Unix domain socket).
type memBackend struct {
BackendType string `json:"backend_type"`
BackendPath string `json:"backend_path"`
}

type networkOverride struct {
IfaceID string `json:"iface_id"`
HostDevName string `json:"host_dev_name"`
Expand All @@ -103,6 +112,11 @@ type instanceInfo struct {
type restoreMetadata struct {
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
SnapshotSourceDataDir string `json:"snapshot_source_data_dir,omitempty"`
// UffdSocketPath, when non-empty, makes loadSnapshot send a Uffd
// mem_backend pointing at the page server instead of letting
// firecracker mmap the mem-file directly. PrepareFork records it
// per fork so RestoreVM can pick it up after a hypeman restart.
UffdSocketPath string `json:"uffd_socket_path,omitempty"`
}

func toBootSource(cfg hypervisor.VMConfig) bootSource {
Expand Down Expand Up @@ -212,14 +226,25 @@ func toSnapshotCreateParams(snapshotDir string) snapshotCreateParams {
}
}

func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride) snapshotLoadParams {
return snapshotLoadParams{
MemFilePath: snapshotMemoryPath(snapshotDir),
func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride, uffdSocketPath string) snapshotLoadParams {
params := snapshotLoadParams{
SnapshotPath: snapshotStatePath(snapshotDir),
EnableDiffSnapshots: true,
ResumeVM: false,
NetworkOverrides: networkOverrides,
}
if uffdSocketPath != "" {
// Firecracker rejects load requests that set both mem_file_path
// and a uffd backend. The page server takes the file path through
// its own configuration, so we drop it from the request.
params.MemBackend = &memBackend{
BackendType: "Uffd",
BackendPath: uffdSocketPath,
}
} else {
params.MemFilePath = snapshotMemoryPath(snapshotDir)
}
return params
}

func snapshotStatePath(snapshotDir string) string {
Expand Down
9 changes: 8 additions & 1 deletion lib/hypervisor/firecracker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,19 @@ func TestSnapshotParamPaths(t *testing.T) {

load := toSnapshotLoadParams("/tmp/snapshot-latest", []networkOverride{
{IfaceID: "eth0", HostDevName: "hype-abc123"},
})
}, "")
assert.Equal(t, "/tmp/snapshot-latest/state", load.SnapshotPath)
assert.Equal(t, "/tmp/snapshot-latest/memory", load.MemFilePath)
assert.Nil(t, load.MemBackend)
assert.True(t, load.EnableDiffSnapshots)
assert.False(t, load.ResumeVM)
require.Len(t, load.NetworkOverrides, 1)

loadUffd := toSnapshotLoadParams("/tmp/snapshot-latest", nil, "/run/uffd/abc.sock")
assert.Equal(t, "", loadUffd.MemFilePath, "mem_file_path must be empty when a uffd backend is set")
require.NotNil(t, loadUffd.MemBackend)
assert.Equal(t, "Uffd", loadUffd.MemBackend.BackendType)
assert.Equal(t, "/run/uffd/abc.sock", loadUffd.MemBackend.BackendPath)
}

func TestToBalloonConfig(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions lib/hypervisor/firecracker/firecracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ func (f *Firecracker) instanceStart(ctx context.Context) error {
return f.postAction(ctx, "InstanceStart")
}

func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides)
func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride, uffdSocketPath string) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides, uffdSocketPath)
if _, err := f.do(ctx, http.MethodPut, "/snapshot/load", params, http.StatusNoContent); err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions lib/hypervisor/firecracker/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ func (s *Starter) PrepareFork(ctx context.Context, req hypervisor.ForkPrepareReq
changed = true
}
}
if meta.UffdSocketPath != req.UffdSocketPath {
meta.UffdSocketPath = req.UffdSocketPath
changed = true
}

if changed {
if err := saveRestoreMetadataState(instanceDir, meta); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/firecracker/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
snapshotSourceAliasMu.Lock()
defer snapshotSourceAliasMu.Unlock()
return withSnapshotSourceDirAlias(meta, filepath.Dir(socketPath), func() error {
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides)
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides, meta.UffdSocketPath)
})
}()
if err != nil {
Expand Down
6 changes: 6 additions & 0 deletions lib/hypervisor/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ type ForkPrepareRequest struct {

SerialLogPath string
Network *ForkNetworkConfig

// UffdSocketPath is set when the fork should restore from a userfaultfd
// page-server socket instead of mmap'ing its mem-file directly. The
// hypervisor records this so RestoreVM can attach a uffd memory backend
// in the snapshot/load request. Empty means use the default mmap path.
UffdSocketPath string
}

// ForkPrepareResult describes which optional fork rewrites were actually applied.
Expand Down
8 changes: 7 additions & 1 deletion lib/instances/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,15 @@ func (m *manager) deleteInstance(
}

// 9. If this instance was a fork of a template, drop the template's
// fork refcount so the template can eventually be deleted.
// fork refcount so the template can eventually be deleted, and
// detach it from the uffd page server if one is running.
if stored.ForkOfTemplate != "" {
m.dropTemplateForkRefcount(ctx, stored.ForkOfTemplate)
if m.uffd != nil {
if err := m.uffd.releaseUffdForFork(stored.ForkOfTemplate, id); err != nil {
log.WarnContext(ctx, "failed to release uffd page server for fork", "instance_id", id, "template_id", stored.ForkOfTemplate, "error", err)
}
}
}

log.InfoContext(ctx, "instance deleted successfully", "instance_id", id)
Expand Down
109 changes: 109 additions & 0 deletions lib/instances/firecracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"strings"
"syscall"
"testing"
"time"

Expand Down Expand Up @@ -544,3 +545,111 @@ func TestFirecrackerSnapshotFeature(t *testing.T) {
forkName: "fc-snapshot-fork",
})
}

// TestFirecrackerForkFromTemplate exercises the full template-driven fork
// path: standby a firecracker source, promote it to a template, fork off it,
// and assert the fork (a) reaches Running, (b) has its mem-file hardlinked
// to the template's snapshot mem-file (the fan-out optimisation), (c) bumped
// the template's fork refcount, (d) registered with the per-template uffd
// page server, and (e) on delete, drops the refcount and detaches from uffd.
func TestFirecrackerForkFromTemplate(t *testing.T) {
t.Parallel()
requireFirecrackerIntegrationPrereqs(t)

mgr, tmpDir := setupTestManagerForFirecracker(t)
ctx := context.Background()
p := paths.New(tmpDir)

imageManager, err := images.NewManager(p, 1, nil)
require.NoError(t, err)
createNginxImageAndWait(t, ctx, imageManager)

systemManager := system.NewManager(p)
require.NoError(t, systemManager.EnsureSystemFiles(ctx))
require.NoError(t, mgr.networkManager.Initialize(ctx, nil))

source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{
Name: "fc-tpl-src",
Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"),
Size: 1024 * 1024 * 1024,
HotplugSize: 256 * 1024 * 1024,
OverlaySize: 5 * 1024 * 1024 * 1024,
Vcpus: 1,
NetworkEnabled: true,
Hypervisor: hypervisor.TypeFirecracker,
})
require.NoError(t, err)
source, err = waitForInstanceState(ctx, mgr, source.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
sourceID := source.Id
t.Cleanup(func() { _ = mgr.DeleteInstance(context.Background(), sourceID) })

// Standby is a precondition for promotion.
source, err = mgr.StandbyInstance(ctx, sourceID, StandbyInstanceRequest{})
require.NoError(t, err)
require.Equal(t, StateStandby, source.State)
require.True(t, source.HasSnapshot)

tpl, err := mgr.promoteToTemplate(ctx, sourceID, PromoteToTemplateRequest{Name: "fc-tpl-e2e"})
require.NoError(t, err)
require.NotNil(t, tpl)
require.Equal(t, sourceID, tpl.SourceInstanceID)
require.Equal(t, hypervisor.TypeFirecracker, tpl.HypervisorType)
require.Equal(t, 0, tpl.ForkCount)

// Fork from the template (no source instance id passed).
forked, err := mgr.ForkInstance(ctx, "", ForkInstanceRequest{
Name: "fc-tpl-fork",
TemplateID: tpl.ID,
TargetState: StateRunning,
})
require.NoError(t, err)
forked, err = waitForInstanceState(ctx, mgr, forked.Id, StateRunning, integrationTestTimeout(30*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, forked.State)
forkID := forked.Id
deletedFork := false
t.Cleanup(func() {
if !deletedFork {
_ = mgr.DeleteInstance(context.Background(), forkID)
}
})

// (b) The fork's mem-file must share the source's inode (hardlink), not
// be a copy. We can't compare paths because the link is by inode; we
// compare st_ino + st_dev between the two instances' mem-files.
//
// Firecracker retains the post-restore snapshot dir as snapshot-base
// (see restoreRetainedSnapshotBase), so after the Standby -> Running
// transition the hardlink lives under snapshot-base/, not snapshot-latest/.
// Hardlinks survive the rename because they bind to the inode.
forkMemPath := filepath.Join(p.InstanceSnapshotBase(forkID), templateSharedMemFileName)
srcMemPath := filepath.Join(p.InstanceSnapshotLatest(sourceID), templateSharedMemFileName)
forkInfo, err := os.Stat(forkMemPath)
require.NoError(t, err, "fork mem-file should exist at snapshot-base/memory after restore")
assert.True(t, forkInfo.Mode().IsRegular(), "fork mem-file should be a regular file (hardlink), not a symlink")
srcInfo, err := os.Stat(srcMemPath)
require.NoError(t, err)
forkSys := forkInfo.Sys().(*syscall.Stat_t)
srcSys := srcInfo.Sys().(*syscall.Stat_t)
assert.Equal(t, srcSys.Ino, forkSys.Ino, "fork mem-file should share the source's inode (hardlink, not copy)")
assert.Equal(t, srcSys.Dev, forkSys.Dev, "fork mem-file should be on the same filesystem as source")

// (c) Refcount on the template must be bumped to 1.
tplAfterFork, err := mgr.getTemplate(ctx, tpl.ID)
require.NoError(t, err)
assert.Equal(t, 1, tplAfterFork.ForkCount, "template fork refcount should be 1 after one fork")

// (d) The per-template uffd page server should be tracking this fork.
require.NotNil(t, mgr.uffd)
assert.True(t, mgr.uffd.hasFork(tpl.ID, forkID), "uffd tracker should report fork as registered against its template")

// Deleting the fork drops the refcount and detaches from uffd.
require.NoError(t, mgr.DeleteInstance(ctx, forkID))
deletedFork = true

tplAfterDelete, err := mgr.getTemplate(ctx, tpl.ID)
require.NoError(t, err)
assert.Equal(t, 0, tplAfterDelete.ForkCount, "template fork refcount should drop back to 0")
assert.False(t, mgr.uffd.hasFork(tpl.ID, forkID), "uffd tracker should no longer track the deleted fork")
}
10 changes: 10 additions & 0 deletions lib/instances/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,15 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
if forkMeta.NetworkEnabled {
netCfg = &hypervisor.ForkNetworkConfig{TAPDevice: network.GenerateTAPName(forkID)}
}
uffdSocketPath, err := m.acquireForkUffdIfApplicable(ctx, tpl, forkID, stored.HypervisorType)
if err != nil {
return nil, fmt.Errorf("attach uffd page server: %w", err)
}
if uffdSocketPath != "" {
cu.Add(func() {
_ = m.uffd.releaseUffdForFork(tpl.ID, forkID)
})
}
if _, err := starter.PrepareFork(ctx, hypervisor.ForkPrepareRequest{
SnapshotConfigPath: snapshotConfigPath,
SourceDataDir: stored.DataDir,
Expand All @@ -344,6 +353,7 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
VsockSocket: forkMeta.VsockSocket,
SerialLogPath: m.paths.InstanceAppLog(forkID),
Network: netCfg,
UffdSocketPath: uffdSocketPath,
}); err != nil {
if errors.Is(err, hypervisor.ErrNotSupported) {
return nil, fmt.Errorf("%w: fork is not supported for hypervisor %s", ErrNotSupported, stored.HypervisorType)
Expand Down
6 changes: 6 additions & 0 deletions lib/instances/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ type manager struct {
// fork/delete). Constructed lazily so existing managers without
// template support keep working unchanged.
templateRegistry templates.Registry

// uffd is the per-template userfaultfd page-server tracker. nil on
// non-Linux hosts; on Linux it is started lazily for forks that
// resolve to a template and torn down once no forks remain.
uffd *uffdTracker
}

// platformStarters is populated by platform-specific init functions.
Expand Down Expand Up @@ -209,6 +214,7 @@ func NewManagerWithConfig(p *paths.Paths, imageManager images.Manager, systemMan
nativeCodecPaths: make(map[string]string),
lifecycleEvents: newLifecycleSubscribersWithBufferSize(managerConfig.LifecycleEventBufferSize),
templateRegistry: templates.NewFileRegistry(p.TemplatesDir()),
uffd: newUffdTracker(),
}
m.deleteSnapshotFn = m.deleteSnapshot

Expand Down
20 changes: 14 additions & 6 deletions lib/instances/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,22 @@ func (m *manager) resolveForkFromTemplateRequest(ctx context.Context, instanceID
}

// installForkSharedMemFile arranges the fork's snapshot directory so the
// guest mem-file is a symlink into the template's snapshot directory
// guest mem-file is a hardlink to the template's snapshot mem-file
// instead of a per-fork copy. firecracker mmaps the mem-file MAP_PRIVATE
// during restore, so all forks COW from the same backing file.
// during restore, so all forks COW from the same backing inode.
//
// Layout: dst is the fork's data dir. The snapshot dir is at
// <dst>/snapshots/snapshot-latest, and the mem-file lives at
// <snapshot dir>/memory. The symlink target is the template's source
// instance's standby snapshot mem-file.
// <snapshot dir>/memory. The hardlink shares the inode with the
// template's source instance's standby snapshot mem-file.
//
// We use a hardlink rather than a symlink because RestoreVM temporarily
// aliases the source data dir to the fork data dir while firecracker
// loads the snapshot (see withSnapshotSourceDirAlias). A symlink whose
// target traverses the source dir would resolve back into the fork dir
// during that window and trip ELOOP; a hardlink resolves by inode so
// the alias has no effect on it. Hardlinks require both paths on the
// same filesystem, which holds for our standard data-dir layout.
func (m *manager) installForkSharedMemFile(forkDataDir string, tpl *templates.Template) error {
if tpl == nil {
return nil
Expand All @@ -276,8 +284,8 @@ func (m *manager) installForkSharedMemFile(forkDataDir string, tpl *templates.Te
// Tolerate a leftover entry (e.g. from a partial copy that wasn't fully
// skipped on a different filesystem layout).
_ = os.Remove(dstMem)
if err := os.Symlink(srcMem, dstMem); err != nil {
return fmt.Errorf("symlink shared mem-file: %w", err)
if err := os.Link(srcMem, dstMem); err != nil {
return fmt.Errorf("hardlink shared mem-file: %w", err)
}
return nil
}
Expand Down
6 changes: 3 additions & 3 deletions lib/instances/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ type ForkInstanceRequest struct {

// TemplateID resolves the source instance from the template registry by
// id-or-name. When set, the source instance id passed to ForkInstance is
// ignored (must be empty). The fork's mem-file is shared with the
// template's mem-file via symlink instead of being copied per-fork, so
// many forks fan out from the same warm guest memory.
// ignored (must be empty). The fork's mem-file is hardlinked to the
// template's mem-file instead of being copied per-fork, so many forks
// fan out from the same warm guest memory.
TemplateID string
}

Expand Down
Loading
Loading