Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ public abstract class AbstractVmInstance implements VmInstance {
APIDestroyVmInstanceMsg.class.getName(),
DestroyVmInstanceMsg.class.getName());

// Registering state: only metadata-related reads, destroy (for cleanup/rollback),
// and ChangeVmMetaDataMsg (for state transitions during registration) are allowed.
allowedOperations.addState(VmInstanceState.Registering,
ChangeVmMetaDataMsg.class.getName(),
APIDestroyVmInstanceMsg.class.getName(),
DestroyVmInstanceMsg.class.getName());

stateChangeChecker.addState(VmInstanceStateEvent.unknown.toString(),
VmInstanceState.Created.toString(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package org.zstack.compute.vm;

import org.springframework.beans.factory.annotation.Autowire;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Configurable;
import org.zstack.core.db.Q;
import org.zstack.header.core.Completion;
import org.zstack.header.core.workflow.FlowTrigger;
import org.zstack.header.core.workflow.NoRollbackFlow;
import org.zstack.header.errorcode.ErrorCode;
import org.zstack.header.vm.MetadataStorageHandler;
import org.zstack.header.vm.VmInstanceConstant;
import org.zstack.header.vm.VmInstanceSpec;
import org.zstack.header.volume.VolumeVO;
import org.zstack.header.volume.VolumeVO_;
import org.zstack.header.volume.VolumeType;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

import java.util.Map;

/**
* VM 彻底删除(Expunge)时清理主存储上的元数据文件。
*
* <p>设计要点(Part 02b §8.3):</p>
* <ul>
* <li>在 ExpungeVm 流程链中执行,位于 Root/Memory/Cache Volume 删除之后</li>
* <li>通过根卷所在 PS 定位元数据位置</li>
* <li><b>best-effort</b>:删除失败仅 WARN 日志,不阻塞 VM 物理清除</li>
* <li>dirty 行由 FK CASCADE 自动清理,本 Flow 不处理</li>
* </ul>
*
* <p>删除时机说明(Δ-5):元数据在 Expunge(物理删除)而非 Destroy(软删除)
* 阶段清理。Destroy 时 VM 可通过 Recover 恢复,过早删除会导致恢复后元数据丢失。</p>
*/
@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
public class VmExpungeMetadataFlow extends NoRollbackFlow {
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);

@Autowired
private MetadataStorageHandler metadataStorageHandler;

@Override
public void run(FlowTrigger trigger, Map data) {
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
final String vmUuid = spec.getVmInventory().getUuid();

// 功能开关检查:即使功能关闭,也尝试清理已有的元数据文件(best-effort)
// 不检查 VM_METADATA 开关——Expunge 是不可逆操作,应始终尝试清理残留

// 通过根卷查找 PS UUID
String rootVolumeUuid = spec.getVmInventory().getRootVolumeUuid();
if (rootVolumeUuid == null) {
// VM 处于中间状态,无根卷,跳过
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] has no root volume, skipping metadata cleanup", vmUuid));
trigger.next();
return;
}

String psUuid = Q.New(VolumeVO.class)
.eq(VolumeVO_.uuid, rootVolumeUuid)
.select(VolumeVO_.primaryStorageUuid)
.findValue();

if (psUuid == null) {
// 根卷已被删除或无 PS 信息,跳过
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume[uuid:%s] has no primaryStorageUuid, " +
"skipping metadata cleanup", vmUuid, rootVolumeUuid));
trigger.next();
return;
}

logger.info(String.format("[MetadataExpunge] deleting metadata for vm[uuid:%s] on ps[uuid:%s]", vmUuid, psUuid));

metadataStorageHandler.deleteMetadata(psUuid, vmUuid, new Completion(trigger) {
@Override
public void success() {
logger.info(String.format("[MetadataExpunge] metadata deleted for vm[uuid:%s] on ps[uuid:%s]", vmUuid, psUuid));
trigger.next();
}

@Override
public void fail(ErrorCode errorCode) {
// best-effort:失败不阻塞 VM 物理清除
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s], " +
"continuing expunge. Error: %s", vmUuid, psUuid, errorCode));
trigger.next();
}
});
}
}
135 changes: 135 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,139 @@ public class VmGlobalConfig {
@GlobalConfigValidation(validValues = {"None", "AuthenticAMD"})
@BindResourceConfig(value = {VmInstanceVO.class})
public static GlobalConfig VM_CPUID_VENDOR = new GlobalConfig(CATEGORY, "vm.cpuid.vendor");

@GlobalConfigValidation(numberGreaterThan = 1)
public static GlobalConfig GC_INTERVAL = new GlobalConfig(CATEGORY, "deletion.gcInterval");

@GlobalConfigValidation(validValues = {"true", "false"})
public static GlobalConfig VM_METADATA = new GlobalConfig(CATEGORY, "vm.metadata");

@GlobalConfigDef(defaultValue = "5", type = Integer.class,
description = "Max concurrent metadata writes per primary storage per MN")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PS_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.ps.maxConcurrent");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Max concurrent VM metadata updates globally per MN")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_GLOBAL_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.global.maxConcurrent");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Initial GC delay in seconds after API success")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_GC_INITIAL_DELAY_SEC = new GlobalConfig(CATEGORY, "vm.metadata.gc.initialDelaySec");

@GlobalConfigDef(defaultValue = "5", type = Integer.class,
description = "Max retry count before giving up metadata flush")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.maxRetry");

@GlobalConfigDef(defaultValue = "5", type = Long.class,
description = "Dirty poller interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DIRTY_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.dirty.pollIntervalSec");

@GlobalConfigDef(defaultValue = "20", type = Integer.class,
description = "Max dirty rows to claim per poller cycle")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DIRTY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.dirty.batchSize");

@GlobalConfigDef(defaultValue = "300", type = Long.class,
description = "Path fingerprint check interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PATH_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.pathCheck.intervalSec");

@GlobalConfigDef(defaultValue = "500", type = Integer.class,
description = "Path fingerprint check keyset pagination batch size")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PATH_CHECK_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.pathCheck.batchSize");

@GlobalConfigDef(defaultValue = "600", type = Long.class,
description = "Delay in seconds before full refresh after upgrade, waiting for rolling upgrade to complete")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshDelaySec");

@GlobalConfigDef(defaultValue = "1000", type = Integer.class,
description = "Upgrade full refresh SQL batch size")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshBatchSize");

@GlobalConfigDef(defaultValue = "5", type = Long.class,
description = "Delay in seconds after nodeLeft before takeover, reduces zombie MN race condition")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_NODE_LEFT_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.nodeLeft.delaySec");

@GlobalConfigDef(defaultValue = "1800", type = Long.class,
description = "MetadataStaleRecoveryTask scan interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.intervalSec");

@GlobalConfigDef(defaultValue = "100", type = Integer.class,
description = "MetadataStaleRecoveryTask rows per scan batch")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.batchSize");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Max consecutive stale recovery cycles per VM before circuit-break")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.maxCycles");

@GlobalConfigDef(defaultValue = "45", type = Long.class,
description = "Pending API timeout cleanup threshold in minutes")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PENDING_API_TIMEOUT = new GlobalConfig(CATEGORY, "vm.metadata.pendingApi.timeoutMinutes");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Exponential backoff base delay in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_RETRY_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.retry.baseDelaySeconds");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Exponential backoff max exponent")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_RETRY_MAX_EXPONENT = new GlobalConfig(CATEGORY, "vm.metadata.retry.maxExponent");

@GlobalConfigDef(defaultValue = "200", type = Integer.class,
description = "Batch size per round when enabling metadata (false to true init)")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_INIT_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.init.batchSize");

@GlobalConfigDef(defaultValue = "5", type = Long.class,
description = "Delay in seconds between init batches to prevent IO storm")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_INIT_BATCH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.init.batchDelaySec");

@GlobalConfigDef(defaultValue = "3600", type = Long.class,
description = "Orphan metadata detection interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.orphanCheck.intervalSec");

@GlobalConfigDef(defaultValue = "15", type = Long.class,
description = "Zombie claim threshold in minutes: claimed dirty rows older than this are released")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.zombieClaim.thresholdMinutes");

@GlobalConfigDef(defaultValue = "30", type = Long.class,
description = "Stale claim threshold in minutes for background recovery task")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.staleClaim.thresholdMinutes");

@GlobalConfigDef(defaultValue = "10", type = Long.class,
description = "Inline stale claim takeover threshold in minutes for triggerFlushForVm hot path")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_TRIGGER_FLUSH_STALE = new GlobalConfig(CATEGORY, "vm.metadata.triggerFlush.staleMinutes");

@GlobalConfigDef(defaultValue = "3", type = Integer.class,
description = "Max retry count for deleteMetadata in ExpungeVmInstanceFlow")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DELETE_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.delete.maxRetry");

@GlobalConfigDef(defaultValue = "30", type = Long.class,
description = "Base delay in seconds for deleteMetadata retry backoff")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DELETE_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.delete.baseDelaySec");

@GlobalConfigDef(defaultValue = "", type = String.class,
description = "Last completed upgrade refresh version, prevents duplicate triggers across MNs. Internal use only")
public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.zstack.header.message.APIMessage;
import org.zstack.header.network.l2.*;
import org.zstack.header.network.l3.*;
import org.zstack.header.storage.primary.APIRegisterVmInstanceMsg;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO_;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
Expand Down
Loading