Skip to content

Commit a57104d

Browse files
committed
<fix>[compute]: meta
Resolves: ZSV-10000 Change-Id: I746a6d7266686d637a6d76756861636a74766b74
1 parent e3362fe commit a57104d

157 files changed

Lines changed: 15296 additions & 35 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

compute/src/main/java/org/zstack/compute/vm/AbstractVmInstance.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,12 @@ public abstract class AbstractVmInstance implements VmInstance {
205205
APIDestroyVmInstanceMsg.class.getName(),
206206
DestroyVmInstanceMsg.class.getName());
207207

208+
// Registering state: only metadata-related reads, destroy (for cleanup/rollback),
209+
// and ChangeVmMetaDataMsg (for state transitions during registration) are allowed.
210+
allowedOperations.addState(VmInstanceState.Registering,
211+
ChangeVmMetaDataMsg.class.getName(),
212+
APIDestroyVmInstanceMsg.class.getName(),
213+
DestroyVmInstanceMsg.class.getName());
208214

209215
stateChangeChecker.addState(VmInstanceStateEvent.unknown.toString(),
210216
VmInstanceState.Created.toString(),
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package org.zstack.compute.vm;
2+
3+
import org.springframework.beans.factory.annotation.Autowire;
4+
import org.springframework.beans.factory.annotation.Autowired;
5+
import org.springframework.beans.factory.annotation.Configurable;
6+
import org.zstack.core.db.Q;
7+
import org.zstack.header.core.Completion;
8+
import org.zstack.header.core.workflow.FlowTrigger;
9+
import org.zstack.header.core.workflow.NoRollbackFlow;
10+
import org.zstack.header.errorcode.ErrorCode;
11+
import org.zstack.header.vm.MetadataStorageHandler;
12+
import org.zstack.header.vm.VmInstanceConstant;
13+
import org.zstack.header.vm.VmInstanceSpec;
14+
import org.zstack.header.volume.VolumeVO;
15+
import org.zstack.header.volume.VolumeVO_;
16+
import org.zstack.header.volume.VolumeType;
17+
import org.zstack.utils.Utils;
18+
import org.zstack.utils.logging.CLogger;
19+
20+
import java.util.Map;
21+
22+
/**
23+
* VM 彻底删除(Expunge)时清理主存储上的元数据文件。
24+
*
25+
* <p>设计要点(Part 02b §8.3):</p>
26+
* <ul>
27+
* <li>在 ExpungeVm 流程链中执行,位于 Root/Memory/Cache Volume 删除之后</li>
28+
* <li>通过根卷所在 PS 定位元数据位置</li>
29+
* <li><b>best-effort</b>:删除失败仅 WARN 日志,不阻塞 VM 物理清除</li>
30+
* <li>dirty 行由 FK CASCADE 自动清理,本 Flow 不处理</li>
31+
* </ul>
32+
*
33+
* <p>删除时机说明(Δ-5):元数据在 Expunge(物理删除)而非 Destroy(软删除)
34+
* 阶段清理。Destroy 时 VM 可通过 Recover 恢复,过早删除会导致恢复后元数据丢失。</p>
35+
*/
36+
@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
37+
public class VmExpungeMetadataFlow extends NoRollbackFlow {
38+
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);
39+
40+
@Autowired
41+
private MetadataStorageHandler metadataStorageHandler;
42+
43+
@Override
44+
public void run(FlowTrigger trigger, Map data) {
45+
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
46+
final String vmUuid = spec.getVmInventory().getUuid();
47+
48+
// 功能开关检查:即使功能关闭,也尝试清理已有的元数据文件(best-effort)
49+
// 不检查 VM_METADATA 开关——Expunge 是不可逆操作,应始终尝试清理残留
50+
51+
// 通过根卷查找 PS UUID
52+
String rootVolumeUuid = spec.getVmInventory().getRootVolumeUuid();
53+
if (rootVolumeUuid == null) {
54+
// VM 处于中间状态,无根卷,跳过
55+
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] has no root volume, skipping metadata cleanup", vmUuid));
56+
trigger.next();
57+
return;
58+
}
59+
60+
String psUuid = Q.New(VolumeVO.class)
61+
.eq(VolumeVO_.uuid, rootVolumeUuid)
62+
.select(VolumeVO_.primaryStorageUuid)
63+
.findValue();
64+
65+
if (psUuid == null) {
66+
// 根卷已被删除或无 PS 信息,跳过
67+
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume[uuid:%s] has no primaryStorageUuid, " +
68+
"skipping metadata cleanup", vmUuid, rootVolumeUuid));
69+
trigger.next();
70+
return;
71+
}
72+
73+
logger.info(String.format("[MetadataExpunge] deleting metadata for vm[uuid:%s] on ps[uuid:%s]", vmUuid, psUuid));
74+
75+
metadataStorageHandler.deleteMetadata(psUuid, vmUuid, new Completion(trigger) {
76+
@Override
77+
public void success() {
78+
logger.info(String.format("[MetadataExpunge] metadata deleted for vm[uuid:%s] on ps[uuid:%s]", vmUuid, psUuid));
79+
trigger.next();
80+
}
81+
82+
@Override
83+
public void fail(ErrorCode errorCode) {
84+
// best-effort:失败不阻塞 VM 物理清除
85+
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s], " +
86+
"continuing expunge. Error: %s", vmUuid, psUuid, errorCode));
87+
trigger.next();
88+
}
89+
});
90+
}
91+
}

compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,139 @@ public class VmGlobalConfig {
133133
@GlobalConfigValidation(validValues = {"None", "AuthenticAMD"})
134134
@BindResourceConfig(value = {VmInstanceVO.class})
135135
public static GlobalConfig VM_CPUID_VENDOR = new GlobalConfig(CATEGORY, "vm.cpuid.vendor");
136+
137+
@GlobalConfigValidation(numberGreaterThan = 1)
138+
public static GlobalConfig GC_INTERVAL = new GlobalConfig(CATEGORY, "deletion.gcInterval");
139+
140+
@GlobalConfigValidation(validValues = {"true", "false"})
141+
public static GlobalConfig VM_METADATA = new GlobalConfig(CATEGORY, "vm.metadata");
142+
143+
@GlobalConfigDef(defaultValue = "5", type = Integer.class,
144+
description = "Max concurrent metadata writes per primary storage per MN")
145+
@GlobalConfigValidation(numberGreaterThan = 0)
146+
public static GlobalConfig VM_METADATA_PS_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.ps.maxConcurrent");
147+
148+
@GlobalConfigDef(defaultValue = "10", type = Integer.class,
149+
description = "Max concurrent VM metadata updates globally per MN")
150+
@GlobalConfigValidation(numberGreaterThan = 0)
151+
public static GlobalConfig VM_METADATA_GLOBAL_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.global.maxConcurrent");
152+
153+
@GlobalConfigDef(defaultValue = "10", type = Integer.class,
154+
description = "Initial GC delay in seconds after API success")
155+
@GlobalConfigValidation(numberGreaterThan = 0)
156+
public static GlobalConfig VM_METADATA_GC_INITIAL_DELAY_SEC = new GlobalConfig(CATEGORY, "vm.metadata.gc.initialDelaySec");
157+
158+
@GlobalConfigDef(defaultValue = "5", type = Integer.class,
159+
description = "Max retry count before giving up metadata flush")
160+
@GlobalConfigValidation(numberGreaterThan = 0)
161+
public static GlobalConfig VM_METADATA_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.maxRetry");
162+
163+
@GlobalConfigDef(defaultValue = "5", type = Long.class,
164+
description = "Dirty poller interval in seconds")
165+
@GlobalConfigValidation(numberGreaterThan = 0)
166+
public static GlobalConfig VM_METADATA_DIRTY_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.dirty.pollIntervalSec");
167+
168+
@GlobalConfigDef(defaultValue = "20", type = Integer.class,
169+
description = "Max dirty rows to claim per poller cycle")
170+
@GlobalConfigValidation(numberGreaterThan = 0)
171+
public static GlobalConfig VM_METADATA_DIRTY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.dirty.batchSize");
172+
173+
@GlobalConfigDef(defaultValue = "300", type = Long.class,
174+
description = "Path fingerprint check interval in seconds")
175+
@GlobalConfigValidation(numberGreaterThan = 0)
176+
public static GlobalConfig VM_METADATA_PATH_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.pathCheck.intervalSec");
177+
178+
@GlobalConfigDef(defaultValue = "500", type = Integer.class,
179+
description = "Path fingerprint check keyset pagination batch size")
180+
@GlobalConfigValidation(numberGreaterThan = 0)
181+
public static GlobalConfig VM_METADATA_PATH_CHECK_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.pathCheck.batchSize");
182+
183+
@GlobalConfigDef(defaultValue = "600", type = Long.class,
184+
description = "Delay in seconds before full refresh after upgrade, waiting for rolling upgrade to complete")
185+
@GlobalConfigValidation(numberGreaterThan = 0)
186+
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshDelaySec");
187+
188+
@GlobalConfigDef(defaultValue = "1000", type = Integer.class,
189+
description = "Upgrade full refresh SQL batch size")
190+
@GlobalConfigValidation(numberGreaterThan = 0)
191+
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshBatchSize");
192+
193+
@GlobalConfigDef(defaultValue = "5", type = Long.class,
194+
description = "Delay in seconds after nodeLeft before takeover, reduces zombie MN race condition")
195+
@GlobalConfigValidation(numberGreaterThan = 0)
196+
public static GlobalConfig VM_METADATA_NODE_LEFT_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.nodeLeft.delaySec");
197+
198+
@GlobalConfigDef(defaultValue = "1800", type = Long.class,
199+
description = "MetadataStaleRecoveryTask scan interval in seconds")
200+
@GlobalConfigValidation(numberGreaterThan = 0)
201+
public static GlobalConfig VM_METADATA_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.intervalSec");
202+
203+
@GlobalConfigDef(defaultValue = "100", type = Integer.class,
204+
description = "MetadataStaleRecoveryTask rows per scan batch")
205+
@GlobalConfigValidation(numberGreaterThan = 0)
206+
public static GlobalConfig VM_METADATA_STALE_RECOVERY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.batchSize");
207+
208+
@GlobalConfigDef(defaultValue = "10", type = Integer.class,
209+
description = "Max consecutive stale recovery cycles per VM before circuit-break")
210+
@GlobalConfigValidation(numberGreaterThan = 0)
211+
public static GlobalConfig VM_METADATA_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.maxCycles");
212+
213+
@GlobalConfigDef(defaultValue = "45", type = Long.class,
214+
description = "Pending API timeout cleanup threshold in minutes")
215+
@GlobalConfigValidation(numberGreaterThan = 0)
216+
public static GlobalConfig VM_METADATA_PENDING_API_TIMEOUT = new GlobalConfig(CATEGORY, "vm.metadata.pendingApi.timeoutMinutes");
217+
218+
@GlobalConfigDef(defaultValue = "10", type = Integer.class,
219+
description = "Exponential backoff base delay in seconds")
220+
@GlobalConfigValidation(numberGreaterThan = 0)
221+
public static GlobalConfig VM_METADATA_RETRY_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.retry.baseDelaySeconds");
222+
223+
@GlobalConfigDef(defaultValue = "10", type = Integer.class,
224+
description = "Exponential backoff max exponent")
225+
@GlobalConfigValidation(numberGreaterThan = 0)
226+
public static GlobalConfig VM_METADATA_RETRY_MAX_EXPONENT = new GlobalConfig(CATEGORY, "vm.metadata.retry.maxExponent");
227+
228+
@GlobalConfigDef(defaultValue = "200", type = Integer.class,
229+
description = "Batch size per round when enabling metadata (false to true init)")
230+
@GlobalConfigValidation(numberGreaterThan = 0)
231+
public static GlobalConfig VM_METADATA_INIT_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.init.batchSize");
232+
233+
@GlobalConfigDef(defaultValue = "5", type = Long.class,
234+
description = "Delay in seconds between init batches to prevent IO storm")
235+
@GlobalConfigValidation(numberGreaterThan = 0)
236+
public static GlobalConfig VM_METADATA_INIT_BATCH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.init.batchDelaySec");
237+
238+
@GlobalConfigDef(defaultValue = "3600", type = Long.class,
239+
description = "Orphan metadata detection interval in seconds")
240+
@GlobalConfigValidation(numberGreaterThan = 0)
241+
public static GlobalConfig VM_METADATA_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.orphanCheck.intervalSec");
242+
243+
@GlobalConfigDef(defaultValue = "15", type = Long.class,
244+
description = "Zombie claim threshold in minutes: claimed dirty rows older than this are released")
245+
@GlobalConfigValidation(numberGreaterThan = 0)
246+
public static GlobalConfig VM_METADATA_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.zombieClaim.thresholdMinutes");
247+
248+
@GlobalConfigDef(defaultValue = "30", type = Long.class,
249+
description = "Stale claim threshold in minutes for background recovery task")
250+
@GlobalConfigValidation(numberGreaterThan = 0)
251+
public static GlobalConfig VM_METADATA_STALE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.staleClaim.thresholdMinutes");
252+
253+
@GlobalConfigDef(defaultValue = "10", type = Long.class,
254+
description = "Inline stale claim takeover threshold in minutes for triggerFlushForVm hot path")
255+
@GlobalConfigValidation(numberGreaterThan = 0)
256+
public static GlobalConfig VM_METADATA_TRIGGER_FLUSH_STALE = new GlobalConfig(CATEGORY, "vm.metadata.triggerFlush.staleMinutes");
257+
258+
@GlobalConfigDef(defaultValue = "3", type = Integer.class,
259+
description = "Max retry count for deleteMetadata in ExpungeVmInstanceFlow")
260+
@GlobalConfigValidation(numberGreaterThan = 0)
261+
public static GlobalConfig VM_METADATA_DELETE_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.delete.maxRetry");
262+
263+
@GlobalConfigDef(defaultValue = "30", type = Long.class,
264+
description = "Base delay in seconds for deleteMetadata retry backoff")
265+
@GlobalConfigValidation(numberGreaterThan = 0)
266+
public static GlobalConfig VM_METADATA_DELETE_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.delete.baseDelaySec");
267+
268+
@GlobalConfigDef(defaultValue = "", type = String.class,
269+
description = "Last completed upgrade refresh version, prevents duplicate triggers across MNs. Internal use only")
270+
public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");
136271
}

compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.zstack.header.message.APIMessage;
2222
import org.zstack.header.network.l2.*;
2323
import org.zstack.header.network.l3.*;
24+
import org.zstack.header.storage.primary.APIRegisterVmInstanceMsg;
2425
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO;
2526
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO_;
2627
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;

0 commit comments

Comments
 (0)