From 31b5f4862f3ec03509b22a04a288b94ec9c61191 Mon Sep 17 00:00:00 2001 From: walter Date: Thu, 12 Mar 2026 15:25:18 +0800 Subject: [PATCH] [feat](cloud) implement decouple_instance (#61221) When an instance is cloned via clone_instance, it maintains references to the source instance and snapshot. After snapshot compaction completes, these references can be safely removed to decouple the cloned instance, making it fully independent. Usage Example: ``` curl -X POST "http://ms_host:port/decouple_instance?instance_id=my_cloned_instance&token=xxx" ``` Requirements: - instance_id: The ID of the cloned instance to decouple - The instance must have been created via clone_instance - The instance's snapshot_compact_status must be SNAPSHOT_COMPACT_DONE --- cloud/src/meta-service/meta_service_http.cpp | 12 +++ cloud/src/snapshot/snapshot_manager.cpp | 93 ++++++++++++++++++++ cloud/src/snapshot/snapshot_manager.h | 8 ++ 3 files changed, 113 insertions(+) diff --git a/cloud/src/meta-service/meta_service_http.cpp b/cloud/src/meta-service/meta_service_http.cpp index 991a5fc83e64ef..2656b63f8bd5ff 100644 --- a/cloud/src/meta-service/meta_service_http.cpp +++ b/cloud/src/meta-service/meta_service_http.cpp @@ -747,6 +747,16 @@ static HttpResponse process_compact_snapshot(MetaServiceImpl* service, brpc::Con return http_json_reply(resp.status()); } +static HttpResponse process_decouple_instance(MetaServiceImpl* service, brpc::Controller* ctrl) { + auto& uri = ctrl->http_request().uri(); + std::string instance_id(http_query(uri, "instance_id")); + if (instance_id.empty()) { + return http_json_reply(MetaServiceCode::INVALID_ARGUMENT, "instance_id is empty"); + } + auto [code, msg] = service->snapshot_manager()->decouple_instance(instance_id); + return http_json_reply(code, msg); +} + static HttpResponse process_set_snapshot_property(MetaServiceImpl* service, brpc::Controller* ctrl) { AlterInstanceRequest req; @@ -987,6 +997,8 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller, {"v1/set_multi_version_status", process_set_multi_version_status}, {"compact_snapshot", process_compact_snapshot}, {"v1/compact_snapshot", process_compact_snapshot}, + {"decouple_instance", process_decouple_instance}, + {"v1/decouple_instance", process_decouple_instance}, // misc {"abort_txn", process_abort_txn}, {"abort_tablet_job", process_abort_tablet_job}, diff --git a/cloud/src/snapshot/snapshot_manager.cpp b/cloud/src/snapshot/snapshot_manager.cpp index 94d1f4f9c6d012..ac2b9e22d442f1 100644 --- a/cloud/src/snapshot/snapshot_manager.cpp +++ b/cloud/src/snapshot/snapshot_manager.cpp @@ -17,6 +17,10 @@ #include "snapshot/snapshot_manager.h" +#include + +#include "common/logging.h" +#include "meta-store/keys.h" #include "meta-store/versionstamp.h" #include "recycler/checker.h" #include "recycler/recycler.h" @@ -122,6 +126,95 @@ std::pair SnapshotManager::compact_snapshot( return {MetaServiceCode::UNDEFINED_ERR, "Not implemented"}; } +std::pair SnapshotManager::decouple_instance(std::string_view id) { + std::string instance_id(id); + LOG_INFO("decouple_instance").tag("instance_id", instance_id); + + // 1. Create transaction and get current instance info + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + return {MetaServiceCode::KV_TXN_CREATE_ERR, "failed to create txn"}; + } + + std::string key = instance_key({instance_id}); + std::string value; + err = txn->get(key, &value); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + return {MetaServiceCode::CLUSTER_NOT_FOUND, + fmt::format("instance not found, instance_id={}", instance_id)}; + } else if (err != TxnErrorCode::TXN_OK) { + return {MetaServiceCode::KV_TXN_GET_ERR, + fmt::format("failed to get instance info, instance_id={}, err={}", instance_id, + err)}; + } + + InstanceInfoPB instance; + if (!instance.ParseFromString(value)) { + return {MetaServiceCode::PROTOBUF_PARSE_ERR, "failed to parse instance info"}; + } + + // 2. Check the instance was created via clone_instance + if (!instance.has_source_instance_id() || instance.source_instance_id().empty() || + !instance.has_source_snapshot_id() || instance.source_snapshot_id().empty()) { + return {MetaServiceCode::INVALID_ARGUMENT, + fmt::format("instance {} was not a cloned instance (created via clone_instance)", + instance_id)}; + } + + // 3. Check snapshot_compact_status is SNAPSHOT_COMPACT_DONE + if (instance.snapshot_compact_status() != SnapshotCompactStatus::SNAPSHOT_COMPACT_DONE) { + return {MetaServiceCode::INVALID_ARGUMENT, + fmt::format("instance {} snapshot_compact_status is not SNAPSHOT_COMPACT_DONE, " + "current status={}", + instance_id, + SnapshotCompactStatus_Name(instance.snapshot_compact_status()))}; + } + + // 4. Remove the snapshot reference key in the source instance + const std::string& source_instance_id = instance.source_instance_id(); + const std::string& source_snapshot_id = instance.source_snapshot_id(); + + Versionstamp snapshot_versionstamp; + if (!parse_snapshot_versionstamp(source_snapshot_id, &snapshot_versionstamp)) { + return {MetaServiceCode::UNDEFINED_ERR, + fmt::format("failed to parse source_snapshot_id={} to versionstamp", + source_snapshot_id)}; + } + + versioned::SnapshotReferenceKeyInfo ref_key_info {source_instance_id, snapshot_versionstamp, + instance_id}; + std::string reference_key = versioned::snapshot_reference_key(ref_key_info); + txn->remove(reference_key); + + // 5. Clear source_snapshot_id and source_instance_id from the instance PB + instance.clear_source_snapshot_id(); + instance.clear_source_instance_id(); + + // 6. Persist the updated instance + std::string updated_val; + if (!instance.SerializeToString(&updated_val)) { + return {MetaServiceCode::PROTOBUF_SERIALIZE_ERR, + fmt::format("failed to serialize updated instance, instance_id={}", instance_id)}; + } + + txn->atomic_add(system_meta_service_instance_update_key(), 1); + txn->put(key, updated_val); + + err = txn->commit(); + if (err != TxnErrorCode::TXN_OK) { + return {MetaServiceCode::KV_TXN_COMMIT_ERR, + fmt::format("failed to commit txn, instance_id={}, err={}", instance_id, err)}; + } + + LOG_INFO("decouple_instance completed successfully") + .tag("instance_id", instance_id) + .tag("source_instance_id", source_instance_id) + .tag("source_snapshot_id", source_snapshot_id); + + return {MetaServiceCode::OK, ""}; +} + std::pair SnapshotManager::set_multi_version_status( std::string_view instance_id, MultiVersionStatus multi_version_status) { return {MetaServiceCode::UNDEFINED_ERR, "Not implemented"}; diff --git a/cloud/src/snapshot/snapshot_manager.h b/cloud/src/snapshot/snapshot_manager.h index 0e7ad2c12a366a..29e8771c8c477d 100644 --- a/cloud/src/snapshot/snapshot_manager.h +++ b/cloud/src/snapshot/snapshot_manager.h @@ -55,6 +55,14 @@ class SnapshotManager { // Manually trigger snapshot compact for an instance. virtual std::pair compact_snapshot(std::string_view instance_id); + // Decouple a cloned instance from its source snapshot. + // + // It removes the snapshot reference key in the source instance, and clears + // source_snapshot_id and source_instance_id from the cloned instance PB. + // The instance must have been created via clone_instance, and its snapshot_compact_status + // must be SNAPSHOT_COMPACT_DONE. + std::pair decouple_instance(std::string_view instance_id); + virtual std::pair set_multi_version_status( std::string_view instance_id, MultiVersionStatus multi_version_status);