From 69c00c98abc87d19d872b8f10e6f3c87310330a9 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 24 Apr 2026 07:22:59 -0600 Subject: [PATCH 01/12] Promote to stable --- .github/workflows/build_commit.yml | 12 ++++---- .github/workflows/build_dependencies.yml | 32 ++++++++++---------- .github/workflows/merge_build.yml | 28 ++++++++--------- .github/workflows/merge_build_3x.yml | 35 ---------------------- .github/workflows/merge_build_6x.yml | 38 ------------------------ .github/workflows/pr_build.yml | 2 +- .jenkins/Jenkinsfile | 1 + conanfile.py | 12 ++++---- 8 files changed, 44 insertions(+), 116 deletions(-) delete mode 100644 .github/workflows/merge_build_3x.yml delete mode 100644 .github/workflows/merge_build_6x.yml diff --git a/.github/workflows/build_commit.yml b/.github/workflows/build_commit.yml index 17d159d56..3cf237356 100644 --- a/.github/workflows/build_commit.yml +++ b/.github/workflows/build_commit.yml @@ -21,9 +21,9 @@ on: jobs: SislDeps: - uses: eBay/sisl/.github/workflows/build_dependencies.yml@master + uses: eBay/sisl/.github/workflows/build_dependencies.yml@stable/v13.x with: - branch: master + branch: stable/v13.x platform: ${{ inputs.platform }} build-type: ${{ inputs.build-type }} malloc-impl: ${{ inputs.malloc-impl }} @@ -32,9 +32,9 @@ jobs: NuraftMesgDeps: needs: SislDeps - uses: eBay/nuraft_mesg/.github/workflows/build_dependencies.yml@main + uses: eBay/nuraft_mesg/.github/workflows/build_dependencies.yml@stable/v4.x with: - branch: main + branch: stable/v4.x platform: ${{ inputs.platform }} build-type: ${{ inputs.build-type }} malloc-impl: ${{ inputs.malloc-impl }} @@ -43,9 +43,9 @@ jobs: IOMgrDeps: needs: SislDeps - uses: eBay/iomanager/.github/workflows/build_dependencies.yml@master + uses: eBay/iomanager/.github/workflows/build_dependencies.yml@stable/v12.x with: - branch: master + branch: stable/v12.x platform: ${{ inputs.platform }} build-type: ${{ inputs.build-type }} malloc-impl: ${{ inputs.malloc-impl }} diff --git a/.github/workflows/build_dependencies.yml b/.github/workflows/build_dependencies.yml index 2c2876b03..fac9c7bda 100644 --- a/.github/workflows/build_dependencies.yml +++ b/.github/workflows/build_dependencies.yml @@ -83,40 +83,40 @@ jobs: timeout-minutes: 1440 steps: - name: Retrieve Code - uses: actions/checkout@main + uses: actions/checkout@v4 with: ref: ${{ inputs.branch }} if: ${{ inputs.testing == 'True' }} - name: Retrieve Recipe - uses: actions/checkout@main + uses: actions/checkout@v4 with: repository: eBay/Homestore ref: ${{ inputs.branch }} if: ${{ inputs.testing == 'False' }} - name: Setup Conan - uses: ebay/sisl/.github/actions/setup_conan2@master + uses: ebay/sisl/.github/actions/setup_conan2@stable/v13.x with: platform: ${{ inputs.platform }} if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Load Homestore Cache id: restore-cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: ${{ inputs.testing }} key_prefix: HomestoreDeps-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }}-${{ inputs.prerelease }} - name: Load Sisl Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: load_any: 'True' key_prefix: SislDeps13-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }} if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Retrieve Dependencies - uses: actions/checkout@main + uses: actions/checkout@v4 with: repository: eBay/iomanager path: import/iomgr @@ -124,15 +124,15 @@ jobs: if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Retrieve Dependencies - uses: actions/checkout@main + uses: actions/checkout@v4 with: repository: eBay/nuraft_mesg path: import/nuraft_mesg - ref: main + ref: stable/v4.x if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Load IOMgr Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/iomgr @@ -141,7 +141,7 @@ jobs: if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Load NuraftMesg Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/nuraft_mesg @@ -160,8 +160,8 @@ jobs: sudo rm -rf /usr/lib/google-cloud-sdk python -m pip install pyelftools - conan export --user oss --channel master import/iomgr - conan export --user oss --channel main import/nuraft_mesg + conan export import/iomgr + conan export import/nuraft_mesg if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Build Cache @@ -182,20 +182,20 @@ jobs: if: ${{ steps.restore-cache.outputs.cache-hit != 'true' }} - name: Save Conan Cache - uses: eBay/sisl/.github/actions/store_conan2@master + uses: eBay/sisl/.github/actions/store_conan2@stable/v13.x with: key_prefix: HomestoreDeps-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }}-${{ inputs.prerelease }} if: ${{ github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} - name: Reload Sisl Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: load_any: 'True' key_prefix: SislDeps13-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }} if: ${{ inputs.testing == 'True' && github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} - name: Reload IOMgr Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/iomgr @@ -204,7 +204,7 @@ jobs: if: ${{ inputs.testing == 'True' && github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} - name: Reload NuraftMesg Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/nuraft_mesg diff --git a/.github/workflows/merge_build.yml b/.github/workflows/merge_build.yml index 0c0fe0c12..5f4fc008f 100644 --- a/.github/workflows/merge_build.yml +++ b/.github/workflows/merge_build.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: push: branches: - - master + - stable/v7.x jobs: Build: @@ -36,16 +36,16 @@ jobs: malloc-impl: ${{ matrix.malloc-impl }} prerelease: ${{ matrix.prerelease }} tooling: ${{ matrix.tooling }} - # ChainBuild: - # runs-on: "ubuntu-22.04" - # steps: - # - name: Start HomeObject Build - # run: | - # curl -L \ - # -X POST \ - # -H "Accept: application/vnd.github+json" \ - # -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}"\ - # -H "X-GitHub-Api-Version: 2022-11-28" \ - # https://api.github.com/repos/eBay/homeobject/actions/workflows/conan_build.yml/dispatches \ - # -d '{"ref":"main","inputs":{}}' - # if: ${{ github.ref == 'refs/heads/master' }} + # ChainBuild: + # runs-on: "ubuntu-22.04" + # steps: + # - name: Start HomeObject Build + # run: | + # curl -L \ + # -X POST \ + # -H "Accept: application/vnd.github+json" \ + # -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}"\ + # -H "X-GitHub-Api-Version: 2022-11-28" \ + # https://api.github.com/repos/eBay/homeobject/actions/workflows/conan_build.yml/dispatches \ + # -d '{"ref":"main","inputs":{}}' + # if: ${{ github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/merge_build_3x.yml b/.github/workflows/merge_build_3x.yml deleted file mode 100644 index 2744dd33d..000000000 --- a/.github/workflows/merge_build_3x.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Homestore 3.x Build - -on: - workflow_dispatch: - push: - branches: - - stable/v3.x - -jobs: - Build: - strategy: - fail-fast: false - matrix: - platform: ["ubuntu-22.04", "ubuntu-20.04"] - build-type: ["Debug", "Release"] - malloc-impl: ["libc", "tcmalloc"] - prerelease: ["True", "False"] - exclude: - - build-type: Debug - platform: ubuntu-20.04 - - build-type: Debug - malloc-impl: tcmalloc - - malloc-impl: tcmalloc - platform: ubuntu-20.04 - - malloc-impl: libc - build-type: Release - platform: ubuntu-22.04 - - prerelease: "True" - platform: ubuntu-20.04 - uses: ./.github/workflows/build_commit.yml - with: - platform: ${{ matrix.platform }} - build-type: ${{ matrix.build-type }} - malloc-impl: ${{ matrix.malloc-impl }} - prerelease: ${{ matrix.prerelease }} diff --git a/.github/workflows/merge_build_6x.yml b/.github/workflows/merge_build_6x.yml deleted file mode 100644 index a2a9ea4af..000000000 --- a/.github/workflows/merge_build_6x.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Homestore 6.x Build - -on: - workflow_dispatch: - push: - branches: - - stable/v6.x - -jobs: - Build: - strategy: - fail-fast: false - matrix: - platform: ["ubuntu-22.04"] - build-type: ["Debug", "Release"] - malloc-impl: ["libc", "tcmalloc"] - prerelease: ["True", "False"] - tooling: ["Sanitize", "Coverage", "None"] - exclude: - - build-type: Debug - prerelease: "False" - - build-type: Debug - tooling: None - - build-type: Debug - malloc-impl: tcmalloc - - build-type: Release - malloc-impl: libc - - build-type: Release - tooling: Sanitize - - build-type: Release - tooling: Coverage - uses: eBay/homestore/.github/workflows/build_commit.yml@stable/v6.x - with: - platform: ${{ matrix.platform }} - build-type: ${{ matrix.build-type }} - malloc-impl: ${{ matrix.malloc-impl }} - prerelease: ${{ matrix.prerelease }} - tooling: ${{ matrix.tooling }} diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index e9b5896b6..2c77e78ea 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: pull_request: branches: - - master + - stable/v7.x jobs: Build: diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index 6e1879f96..d39990397 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -56,6 +56,7 @@ pipeline { steps { script { TAG = "${VER}@" + CONAN_FLAGS="--name ${PROJECT}" } } } diff --git a/conanfile.py b/conanfile.py index 448753f7f..6b4fac0e3 100644 --- a/conanfile.py +++ b/conanfile.py @@ -48,17 +48,17 @@ def configure(self): raise ConanInvalidConfiguration("Coverage/Sanitizer requires Testing!") def build_requirements(self): - self.test_requires("benchmark/1.9.4") - self.test_requires("gtest/1.17.0") + self.test_requires("benchmark/[^1.9]") + self.test_requires("gtest/[^1.17]") def requirements(self): - self.requires("iomgr/[^12]@oss/master", transitive_headers=True) - self.requires("sisl/[^13.2.4]@oss/master", transitive_headers=True) - self.requires("nuraft_mesg/[^4]@oss/main", transitive_headers=True) + self.requires("iomgr/[^12.0]", transitive_headers=True) + self.requires("sisl/[^13.2]", transitive_headers=True) + self.requires("nuraft_mesg/[^4.0]", transitive_headers=True) self.requires("farmhash/cci.20190513@", transitive_headers=True) if self.settings.arch in ['x86', 'x86_64']: - self.requires("isa-l/2.30.0", transitive_headers=True) + self.requires("isa-l/[^2.30]", transitive_headers=True) # Tests require OpenSSL 3.x self.requires("openssl/[^3.1]", override=True) From 74d80c736afdf782e9b052842047534792e44639 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 24 Apr 2026 10:02:03 -0700 Subject: [PATCH 02/12] Remove sanitize from internal, already gated in gh. --- .jenkins/Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index d39990397..beaad8e98 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -65,7 +65,6 @@ pipeline { steps { sh "hostname ; \ echo $NODE_NAME ; \ - conan create ${BUILD_MISSING} -s:h build_type=Debug -o ${PROJECT}/*:sanitize=True ${CONAN_FLAGS} . ; \ conan create ${BUILD_MISSING} -s:h build_type=Debug ${CONAN_FLAGS} . ; \ conan create ${BUILD_MISSING} -s:h build_type=RelWithDebInfo -o sisl/*:malloc_impl=tcmalloc ${CONAN_FLAGS} . ; \ conan create ${BUILD_MISSING} -s:h build_type=RelWithDebInfo -o sisl/*:prerelease=True -o sisl/*:malloc_impl=tcmalloc ${CONAN_FLAGS} . ; \ From 2b0478fa81d9048ad226c04cdb3e36dfc280284d Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 24 Apr 2026 10:56:34 -0700 Subject: [PATCH 03/12] Fix iomanager ref --- .github/workflows/build_dependencies.yml | 2 +- .github/workflows/version_change_check.yml | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_dependencies.yml b/.github/workflows/build_dependencies.yml index fac9c7bda..c3070cb72 100644 --- a/.github/workflows/build_dependencies.yml +++ b/.github/workflows/build_dependencies.yml @@ -120,7 +120,7 @@ jobs: with: repository: eBay/iomanager path: import/iomgr - ref: master + ref: stable/v12.x if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Retrieve Dependencies diff --git a/.github/workflows/version_change_check.yml b/.github/workflows/version_change_check.yml index 1e8552f6f..c007a8545 100644 --- a/.github/workflows/version_change_check.yml +++ b/.github/workflows/version_change_check.yml @@ -10,12 +10,18 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@main + uses: actions/checkout@v4 with: fetch-depth: 2 - - name: Check if file is modified + - name: Check if version bump is required run: | - if git diff -r HEAD^1 HEAD -- conanfile.py | egrep "[ ]+version = "; then + changed=$(git diff --name-only HEAD^1 HEAD) + non_test=$(echo "$changed" | grep -Ev '^src/tests/|CMakeLists\.txt$|^\.github/|^\.jenkins|\.md$' || true) + if [ -z "$non_test" ]; then + echo "Only test files changed — version bump not required" + exit 0 + fi + if git diff -r HEAD^1 HEAD -- conanfile.py | grep -E "[ ]+version = "; then echo "Version is updated with this PR, OK" else echo "Conan version is not updated with this PR. Please update that to allow PR merge" From 77d056ab1b836c8882ea4d7fa54da2f9bb1547b8 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Thu, 7 May 2026 18:26:33 +0800 Subject: [PATCH 04/12] SDSTOR-21921 Improve LOG_EVERY_N logic with memory optimization and time-based rate limiting This change enhances LOG_EVERY_N macros to fix memory leaks and add new capabilities: - Refactor check_logged_already() to check_and_format_log() which consolidates rate-limiting decision and suffix formatting into single function - Use hash-based keys instead of full message strings for memory efficiency - Clear entire map every 300s to prevent unbounded memory growth - Store millisecond offsets and counts for time-based rate limiting - Log first occurrence (count==1) in addition to every Nth occurrence - Add HS_LOG_EVERY_N_SEC for time-based rate limiting (log every N seconds) - Add HS_LOG_EVERY_N_OR_SEC for hybrid rate limiting (every N occurrences OR every M seconds) - Use consistent suffix format: "Last logged X.Xs ago, N occurrences" Memory impact: Reduces per-entry storage from ~124 bytes to ~20 bytes (6x improvement) Resolves: #880 --- .gitignore | 3 + conanfile.py | 2 +- src/lib/common/homestore_assert.hpp | 132 +++++++++++++++++++++++----- 3 files changed, 116 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 78b8a1f38..47363d6a8 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,6 @@ CMakeUserPresets.json # claude .claude/** + +# worktrees +.worktrees/ diff --git a/conanfile.py b/conanfile.py index 6b4fac0e3..0d1b7df1d 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.6" + version = "7.5.7" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/common/homestore_assert.hpp b/src/lib/common/homestore_assert.hpp index 9ffa46273..9bacf4727 100644 --- a/src/lib/common/homestore_assert.hpp +++ b/src/lib/common/homestore_assert.hpp @@ -143,21 +143,63 @@ fmt::make_format_args(detail_name, detail_val)))) \ (); \ fmt::vformat_to(fmt::appender{buf}, fmt::string_view{msgcb}, fmt::make_format_args(args...)); \ - const auto count{check_logged_already(buf)}; \ - if (count % freq == 0) { \ - if (count) { \ - fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Repeated {} times in this thread"}, \ - fmt::make_format_args(freq)); \ - } \ - return true; \ - } \ - return false; \ + return check_and_format_log(buf, freq, 0); \ }), \ msg, ##__VA_ARGS__); \ } #define HS_LOG_EVERY_N(level, mod, freq, msg, ...) HS_DETAILED_LOG_EVERY_N(level, mod, freq, , , , , msg, ##__VA_ARGS__) +#define HS_DETAILED_LOG_EVERY_N_SEC(level, mod, interval_sec, submod_name, submod_val, detail_name, detail_val, msg, \ + ...) \ + { \ + LOG##level##MOD_FMT( \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(mod), base, mod), \ + ([&](fmt::memory_buffer& buf, const char* const msgcb, auto&&... args) -> bool { \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}:{}] "}, \ + fmt::make_format_args(unmove(file_name(__FILE__)), unmove(__LINE__))); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(submod_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(submod_name, submod_val)))) \ + (); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(detail_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(detail_name, detail_val)))) \ + (); \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{msgcb}, fmt::make_format_args(args...)); \ + return check_and_format_log(buf, 0, interval_sec); \ + }), \ + msg, ##__VA_ARGS__); \ + } + +#define HS_LOG_EVERY_N_SEC(level, mod, interval_sec, msg, ...) \ + HS_DETAILED_LOG_EVERY_N_SEC(level, mod, interval_sec, , , , , msg, ##__VA_ARGS__) + +#define HS_DETAILED_LOG_EVERY_N_OR_SEC(level, mod, freq, interval_sec, submod_name, submod_val, detail_name, \ + detail_val, msg, ...) \ + { \ + LOG##level##MOD_FMT( \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(mod), base, mod), \ + ([&](fmt::memory_buffer& buf, const char* const msgcb, auto&&... args) -> bool { \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}:{}] "}, \ + fmt::make_format_args(unmove(file_name(__FILE__)), unmove(__LINE__))); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(submod_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(submod_name, submod_val)))) \ + (); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(detail_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(detail_name, detail_val)))) \ + (); \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{msgcb}, fmt::make_format_args(args...)); \ + return check_and_format_log(buf, freq, interval_sec); \ + }), \ + msg, ##__VA_ARGS__); \ + } + +#define HS_LOG_EVERY_N_OR_SEC(level, mod, freq, interval_sec, msg, ...) \ + HS_DETAILED_LOG_EVERY_N_OR_SEC(level, mod, freq, interval_sec, , , , , msg, ##__VA_ARGS__) + #define HS_SUBMOD_LOG(level, mod, req, submod_name, submod_val, msg, ...) \ HS_DETAILED_LOG(level, mod, req, submod_name, submod_val, , , msg, ##__VA_ARGS__) #define HS_REQ_LOG(level, mod, req, msg, ...) HS_SUBMOD_LOG(level, mod, req, , , msg, ##__VA_ARGS__) @@ -310,16 +352,66 @@ #define HS_REL_ASSERT_NULL(val, ...) HS_ASSERT_NULL(RELEASE_ASSERT_CMP, val, ##__VA_ARGS__) #define HS_REL_ASSERT_NOTNULL(val, ...) HS_ASSERT_NOTNULL(RELEASE_ASSERT_CMP, val, ##__VA_ARGS__) -[[maybe_unused]] static uint64_t check_logged_already(const fmt::memory_buffer& buf) { +[[maybe_unused]] static bool check_and_format_log(fmt::memory_buffer& buf, uint64_t freq = 0, + uint64_t interval_sec = 0) { static constexpr uint64_t COUNTER_RESET_SEC{300}; // Reset every 5 minutes - static thread_local std::unordered_map< std::string, std::pair< Clock::time_point, uint64_t > > log_map{}; - - const std::string_view msg{buf.data()}; - auto [it, happened] = log_map.emplace(msg, std::make_pair(Clock::now(), 0)); - HS_REL_ASSERT(it != std::cend(log_map), "Expected entry to be either present or new insertion to succeed"); - auto& [tm, count] = it->second; - auto result = count; - count = (get_elapsed_time_sec(tm) > COUNTER_RESET_SEC) ? static_cast< decltype(count) >(0) : count + 1; - tm = Clock::now(); - return result; + static thread_local Clock::time_point last_cleanup{Clock::now()}; + static thread_local std::unordered_map< size_t, std::pair< uint32_t, uint64_t > > log_map{}; + // hash -> (last_log_ms, count) + + const auto now = Clock::now(); + if (get_elapsed_time_sec(last_cleanup) > COUNTER_RESET_SEC) { + log_map.clear(); + last_cleanup = now; + } + + // Hash the buffer content BEFORE appending any suffix + const std::string_view msg{buf.data(), buf.size()}; + const size_t msg_hash = std::hash< std::string_view >{}(msg); + + // Milliseconds since last cleanup (max ~49 days with uint32_t) + const uint32_t now_ms = + std::chrono::duration_cast< std::chrono::milliseconds >(now - last_cleanup).count(); + + auto [it, happened] = log_map.emplace(msg_hash, std::make_pair(now_ms, 1)); + uint32_t elapsed_ms = 0; + uint64_t count = 1; + + if (!happened) { + // Entry exists - increment count and calculate elapsed time + elapsed_ms = now_ms - it->second.first; // Time since last log emission + it->second.second++; // Increment count + count = it->second.second; + } + + // Decide if we should log + bool should_log = false; + if (count == 1) { + // Always log first occurrence + should_log = true; + } else if (freq > 0 && count % freq == 0) { + // Count-based: log every Nth occurrence + should_log = true; + } else if (interval_sec > 0 && elapsed_ms >= static_cast< uint32_t >(interval_sec) * 1000) { + // Time-based: log if enough time has passed since last emission + should_log = true; + } + + // If logging, update timestamp and append suffix + if (should_log) { + // Append formatted suffix if not first occurrence + if (count > 1) { + // Always show elapsed time and count since last log + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Last logged {:.1f}s ago, {} occurrences"}, + fmt::make_format_args(elapsed_ms / 1000.0, count)); + } + + // Update state after logging (so next log shows "since this log") + if (!happened) { + it->second.first = now_ms; // Update timestamp + it->second.second = 1; // Reset count (next occurrence will be "1 since this log") + } + } + + return should_log; } From ba63a7d1ab8c19769eb6a37d807cca39707885bf Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Fri, 8 May 2026 01:38:04 +0800 Subject: [PATCH 05/12] Fix fmt::make_format_args compilation error Use milliseconds directly in format string instead of converting to seconds, avoiding rvalue binding issue with fmt::make_format_args. Co-Authored-By: Claude Opus 4.6 --- src/lib/common/homestore_assert.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/common/homestore_assert.hpp b/src/lib/common/homestore_assert.hpp index 9bacf4727..27789e3ab 100644 --- a/src/lib/common/homestore_assert.hpp +++ b/src/lib/common/homestore_assert.hpp @@ -402,8 +402,8 @@ // Append formatted suffix if not first occurrence if (count > 1) { // Always show elapsed time and count since last log - fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Last logged {:.1f}s ago, {} occurrences"}, - fmt::make_format_args(elapsed_ms / 1000.0, count)); + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Last logged {}ms ago, {} occurrences"}, + fmt::make_format_args(elapsed_ms, count)); } // Update state after logging (so next log shows "since this log") From f7f03e68e220e2802a2b78766ae27619bbaf880e Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Fri, 8 May 2026 01:47:11 +0800 Subject: [PATCH 06/12] Fix off-by-one error in count tracking and improve memory cleanup - Fix count tracking: reset to 0 instead of 1 to avoid off-by-one in rate limiting (was logging at 10, 19, 28... instead of 10, 20, 30...) - Use happened flag to detect first occurrence instead of count==1 - Use swap with empty map to actually release memory during cleanup (clear() only removes entries but retains bucket array capacity) Co-Authored-By: Claude Opus 4.6 --- src/lib/common/homestore_assert.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib/common/homestore_assert.hpp b/src/lib/common/homestore_assert.hpp index 27789e3ab..9d2aead03 100644 --- a/src/lib/common/homestore_assert.hpp +++ b/src/lib/common/homestore_assert.hpp @@ -361,7 +361,7 @@ const auto now = Clock::now(); if (get_elapsed_time_sec(last_cleanup) > COUNTER_RESET_SEC) { - log_map.clear(); + std::unordered_map< size_t, std::pair< uint32_t, uint64_t > >().swap(log_map); // Actually release memory last_cleanup = now; } @@ -373,9 +373,9 @@ const uint32_t now_ms = std::chrono::duration_cast< std::chrono::milliseconds >(now - last_cleanup).count(); - auto [it, happened] = log_map.emplace(msg_hash, std::make_pair(now_ms, 1)); + auto [it, happened] = log_map.emplace(msg_hash, std::make_pair(now_ms, 0)); uint32_t elapsed_ms = 0; - uint64_t count = 1; + uint64_t count = 0; if (!happened) { // Entry exists - increment count and calculate elapsed time @@ -386,8 +386,8 @@ // Decide if we should log bool should_log = false; - if (count == 1) { - // Always log first occurrence + if (happened) { + // Always log first occurrence (new entry) should_log = true; } else if (freq > 0 && count % freq == 0) { // Count-based: log every Nth occurrence @@ -400,7 +400,7 @@ // If logging, update timestamp and append suffix if (should_log) { // Append formatted suffix if not first occurrence - if (count > 1) { + if (!happened && count > 0) { // Always show elapsed time and count since last log fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Last logged {}ms ago, {} occurrences"}, fmt::make_format_args(elapsed_ms, count)); @@ -409,7 +409,7 @@ // Update state after logging (so next log shows "since this log") if (!happened) { it->second.first = now_ms; // Update timestamp - it->second.second = 1; // Reset count (next occurrence will be "1 since this log") + it->second.second = 0; // Reset count (next occurrence will be "1 since this log") } } From 9b5a32978162f2a3e37ffdcc9d73671f7e9432e5 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Fri, 8 May 2026 10:25:31 +0800 Subject: [PATCH 07/12] Add missing functional include and handle edge case - Add #include for std::hash to avoid relying on transitive includes - Handle edge case when both freq=0 and interval_sec=0: always log (fallback behavior) instead of only logging first occurrence then suppressing all subsequent logs Co-Authored-By: Claude Opus 4.6 --- src/lib/common/homestore_assert.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/common/homestore_assert.hpp b/src/lib/common/homestore_assert.hpp index 9d2aead03..7b4633266 100644 --- a/src/lib/common/homestore_assert.hpp +++ b/src/lib/common/homestore_assert.hpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -389,6 +390,9 @@ if (happened) { // Always log first occurrence (new entry) should_log = true; + } else if (freq == 0 && interval_sec == 0) { + // Both rate limiters disabled: always log (fallback behavior) + should_log = true; } else if (freq > 0 && count % freq == 0) { // Count-based: log every Nth occurrence should_log = true; From 568f9ff0a690ec1263f27bff50109c6d15da147b Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Fri, 8 May 2026 13:23:59 +0800 Subject: [PATCH 08/12] Add documentation and warning for interval_sec > 300s - Add function comment documenting the 300s cleanup limitation - Log one-time warning when interval_sec exceeds cleanup period - Clarifies expected behavior when using large interval_sec values Co-Authored-By: Claude Opus 4.6 --- src/lib/common/homestore_assert.hpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/lib/common/homestore_assert.hpp b/src/lib/common/homestore_assert.hpp index 7b4633266..690021f1a 100644 --- a/src/lib/common/homestore_assert.hpp +++ b/src/lib/common/homestore_assert.hpp @@ -353,6 +353,13 @@ #define HS_REL_ASSERT_NULL(val, ...) HS_ASSERT_NULL(RELEASE_ASSERT_CMP, val, ##__VA_ARGS__) #define HS_REL_ASSERT_NOTNULL(val, ...) HS_ASSERT_NOTNULL(RELEASE_ASSERT_CMP, val, ##__VA_ARGS__) +/** + * Rate-limited logging helper with count-based and time-based controls. + * + * State is cleaned up every 300 seconds to prevent unbounded memory growth. + * This means time-based rate limiting works correctly only when interval_sec < 300. + * If interval_sec >= 300, the behavior effectively becomes "log first occurrence after each 5min reset." + */ [[maybe_unused]] static bool check_and_format_log(fmt::memory_buffer& buf, uint64_t freq = 0, uint64_t interval_sec = 0) { static constexpr uint64_t COUNTER_RESET_SEC{300}; // Reset every 5 minutes @@ -360,6 +367,16 @@ static thread_local std::unordered_map< size_t, std::pair< uint32_t, uint64_t > > log_map{}; // hash -> (last_log_ms, count) + // Warn once if interval_sec exceeds cleanup period + if (interval_sec > COUNTER_RESET_SEC) { + static thread_local bool warned = false; + if (!warned) { + LOGWARN("interval_sec={} exceeds cleanup period ({}s) - time-based rate limiting may not work as expected", + interval_sec, COUNTER_RESET_SEC); + warned = true; + } + } + const auto now = Clock::now(); if (get_elapsed_time_sec(last_cleanup) > COUNTER_RESET_SEC) { std::unordered_map< size_t, std::pair< uint32_t, uint64_t > >().swap(log_map); // Actually release memory From 0bc81f18b4c5f666b7a82b4de6c7ec3c3399e3c0 Mon Sep 17 00:00:00 2001 From: yuwmao <148639999+yuwmao@users.noreply.github.com> Date: Mon, 11 May 2026 09:38:26 +0800 Subject: [PATCH 09/12] SDSTOR-21760: Fix table not found issue during index recovery (#877) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here is the issue description: journal–table metadata mismatch due to CP vs destroy ordering A split hits crash flip and marks its parent buffer with m_crash_flag_on during transact_bufs (src/lib/index/wb_cache.cpp:237-247). The same logical window removes the table: index_table::destroy() immediately removes its superblock from meta via MetaBlkService::remove_sub_sb (src/include/homestore/index/index_table.hpp:135-147 → src/lib/meta/meta_blk_service.cpp:872+). CP flush later starts and writes the txn_journal to meta first, then begins flushing dirty buffers; when the flagged parent buffer is reached, it crashes (src/lib/index/wb_cache.cpp:860-871, 896-903). On restart, recovery replays the persisted txn_journal and attempts to repair the table by ordinal, but the table superblock is gone and the table isn’t loaded → HS_DBG_ASSERT in repair_index_node (src/lib/index/index_service.cpp:205-212). Key ordering rules that cause the mismatch - Table destroy persistence is immediate at destroy(): meta superblock is removed synchronously (not tied to CP). - Index CP flush ordering is fixed: (1) persist txn_journal; (2) flush dirty buffers; crash can occur at (2). - Thus it’s possible to have a persisted journal entry for a table whose superblock was already removed. Co-authored-by: yuwmao --- conanfile.py | 2 +- src/include/homestore/index/index_table.hpp | 7 ++ src/tests/test_index_crash_recovery.cpp | 88 +++++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index 0d1b7df1d..66e884eb9 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.7" + version = "7.5.8" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/include/homestore/index/index_table.hpp b/src/include/homestore/index/index_table.hpp index 3075f00ab..b79381962 100644 --- a/src/include/homestore/index/index_table.hpp +++ b/src/include/homestore/index/index_table.hpp @@ -140,6 +140,13 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { auto cpg = cp_mgr().cp_guard(); Btree< K, V >::destroy_btree(cpg.context(cp_consumer_t::INDEX_SVC)); } + // Flush the current CP to completion before removing the superblock. + // This closes the journal–table mismatch window: any txn_journal entries + // for this table's ordinal are written and the CP is marked done on disk. + // If the process crashes after this point, recovery will not replay the + // old journal (CP is complete), so repair_index_node will never be called + // for an ordinal whose superblock no longer exists. + cp_mgr().trigger_cp_flush(true /* force */).get(); m_sb.destroy(); m_sb_buffer->m_valid = false; decr_pending_request_num(); diff --git a/src/tests/test_index_crash_recovery.cpp b/src/tests/test_index_crash_recovery.cpp index 32dc3141a..0d90cfa9d 100644 --- a/src/tests/test_index_crash_recovery.cpp +++ b/src/tests/test_index_crash_recovery.cpp @@ -377,6 +377,18 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT LOGINFO("destroy btree - erase shadow map {}", m_shadow_filename); } + // Install a brand-new empty btree (without destroying the old one first). + // Used after recovery to replace a stale/destroyed m_bt so TearDown() is safe. + void install_fresh_btree(uint32_t erase_up_to_key = 0) { + auto uuid = boost::uuids::random_generator()(); + auto parent_uuid = boost::uuids::random_generator()(); + this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); + hs()->index_service().add_index_table(this->m_bt); + if (erase_up_to_key > 0) { this->m_shadow_map.range_erase(0, erase_up_to_key - 1); } + this->m_shadow_map.save(m_shadow_filename); + LOGINFO("Installed fresh btree with uuid {}", boost::uuids::to_string(uuid)); + } + void restart_homestore(uint32_t shutdown_delay_sec = 3) override { this->params(HS_SERVICE::INDEX).index_svc_cbs = new TestIndexServiceCallbacks(this); LOGINFO("\n\n\n\n\n\n shutdown homestore for index service Test\n\n\n\n\n"); @@ -1001,6 +1013,82 @@ TYPED_TEST(IndexCrashTest, MergeRemoveBasic) { } } +// Regression test for the bug: when an index table is destroyed concurrently with a CP flush, +// the txn_journal (written at the START of the flush) can contain entries for the destroyed +// table's ordinal while the table's meta superblock is already gone. On recovery the journal +// is replayed but the table cannot be found in m_ordinal_index_map, causing +// repair_index_node / sanity_check to assert. +// +// Sequence that triggers the bug: +// 1. Build a multi-level tree and flush a baseline CP. +// 2. Set a crash flip so the next CP crashes mid-flush (after writing the journal). +// 3. Insert more keys → splits happen → transact_bufs is called → txn_journal entry added +// for this table's ordinal AND m_crash_flag_on is set on the split parent buf. +// 4. Destroy the table: meta superblock removed from disk, but the dirty split bufs +// (with m_crash_flag_on) remain in the CP dirty list. +// 5. Trigger CP → journal written to disk → do_flush_one_buf hits the flagged buf → crash. +// 6. Recovery: journal has entries for the destroyed table → without fix: assert/abort. +// With fix: skip gracefully. +TYPED_TEST(IndexCrashTest, DestroyTableWithPendingCpCrash) { + // Use a small fixed preload to avoid triggering unrelated block-allocator issues + // that surface when inserting hundreds of consecutive right-edge keys into a + // fully-flushed large tree. With max_keys_in_node=20 and 100 preloaded keys + // we get 5 full leaf nodes; inserting 20 more triggers 1-2 leaf splits which is + // enough to (a) populate a txn_journal entry and (b) mark a parent buffer for crash. + static constexpr uint32_t preload_size = 100; + static constexpr uint32_t extra_keys = 20; + + // Step 1: Build a small tree and flush a clean baseline CP. + LOGINFO("Step 1: Preload {} keys and flush baseline CP", preload_size); + for (auto k = 0u; k < preload_size; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + test_common::HSTestHelper::trigger_cp(true); + this->m_shadow_map.save(this->m_shadow_filename); + + // Step 2: Set the crash flip BEFORE the operations that will cause splits. + // When a split occurs, transact_bufs checks this flip and sets m_crash_flag_on on + // the parent buf. The actual crash fires later when the CP flushes that buf. + LOGINFO("Step 2: Set crash flip on split-at-parent"); + this->set_basic_flip("crash_flush_on_split_at_parent"); + + // Step 3: Insert extra keys to cause splits and populate the txn_journal. + // The rightmost leaf is full after the preload, so the first new key triggers a + // split → transact_bufs is called → crash flip fires once → m_crash_flag_on set + // on the parent buf; subsequent splits (if any) accumulate more journal entries + // but no additional crash flags (flip fires only once, count=1). + LOGINFO("Step 3: Insert {} extra keys to cause splits (txn_journal populated)", extra_keys); + for (auto k = preload_size; k < preload_size + extra_keys; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + + // Step 4: Destroy the table. m_sb.destroy() removes the meta superblock from disk. + // The dirty split bufs (including the one with m_crash_flag_on) remain in the CP + // dirty list because free_buf only marks m_node_freed — it does not touch the list. + LOGINFO("Step 4: Destroy index table — meta superblock removed from disk"); + hs()->index_service().remove_index_table(this->m_bt); + this->m_bt->destroy(); + + // Step 5: Trigger the CP (do not wait). async_cp_flush will: + // (a) write the txn_journal to disk (entries include the destroyed table's ordinal), + // (b) start flushing nodes, hit the buf with m_crash_flag_on → simulated crash. + LOGINFO("Step 5: Trigger CP (crash expected after journal is written)"); + test_common::HSTestHelper::trigger_cp(false /* no wait */); + + // Step 6: Wait for the simulated crash and homestore recovery. + // Without fix : repair_index_node / sanity_check asserts → process aborts (SIGABRT). + // With fix : missing table is skipped gracefully and recovery completes normally. + LOGINFO("Step 6: Waiting for crash-recovery"); + this->wait_for_crash_recovery(true /* check_will_crash */); + LOGINFO("Step 6: Recovery succeeded without aborting — destroyed table was handled gracefully"); + + // Step 7: Install a fresh empty btree so that the shared TearDown() can safely call + // root_node_id() and tree_key_count() without hitting the stale destroyed btree. + LOGINFO("Step 7: Installing a fresh empty btree for teardown"); + this->install_fresh_btree(preload_size + extra_keys); + test_common::HSTestHelper::trigger_cp(true); +} + TYPED_TEST(IndexCrashTest, MetricsTest) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); std::vector< uint32_t > vec(num_entries); From 9c5bf785eeaa2f58e844d7bfdb57dab107890b8c Mon Sep 17 00:00:00 2001 From: ywz <649521587@qq.com> Date: Thu, 21 May 2026 11:11:18 +0800 Subject: [PATCH 10/12] SDSTOR-21863: Revert AppendBlkAllocator Reset logic to prevent race condition between upper layer reset and self operation on it (cp_flush and free) (#886) Fix https://github.com/eBay/HomeObject/issues/401 Co-authored-by: yawzhang Co-authored-by: Claude Sonnet 4.6 --- conanfile.py | 2 +- src/lib/blkalloc/append_blk_allocator.cpp | 25 +++++++--- src/lib/device/virtual_dev.cpp | 12 ++++- src/tests/test_data_service.cpp | 60 ++++++++++++++++++++--- 4 files changed, 83 insertions(+), 16 deletions(-) diff --git a/conanfile.py b/conanfile.py index 66e884eb9..0d20a477d 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.8" + version = "7.5.9" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/blkalloc/append_blk_allocator.cpp b/src/lib/blkalloc/append_blk_allocator.cpp index 50b6e81cb..40a718b92 100644 --- a/src/lib/blkalloc/append_blk_allocator.cpp +++ b/src/lib/blkalloc/append_blk_allocator.cpp @@ -155,17 +155,30 @@ bool AppendBlkAllocator::is_blk_alloced(const BlkId& in_bid, bool) const { } void AppendBlkAllocator::reset() { + // Reset in-place: restore all in-memory counters to the initial empty state and mark dirty + // so the next cp_flush persists commit_offset=0 to the existing superblock. + // + // We intentionally do NOT destroy the superblock or deregister the handler here, unlike + // BitmapBlkAllocator::reset(). AppendBlkAllocator is managed by the upper layer (HomeObject + // GC), which reuses the same chunk across multiple GC cycles. Keeping the same allocator + // instance alive avoids the race described in https://github.com/eBay/HomeObject/issues/401: + // + // T1: cp_flush holds a shared_ptr to the old allocator + // T2: reset() destroys m_sb [old behavior] + // T3: gc_repl_reqs calls free() on old allocator -> m_is_dirty=true + // T4: cp_flush writes to the destroyed m_sb -> crash + // + // With in-place reset the superblock is always valid, so T4 is safe regardless of T3. std::lock_guard lg(m_sb_mtx); - m_is_dirty.store(false); - m_sb.destroy(); - meta_service().deregister_handler(get_name()); + m_last_append_offset.store(0); + m_freeable_nblks.store(0); + m_commit_offset.store(0); + m_is_dirty.store(true); } bool AppendBlkAllocator::is_blk_alloced_on_disk(BlkId const& bid, bool use_lock) const { std::lock_guard lg(m_sb_mtx); - if (!m_sb.is_empty()) { return bid.blk_num() < m_sb->commit_offset; } - // if allocator is reset, the sb will be destroyed, and all the blks will be treated as free; - return false; + return bid.blk_num() < m_sb->commit_offset; } std::string AppendBlkAllocator::get_name() const { return "AppendBlkAlloc_chunk_" + std::to_string(m_chunk_id); } diff --git a/src/lib/device/virtual_dev.cpp b/src/lib/device/virtual_dev.cpp index 73d39fc55..b3c4bb3ac 100644 --- a/src/lib/device/virtual_dev.cpp +++ b/src/lib/device/virtual_dev.cpp @@ -129,6 +129,16 @@ void VirtualDev::add_chunk(cshared< Chunk >& chunk, bool is_fresh_chunk) { } void VirtualDev::reset_chunk_blk_allocator(Chunk* chunk) { + if (m_allocator_type == blk_allocator_type_t::append) { + // AppendBlkAllocator resets in-place via its reset() method and no new allocator instance is created. + // Creating a new instance would require deregistering the old handler and registering a + // new one, which introduces a window where a concurrent cp_flush holding the old + // allocator's shared_ptr could write to an already-destroyed superblock. + // See https://github.com/eBay/HomeObject/issues/401. + + LOGINFO("AppendBlkAllocator resets in-place, skip creating new instance for chunk {}", chunk->chunk_id()); + return; + } auto ba = create_blk_allocator(m_allocator_type, block_size(), chunk->physical_dev()->optimal_page_size(), chunk->physical_dev()->align_size(), chunk->size(), m_auto_recovery, chunk->chunk_id(), true /* is_init */, m_use_slab_in_blk_allocator); @@ -743,8 +753,6 @@ void VirtualDev::cp_flush(VDevCPContext* v_cp_ctx) { // pass down cp so that underlying components can get their customized CP context if needed; m_chunk_selector->foreach_chunks([this, cp](cshared< Chunk >& chunk) { HS_LOG(TRACE, device, "Flushing chunk: {}, vdev: {}", chunk->chunk_id(), m_vdev_info.name); - // Hold shared_ptr to prevent allocator from being reset during cp_flush. - // This fixes race with GC's purge_reserved_chunk() which calls reset_block_allocator(). auto alloc_ptr = chunk->blk_allocator_shared(); #ifdef _PRERELEASE diff --git a/src/tests/test_data_service.cpp b/src/tests/test_data_service.cpp index 53b5fe19d..8efad5ba6 100644 --- a/src/tests/test_data_service.cpp +++ b/src/tests/test_data_service.cpp @@ -1123,8 +1123,8 @@ TEST_F(BlkDataServiceAppendTest, TestResetCompletedBeforeCpFlush) { LOGINFO("Scenario 1: PASSED - Order: get_ptr -> reset_block_allocator -> cp_flush"); } -// Scenario 2: Order: cp_flush holds shared_ptr -> old allocator reset -> cp_flush on old -> create new allocator -// Test that cp_flush executes after old allocator reset, then new allocator is created +// Scenario 2: Order: cp_flush holds shared_ptr -> allocator reset (in-place) -> cp_flush completes on same allocator +// Test that cp_flush completes safely after in-place reset of the same allocator TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { vdev_info vinfo; auto data_vdev = inst().open_vdev(vinfo, true); @@ -1185,12 +1185,12 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { LOGINFO("Scenario 2: Old allocator reset() completed"); old_allocator_reset.store(true); - // Wait for cp_flush to complete on old allocator before creating new one + // Wait for cp_flush to complete before reset thread continues for (int i = 0; i < 200 && !cp_flush_completed.load(); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } - RELEASE_ASSERT(cp_flush_completed.load(), "cp_flush should complete before creating new allocator"); - LOGINFO("Scenario 2: cp_flush completed, proceeding to create new allocator"); + RELEASE_ASSERT(cp_flush_completed.load(), "cp_flush should complete before reset thread continues"); + LOGINFO("Scenario 2: cp_flush completed, reset thread proceeding"); })); std::thread flush_thread([&]() { @@ -1205,7 +1205,7 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { std::thread reset_thread([&]() { chunk->reset_block_allocator(); reset_completed.store(true); - LOGINFO("Scenario 2: reset_block_allocator completed (new allocator created)"); + LOGINFO("Scenario 2: reset_block_allocator completed (in-place reset)"); }); flush_thread.join(); @@ -1219,7 +1219,7 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { ASSERT_TRUE(cp_flush_completed.load()); ASSERT_TRUE(reset_completed.load()); ASSERT_TRUE(old_allocator_reset.load()); - LOGINFO("Scenario 2: PASSED - Order: get_ptr -> old_allocator_reset -> cp_flush -> create new allocator"); + LOGINFO("Scenario 2: PASSED - Order: get_ptr -> allocator_reset (in-place) -> cp_flush on same allocator"); } // Scenario 3: Order: cp_flush acquires mutex first -> reset_block_allocator blocks on mutex @@ -1299,6 +1299,52 @@ TEST_F(BlkDataServiceAppendTest, TestCpFlushBlocksResetWithMutex) { LOGINFO("Scenario 3: PASSED - mutex protected m_sb from concurrent reset_block_allocator during cp_flush"); } +// Scenario 4: Order: cp_flush holds shared_ptr -> reset() completes -> free() re-sets dirty on old allocator +// -> cp_flush runs on old allocator +// Test the race from https://github.com/eBay/HomeObject/issues/401: +// with the old AppendBlkAllocator::reset() (destroy + recreate): +// - reset() destroys m_sb of the old allocator +// - free() sets m_is_dirty=true on the old allocator (whose m_sb is already destroyed) +// - cp_flush writes to the destroyed m_sb -> crash / undefined behavior +TEST_F(BlkDataServiceAppendTest, TestFreeAfterResetBeforeCpFlushSafe) { + vdev_info vinfo; + auto data_vdev = inst().open_vdev(vinfo, true); + auto chunks = data_vdev->get_chunks(); + + auto it = std::find_if(chunks.begin(), chunks.end(), [](const auto& pair) { + return pair.second && pair.second->blk_allocator() && + pair.second->blk_allocator()->get_name().find("append_chunk_") == 0; + }); + ASSERT_NE(it, chunks.end()) << "No AppendBlkAllocator chunks found"; + auto chunk = it->second; + auto chunk_id = chunk->chunk_id(); + + LOGINFO("Scenario 4: Testing chunk_id={}", chunk_id); + + // Simulate cp_flush holding a reference to the old allocator before reset begins. + LOGINFO("Scenario 4: Simulate cp_flush acquiring shared_ptr to old allocator before reset"); + auto old_alloc = chunk->blk_allocator_shared(); + + // Simulate GC's purge_reserved_chunk calling reset_block_allocator(). + LOGINFO("Scenario 4: Simulate GC calling reset_block_allocator on the chunk"); + chunk->reset_block_allocator(); + auto alloc_after_reset = chunk->blk_allocator_shared(); + ASSERT_FALSE(old_alloc.owner_before(alloc_after_reset) || alloc_after_reset.owner_before(old_alloc)); + + // Simulate gc_repl_reqs calling free() on the old allocator reference after reset + LOGINFO("Scenario 4: Simulate gc_repl_reqs calling free() on old allocator reference after reset"); + BlkId fake_bid{0, 1, chunk_id}; + old_alloc->free(fake_bid); + + // Simulate cp_flush running on the old allocator reference. + // With fix: safe - m_sb is still valid, writes commit_offset=0 correctly. + // Without fix: UB - m_sb was destroyed in reset(), writing to it crashes. + LOGINFO("Scenario 4: Simulate cp_flush running on old allocator reference after reset and free"); + old_alloc->cp_flush(nullptr); + + LOGINFO("Scenario 4: PASSED - free() + cp_flush on old allocator after reset is safe"); +} + #endif // _PRERELEASE // Stream related test From 55ed28e9d05d246970d76d4d35183ff06b4bb222 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Sun, 10 May 2026 00:47:09 +0800 Subject: [PATCH 11/12] Enable ChainBuild with dynamic default branch detection Signed-off-by: Xiaoxi Chen --- .github/workflows/merge_build.yml | 47 ++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/.github/workflows/merge_build.yml b/.github/workflows/merge_build.yml index 5f4fc008f..681bf6b1d 100644 --- a/.github/workflows/merge_build.yml +++ b/.github/workflows/merge_build.yml @@ -36,16 +36,37 @@ jobs: malloc-impl: ${{ matrix.malloc-impl }} prerelease: ${{ matrix.prerelease }} tooling: ${{ matrix.tooling }} - # ChainBuild: - # runs-on: "ubuntu-22.04" - # steps: - # - name: Start HomeObject Build - # run: | - # curl -L \ - # -X POST \ - # -H "Accept: application/vnd.github+json" \ - # -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}"\ - # -H "X-GitHub-Api-Version: 2022-11-28" \ - # https://api.github.com/repos/eBay/homeobject/actions/workflows/conan_build.yml/dispatches \ - # -d '{"ref":"main","inputs":{}}' - # if: ${{ github.ref == 'refs/heads/master' }} + ChainBuild: + needs: Build + runs-on: "ubuntu-24.04" + steps: + - name: Get HomeObject default branch + id: get-branch + run: | + set -euo pipefail + + DEFAULT_BRANCH=$(curl --fail --show-error --silent --location \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/eBay/homeobject \ + | jq -r '.default_branch') + + if [ -z "$DEFAULT_BRANCH" ] || [ "$DEFAULT_BRANCH" = "null" ]; then + echo "Failed to determine HomeObject default branch from GitHub API response" >&2 + exit 1 + fi + + echo "default_branch=$DEFAULT_BRANCH" >> "$GITHUB_OUTPUT" + echo "HomeObject default branch: $DEFAULT_BRANCH" + + - name: Start HomeObject Build + run: | + curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/eBay/homeobject/actions/workflows/conan_build.yml/dispatches \ + -d '{"ref":"${{ steps.get-branch.outputs.default_branch }}","inputs":{}}' + if: ${{ github.ref == 'refs/heads/stable/v7.x' }} From ba36650db19b35b2cb51c4cbb1be402c9e186431 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Tue, 24 Feb 2026 15:02:28 +0800 Subject: [PATCH 12/12] Fix superblock IO error handling and add footer validation This change improves superblock error handling and integrity checking: - Fail fast on IO errors during superblock read instead of treating them as fresh/unformatted disks - Add footer superblock validation for HDD devices to detect corruption - Check header and footer write errors independently to prevent silent failures - Add comprehensive unit tests covering all error scenarios Changes: - read_first_block(): Now throws std::system_error on IO errors instead of returning garbage data - write_super_block(): Separately validates header and footer writes with independent error checking - sanity_check(): New method that validates footer consistency on HDD devices by comparing header and footer superblocks using full memcmp - Added 6 unit tests covering IO errors, corruption detection, and footer validation scenarios Use release assert instead of exceptions for superblock IO errors Per review feedback, replace exception throws with HS_REL_ASSERT for all superblock IO errors to ensure immediate crash on failure: - read_first_block(): Use HS_REL_ASSERT instead of throwing std::system_error - sanity_check(): Use HS_REL_ASSERT for header/footer read errors and mismatch - Update tests from ASSERT_THROW to ASSERT_DEATH to verify crash behavior --- conanfile.py | 2 +- src/lib/device/physical_dev.cpp | 57 +++++++- src/lib/device/physical_dev.hpp | 1 + src/tests/test_pdev.cpp | 221 ++++++++++++++++++++++++++++++++ 4 files changed, 274 insertions(+), 7 deletions(-) diff --git a/conanfile.py b/conanfile.py index 0d20a477d..b5db85ddd 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.9" + version = "7.5.10" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/device/physical_dev.cpp b/src/lib/device/physical_dev.cpp index ba52ba2f2..b151c308c 100644 --- a/src/lib/device/physical_dev.cpp +++ b/src/lib/device/physical_dev.cpp @@ -66,8 +66,11 @@ first_block PhysicalDev::read_first_block(const std::string& devname, int oflags first_block ret; auto buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, 512); - iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, - hs_super_blk::first_block_offset()); + auto err = iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, + hs_super_blk::first_block_offset()); + + HS_REL_ASSERT(!err, "IO error reading first block from device={}, error={}, homestore will go down", devname, + err.message()); ret = *(r_cast< first_block* >(buf)); hs_utils::iobuf_free(buf, sisl::buftag::superblk); @@ -114,20 +117,25 @@ PhysicalDev::PhysicalDev(const dev_info& dinfo, int oflags, const pdev_info_head m_streams.emplace_back(i); } m_super_blk_in_footer = m_pdev_info.mirror_super_block; + + // Validate footer superblock consistency if mirroring is enabled + sanity_check(); } PhysicalDev::~PhysicalDev() { close_device(); } void PhysicalDev::write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset) { auto err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, offset); + HS_REL_ASSERT(!err_c, "Super block write to header failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, offset); if (m_super_blk_in_footer) { auto t_offset = data_end_offset() + offset; - err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + auto footer_err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + HS_REL_ASSERT(!footer_err_c, + "Super block write to footer failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, t_offset); } - - HS_REL_ASSERT(!err_c, "Super block write failed on dev={} at size={} offset={}, homestore will go down", m_devname, - sb_size, offset); } std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset) { @@ -136,6 +144,43 @@ std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, ui void PhysicalDev::close_device() { close_and_uncache_dev(m_devname, m_iodev); } +void PhysicalDev::sanity_check() { + // Only validate footer if mirroring is enabled (HDD devices) + if (!m_super_blk_in_footer) { return; } + + HS_LOG(INFO, device, "Validating footer superblock consistency on device={}", m_devname); + + // Read header first block + auto header_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto header_err = read_super_block(header_buf, first_block::s_io_fb_size, hs_super_blk::first_block_offset()); + HS_REL_ASSERT(!header_err, + "IO error reading header first block during sanity check on device={}, error={}, homestore will go down", + m_devname, header_err.message()); + + // Read footer first block using the same offset calculation as write_super_block() + auto footer_offset = data_end_offset() + hs_super_blk::first_block_offset(); + auto footer_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto footer_err = read_super_block(footer_buf, first_block::s_io_fb_size, footer_offset); + HS_REL_ASSERT( + !footer_err, + "IO error reading footer first block during sanity check on device={}, offset={}, error={}, homestore will go down", + m_devname, footer_offset, footer_err.message()); + + // Compare header and footer + auto header_blk = r_cast< first_block* >(header_buf); + auto footer_blk = r_cast< first_block* >(footer_buf); + HS_REL_ASSERT(std::memcmp(header_blk, footer_blk, first_block::s_atomic_fb_size) == 0, + "Footer first block mismatch with header on device={}, header=[{}], footer=[{}], this indicates " + "corruption, homestore will go down", + m_devname, header_blk->to_string(), footer_blk->to_string()); + + hs_utils::iobuf_free(header_buf, sisl::buftag::superblk); + hs_utils::iobuf_free(footer_buf, sisl::buftag::superblk); + HS_LOG(INFO, device, "Footer superblock validated successfully on device={}", m_devname); +} + folly::Future< std::error_code > PhysicalDev::async_write(const char* data, uint32_t size, uint64_t offset, bool part_of_batch) { auto const start_time = get_current_time(); diff --git a/src/lib/device/physical_dev.hpp b/src/lib/device/physical_dev.hpp index f68ae14e6..0709e27d5 100644 --- a/src/lib/device/physical_dev.hpp +++ b/src/lib/device/physical_dev.hpp @@ -154,6 +154,7 @@ class PhysicalDev { std::error_code read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset); void write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset); + void sanity_check(); void close_device(); //////////////////////////// Chunk Creation/Load related methods ///////////////////////////////////////// diff --git a/src/tests/test_pdev.cpp b/src/tests/test_pdev.cpp index 4447c500b..e8003a445 100644 --- a/src/tests/test_pdev.cpp +++ b/src/tests/test_pdev.cpp @@ -262,6 +262,227 @@ TEST_F(PDevTest, RandomChunkOpsWithRestart) { num_removed, available_size); } +// Test fixture for superblock error handling tests +class SuperblockErrorTest : public ::testing::Test { +protected: + std::string m_test_file; + uint64_t m_dev_size{100 * 1024 * 1024}; // 100MB + + void SetUp() override { + m_test_file = "/tmp/test_superblock_error"; + init_file(m_test_file, m_dev_size); + + auto const is_spdk = SISL_OPTIONS["spdk"].as< bool >(); + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = is_spdk}); + } + + void TearDown() override { + iomanager.stop(); + if (std::filesystem::exists(m_test_file)) { + std::filesystem::remove(m_test_file); + } + } + + // Helper to corrupt a file at specific offset + void corrupt_file_at_offset(uint64_t offset, uint64_t size) { + std::fstream file(m_test_file, std::ios::binary | std::ios::in | std::ios::out); + ASSERT_TRUE(file.is_open()); + file.seekp(offset); + std::vector garbage(size, 0xAA); // Fill with garbage + file.write(reinterpret_cast(garbage.data()), size); + file.close(); + } + + // Helper to truncate file to simulate IO errors + void truncate_file(uint64_t new_size) { + std::filesystem::resize_file(m_test_file, new_size); + } +}; + +TEST_F(SuperblockErrorTest, ReadFirstBlockIOError) { + LOGINFO("Test: read_first_block should crash on IO error"); + + // Truncate the file to be too small to contain first block + truncate_file(512); // Less than first_block::s_io_fb_size (4096) + + // Attempt to read first block should crash with HS_REL_ASSERT + ASSERT_DEATH({ + PhysicalDev::read_first_block(m_test_file, O_RDWR); + }, "IO error reading first block"); +} + +TEST_F(SuperblockErrorTest, ReadFirstBlockCorruptedData) { + LOGINFO("Test: read_first_block should return invalid first_block on corrupted data"); + + // Fill the first block area with garbage + corrupt_file_at_offset(0, 4096); + + // Reading should succeed but return invalid first_block + ASSERT_NO_THROW({ + auto fblk = PhysicalDev::read_first_block(m_test_file, O_RDWR); + ASSERT_FALSE(fblk.is_valid()) << "Corrupted first block should be invalid"; + LOGINFO("Successfully read corrupted first block, is_valid={}", fblk.is_valid()); + }); +} + +TEST_F(SuperblockErrorTest, FooterValidationHDDDevice) { + LOGINFO("Test: Footer validation should detect header/footer mismatch on HDD"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + // Get the pdev to check if it has footer mirroring + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices (with footer mirroring), test footer validation + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer corruption detection"); + + dmgr.reset(); + iomanager.stop(); + + // Calculate footer offset: data_end_offset = devsize - data_offset + // Footer first block is at: data_end_offset + first_block_offset (0) + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + + LOGINFO("Corrupting footer at offset={}", footer_offset); + corrupt_file_at_offset(footer_offset, 512); + + // Restart should crash because footer doesn't match header + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "Footer first block mismatch"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer corruption test"); + } +} + +TEST_F(SuperblockErrorTest, FooterIOError) { + LOGINFO("Test: Footer read IO error should be caught during sanity_check"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices, test footer IO error + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer IO error"); + + dmgr.reset(); + iomanager.stop(); + + // Truncate file to cut off the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto truncate_size = data_offset + 1024; // Cut off before footer + + LOGINFO("Truncating file to size={} to cause footer IO error", truncate_size); + truncate_file(truncate_size); + + // Restart should crash because footer cannot be read + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "IO error reading footer first block"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer IO error test"); + } +} + +TEST_F(SuperblockErrorTest, NonHDDDeviceSkipsFooterValidation) { + LOGINFO("Test: Non-HDD devices should skip footer validation"); + + // Create device as Fast type (SSD), which typically doesn't have footer mirroring + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Fast); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Fast); + ASSERT_GT(pdevs.size(), 0); + + // Should restart successfully even if we corrupt the footer area + dmgr.reset(); + iomanager.stop(); + + // Corrupt what would be the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + corrupt_file_at_offset(footer_offset, 4096); + + // Should succeed because SSD doesn't validate footer + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully loaded device without footer validation"); + }); +} + +TEST_F(SuperblockErrorTest, ValidFooterMatchesHeader) { + LOGINFO("Test: Valid footer should match header on HDD device"); + + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + + // Restart should succeed with matching header and footer + dmgr.reset(); + iomanager.stop(); + + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully validated matching header and footer"); + }); +} + int main(int argc, char* argv[]) { SISL_OPTIONS_LOAD(argc, argv, logging, test_pdev, iomgr); ::testing::InitGoogleTest(&argc, argv);