diff --git a/.github/workflows/build_commit.yml b/.github/workflows/build_commit.yml index 17d159d56..3cf237356 100644 --- a/.github/workflows/build_commit.yml +++ b/.github/workflows/build_commit.yml @@ -21,9 +21,9 @@ on: jobs: SislDeps: - uses: eBay/sisl/.github/workflows/build_dependencies.yml@master + uses: eBay/sisl/.github/workflows/build_dependencies.yml@stable/v13.x with: - branch: master + branch: stable/v13.x platform: ${{ inputs.platform }} build-type: ${{ inputs.build-type }} malloc-impl: ${{ inputs.malloc-impl }} @@ -32,9 +32,9 @@ jobs: NuraftMesgDeps: needs: SislDeps - uses: eBay/nuraft_mesg/.github/workflows/build_dependencies.yml@main + uses: eBay/nuraft_mesg/.github/workflows/build_dependencies.yml@stable/v4.x with: - branch: main + branch: stable/v4.x platform: ${{ inputs.platform }} build-type: ${{ inputs.build-type }} malloc-impl: ${{ inputs.malloc-impl }} @@ -43,9 +43,9 @@ jobs: IOMgrDeps: needs: SislDeps - uses: eBay/iomanager/.github/workflows/build_dependencies.yml@master + uses: eBay/iomanager/.github/workflows/build_dependencies.yml@stable/v12.x with: - branch: master + branch: stable/v12.x platform: ${{ inputs.platform }} build-type: ${{ inputs.build-type }} malloc-impl: ${{ inputs.malloc-impl }} diff --git a/.github/workflows/build_dependencies.yml b/.github/workflows/build_dependencies.yml index 2c2876b03..c3070cb72 100644 --- a/.github/workflows/build_dependencies.yml +++ b/.github/workflows/build_dependencies.yml @@ -83,56 +83,56 @@ jobs: timeout-minutes: 1440 steps: - name: Retrieve Code - uses: actions/checkout@main + uses: actions/checkout@v4 with: ref: ${{ inputs.branch }} if: ${{ inputs.testing == 'True' }} - name: Retrieve Recipe - uses: actions/checkout@main + uses: actions/checkout@v4 with: repository: eBay/Homestore ref: ${{ inputs.branch }} if: ${{ inputs.testing == 'False' }} - name: Setup Conan - uses: ebay/sisl/.github/actions/setup_conan2@master + uses: ebay/sisl/.github/actions/setup_conan2@stable/v13.x with: platform: ${{ inputs.platform }} if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Load Homestore Cache id: restore-cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: ${{ inputs.testing }} key_prefix: HomestoreDeps-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }}-${{ inputs.prerelease }} - name: Load Sisl Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: load_any: 'True' key_prefix: SislDeps13-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }} if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Retrieve Dependencies - uses: actions/checkout@main + uses: actions/checkout@v4 with: repository: eBay/iomanager path: import/iomgr - ref: master + ref: stable/v12.x if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Retrieve Dependencies - uses: actions/checkout@main + uses: actions/checkout@v4 with: repository: eBay/nuraft_mesg path: import/nuraft_mesg - ref: main + ref: stable/v4.x if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Load IOMgr Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/iomgr @@ -141,7 +141,7 @@ jobs: if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Load NuraftMesg Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/nuraft_mesg @@ -160,8 +160,8 @@ jobs: sudo rm -rf /usr/lib/google-cloud-sdk python -m pip install pyelftools - conan export --user oss --channel master import/iomgr - conan export --user oss --channel main import/nuraft_mesg + conan export import/iomgr + conan export import/nuraft_mesg if: ${{ inputs.testing == 'True' || steps.restore-cache.outputs.cache-hit != 'true' }} - name: Build Cache @@ -182,20 +182,20 @@ jobs: if: ${{ steps.restore-cache.outputs.cache-hit != 'true' }} - name: Save Conan Cache - uses: eBay/sisl/.github/actions/store_conan2@master + uses: eBay/sisl/.github/actions/store_conan2@stable/v13.x with: key_prefix: HomestoreDeps-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }}-${{ inputs.prerelease }} if: ${{ github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} - name: Reload Sisl Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: load_any: 'True' key_prefix: SislDeps13-${{ inputs.platform }}-${{ inputs.build-type }}-${{ inputs.malloc-impl }} if: ${{ inputs.testing == 'True' && github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} - name: Reload IOMgr Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/iomgr @@ -204,7 +204,7 @@ jobs: if: ${{ inputs.testing == 'True' && github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} - name: Reload NuraftMesg Cache - uses: eBay/sisl/.github/actions/load_conan2@master + uses: eBay/sisl/.github/actions/load_conan2@stable/v13.x with: testing: 'False' path: import/nuraft_mesg diff --git a/.github/workflows/merge_build.yml b/.github/workflows/merge_build.yml index 0c0fe0c12..681bf6b1d 100644 --- a/.github/workflows/merge_build.yml +++ b/.github/workflows/merge_build.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: push: branches: - - master + - stable/v7.x jobs: Build: @@ -36,16 +36,37 @@ jobs: malloc-impl: ${{ matrix.malloc-impl }} prerelease: ${{ matrix.prerelease }} tooling: ${{ matrix.tooling }} - # ChainBuild: - # runs-on: "ubuntu-22.04" - # steps: - # - name: Start HomeObject Build - # run: | - # curl -L \ - # -X POST \ - # -H "Accept: application/vnd.github+json" \ - # -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}"\ - # -H "X-GitHub-Api-Version: 2022-11-28" \ - # https://api.github.com/repos/eBay/homeobject/actions/workflows/conan_build.yml/dispatches \ - # -d '{"ref":"main","inputs":{}}' - # if: ${{ github.ref == 'refs/heads/master' }} + ChainBuild: + needs: Build + runs-on: "ubuntu-24.04" + steps: + - name: Get HomeObject default branch + id: get-branch + run: | + set -euo pipefail + + DEFAULT_BRANCH=$(curl --fail --show-error --silent --location \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/eBay/homeobject \ + | jq -r '.default_branch') + + if [ -z "$DEFAULT_BRANCH" ] || [ "$DEFAULT_BRANCH" = "null" ]; then + echo "Failed to determine HomeObject default branch from GitHub API response" >&2 + exit 1 + fi + + echo "default_branch=$DEFAULT_BRANCH" >> "$GITHUB_OUTPUT" + echo "HomeObject default branch: $DEFAULT_BRANCH" + + - name: Start HomeObject Build + run: | + curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.CHAIN_BUILD_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/eBay/homeobject/actions/workflows/conan_build.yml/dispatches \ + -d '{"ref":"${{ steps.get-branch.outputs.default_branch }}","inputs":{}}' + if: ${{ github.ref == 'refs/heads/stable/v7.x' }} diff --git a/.github/workflows/merge_build_3x.yml b/.github/workflows/merge_build_3x.yml deleted file mode 100644 index 2744dd33d..000000000 --- a/.github/workflows/merge_build_3x.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Homestore 3.x Build - -on: - workflow_dispatch: - push: - branches: - - stable/v3.x - -jobs: - Build: - strategy: - fail-fast: false - matrix: - platform: ["ubuntu-22.04", "ubuntu-20.04"] - build-type: ["Debug", "Release"] - malloc-impl: ["libc", "tcmalloc"] - prerelease: ["True", "False"] - exclude: - - build-type: Debug - platform: ubuntu-20.04 - - build-type: Debug - malloc-impl: tcmalloc - - malloc-impl: tcmalloc - platform: ubuntu-20.04 - - malloc-impl: libc - build-type: Release - platform: ubuntu-22.04 - - prerelease: "True" - platform: ubuntu-20.04 - uses: ./.github/workflows/build_commit.yml - with: - platform: ${{ matrix.platform }} - build-type: ${{ matrix.build-type }} - malloc-impl: ${{ matrix.malloc-impl }} - prerelease: ${{ matrix.prerelease }} diff --git a/.github/workflows/merge_build_6x.yml b/.github/workflows/merge_build_6x.yml deleted file mode 100644 index a2a9ea4af..000000000 --- a/.github/workflows/merge_build_6x.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Homestore 6.x Build - -on: - workflow_dispatch: - push: - branches: - - stable/v6.x - -jobs: - Build: - strategy: - fail-fast: false - matrix: - platform: ["ubuntu-22.04"] - build-type: ["Debug", "Release"] - malloc-impl: ["libc", "tcmalloc"] - prerelease: ["True", "False"] - tooling: ["Sanitize", "Coverage", "None"] - exclude: - - build-type: Debug - prerelease: "False" - - build-type: Debug - tooling: None - - build-type: Debug - malloc-impl: tcmalloc - - build-type: Release - malloc-impl: libc - - build-type: Release - tooling: Sanitize - - build-type: Release - tooling: Coverage - uses: eBay/homestore/.github/workflows/build_commit.yml@stable/v6.x - with: - platform: ${{ matrix.platform }} - build-type: ${{ matrix.build-type }} - malloc-impl: ${{ matrix.malloc-impl }} - prerelease: ${{ matrix.prerelease }} - tooling: ${{ matrix.tooling }} diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index e9b5896b6..2c77e78ea 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: pull_request: branches: - - master + - stable/v7.x jobs: Build: diff --git a/.github/workflows/version_change_check.yml b/.github/workflows/version_change_check.yml index 1e8552f6f..c007a8545 100644 --- a/.github/workflows/version_change_check.yml +++ b/.github/workflows/version_change_check.yml @@ -10,12 +10,18 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@main + uses: actions/checkout@v4 with: fetch-depth: 2 - - name: Check if file is modified + - name: Check if version bump is required run: | - if git diff -r HEAD^1 HEAD -- conanfile.py | egrep "[ ]+version = "; then + changed=$(git diff --name-only HEAD^1 HEAD) + non_test=$(echo "$changed" | grep -Ev '^src/tests/|CMakeLists\.txt$|^\.github/|^\.jenkins|\.md$' || true) + if [ -z "$non_test" ]; then + echo "Only test files changed — version bump not required" + exit 0 + fi + if git diff -r HEAD^1 HEAD -- conanfile.py | grep -E "[ ]+version = "; then echo "Version is updated with this PR, OK" else echo "Conan version is not updated with this PR. Please update that to allow PR merge" diff --git a/.gitignore b/.gitignore index 78b8a1f38..47363d6a8 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,6 @@ CMakeUserPresets.json # claude .claude/** + +# worktrees +.worktrees/ diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index 6e1879f96..beaad8e98 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -56,6 +56,7 @@ pipeline { steps { script { TAG = "${VER}@" + CONAN_FLAGS="--name ${PROJECT}" } } } @@ -64,7 +65,6 @@ pipeline { steps { sh "hostname ; \ echo $NODE_NAME ; \ - conan create ${BUILD_MISSING} -s:h build_type=Debug -o ${PROJECT}/*:sanitize=True ${CONAN_FLAGS} . ; \ conan create ${BUILD_MISSING} -s:h build_type=Debug ${CONAN_FLAGS} . ; \ conan create ${BUILD_MISSING} -s:h build_type=RelWithDebInfo -o sisl/*:malloc_impl=tcmalloc ${CONAN_FLAGS} . ; \ conan create ${BUILD_MISSING} -s:h build_type=RelWithDebInfo -o sisl/*:prerelease=True -o sisl/*:malloc_impl=tcmalloc ${CONAN_FLAGS} . ; \ diff --git a/conanfile.py b/conanfile.py index 448753f7f..b5db85ddd 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.6" + version = "7.5.10" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" @@ -48,17 +48,17 @@ def configure(self): raise ConanInvalidConfiguration("Coverage/Sanitizer requires Testing!") def build_requirements(self): - self.test_requires("benchmark/1.9.4") - self.test_requires("gtest/1.17.0") + self.test_requires("benchmark/[^1.9]") + self.test_requires("gtest/[^1.17]") def requirements(self): - self.requires("iomgr/[^12]@oss/master", transitive_headers=True) - self.requires("sisl/[^13.2.4]@oss/master", transitive_headers=True) - self.requires("nuraft_mesg/[^4]@oss/main", transitive_headers=True) + self.requires("iomgr/[^12.0]", transitive_headers=True) + self.requires("sisl/[^13.2]", transitive_headers=True) + self.requires("nuraft_mesg/[^4.0]", transitive_headers=True) self.requires("farmhash/cci.20190513@", transitive_headers=True) if self.settings.arch in ['x86', 'x86_64']: - self.requires("isa-l/2.30.0", transitive_headers=True) + self.requires("isa-l/[^2.30]", transitive_headers=True) # Tests require OpenSSL 3.x self.requires("openssl/[^3.1]", override=True) diff --git a/src/include/homestore/index/index_table.hpp b/src/include/homestore/index/index_table.hpp index 3075f00ab..b79381962 100644 --- a/src/include/homestore/index/index_table.hpp +++ b/src/include/homestore/index/index_table.hpp @@ -140,6 +140,13 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { auto cpg = cp_mgr().cp_guard(); Btree< K, V >::destroy_btree(cpg.context(cp_consumer_t::INDEX_SVC)); } + // Flush the current CP to completion before removing the superblock. + // This closes the journal–table mismatch window: any txn_journal entries + // for this table's ordinal are written and the CP is marked done on disk. + // If the process crashes after this point, recovery will not replay the + // old journal (CP is complete), so repair_index_node will never be called + // for an ordinal whose superblock no longer exists. + cp_mgr().trigger_cp_flush(true /* force */).get(); m_sb.destroy(); m_sb_buffer->m_valid = false; decr_pending_request_num(); diff --git a/src/lib/blkalloc/append_blk_allocator.cpp b/src/lib/blkalloc/append_blk_allocator.cpp index 50b6e81cb..40a718b92 100644 --- a/src/lib/blkalloc/append_blk_allocator.cpp +++ b/src/lib/blkalloc/append_blk_allocator.cpp @@ -155,17 +155,30 @@ bool AppendBlkAllocator::is_blk_alloced(const BlkId& in_bid, bool) const { } void AppendBlkAllocator::reset() { + // Reset in-place: restore all in-memory counters to the initial empty state and mark dirty + // so the next cp_flush persists commit_offset=0 to the existing superblock. + // + // We intentionally do NOT destroy the superblock or deregister the handler here, unlike + // BitmapBlkAllocator::reset(). AppendBlkAllocator is managed by the upper layer (HomeObject + // GC), which reuses the same chunk across multiple GC cycles. Keeping the same allocator + // instance alive avoids the race described in https://github.com/eBay/HomeObject/issues/401: + // + // T1: cp_flush holds a shared_ptr to the old allocator + // T2: reset() destroys m_sb [old behavior] + // T3: gc_repl_reqs calls free() on old allocator -> m_is_dirty=true + // T4: cp_flush writes to the destroyed m_sb -> crash + // + // With in-place reset the superblock is always valid, so T4 is safe regardless of T3. std::lock_guard lg(m_sb_mtx); - m_is_dirty.store(false); - m_sb.destroy(); - meta_service().deregister_handler(get_name()); + m_last_append_offset.store(0); + m_freeable_nblks.store(0); + m_commit_offset.store(0); + m_is_dirty.store(true); } bool AppendBlkAllocator::is_blk_alloced_on_disk(BlkId const& bid, bool use_lock) const { std::lock_guard lg(m_sb_mtx); - if (!m_sb.is_empty()) { return bid.blk_num() < m_sb->commit_offset; } - // if allocator is reset, the sb will be destroyed, and all the blks will be treated as free; - return false; + return bid.blk_num() < m_sb->commit_offset; } std::string AppendBlkAllocator::get_name() const { return "AppendBlkAlloc_chunk_" + std::to_string(m_chunk_id); } diff --git a/src/lib/common/homestore_assert.hpp b/src/lib/common/homestore_assert.hpp index 9ffa46273..690021f1a 100644 --- a/src/lib/common/homestore_assert.hpp +++ b/src/lib/common/homestore_assert.hpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -143,21 +144,63 @@ fmt::make_format_args(detail_name, detail_val)))) \ (); \ fmt::vformat_to(fmt::appender{buf}, fmt::string_view{msgcb}, fmt::make_format_args(args...)); \ - const auto count{check_logged_already(buf)}; \ - if (count % freq == 0) { \ - if (count) { \ - fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Repeated {} times in this thread"}, \ - fmt::make_format_args(freq)); \ - } \ - return true; \ - } \ - return false; \ + return check_and_format_log(buf, freq, 0); \ }), \ msg, ##__VA_ARGS__); \ } #define HS_LOG_EVERY_N(level, mod, freq, msg, ...) HS_DETAILED_LOG_EVERY_N(level, mod, freq, , , , , msg, ##__VA_ARGS__) +#define HS_DETAILED_LOG_EVERY_N_SEC(level, mod, interval_sec, submod_name, submod_val, detail_name, detail_val, msg, \ + ...) \ + { \ + LOG##level##MOD_FMT( \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(mod), base, mod), \ + ([&](fmt::memory_buffer& buf, const char* const msgcb, auto&&... args) -> bool { \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}:{}] "}, \ + fmt::make_format_args(unmove(file_name(__FILE__)), unmove(__LINE__))); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(submod_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(submod_name, submod_val)))) \ + (); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(detail_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(detail_name, detail_val)))) \ + (); \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{msgcb}, fmt::make_format_args(args...)); \ + return check_and_format_log(buf, 0, interval_sec); \ + }), \ + msg, ##__VA_ARGS__); \ + } + +#define HS_LOG_EVERY_N_SEC(level, mod, interval_sec, msg, ...) \ + HS_DETAILED_LOG_EVERY_N_SEC(level, mod, interval_sec, , , , , msg, ##__VA_ARGS__) + +#define HS_DETAILED_LOG_EVERY_N_OR_SEC(level, mod, freq, interval_sec, submod_name, submod_val, detail_name, \ + detail_val, msg, ...) \ + { \ + LOG##level##MOD_FMT( \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(mod), base, mod), \ + ([&](fmt::memory_buffer& buf, const char* const msgcb, auto&&... args) -> bool { \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}:{}] "}, \ + fmt::make_format_args(unmove(file_name(__FILE__)), unmove(__LINE__))); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(submod_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(submod_name, submod_val)))) \ + (); \ + BOOST_PP_IF(BOOST_VMD_IS_EMPTY(detail_name), BOOST_PP_EMPTY, \ + BOOST_PP_IDENTITY(fmt::vformat_to(fmt::appender{buf}, fmt::string_view{"[{}={}] "}, \ + fmt::make_format_args(detail_name, detail_val)))) \ + (); \ + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{msgcb}, fmt::make_format_args(args...)); \ + return check_and_format_log(buf, freq, interval_sec); \ + }), \ + msg, ##__VA_ARGS__); \ + } + +#define HS_LOG_EVERY_N_OR_SEC(level, mod, freq, interval_sec, msg, ...) \ + HS_DETAILED_LOG_EVERY_N_OR_SEC(level, mod, freq, interval_sec, , , , , msg, ##__VA_ARGS__) + #define HS_SUBMOD_LOG(level, mod, req, submod_name, submod_val, msg, ...) \ HS_DETAILED_LOG(level, mod, req, submod_name, submod_val, , , msg, ##__VA_ARGS__) #define HS_REQ_LOG(level, mod, req, msg, ...) HS_SUBMOD_LOG(level, mod, req, , , msg, ##__VA_ARGS__) @@ -310,16 +353,86 @@ #define HS_REL_ASSERT_NULL(val, ...) HS_ASSERT_NULL(RELEASE_ASSERT_CMP, val, ##__VA_ARGS__) #define HS_REL_ASSERT_NOTNULL(val, ...) HS_ASSERT_NOTNULL(RELEASE_ASSERT_CMP, val, ##__VA_ARGS__) -[[maybe_unused]] static uint64_t check_logged_already(const fmt::memory_buffer& buf) { +/** + * Rate-limited logging helper with count-based and time-based controls. + * + * State is cleaned up every 300 seconds to prevent unbounded memory growth. + * This means time-based rate limiting works correctly only when interval_sec < 300. + * If interval_sec >= 300, the behavior effectively becomes "log first occurrence after each 5min reset." + */ +[[maybe_unused]] static bool check_and_format_log(fmt::memory_buffer& buf, uint64_t freq = 0, + uint64_t interval_sec = 0) { static constexpr uint64_t COUNTER_RESET_SEC{300}; // Reset every 5 minutes - static thread_local std::unordered_map< std::string, std::pair< Clock::time_point, uint64_t > > log_map{}; - - const std::string_view msg{buf.data()}; - auto [it, happened] = log_map.emplace(msg, std::make_pair(Clock::now(), 0)); - HS_REL_ASSERT(it != std::cend(log_map), "Expected entry to be either present or new insertion to succeed"); - auto& [tm, count] = it->second; - auto result = count; - count = (get_elapsed_time_sec(tm) > COUNTER_RESET_SEC) ? static_cast< decltype(count) >(0) : count + 1; - tm = Clock::now(); - return result; + static thread_local Clock::time_point last_cleanup{Clock::now()}; + static thread_local std::unordered_map< size_t, std::pair< uint32_t, uint64_t > > log_map{}; + // hash -> (last_log_ms, count) + + // Warn once if interval_sec exceeds cleanup period + if (interval_sec > COUNTER_RESET_SEC) { + static thread_local bool warned = false; + if (!warned) { + LOGWARN("interval_sec={} exceeds cleanup period ({}s) - time-based rate limiting may not work as expected", + interval_sec, COUNTER_RESET_SEC); + warned = true; + } + } + + const auto now = Clock::now(); + if (get_elapsed_time_sec(last_cleanup) > COUNTER_RESET_SEC) { + std::unordered_map< size_t, std::pair< uint32_t, uint64_t > >().swap(log_map); // Actually release memory + last_cleanup = now; + } + + // Hash the buffer content BEFORE appending any suffix + const std::string_view msg{buf.data(), buf.size()}; + const size_t msg_hash = std::hash< std::string_view >{}(msg); + + // Milliseconds since last cleanup (max ~49 days with uint32_t) + const uint32_t now_ms = + std::chrono::duration_cast< std::chrono::milliseconds >(now - last_cleanup).count(); + + auto [it, happened] = log_map.emplace(msg_hash, std::make_pair(now_ms, 0)); + uint32_t elapsed_ms = 0; + uint64_t count = 0; + + if (!happened) { + // Entry exists - increment count and calculate elapsed time + elapsed_ms = now_ms - it->second.first; // Time since last log emission + it->second.second++; // Increment count + count = it->second.second; + } + + // Decide if we should log + bool should_log = false; + if (happened) { + // Always log first occurrence (new entry) + should_log = true; + } else if (freq == 0 && interval_sec == 0) { + // Both rate limiters disabled: always log (fallback behavior) + should_log = true; + } else if (freq > 0 && count % freq == 0) { + // Count-based: log every Nth occurrence + should_log = true; + } else if (interval_sec > 0 && elapsed_ms >= static_cast< uint32_t >(interval_sec) * 1000) { + // Time-based: log if enough time has passed since last emission + should_log = true; + } + + // If logging, update timestamp and append suffix + if (should_log) { + // Append formatted suffix if not first occurrence + if (!happened && count > 0) { + // Always show elapsed time and count since last log + fmt::vformat_to(fmt::appender{buf}, fmt::string_view{" ...Last logged {}ms ago, {} occurrences"}, + fmt::make_format_args(elapsed_ms, count)); + } + + // Update state after logging (so next log shows "since this log") + if (!happened) { + it->second.first = now_ms; // Update timestamp + it->second.second = 0; // Reset count (next occurrence will be "1 since this log") + } + } + + return should_log; } diff --git a/src/lib/device/physical_dev.cpp b/src/lib/device/physical_dev.cpp index ba52ba2f2..b151c308c 100644 --- a/src/lib/device/physical_dev.cpp +++ b/src/lib/device/physical_dev.cpp @@ -66,8 +66,11 @@ first_block PhysicalDev::read_first_block(const std::string& devname, int oflags first_block ret; auto buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, 512); - iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, - hs_super_blk::first_block_offset()); + auto err = iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, + hs_super_blk::first_block_offset()); + + HS_REL_ASSERT(!err, "IO error reading first block from device={}, error={}, homestore will go down", devname, + err.message()); ret = *(r_cast< first_block* >(buf)); hs_utils::iobuf_free(buf, sisl::buftag::superblk); @@ -114,20 +117,25 @@ PhysicalDev::PhysicalDev(const dev_info& dinfo, int oflags, const pdev_info_head m_streams.emplace_back(i); } m_super_blk_in_footer = m_pdev_info.mirror_super_block; + + // Validate footer superblock consistency if mirroring is enabled + sanity_check(); } PhysicalDev::~PhysicalDev() { close_device(); } void PhysicalDev::write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset) { auto err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, offset); + HS_REL_ASSERT(!err_c, "Super block write to header failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, offset); if (m_super_blk_in_footer) { auto t_offset = data_end_offset() + offset; - err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + auto footer_err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + HS_REL_ASSERT(!footer_err_c, + "Super block write to footer failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, t_offset); } - - HS_REL_ASSERT(!err_c, "Super block write failed on dev={} at size={} offset={}, homestore will go down", m_devname, - sb_size, offset); } std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset) { @@ -136,6 +144,43 @@ std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, ui void PhysicalDev::close_device() { close_and_uncache_dev(m_devname, m_iodev); } +void PhysicalDev::sanity_check() { + // Only validate footer if mirroring is enabled (HDD devices) + if (!m_super_blk_in_footer) { return; } + + HS_LOG(INFO, device, "Validating footer superblock consistency on device={}", m_devname); + + // Read header first block + auto header_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto header_err = read_super_block(header_buf, first_block::s_io_fb_size, hs_super_blk::first_block_offset()); + HS_REL_ASSERT(!header_err, + "IO error reading header first block during sanity check on device={}, error={}, homestore will go down", + m_devname, header_err.message()); + + // Read footer first block using the same offset calculation as write_super_block() + auto footer_offset = data_end_offset() + hs_super_blk::first_block_offset(); + auto footer_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto footer_err = read_super_block(footer_buf, first_block::s_io_fb_size, footer_offset); + HS_REL_ASSERT( + !footer_err, + "IO error reading footer first block during sanity check on device={}, offset={}, error={}, homestore will go down", + m_devname, footer_offset, footer_err.message()); + + // Compare header and footer + auto header_blk = r_cast< first_block* >(header_buf); + auto footer_blk = r_cast< first_block* >(footer_buf); + HS_REL_ASSERT(std::memcmp(header_blk, footer_blk, first_block::s_atomic_fb_size) == 0, + "Footer first block mismatch with header on device={}, header=[{}], footer=[{}], this indicates " + "corruption, homestore will go down", + m_devname, header_blk->to_string(), footer_blk->to_string()); + + hs_utils::iobuf_free(header_buf, sisl::buftag::superblk); + hs_utils::iobuf_free(footer_buf, sisl::buftag::superblk); + HS_LOG(INFO, device, "Footer superblock validated successfully on device={}", m_devname); +} + folly::Future< std::error_code > PhysicalDev::async_write(const char* data, uint32_t size, uint64_t offset, bool part_of_batch) { auto const start_time = get_current_time(); diff --git a/src/lib/device/physical_dev.hpp b/src/lib/device/physical_dev.hpp index f68ae14e6..0709e27d5 100644 --- a/src/lib/device/physical_dev.hpp +++ b/src/lib/device/physical_dev.hpp @@ -154,6 +154,7 @@ class PhysicalDev { std::error_code read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset); void write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset); + void sanity_check(); void close_device(); //////////////////////////// Chunk Creation/Load related methods ///////////////////////////////////////// diff --git a/src/lib/device/virtual_dev.cpp b/src/lib/device/virtual_dev.cpp index 73d39fc55..b3c4bb3ac 100644 --- a/src/lib/device/virtual_dev.cpp +++ b/src/lib/device/virtual_dev.cpp @@ -129,6 +129,16 @@ void VirtualDev::add_chunk(cshared< Chunk >& chunk, bool is_fresh_chunk) { } void VirtualDev::reset_chunk_blk_allocator(Chunk* chunk) { + if (m_allocator_type == blk_allocator_type_t::append) { + // AppendBlkAllocator resets in-place via its reset() method and no new allocator instance is created. + // Creating a new instance would require deregistering the old handler and registering a + // new one, which introduces a window where a concurrent cp_flush holding the old + // allocator's shared_ptr could write to an already-destroyed superblock. + // See https://github.com/eBay/HomeObject/issues/401. + + LOGINFO("AppendBlkAllocator resets in-place, skip creating new instance for chunk {}", chunk->chunk_id()); + return; + } auto ba = create_blk_allocator(m_allocator_type, block_size(), chunk->physical_dev()->optimal_page_size(), chunk->physical_dev()->align_size(), chunk->size(), m_auto_recovery, chunk->chunk_id(), true /* is_init */, m_use_slab_in_blk_allocator); @@ -743,8 +753,6 @@ void VirtualDev::cp_flush(VDevCPContext* v_cp_ctx) { // pass down cp so that underlying components can get their customized CP context if needed; m_chunk_selector->foreach_chunks([this, cp](cshared< Chunk >& chunk) { HS_LOG(TRACE, device, "Flushing chunk: {}, vdev: {}", chunk->chunk_id(), m_vdev_info.name); - // Hold shared_ptr to prevent allocator from being reset during cp_flush. - // This fixes race with GC's purge_reserved_chunk() which calls reset_block_allocator(). auto alloc_ptr = chunk->blk_allocator_shared(); #ifdef _PRERELEASE diff --git a/src/tests/test_data_service.cpp b/src/tests/test_data_service.cpp index 53b5fe19d..8efad5ba6 100644 --- a/src/tests/test_data_service.cpp +++ b/src/tests/test_data_service.cpp @@ -1123,8 +1123,8 @@ TEST_F(BlkDataServiceAppendTest, TestResetCompletedBeforeCpFlush) { LOGINFO("Scenario 1: PASSED - Order: get_ptr -> reset_block_allocator -> cp_flush"); } -// Scenario 2: Order: cp_flush holds shared_ptr -> old allocator reset -> cp_flush on old -> create new allocator -// Test that cp_flush executes after old allocator reset, then new allocator is created +// Scenario 2: Order: cp_flush holds shared_ptr -> allocator reset (in-place) -> cp_flush completes on same allocator +// Test that cp_flush completes safely after in-place reset of the same allocator TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { vdev_info vinfo; auto data_vdev = inst().open_vdev(vinfo, true); @@ -1185,12 +1185,12 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { LOGINFO("Scenario 2: Old allocator reset() completed"); old_allocator_reset.store(true); - // Wait for cp_flush to complete on old allocator before creating new one + // Wait for cp_flush to complete before reset thread continues for (int i = 0; i < 200 && !cp_flush_completed.load(); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } - RELEASE_ASSERT(cp_flush_completed.load(), "cp_flush should complete before creating new allocator"); - LOGINFO("Scenario 2: cp_flush completed, proceeding to create new allocator"); + RELEASE_ASSERT(cp_flush_completed.load(), "cp_flush should complete before reset thread continues"); + LOGINFO("Scenario 2: cp_flush completed, reset thread proceeding"); })); std::thread flush_thread([&]() { @@ -1205,7 +1205,7 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { std::thread reset_thread([&]() { chunk->reset_block_allocator(); reset_completed.store(true); - LOGINFO("Scenario 2: reset_block_allocator completed (new allocator created)"); + LOGINFO("Scenario 2: reset_block_allocator completed (in-place reset)"); }); flush_thread.join(); @@ -1219,7 +1219,7 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) { ASSERT_TRUE(cp_flush_completed.load()); ASSERT_TRUE(reset_completed.load()); ASSERT_TRUE(old_allocator_reset.load()); - LOGINFO("Scenario 2: PASSED - Order: get_ptr -> old_allocator_reset -> cp_flush -> create new allocator"); + LOGINFO("Scenario 2: PASSED - Order: get_ptr -> allocator_reset (in-place) -> cp_flush on same allocator"); } // Scenario 3: Order: cp_flush acquires mutex first -> reset_block_allocator blocks on mutex @@ -1299,6 +1299,52 @@ TEST_F(BlkDataServiceAppendTest, TestCpFlushBlocksResetWithMutex) { LOGINFO("Scenario 3: PASSED - mutex protected m_sb from concurrent reset_block_allocator during cp_flush"); } +// Scenario 4: Order: cp_flush holds shared_ptr -> reset() completes -> free() re-sets dirty on old allocator +// -> cp_flush runs on old allocator +// Test the race from https://github.com/eBay/HomeObject/issues/401: +// with the old AppendBlkAllocator::reset() (destroy + recreate): +// - reset() destroys m_sb of the old allocator +// - free() sets m_is_dirty=true on the old allocator (whose m_sb is already destroyed) +// - cp_flush writes to the destroyed m_sb -> crash / undefined behavior +TEST_F(BlkDataServiceAppendTest, TestFreeAfterResetBeforeCpFlushSafe) { + vdev_info vinfo; + auto data_vdev = inst().open_vdev(vinfo, true); + auto chunks = data_vdev->get_chunks(); + + auto it = std::find_if(chunks.begin(), chunks.end(), [](const auto& pair) { + return pair.second && pair.second->blk_allocator() && + pair.second->blk_allocator()->get_name().find("append_chunk_") == 0; + }); + ASSERT_NE(it, chunks.end()) << "No AppendBlkAllocator chunks found"; + auto chunk = it->second; + auto chunk_id = chunk->chunk_id(); + + LOGINFO("Scenario 4: Testing chunk_id={}", chunk_id); + + // Simulate cp_flush holding a reference to the old allocator before reset begins. + LOGINFO("Scenario 4: Simulate cp_flush acquiring shared_ptr to old allocator before reset"); + auto old_alloc = chunk->blk_allocator_shared(); + + // Simulate GC's purge_reserved_chunk calling reset_block_allocator(). + LOGINFO("Scenario 4: Simulate GC calling reset_block_allocator on the chunk"); + chunk->reset_block_allocator(); + auto alloc_after_reset = chunk->blk_allocator_shared(); + ASSERT_FALSE(old_alloc.owner_before(alloc_after_reset) || alloc_after_reset.owner_before(old_alloc)); + + // Simulate gc_repl_reqs calling free() on the old allocator reference after reset + LOGINFO("Scenario 4: Simulate gc_repl_reqs calling free() on old allocator reference after reset"); + BlkId fake_bid{0, 1, chunk_id}; + old_alloc->free(fake_bid); + + // Simulate cp_flush running on the old allocator reference. + // With fix: safe - m_sb is still valid, writes commit_offset=0 correctly. + // Without fix: UB - m_sb was destroyed in reset(), writing to it crashes. + LOGINFO("Scenario 4: Simulate cp_flush running on old allocator reference after reset and free"); + old_alloc->cp_flush(nullptr); + + LOGINFO("Scenario 4: PASSED - free() + cp_flush on old allocator after reset is safe"); +} + #endif // _PRERELEASE // Stream related test diff --git a/src/tests/test_index_crash_recovery.cpp b/src/tests/test_index_crash_recovery.cpp index 32dc3141a..0d90cfa9d 100644 --- a/src/tests/test_index_crash_recovery.cpp +++ b/src/tests/test_index_crash_recovery.cpp @@ -377,6 +377,18 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT LOGINFO("destroy btree - erase shadow map {}", m_shadow_filename); } + // Install a brand-new empty btree (without destroying the old one first). + // Used after recovery to replace a stale/destroyed m_bt so TearDown() is safe. + void install_fresh_btree(uint32_t erase_up_to_key = 0) { + auto uuid = boost::uuids::random_generator()(); + auto parent_uuid = boost::uuids::random_generator()(); + this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); + hs()->index_service().add_index_table(this->m_bt); + if (erase_up_to_key > 0) { this->m_shadow_map.range_erase(0, erase_up_to_key - 1); } + this->m_shadow_map.save(m_shadow_filename); + LOGINFO("Installed fresh btree with uuid {}", boost::uuids::to_string(uuid)); + } + void restart_homestore(uint32_t shutdown_delay_sec = 3) override { this->params(HS_SERVICE::INDEX).index_svc_cbs = new TestIndexServiceCallbacks(this); LOGINFO("\n\n\n\n\n\n shutdown homestore for index service Test\n\n\n\n\n"); @@ -1001,6 +1013,82 @@ TYPED_TEST(IndexCrashTest, MergeRemoveBasic) { } } +// Regression test for the bug: when an index table is destroyed concurrently with a CP flush, +// the txn_journal (written at the START of the flush) can contain entries for the destroyed +// table's ordinal while the table's meta superblock is already gone. On recovery the journal +// is replayed but the table cannot be found in m_ordinal_index_map, causing +// repair_index_node / sanity_check to assert. +// +// Sequence that triggers the bug: +// 1. Build a multi-level tree and flush a baseline CP. +// 2. Set a crash flip so the next CP crashes mid-flush (after writing the journal). +// 3. Insert more keys → splits happen → transact_bufs is called → txn_journal entry added +// for this table's ordinal AND m_crash_flag_on is set on the split parent buf. +// 4. Destroy the table: meta superblock removed from disk, but the dirty split bufs +// (with m_crash_flag_on) remain in the CP dirty list. +// 5. Trigger CP → journal written to disk → do_flush_one_buf hits the flagged buf → crash. +// 6. Recovery: journal has entries for the destroyed table → without fix: assert/abort. +// With fix: skip gracefully. +TYPED_TEST(IndexCrashTest, DestroyTableWithPendingCpCrash) { + // Use a small fixed preload to avoid triggering unrelated block-allocator issues + // that surface when inserting hundreds of consecutive right-edge keys into a + // fully-flushed large tree. With max_keys_in_node=20 and 100 preloaded keys + // we get 5 full leaf nodes; inserting 20 more triggers 1-2 leaf splits which is + // enough to (a) populate a txn_journal entry and (b) mark a parent buffer for crash. + static constexpr uint32_t preload_size = 100; + static constexpr uint32_t extra_keys = 20; + + // Step 1: Build a small tree and flush a clean baseline CP. + LOGINFO("Step 1: Preload {} keys and flush baseline CP", preload_size); + for (auto k = 0u; k < preload_size; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + test_common::HSTestHelper::trigger_cp(true); + this->m_shadow_map.save(this->m_shadow_filename); + + // Step 2: Set the crash flip BEFORE the operations that will cause splits. + // When a split occurs, transact_bufs checks this flip and sets m_crash_flag_on on + // the parent buf. The actual crash fires later when the CP flushes that buf. + LOGINFO("Step 2: Set crash flip on split-at-parent"); + this->set_basic_flip("crash_flush_on_split_at_parent"); + + // Step 3: Insert extra keys to cause splits and populate the txn_journal. + // The rightmost leaf is full after the preload, so the first new key triggers a + // split → transact_bufs is called → crash flip fires once → m_crash_flag_on set + // on the parent buf; subsequent splits (if any) accumulate more journal entries + // but no additional crash flags (flip fires only once, count=1). + LOGINFO("Step 3: Insert {} extra keys to cause splits (txn_journal populated)", extra_keys); + for (auto k = preload_size; k < preload_size + extra_keys; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + + // Step 4: Destroy the table. m_sb.destroy() removes the meta superblock from disk. + // The dirty split bufs (including the one with m_crash_flag_on) remain in the CP + // dirty list because free_buf only marks m_node_freed — it does not touch the list. + LOGINFO("Step 4: Destroy index table — meta superblock removed from disk"); + hs()->index_service().remove_index_table(this->m_bt); + this->m_bt->destroy(); + + // Step 5: Trigger the CP (do not wait). async_cp_flush will: + // (a) write the txn_journal to disk (entries include the destroyed table's ordinal), + // (b) start flushing nodes, hit the buf with m_crash_flag_on → simulated crash. + LOGINFO("Step 5: Trigger CP (crash expected after journal is written)"); + test_common::HSTestHelper::trigger_cp(false /* no wait */); + + // Step 6: Wait for the simulated crash and homestore recovery. + // Without fix : repair_index_node / sanity_check asserts → process aborts (SIGABRT). + // With fix : missing table is skipped gracefully and recovery completes normally. + LOGINFO("Step 6: Waiting for crash-recovery"); + this->wait_for_crash_recovery(true /* check_will_crash */); + LOGINFO("Step 6: Recovery succeeded without aborting — destroyed table was handled gracefully"); + + // Step 7: Install a fresh empty btree so that the shared TearDown() can safely call + // root_node_id() and tree_key_count() without hitting the stale destroyed btree. + LOGINFO("Step 7: Installing a fresh empty btree for teardown"); + this->install_fresh_btree(preload_size + extra_keys); + test_common::HSTestHelper::trigger_cp(true); +} + TYPED_TEST(IndexCrashTest, MetricsTest) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); std::vector< uint32_t > vec(num_entries); diff --git a/src/tests/test_pdev.cpp b/src/tests/test_pdev.cpp index 4447c500b..e8003a445 100644 --- a/src/tests/test_pdev.cpp +++ b/src/tests/test_pdev.cpp @@ -262,6 +262,227 @@ TEST_F(PDevTest, RandomChunkOpsWithRestart) { num_removed, available_size); } +// Test fixture for superblock error handling tests +class SuperblockErrorTest : public ::testing::Test { +protected: + std::string m_test_file; + uint64_t m_dev_size{100 * 1024 * 1024}; // 100MB + + void SetUp() override { + m_test_file = "/tmp/test_superblock_error"; + init_file(m_test_file, m_dev_size); + + auto const is_spdk = SISL_OPTIONS["spdk"].as< bool >(); + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = is_spdk}); + } + + void TearDown() override { + iomanager.stop(); + if (std::filesystem::exists(m_test_file)) { + std::filesystem::remove(m_test_file); + } + } + + // Helper to corrupt a file at specific offset + void corrupt_file_at_offset(uint64_t offset, uint64_t size) { + std::fstream file(m_test_file, std::ios::binary | std::ios::in | std::ios::out); + ASSERT_TRUE(file.is_open()); + file.seekp(offset); + std::vector garbage(size, 0xAA); // Fill with garbage + file.write(reinterpret_cast(garbage.data()), size); + file.close(); + } + + // Helper to truncate file to simulate IO errors + void truncate_file(uint64_t new_size) { + std::filesystem::resize_file(m_test_file, new_size); + } +}; + +TEST_F(SuperblockErrorTest, ReadFirstBlockIOError) { + LOGINFO("Test: read_first_block should crash on IO error"); + + // Truncate the file to be too small to contain first block + truncate_file(512); // Less than first_block::s_io_fb_size (4096) + + // Attempt to read first block should crash with HS_REL_ASSERT + ASSERT_DEATH({ + PhysicalDev::read_first_block(m_test_file, O_RDWR); + }, "IO error reading first block"); +} + +TEST_F(SuperblockErrorTest, ReadFirstBlockCorruptedData) { + LOGINFO("Test: read_first_block should return invalid first_block on corrupted data"); + + // Fill the first block area with garbage + corrupt_file_at_offset(0, 4096); + + // Reading should succeed but return invalid first_block + ASSERT_NO_THROW({ + auto fblk = PhysicalDev::read_first_block(m_test_file, O_RDWR); + ASSERT_FALSE(fblk.is_valid()) << "Corrupted first block should be invalid"; + LOGINFO("Successfully read corrupted first block, is_valid={}", fblk.is_valid()); + }); +} + +TEST_F(SuperblockErrorTest, FooterValidationHDDDevice) { + LOGINFO("Test: Footer validation should detect header/footer mismatch on HDD"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + // Get the pdev to check if it has footer mirroring + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices (with footer mirroring), test footer validation + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer corruption detection"); + + dmgr.reset(); + iomanager.stop(); + + // Calculate footer offset: data_end_offset = devsize - data_offset + // Footer first block is at: data_end_offset + first_block_offset (0) + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + + LOGINFO("Corrupting footer at offset={}", footer_offset); + corrupt_file_at_offset(footer_offset, 512); + + // Restart should crash because footer doesn't match header + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "Footer first block mismatch"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer corruption test"); + } +} + +TEST_F(SuperblockErrorTest, FooterIOError) { + LOGINFO("Test: Footer read IO error should be caught during sanity_check"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices, test footer IO error + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer IO error"); + + dmgr.reset(); + iomanager.stop(); + + // Truncate file to cut off the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto truncate_size = data_offset + 1024; // Cut off before footer + + LOGINFO("Truncating file to size={} to cause footer IO error", truncate_size); + truncate_file(truncate_size); + + // Restart should crash because footer cannot be read + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "IO error reading footer first block"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer IO error test"); + } +} + +TEST_F(SuperblockErrorTest, NonHDDDeviceSkipsFooterValidation) { + LOGINFO("Test: Non-HDD devices should skip footer validation"); + + // Create device as Fast type (SSD), which typically doesn't have footer mirroring + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Fast); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Fast); + ASSERT_GT(pdevs.size(), 0); + + // Should restart successfully even if we corrupt the footer area + dmgr.reset(); + iomanager.stop(); + + // Corrupt what would be the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + corrupt_file_at_offset(footer_offset, 4096); + + // Should succeed because SSD doesn't validate footer + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully loaded device without footer validation"); + }); +} + +TEST_F(SuperblockErrorTest, ValidFooterMatchesHeader) { + LOGINFO("Test: Valid footer should match header on HDD device"); + + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + + // Restart should succeed with matching header and footer + dmgr.reset(); + iomanager.stop(); + + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully validated matching header and footer"); + }); +} + int main(int argc, char* argv[]) { SISL_OPTIONS_LOAD(argc, argv, logging, test_pdev, iomgr); ::testing::InitGoogleTest(&argc, argv);