From 9804de7fa10bd56c082736b73a0d04726c5362c1 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Fri, 10 Oct 2025 00:36:45 +0300 Subject: [PATCH 01/33] stub for leveled_bookie:status/1 --- src/leveled_bookie.erl | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 65611021..8ee9bed8 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -75,7 +75,8 @@ book_loglevel/2, book_addlogs/2, book_removelogs/2, - book_headstatus/1 + book_headstatus/1, + book_status/1 ]). %% folding API @@ -1316,6 +1317,24 @@ book_removelogs(Pid, ForcedLogs) -> book_headstatus(Pid) -> gen_server:call(Pid, head_status, infinity). +-spec book_status(pid()) -> proplists:proplist(). +%% @doc +%% Return a proplist conteaining the following items: +%% * current size of the ledger cache; +%% * number of active journal files; +%% * average compaction score for the journal; +%% * current distribution of files across the ledger (e.g. count of files by level); +%% * current size of the penciller in-memory cache; +%% * penciller work backlog status; +%% * last merge time (penciller); +%% * last compaction time (journal); +%% * last compaction result (journal) e.g. files compacted and compaction score; +%% * ratio of metadata to object size (recent PUTs); +%% * PUT/GET/HEAD recent time/count metrics; +%% * mean level for recent fetches. +book_status(Pid) -> + gen_server:call(Pid, status, infinity). + %%%============================================================================ %%% gen_server callbacks %%%============================================================================ @@ -1740,6 +1759,8 @@ handle_call(return_actors, _From, State) -> {reply, {ok, State#state.inker, State#state.penciller}, State}; handle_call(head_status, _From, State) -> {reply, {State#state.head_only, State#state.head_lookup}, State}; +handle_call(status, _From, State) -> + {reply, status(State), State}; handle_call(Msg, _From, State) -> {reply, {unsupported_message, element(1, Msg)}, State}. @@ -3048,6 +3069,23 @@ maybelog_snap_timing({Pid, _StatsFreq}, BookieTime, PCLTime) when maybelog_snap_timing(_Monitor, _, _) -> ok. + +status(#state{penciller = _Penciller, + ledger_cache = _LedgerCache}) -> + [{ledger_cache_size, -1}, + {n_active_journal_files, -1}, + {avg_compaction_score, -1.0}, + {level_files_count, []}, + {penciller_inmem_cache_size, -1}, + {penciller_work_backlog_status, void}, + {penciller_last_merge_time, os:system_time(millisecond)}, + {journal_last_compaction_time, os:system_time(millisecond)}, + {journal_last_compaction_result, {-1, -1}}, + {metadata_objsize_ratio, -0.1}, + {recent_putgethead_counts, []}, + {recent_fetch_mean_level, -1}]. + + %%%============================================================================ %%% Test %%%============================================================================ From 779bd05bf834a77b83a3943e9cc47f8439f6f799 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Sun, 12 Oct 2025 00:44:32 +0300 Subject: [PATCH 02/33] bookie_status: ledger_cache_size --- src/leveled_bookie.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 8ee9bed8..704c45f5 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -3071,8 +3071,10 @@ maybelog_snap_timing(_Monitor, _, _) -> status(#state{penciller = _Penciller, - ledger_cache = _LedgerCache}) -> - [{ledger_cache_size, -1}, + ledger_cache = #ledger_cache{mem = Mem}}) -> + PP = ets:info(Mem), + [{ledger_cache_size, #{size => proplists:get_value(size, PP), + memory => proplists:get_value(memory, PP)}}, {n_active_journal_files, -1}, {avg_compaction_score, -1.0}, {level_files_count, []}, From ddb2587dcb65944526c72843775a97186f726887 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 21 Oct 2025 00:15:04 +0300 Subject: [PATCH 03/33] bookie_status: redoing using leveled_monitor WIP --- src/leveled_bookie.erl | 34 +++++++++++++-------------------- src/leveled_monitor.erl | 42 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 704c45f5..ecea2124 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -1494,7 +1494,8 @@ handle_call( State#state.cache_size, State#state.cache_multiple, Cache0, - State#state.penciller + State#state.penciller, + State#state.monitor ) of {ok, Cache} -> @@ -1528,7 +1529,8 @@ handle_call({mput, ObjectSpecs, TTL}, From, State) when State#state.cache_size, State#state.cache_multiple, Cache0, - State#state.penciller + State#state.penciller, + State#state.monitor ) of {ok, Cache} -> @@ -1705,7 +1707,8 @@ handle_call({compact_journal, Timeout}, From, State) when State#state.cache_size, State#state.cache_multiple, State#state.ledger_cache, - State#state.penciller + State#state.penciller, + State#state.monitor ) of {_, NewCache} -> @@ -2898,7 +2901,7 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> end. -spec maybepush_ledgercache( - pos_integer(), pos_integer(), ledger_cache(), pid() + pos_integer(), pos_integer(), ledger_cache(), pid(), pid() ) -> {ok | returned, ledger_cache()}. %% @doc @@ -2911,9 +2914,10 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> %% in the reply. Try again later when it isn't busy (and also potentially %% implement a slow_offer state to slow down the pace at which PUTs are being %% received) -maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller) -> +maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller, Monitor) -> Tab = Cache#ledger_cache.mem, CacheSize = ets:info(Tab, size), + leveled_monitor:add_stat(Monitor, {ledger_cache_size_update, CacheSize}), TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize, MaxCacheMult), if TimeToPush -> @@ -3070,22 +3074,10 @@ maybelog_snap_timing(_Monitor, _, _) -> ok. -status(#state{penciller = _Penciller, - ledger_cache = #ledger_cache{mem = Mem}}) -> - PP = ets:info(Mem), - [{ledger_cache_size, #{size => proplists:get_value(size, PP), - memory => proplists:get_value(memory, PP)}}, - {n_active_journal_files, -1}, - {avg_compaction_score, -1.0}, - {level_files_count, []}, - {penciller_inmem_cache_size, -1}, - {penciller_work_backlog_status, void}, - {penciller_last_merge_time, os:system_time(millisecond)}, - {journal_last_compaction_time, os:system_time(millisecond)}, - {journal_last_compaction_result, {-1, -1}}, - {metadata_objsize_ratio, -0.1}, - {recent_putgethead_counts, []}, - {recent_fetch_mean_level, -1}]. +status(#state{monitor = {no_monitor, 0}}) -> + #{}; +status(#state{monitor = {Monitor, _}}) -> + leveled_monitor:get_bookie_status(Monitor). %%%============================================================================ diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index ae38e4f3..b9f500d1 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -38,7 +38,8 @@ log_level/2, log_add/2, log_remove/2, - get_defaults/0 + get_defaults/0, + get_bookie_status/1 ]). -define(LOG_LIST, [ @@ -128,6 +129,21 @@ sample_start_time = os:timestamp() :: erlang:timestamp() }). +-type bookie_status() :: #{ + ledger_cache_size := undefined | pos_integer(), + n_active_journal_files_update := undefined | pos_integer(), + avg_compaction_score_update := undefined | pos_integer(), + level_files_count_update := undefined | pos_integer(), + penciller_inmem_cache_size_update := undefined | pos_integer(), + penciller_work_backlog_status_update := undefined | {[non_neg_integer()], boolean(), boolean()}, + penciller_last_merge_time_update := undefined | pos_integer(), + journal_last_compaction_time_update := undefined | pos_integer(), + journal_last_compaction_result_update := undefined | {float(), non_neg_integer()}, + metadata_objsize_ratio_update := not_implemented, + recent_putgethead_counts := undefined | {non_neg_integer(), non_neg_integer(), non_neg_integer()}, + recent_fetch_mean_level := undefined | [{pos_integer(), non_neg_integer()}] +}. + -record(state, { bookie_get_timings = #bookie_get_timings{} :: bookie_get_timings(), bookie_head_timings = #bookie_head_timings{} :: bookie_head_timings(), @@ -137,7 +153,8 @@ sst_fetch_timings = [] :: list(sst_fetch_timings()), cdb_get_timings = #cdb_get_timings{} :: cdb_get_timings(), log_frequency = ?LOG_FREQUENCY_SECONDS :: pos_integer(), - log_order = [] :: list(log_type()) + log_order = [] :: list(log_type()), + bookie_status = #{} :: bookie_status() }). -type bookie_get_timings() :: #bookie_get_timings{}. @@ -179,6 +196,17 @@ microsecs()}. -type cdb_get_update() :: {cdb_get_update, pos_integer(), microsecs(), microsecs()}. +-type bookie_status_update() :: + {ledger_cache_size_update, pos_integer()} + | {n_active_journal_files_update, pos_integer()} + | {avg_compaction_score_update, pos_integer()} + | {level_files_count_update, pos_integer()} + | {penciller_inmem_cache_size_update, pos_integer()} + | {penciller_work_backlog_status_update, {non_neg_integer(), boolean(), boolean()}} + | {penciller_last_merge_time_update, pos_integer()} + | {journal_last_compaction_time_update, pos_integer()} + | {journal_last_compaction_result_update, {float(), non_neg_integer()}} + | {metadata_objsize_ratio_update, not_implemented}. -type statistic() :: bookie_get_update() | bookie_head_update() @@ -186,7 +214,8 @@ | bookie_snap_update() | pcl_fetch_update() | sst_fetch_update() - | cdb_get_update(). + | cdb_get_update() + | bookie_status_update(). -export_type([monitor/0, timing/0, sst_fetch_type/0, log_type/0]). @@ -228,6 +257,10 @@ log_add(Pid, ForcedLogs) -> log_remove(Pid, ForcedLogs) -> gen_server:cast(Pid, {log_remove, ForcedLogs}). +-spec get_bookie_status(pid()) -> bookie_status(). +get_bookie_status(Pid) -> + gen_server:call(Pid, get_bookie_status). + -spec maybe_time(monitor()) -> erlang:timestamp() | no_timing. maybe_time({_Pid, TimingProbability}) -> case rand:uniform(100) of @@ -272,6 +305,9 @@ init([LogOpts, LogFrequency, LogOrder]) -> erlang:send_after(InitialJitter, self(), report_next_stats), {ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder}}. +handle_call(get_bookie_status, _From, State = #state{bookie_status = A}) -> + {reply, A, State}; + handle_call(close, _From, State) -> {stop, normal, ok, State}. From 93a854667b71a12b39f3f63d38cf32ca0edffb6f Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 23 Oct 2025 20:15:39 +0300 Subject: [PATCH 04/33] bookie_status: redoing using leveled_monitor WIP --- src/leveled_bookie.erl | 2 +- src/leveled_monitor.erl | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index ecea2124..9ee47265 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2914,7 +2914,7 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> %% in the reply. Try again later when it isn't busy (and also potentially %% implement a slow_offer state to slow down the pace at which PUTs are being %% received) -maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller, Monitor) -> +maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller, {Monitor, _}) -> Tab = Cache#ledger_cache.mem, CacheSize = ets:info(Tab, size), leveled_monitor:add_stat(Monitor, {ledger_cache_size_update, CacheSize}), diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index b9f500d1..53813454 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -231,7 +231,9 @@ monitor_start(LogFreq, LogOrder) -> ), {ok, Monitor}. --spec add_stat(pid(), statistic()) -> ok. +-spec add_stat(no_monitor | pid(), statistic()) -> ok. +add_stat(no_monitor, _Statistic) -> + ok; add_stat(Watcher, Statistic) -> gen_server:cast(Watcher, Statistic). @@ -667,7 +669,10 @@ handle_cast({log_add, ForcedLogs}, State) -> {noreply, State}; handle_cast({log_remove, ForcedLogs}, State) -> ok = leveled_log:remove_forcedlogs(ForcedLogs), - {noreply, State}. + {noreply, State}; +handle_cast({ledger_cache_size_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{ledger_cache_size => A}}}. + handle_info(report_next_stats, State) -> erlang:send_after( From fc9d1fb127e0dd4e315888e1754bba1d199d91cd Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Fri, 24 Oct 2025 00:33:26 +0300 Subject: [PATCH 05/33] bookie_status: redoing using leveled_monitor WIP * avg/best_compaction_score * level_files_count * penciller_inmem_cache_size --- src/leveled_cdb.erl | 4 +++ src/leveled_iclerk.erl | 27 ++++++++++------ src/leveled_monitor.erl | 67 +++++++++++++++++++++++++++++++-------- src/leveled_pclerk.erl | 10 ++++-- src/leveled_penciller.erl | 5 ++- src/leveled_sst.erl | 7 ++++ 6 files changed, 93 insertions(+), 27 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 57db6bee..9317a66f 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -493,6 +493,7 @@ starting({call, From}, {open_writer, Filename}, State) -> {next_state, writer, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename}, State) -> leveled_log:save(State#state.log_options), + leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, +1}), leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, false), State0 = State#state{ @@ -504,6 +505,7 @@ starting({call, From}, {open_reader, Filename}, State) -> {next_state, reader, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename, LastKey}, State) -> leveled_log:save(State#state.log_options), + leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, +1}), leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), State0 = State#state{ @@ -880,6 +882,7 @@ delete_pending( ) when ?IS_DEF(FN), ?IS_DEF(IO) -> + leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, -1}), leveled_log:log(cdb04, [FN, State#state.delete_point]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal}; @@ -906,6 +909,7 @@ delete_pending( ), {keep_state_and_data, [?DELETE_TIMEOUT]}; false -> + leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, -1}), leveled_log:log(cdb04, [FN, ManSQN]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal} diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index f891531d..6611da37 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -354,7 +354,7 @@ handle_cast( {noreply, State#state{scored_files = [], scoring_state = ScoringState}}; handle_cast( {score_filelist, [Entry | Tail]}, - State = #state{scoring_state = ScoringState} + State = #state{scoring_state = ScoringState, cdb_options = CDBOpts} ) when ?IS_DEF(ScoringState) -> @@ -379,7 +379,8 @@ handle_cast( ScoringState#scoring_state.max_sqn, ?SAMPLE_SIZE, ?BATCH_SIZE, - State#state.reload_strategy + State#state.reload_strategy, + CDBOpts#cdb_options.monitor ); {CachedScore, true, _ScoreOneIn} -> % If caches are used roll the score towards the current score @@ -394,7 +395,8 @@ handle_cast( ScoringState#scoring_state.max_sqn, ?SAMPLE_SIZE, ?BATCH_SIZE, - State#state.reload_strategy + State#state.reload_strategy, + CDBOpts#cdb_options.monitor ), (NewScore + CachedScore) / 2; {CachedScore, false, _ScoreOneIn} -> @@ -427,6 +429,8 @@ handle_cast( {MaxRunLength, State#state.maxrunlength_compactionperc, State#state.singlefile_compactionperc}, {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), + {Monitor, _} = CDBopts#cdb_options.monitor, + leveled_monitor:add_stat(Monitor, {best_compaction_score_update, Score}), leveled_log:log_timer(ic003, [Score, length(BestRun0)], SW), case Score > 0.0 of true -> @@ -594,7 +598,8 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> leveled_codec:sqn(), non_neg_integer(), non_neg_integer(), - leveled_codec:compaction_strategy() + leveled_codec:compaction_strategy(), + leveled_monitor:monitor() ) -> float(). %% @doc @@ -615,7 +620,8 @@ check_single_file( MaxSQN, SampleSize, BatchSize, - ReloadStrategy + ReloadStrategy, + {Monitor, _} ) -> FN = leveled_cdb:cdb_filename(CDB), SW = os:timestamp(), @@ -629,6 +635,7 @@ check_single_file( MaxSQN, ReloadStrategy ), + leveled_monitor:add_stat(Monitor, {avg_compaction_score_update, Score}), safely_log_filescore(PositionList, FN, Score, SW), Score. @@ -1265,14 +1272,14 @@ check_single_file_test() -> replaced end end, - Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), + Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0}), ?assertMatch(37.5, Score1), LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> current end, - Score2 = check_single_file(CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS), + Score2 = check_single_file(CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0}), ?assertMatch(100.0, Score2), - Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS), + Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS, {no_monitor, 0}), ?assertMatch(37.5, Score3), - Score4 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS), + Score4 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS, {no_monitor, 0}), ?assertMatch(75.0, Score4), ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). @@ -1417,7 +1424,7 @@ compact_empty_file_test() -> {3, {o, "Bucket", "Key3", null}} ], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, - Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), + Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0}), ?assert((+0.0 =:= Score1) orelse (-0.0 =:= Score1)), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 53813454..a1f4fde0 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -131,19 +131,22 @@ -type bookie_status() :: #{ ledger_cache_size := undefined | pos_integer(), - n_active_journal_files_update := undefined | pos_integer(), - avg_compaction_score_update := undefined | pos_integer(), - level_files_count_update := undefined | pos_integer(), - penciller_inmem_cache_size_update := undefined | pos_integer(), - penciller_work_backlog_status_update := undefined | {[non_neg_integer()], boolean(), boolean()}, - penciller_last_merge_time_update := undefined | pos_integer(), - journal_last_compaction_time_update := undefined | pos_integer(), - journal_last_compaction_result_update := undefined | {float(), non_neg_integer()}, - metadata_objsize_ratio_update := not_implemented, + n_active_journal_files := undefined | pos_integer(), + avg_compaction_score_sample := undefined | [pos_integer()], + best_compaction_score := undefined | pos_integer(), + level_files_count := undefined | #{non_neg_integer() := non_neg_integer()}, + penciller_inmem_cache_size := undefined | pos_integer(), + penciller_work_backlog_status := undefined | {non_neg_integer(), boolean(), boolean()}, + penciller_last_merge_time := undefined | pos_integer(), + journal_last_compaction_time := undefined | pos_integer(), + journal_last_compaction_result := undefined | {float(), non_neg_integer()}, + metadata_objsize_ratio := not_implemented, recent_putgethead_counts := undefined | {non_neg_integer(), non_neg_integer(), non_neg_integer()}, recent_fetch_mean_level := undefined | [{pos_integer(), non_neg_integer()}] }. +-define(AVG_COMPACTION_SCORE_OVER_MAX, 50). + -record(state, { bookie_get_timings = #bookie_get_timings{} :: bookie_get_timings(), bookie_head_timings = #bookie_head_timings{} :: bookie_head_timings(), @@ -198,9 +201,10 @@ {cdb_get_update, pos_integer(), microsecs(), microsecs()}. -type bookie_status_update() :: {ledger_cache_size_update, pos_integer()} - | {n_active_journal_files_update, pos_integer()} - | {avg_compaction_score_update, pos_integer()} - | {level_files_count_update, pos_integer()} + | {n_active_journal_files_update, integer()} + | {avg_compaction_score_update, [pos_integer()]} + | {best_compaction_score_update, pos_integer()} + | {level_files_count_update, #{non_neg_integer() => pos_integer()}} | {penciller_inmem_cache_size_update, pos_integer()} | {penciller_work_backlog_status_update, {non_neg_integer(), boolean(), boolean()}} | {penciller_last_merge_time_update, pos_integer()} @@ -233,6 +237,7 @@ monitor_start(LogFreq, LogOrder) -> -spec add_stat(no_monitor | pid(), statistic()) -> ok. add_stat(no_monitor, _Statistic) -> + logger:notice("not updating statistic ~p because no_monitor", [_Statistic]), ok; add_stat(Watcher, Statistic) -> gen_server:cast(Watcher, Statistic). @@ -670,8 +675,44 @@ handle_cast({log_add, ForcedLogs}, State) -> handle_cast({log_remove, ForcedLogs}, State) -> ok = leveled_log:remove_forcedlogs(ForcedLogs), {noreply, State}; + handle_cast({ledger_cache_size_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{ledger_cache_size => A}}}. + {noreply, State#state{bookie_status = BS#{ledger_cache_size => A}}}; + +handle_cast({n_active_journal_files_update, Delta}, State = #state{bookie_status = BS0}) -> + A = maps:get(n_active_journal_files, BS0, 0), + BS = maps:put(n_active_journal_files, A + Delta, BS0), + {noreply, State#state{bookie_status = BS}}; + +handle_cast({avg_compaction_score_update, A}, State = #state{bookie_status = BS}) -> + NewSample = + case [A | maps:get(avg_compaction_score_sample, BS, [])] of + L when length(L) > ?AVG_COMPACTION_SCORE_OVER_MAX -> + lists:sublist(L, ?AVG_COMPACTION_SCORE_OVER_MAX); + L -> + L + end, + {noreply, State#state{bookie_status = BS#{avg_compaction_score_sample => NewSample}}}; + +handle_cast({best_compaction_score_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{best_compaction_score => A}}}; + +handle_cast({level_files_count_update, U}, State = #state{bookie_status = BS0}) -> + A = maps:get(level_files_count, BS0, #{}), + BS = maps:put(level_files_count, maps:merge(A, U), BS0), + {noreply, State#state{bookie_status = BS}}; + +handle_cast({penciller_inmem_cache_size_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{penciller_inmem_cache_size => A}}}; +handle_cast({penciller_work_backlog_status_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{penciller_work_backlog_status => A}}}; +handle_cast({penciller_last_merge_time_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{penciller_last_merge_time => A}}}; +handle_cast({journal_last_compaction_result_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{journal_last_compaction_result => A}}}; +handle_cast({metadata_objsize_ratio_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{metadata_objsize_ratio => A}}}. + handle_info(report_next_stats, State) -> diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 5e0af8df..16d14b08 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -388,15 +388,19 @@ merge_limit(SrcLevel, SinkListLength, MMB) when is_integer(MMB) -> list(leveled_sst:sst_pointer()) }. do_merge( - [], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, _Opts, Additions, _Max + [], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, SSTOpts, Additions, _Max ) -> + {Monitor, _} = SSTOpts#sst_options.monitor, + leveled_monitor:add_stat(Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}}), leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), full]), {lists:reverse(Additions), [], []}; do_merge( - KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions, Max + KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, SSTOpts, Additions, Max ) when length(Additions) >= Max -> + {Monitor, _} = SSTOpts#sst_options.monitor, + leveled_monitor:add_stat(Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}}), leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), partial]), FNSrc = leveled_penciller:sst_filename( @@ -409,7 +413,7 @@ do_merge( {ExpandedKL1, []} = split_unexpanded_files(KL1), {ExpandedKL2, L2FilePointersRem} = split_unexpanded_files(KL2), TS1 = os:timestamp(), - InfOpts = OptsSST#sst_options{max_sstslots = infinity}, + InfOpts = SSTOpts#sst_options{max_sstslots = infinity}, % Need to be careful to make sure all the remainder goes in one file, % could be situations whereby the max_sstslots has been changed between % restarts - and so there is too much data for one file in the diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index b2ee31a5..a03a71e7 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -793,6 +793,8 @@ handle_call( State#state.levelzero_index, length(State#state.levelzero_cache) + 1 ), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {penciller_inmem_cache_size_update, NewL0Size}), leveled_log:log_randomtimer( p0031, [NewL0Size, true, true, MinSQN, MaxSQN], @@ -1260,7 +1262,7 @@ handle_cast( }}; handle_cast( work_for_clerk, - State = #state{manifest = Man, levelzero_cache = L0Cache, clerk = Clerk} + State = #state{manifest = Man, levelzero_cache = L0Cache, clerk = Clerk, monitor = Monitor} ) when ?IS_DEF(Man), ?IS_DEF(L0Cache), ?IS_DEF(Clerk) -> @@ -1320,6 +1322,7 @@ handle_cast( % L0 work to do, or because the backlog has grown beyond % tolerance Backlog = WC >= ?WORKQUEUE_BACKLOG_TOLERANCE, + leveled_monitor:add_stat(Monitor, {penciller_work_backlog_status_update, {WC, Backlog, L0Full}}), leveled_log:log(p0024, [WC, Backlog, L0Full]), [TL | _Tail] = WL, ok = leveled_pclerk:clerk_push(Clerk, {TL, Man}), diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 93663572..3218d2b9 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -645,6 +645,12 @@ starting( Level ), Summary = UpdState#state.summary, + + if Level == 0 -> + leveled_monitor:add_stat(element(1, Monitor), {penciller_inmem_cache_size_update, 0}); + el/=se -> + noop + end, leveled_log:log_timer( sst08, [ActualFilename, Level, Summary#summary.max_sqn], SW ), @@ -745,6 +751,7 @@ starting(cast, complete_l0startup, State) -> Summary = UpdState#state.summary, Time4 = timer:now_diff(os:timestamp(), SW4), + leveled_monitor:add_stat(element(1, Monitor), {penciller_inmem_cache_size_update, 0}), leveled_log:log_timer( sst08, [ActualFilename, 0, Summary#summary.max_sqn], SW0 ), From b7fd8870e66e0bc42f658dcabe7f20994df7b2e2 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 28 Oct 2025 01:39:56 +0200 Subject: [PATCH 06/33] fixups --- src/leveled_cdb.erl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 9317a66f..6ffbe1cf 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -493,7 +493,8 @@ starting({call, From}, {open_writer, Filename}, State) -> {next_state, writer, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename}, State) -> leveled_log:save(State#state.log_options), - leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, +1}), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, false), State0 = State#state{ @@ -505,7 +506,8 @@ starting({call, From}, {open_reader, Filename}, State) -> {next_state, reader, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename, LastKey}, State) -> leveled_log:save(State#state.log_options), - leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, +1}), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), State0 = State#state{ @@ -882,7 +884,8 @@ delete_pending( ) when ?IS_DEF(FN), ?IS_DEF(IO) -> - leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, -1}), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, -1}), leveled_log:log(cdb04, [FN, State#state.delete_point]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal}; @@ -909,7 +912,8 @@ delete_pending( ), {keep_state_and_data, [?DELETE_TIMEOUT]}; false -> - leveled_monitor:add_stat(State#state.monitor, {n_active_journal_files_update, -1}), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, -1}), leveled_log:log(cdb04, [FN, ManSQN]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal} From da36264ed2c5bc5ab2e631a8d7b406f9ae25048c Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 28 Oct 2025 18:12:44 +0200 Subject: [PATCH 07/33] bookie_status: redoing using leveled_monitor WIP penciller_last_merge_time --- src/leveled_monitor.erl | 15 ++++++++------- src/leveled_pclerk.erl | 6 ++++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index a1f4fde0..f319dd6e 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -204,10 +204,10 @@ | {n_active_journal_files_update, integer()} | {avg_compaction_score_update, [pos_integer()]} | {best_compaction_score_update, pos_integer()} - | {level_files_count_update, #{non_neg_integer() => pos_integer()}} + | {level_files_count_update, #{non_neg_integer() => pos_integer()}, TS::non_neg_integer()} | {penciller_inmem_cache_size_update, pos_integer()} | {penciller_work_backlog_status_update, {non_neg_integer(), boolean(), boolean()}} - | {penciller_last_merge_time_update, pos_integer()} + %% | {penciller_last_merge_time_update, pos_integer()} via level_files_count_update | {journal_last_compaction_time_update, pos_integer()} | {journal_last_compaction_result_update, {float(), non_neg_integer()}} | {metadata_objsize_ratio_update, not_implemented}. @@ -697,17 +697,18 @@ handle_cast({avg_compaction_score_update, A}, State = #state{bookie_status = BS} handle_cast({best_compaction_score_update, A}, State = #state{bookie_status = BS}) -> {noreply, State#state{bookie_status = BS#{best_compaction_score => A}}}; -handle_cast({level_files_count_update, U}, State = #state{bookie_status = BS0}) -> +handle_cast({level_files_count_update, U, TS}, State = #state{bookie_status = BS0}) -> A = maps:get(level_files_count, BS0, #{}), - BS = maps:put(level_files_count, maps:merge(A, U), BS0), - {noreply, State#state{bookie_status = BS}}; + BS1 = maps:put(level_files_count, maps:merge(A, U), BS0), + BS2 = maps:put(penciller_last_merge_time, TS, BS1), + {noreply, State#state{bookie_status = BS2}}; handle_cast({penciller_inmem_cache_size_update, A}, State = #state{bookie_status = BS}) -> {noreply, State#state{bookie_status = BS#{penciller_inmem_cache_size => A}}}; + handle_cast({penciller_work_backlog_status_update, A}, State = #state{bookie_status = BS}) -> {noreply, State#state{bookie_status = BS#{penciller_work_backlog_status => A}}}; -handle_cast({penciller_last_merge_time_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{penciller_last_merge_time => A}}}; + handle_cast({journal_last_compaction_result_update, A}, State = #state{bookie_status = BS}) -> {noreply, State#state{bookie_status = BS#{journal_last_compaction_result => A}}}; handle_cast({metadata_objsize_ratio_update, A}, State = #state{bookie_status = BS}) -> diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 16d14b08..7a89e96c 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -391,7 +391,8 @@ do_merge( [], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, SSTOpts, Additions, _Max ) -> {Monitor, _} = SSTOpts#sst_options.monitor, - leveled_monitor:add_stat(Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}}), + leveled_monitor:add_stat( + Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}, os:system_time(millisecond)}), leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), full]), {lists:reverse(Additions), [], []}; do_merge( @@ -400,7 +401,8 @@ do_merge( length(Additions) >= Max -> {Monitor, _} = SSTOpts#sst_options.monitor, - leveled_monitor:add_stat(Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}}), + leveled_monitor:add_stat( + Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}, os:system_time(millisecond)}), leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), partial]), FNSrc = leveled_penciller:sst_filename( From b93116ec0f99933478813c9a58598f44c85bbdae Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 28 Oct 2025 18:16:21 +0200 Subject: [PATCH 08/33] bookie_status: redoing using leveled_monitor WIP journal_last_compaction_time --- src/leveled_iclerk.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 6611da37..63582c84 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -476,6 +476,9 @@ handle_cast( -> FilesToDelete = leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), + CDBopts = State#state.cdb_options, + {Monitor, _} = CDBopts#cdb_options.monitor, + leveled_monitor:add_stat(Monitor, {journal_last_compaction_time_update, os:system_time(millisecond)}), leveled_log:log(ic007, []), ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete), {noreply, State}; From 4600df2baac7aa54404d173bbcf0328c1120c6eb Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Wed, 29 Oct 2025 01:15:44 +0200 Subject: [PATCH 09/33] bookie_status: redoing using leveled_monitor mostly done --- src/leveled_bookie.erl | 2 +- src/leveled_iclerk.erl | 5 +++- src/leveled_monitor.erl | 63 ++++++++++++++++++++++----------------- src/leveled_penciller.erl | 5 +++- 4 files changed, 45 insertions(+), 30 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 9ee47265..0fb3bfd0 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2901,7 +2901,7 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> end. -spec maybepush_ledgercache( - pos_integer(), pos_integer(), ledger_cache(), pid(), pid() + pos_integer(), pos_integer(), ledger_cache(), pid(), leveled_monitor:monitor() ) -> {ok | returned, ledger_cache()}. %% @doc diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 63582c84..e9d41107 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -430,7 +430,10 @@ handle_cast( State#state.singlefile_compactionperc}, {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), {Monitor, _} = CDBopts#cdb_options.monitor, - leveled_monitor:add_stat(Monitor, {best_compaction_score_update, Score}), + leveled_monitor:add_stat( + Monitor, {journal_last_compaction_result_update, + {length(BestRun0), + Score}}), leveled_log:log_timer(ic003, [Score, length(BestRun0)], SW), case Score > 0.0 of true -> diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index f319dd6e..46e3805b 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -130,19 +130,17 @@ }). -type bookie_status() :: #{ - ledger_cache_size := undefined | pos_integer(), - n_active_journal_files := undefined | pos_integer(), - avg_compaction_score_sample := undefined | [pos_integer()], - best_compaction_score := undefined | pos_integer(), - level_files_count := undefined | #{non_neg_integer() := non_neg_integer()}, - penciller_inmem_cache_size := undefined | pos_integer(), - penciller_work_backlog_status := undefined | {non_neg_integer(), boolean(), boolean()}, - penciller_last_merge_time := undefined | pos_integer(), - journal_last_compaction_time := undefined | pos_integer(), - journal_last_compaction_result := undefined | {float(), non_neg_integer()}, - metadata_objsize_ratio := not_implemented, - recent_putgethead_counts := undefined | {non_neg_integer(), non_neg_integer(), non_neg_integer()}, - recent_fetch_mean_level := undefined | [{pos_integer(), non_neg_integer()}] + ledger_cache_size => non_neg_integer(), + n_active_journal_files => pos_integer(), + avg_compaction_score_sample => [float()], + level_files_count => #{non_neg_integer() => non_neg_integer()}, + penciller_inmem_cache_size => pos_integer(), + penciller_work_backlog_status => {non_neg_integer(), boolean(), boolean()}, + penciller_last_merge_time => integer(), + journal_last_compaction_time => integer(), + journal_last_compaction_result => {non_neg_integer(), float()}, + recent_putgethead_counts => {non_neg_integer(), non_neg_integer(), non_neg_integer()}, + recent_fetch_mean_level => [{pos_integer(), non_neg_integer()}] }. -define(AVG_COMPACTION_SCORE_OVER_MAX, 50). @@ -202,14 +200,13 @@ -type bookie_status_update() :: {ledger_cache_size_update, pos_integer()} | {n_active_journal_files_update, integer()} - | {avg_compaction_score_update, [pos_integer()]} - | {best_compaction_score_update, pos_integer()} + | {avg_compaction_score_update, float()} | {level_files_count_update, #{non_neg_integer() => pos_integer()}, TS::non_neg_integer()} | {penciller_inmem_cache_size_update, pos_integer()} | {penciller_work_backlog_status_update, {non_neg_integer(), boolean(), boolean()}} %% | {penciller_last_merge_time_update, pos_integer()} via level_files_count_update - | {journal_last_compaction_time_update, pos_integer()} - | {journal_last_compaction_result_update, {float(), non_neg_integer()}} + | {journal_last_compaction_time_update, integer()} + | {journal_last_compaction_result_update, {non_neg_integer(), float()}} | {metadata_objsize_ratio_update, not_implemented}. -type statistic() :: bookie_get_update() @@ -237,7 +234,6 @@ monitor_start(LogFreq, LogOrder) -> -spec add_stat(no_monitor | pid(), statistic()) -> ok. add_stat(no_monitor, _Statistic) -> - logger:notice("not updating statistic ~p because no_monitor", [_Statistic]), ok; add_stat(Watcher, Statistic) -> gen_server:cast(Watcher, Statistic). @@ -310,10 +306,10 @@ init([LogOpts, LogFrequency, LogOrder]) -> ), InitialJitter = rand:uniform(2 * 1000 * LogFrequency), erlang:send_after(InitialJitter, self(), report_next_stats), - {ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder}}. + {ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder, bookie_status = #{}}}. -handle_call(get_bookie_status, _From, State = #state{bookie_status = A}) -> - {reply, A, State}; +handle_call(get_bookie_status, _From, State) -> + {reply, enriched_bookie_status(State), State}; handle_call(close, _From, State) -> {stop, normal, ok, State}. @@ -694,9 +690,6 @@ handle_cast({avg_compaction_score_update, A}, State = #state{bookie_status = BS} end, {noreply, State#state{bookie_status = BS#{avg_compaction_score_sample => NewSample}}}; -handle_cast({best_compaction_score_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{best_compaction_score => A}}}; - handle_cast({level_files_count_update, U, TS}, State = #state{bookie_status = BS0}) -> A = maps:get(level_files_count, BS0, #{}), BS1 = maps:put(level_files_count, maps:merge(A, U), BS0), @@ -709,10 +702,11 @@ handle_cast({penciller_inmem_cache_size_update, A}, State = #state{bookie_status handle_cast({penciller_work_backlog_status_update, A}, State = #state{bookie_status = BS}) -> {noreply, State#state{bookie_status = BS#{penciller_work_backlog_status => A}}}; +handle_cast({journal_last_compaction_time_update, A}, State = #state{bookie_status = BS}) -> + {noreply, State#state{bookie_status = BS#{journal_last_compaction_time => A}}}; + handle_cast({journal_last_compaction_result_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{journal_last_compaction_result => A}}}; -handle_cast({metadata_objsize_ratio_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{metadata_objsize_ratio => A}}}. + {noreply, State#state{bookie_status = BS#{journal_last_compaction_result => A}}}. @@ -734,6 +728,21 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +enriched_bookie_status(#state{bookie_status = BS, + bookie_get_timings = GT, + bookie_put_timings = PT, + bookie_head_timings = HT}) -> + BS#{get_sample_count => GT#bookie_get_timings.sample_count, + get_body_time => GT#bookie_get_timings.body_time, + head_sample_count => HT#bookie_head_timings.sample_count, + head_rsp_time => HT#bookie_head_timings.rsp_time, + put_sample_count => PT#bookie_put_timings.sample_count, + put_prep_time => PT#bookie_put_timings.prep_time, + put_ink_time => PT#bookie_put_timings.ink_time, + put_mem_time => PT#bookie_put_timings.mem_time + }. + + %%%============================================================================ %%% Test %%%============================================================================ diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index a03a71e7..f66fc959 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1262,7 +1262,10 @@ handle_cast( }}; handle_cast( work_for_clerk, - State = #state{manifest = Man, levelzero_cache = L0Cache, clerk = Clerk, monitor = Monitor} + State = #state{manifest = Man, + levelzero_cache = L0Cache, + clerk = Clerk, + monitor = {Monitor, _}} ) when ?IS_DEF(Man), ?IS_DEF(L0Cache), ?IS_DEF(Clerk) -> From d576670dc2a29d66a981d02810905daaf72e0c4a Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Wed, 29 Oct 2025 02:17:04 +0200 Subject: [PATCH 10/33] oblige erlfmt --- src/leveled_bookie.erl | 12 ++++-- src/leveled_cdb.erl | 4 +- src/leveled_iclerk.erl | 31 ++++++++++---- src/leveled_monitor.erl | 90 +++++++++++++++++++++++++-------------- src/leveled_pclerk.erl | 10 ++++- src/leveled_penciller.erl | 20 ++++++--- src/leveled_sst.erl | 13 ++++-- 7 files changed, 121 insertions(+), 59 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 0fb3bfd0..4d2d2c48 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2901,7 +2901,11 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> end. -spec maybepush_ledgercache( - pos_integer(), pos_integer(), ledger_cache(), pid(), leveled_monitor:monitor() + pos_integer(), + pos_integer(), + ledger_cache(), + pid(), + leveled_monitor:monitor() ) -> {ok | returned, ledger_cache()}. %% @doc @@ -2914,7 +2918,9 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> %% in the reply. Try again later when it isn't busy (and also potentially %% implement a slow_offer state to slow down the pace at which PUTs are being %% received) -maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller, {Monitor, _}) -> +maybepush_ledgercache( + MaxCacheSize, MaxCacheMult, Cache, Penciller, {Monitor, _} +) -> Tab = Cache#ledger_cache.mem, CacheSize = ets:info(Tab, size), leveled_monitor:add_stat(Monitor, {ledger_cache_size_update, CacheSize}), @@ -3073,13 +3079,11 @@ maybelog_snap_timing({Pid, _StatsFreq}, BookieTime, PCLTime) when maybelog_snap_timing(_Monitor, _, _) -> ok. - status(#state{monitor = {no_monitor, 0}}) -> #{}; status(#state{monitor = {Monitor, _}}) -> leveled_monitor:get_bookie_status(Monitor). - %%%============================================================================ %%% Test %%%============================================================================ diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 6ffbe1cf..36feb661 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -913,7 +913,9 @@ delete_pending( {keep_state_and_data, [?DELETE_TIMEOUT]}; false -> {Monitor, _} = State#state.monitor, - leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, -1}), + leveled_monitor:add_stat( + Monitor, {n_active_journal_files_update, -1} + ), leveled_log:log(cdb04, [FN, ManSQN]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal} diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index e9d41107..7ebdf627 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -431,9 +431,9 @@ handle_cast( {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), {Monitor, _} = CDBopts#cdb_options.monitor, leveled_monitor:add_stat( - Monitor, {journal_last_compaction_result_update, - {length(BestRun0), - Score}}), + Monitor, + {journal_last_compaction_result_update, {length(BestRun0), Score}} + ), leveled_log:log_timer(ic003, [Score, length(BestRun0)], SW), case Score > 0.0 of true -> @@ -481,7 +481,10 @@ handle_cast( leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), CDBopts = State#state.cdb_options, {Monitor, _} = CDBopts#cdb_options.monitor, - leveled_monitor:add_stat(Monitor, {journal_last_compaction_time_update, os:system_time(millisecond)}), + leveled_monitor:add_stat( + Monitor, + {journal_last_compaction_time_update, os:system_time(millisecond)} + ), leveled_log:log(ic007, []), ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete), {noreply, State}; @@ -1278,14 +1281,22 @@ check_single_file_test() -> replaced end end, - Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0}), + Score1 = check_single_file( + CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0} + ), ?assertMatch(37.5, Score1), LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> current end, - Score2 = check_single_file(CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0}), + Score2 = check_single_file( + CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0} + ), ?assertMatch(100.0, Score2), - Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS, {no_monitor, 0}), + Score3 = check_single_file( + CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS, {no_monitor, 0} + ), ?assertMatch(37.5, Score3), - Score4 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS, {no_monitor, 0}), + Score4 = check_single_file( + CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS, {no_monitor, 0} + ), ?assertMatch(75.0, Score4), ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). @@ -1430,7 +1441,9 @@ compact_empty_file_test() -> {3, {o, "Bucket", "Key3", null}} ], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, - Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0}), + Score1 = check_single_file( + CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0} + ), ?assert((+0.0 =:= Score1) orelse (-0.0 =:= Score1)), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 46e3805b..1158646e 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -139,7 +139,9 @@ penciller_last_merge_time => integer(), journal_last_compaction_time => integer(), journal_last_compaction_result => {non_neg_integer(), float()}, - recent_putgethead_counts => {non_neg_integer(), non_neg_integer(), non_neg_integer()}, + recent_putgethead_counts => { + non_neg_integer(), non_neg_integer(), non_neg_integer() + }, recent_fetch_mean_level => [{pos_integer(), non_neg_integer()}] }. @@ -201,9 +203,12 @@ {ledger_cache_size_update, pos_integer()} | {n_active_journal_files_update, integer()} | {avg_compaction_score_update, float()} - | {level_files_count_update, #{non_neg_integer() => pos_integer()}, TS::non_neg_integer()} + | {level_files_count_update, #{non_neg_integer() => pos_integer()}, + TS :: non_neg_integer()} | {penciller_inmem_cache_size_update, pos_integer()} - | {penciller_work_backlog_status_update, {non_neg_integer(), boolean(), boolean()}} + | {penciller_work_backlog_status_update, { + non_neg_integer(), boolean(), boolean() + }} %% | {penciller_last_merge_time_update, pos_integer()} via level_files_count_update | {journal_last_compaction_time_update, integer()} | {journal_last_compaction_result_update, {non_neg_integer(), float()}} @@ -306,11 +311,14 @@ init([LogOpts, LogFrequency, LogOrder]) -> ), InitialJitter = rand:uniform(2 * 1000 * LogFrequency), erlang:send_after(InitialJitter, self(), report_next_stats), - {ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder, bookie_status = #{}}}. + {ok, #state{ + log_frequency = LogFrequency, + log_order = RandomLogOrder, + bookie_status = #{} + }}. handle_call(get_bookie_status, _From, State) -> {reply, enriched_bookie_status(State), State}; - handle_call(close, _From, State) -> {stop, normal, ok, State}. @@ -671,16 +679,17 @@ handle_cast({log_add, ForcedLogs}, State) -> handle_cast({log_remove, ForcedLogs}, State) -> ok = leveled_log:remove_forcedlogs(ForcedLogs), {noreply, State}; - handle_cast({ledger_cache_size_update, A}, State = #state{bookie_status = BS}) -> {noreply, State#state{bookie_status = BS#{ledger_cache_size => A}}}; - -handle_cast({n_active_journal_files_update, Delta}, State = #state{bookie_status = BS0}) -> +handle_cast( + {n_active_journal_files_update, Delta}, State = #state{bookie_status = BS0} +) -> A = maps:get(n_active_journal_files, BS0, 0), BS = maps:put(n_active_journal_files, A + Delta, BS0), {noreply, State#state{bookie_status = BS}}; - -handle_cast({avg_compaction_score_update, A}, State = #state{bookie_status = BS}) -> +handle_cast( + {avg_compaction_score_update, A}, State = #state{bookie_status = BS} +) -> NewSample = case [A | maps:get(avg_compaction_score_sample, BS, [])] of L when length(L) > ?AVG_COMPACTION_SCORE_OVER_MAX -> @@ -688,27 +697,40 @@ handle_cast({avg_compaction_score_update, A}, State = #state{bookie_status = BS} L -> L end, - {noreply, State#state{bookie_status = BS#{avg_compaction_score_sample => NewSample}}}; - -handle_cast({level_files_count_update, U, TS}, State = #state{bookie_status = BS0}) -> + {noreply, State#state{ + bookie_status = BS#{avg_compaction_score_sample => NewSample} + }}; +handle_cast( + {level_files_count_update, U, TS}, State = #state{bookie_status = BS0} +) -> A = maps:get(level_files_count, BS0, #{}), BS1 = maps:put(level_files_count, maps:merge(A, U), BS0), BS2 = maps:put(penciller_last_merge_time, TS, BS1), {noreply, State#state{bookie_status = BS2}}; - -handle_cast({penciller_inmem_cache_size_update, A}, State = #state{bookie_status = BS}) -> +handle_cast( + {penciller_inmem_cache_size_update, A}, State = #state{bookie_status = BS} +) -> {noreply, State#state{bookie_status = BS#{penciller_inmem_cache_size => A}}}; - -handle_cast({penciller_work_backlog_status_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{penciller_work_backlog_status => A}}}; - -handle_cast({journal_last_compaction_time_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{journal_last_compaction_time => A}}}; - -handle_cast({journal_last_compaction_result_update, A}, State = #state{bookie_status = BS}) -> - {noreply, State#state{bookie_status = BS#{journal_last_compaction_result => A}}}. - - +handle_cast( + {penciller_work_backlog_status_update, A}, + State = #state{bookie_status = BS} +) -> + {noreply, State#state{ + bookie_status = BS#{penciller_work_backlog_status => A} + }}; +handle_cast( + {journal_last_compaction_time_update, A}, State = #state{bookie_status = BS} +) -> + {noreply, State#state{ + bookie_status = BS#{journal_last_compaction_time => A} + }}; +handle_cast( + {journal_last_compaction_result_update, A}, + State = #state{bookie_status = BS} +) -> + {noreply, State#state{ + bookie_status = BS#{journal_last_compaction_result => A} + }}. handle_info(report_next_stats, State) -> erlang:send_after( @@ -728,11 +750,14 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. -enriched_bookie_status(#state{bookie_status = BS, - bookie_get_timings = GT, - bookie_put_timings = PT, - bookie_head_timings = HT}) -> - BS#{get_sample_count => GT#bookie_get_timings.sample_count, +enriched_bookie_status(#state{ + bookie_status = BS, + bookie_get_timings = GT, + bookie_put_timings = PT, + bookie_head_timings = HT +}) -> + BS#{ + get_sample_count => GT#bookie_get_timings.sample_count, get_body_time => GT#bookie_get_timings.body_time, head_sample_count => HT#bookie_head_timings.sample_count, head_rsp_time => HT#bookie_head_timings.rsp_time, @@ -740,8 +765,7 @@ enriched_bookie_status(#state{bookie_status = BS, put_prep_time => PT#bookie_put_timings.prep_time, put_ink_time => PT#bookie_put_timings.ink_time, put_mem_time => PT#bookie_put_timings.mem_time - }. - + }. %%%============================================================================ %%% Test diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 7a89e96c..e67fd2ea 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -392,7 +392,10 @@ do_merge( ) -> {Monitor, _} = SSTOpts#sst_options.monitor, leveled_monitor:add_stat( - Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}, os:system_time(millisecond)}), + Monitor, + {level_files_count_update, #{SinkLevel => length(Additions)}, + os:system_time(millisecond)} + ), leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), full]), {lists:reverse(Additions), [], []}; do_merge( @@ -402,7 +405,10 @@ do_merge( -> {Monitor, _} = SSTOpts#sst_options.monitor, leveled_monitor:add_stat( - Monitor, {level_files_count_update, #{SinkLevel => length(Additions)}, os:system_time(millisecond)}), + Monitor, + {level_files_count_update, #{SinkLevel => length(Additions)}, + os:system_time(millisecond)} + ), leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), partial]), FNSrc = leveled_penciller:sst_filename( diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index f66fc959..10b8404d 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -794,7 +794,9 @@ handle_call( length(State#state.levelzero_cache) + 1 ), {Monitor, _} = State#state.monitor, - leveled_monitor:add_stat(Monitor, {penciller_inmem_cache_size_update, NewL0Size}), + leveled_monitor:add_stat( + Monitor, {penciller_inmem_cache_size_update, NewL0Size} + ), leveled_log:log_randomtimer( p0031, [NewL0Size, true, true, MinSQN, MaxSQN], @@ -1262,10 +1264,12 @@ handle_cast( }}; handle_cast( work_for_clerk, - State = #state{manifest = Man, - levelzero_cache = L0Cache, - clerk = Clerk, - monitor = {Monitor, _}} + State = #state{ + manifest = Man, + levelzero_cache = L0Cache, + clerk = Clerk, + monitor = {Monitor, _} + } ) when ?IS_DEF(Man), ?IS_DEF(L0Cache), ?IS_DEF(Clerk) -> @@ -1325,7 +1329,11 @@ handle_cast( % L0 work to do, or because the backlog has grown beyond % tolerance Backlog = WC >= ?WORKQUEUE_BACKLOG_TOLERANCE, - leveled_monitor:add_stat(Monitor, {penciller_work_backlog_status_update, {WC, Backlog, L0Full}}), + leveled_monitor:add_stat( + Monitor, + {penciller_work_backlog_status_update, + {WC, Backlog, L0Full}} + ), leveled_log:log(p0024, [WC, Backlog, L0Full]), [TL | _Tail] = WL, ok = leveled_pclerk:clerk_push(Clerk, {TL, Man}), diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 3218d2b9..a7f1feec 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -646,9 +646,12 @@ starting( ), Summary = UpdState#state.summary, - if Level == 0 -> - leveled_monitor:add_stat(element(1, Monitor), {penciller_inmem_cache_size_update, 0}); - el/=se -> + if + Level == 0 -> + leveled_monitor:add_stat( + element(1, Monitor), {penciller_inmem_cache_size_update, 0} + ); + el /= se -> noop end, leveled_log:log_timer( @@ -751,7 +754,9 @@ starting(cast, complete_l0startup, State) -> Summary = UpdState#state.summary, Time4 = timer:now_diff(os:timestamp(), SW4), - leveled_monitor:add_stat(element(1, Monitor), {penciller_inmem_cache_size_update, 0}), + leveled_monitor:add_stat( + element(1, Monitor), {penciller_inmem_cache_size_update, 0} + ), leveled_log:log_timer( sst08, [ActualFilename, 0, Summary#summary.max_sqn], SW0 ), From 12dab0b73e0b8084df97db1839be81f76ca3fc7d Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 30 Oct 2025 00:30:36 +0200 Subject: [PATCH 11/33] bookie_status: fetch_count_by_level --- src/leveled_monitor.erl | 45 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 1158646e..f71f2dfc 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -142,8 +142,15 @@ recent_putgethead_counts => { non_neg_integer(), non_neg_integer(), non_neg_integer() }, - recent_fetch_mean_level => [{pos_integer(), non_neg_integer()}] + fetch_count_by_level => #{ + reporting_fetch_level() => #{ + count => non_neg_integer(), + time => non_neg_integer() + } + } }. +-type reporting_fetch_level() :: + not_found | mem | '0' | '1' | '2' | '3' | lower. -define(AVG_COMPACTION_SCORE_OVER_MAX, 50). @@ -754,8 +761,39 @@ enriched_bookie_status(#state{ bookie_status = BS, bookie_get_timings = GT, bookie_put_timings = PT, - bookie_head_timings = HT + bookie_head_timings = HT, + pcl_fetch_timings = PFT }) -> + FCL = #{ + not_found => #{ + count => PFT#pcl_fetch_timings.notfound_count, + time => PFT#pcl_fetch_timings.notfound_time + }, + mem => #{ + count => PFT#pcl_fetch_timings.foundmem_count, + time => PFT#pcl_fetch_timings.foundmem_time + }, + '0' => #{ + count => PFT#pcl_fetch_timings.found0_count, + time => PFT#pcl_fetch_timings.found0_time + }, + '1' => #{ + count => PFT#pcl_fetch_timings.found1_count, + time => PFT#pcl_fetch_timings.found1_time + }, + '2' => #{ + count => PFT#pcl_fetch_timings.found2_count, + time => PFT#pcl_fetch_timings.found2_time + }, + '3' => #{ + count => PFT#pcl_fetch_timings.found3_count, + time => PFT#pcl_fetch_timings.found3_time + }, + lower => #{ + count => PFT#pcl_fetch_timings.foundlower_count, + time => PFT#pcl_fetch_timings.foundlower_time + } + }, BS#{ get_sample_count => GT#bookie_get_timings.sample_count, get_body_time => GT#bookie_get_timings.body_time, @@ -764,7 +802,8 @@ enriched_bookie_status(#state{ put_sample_count => PT#bookie_put_timings.sample_count, put_prep_time => PT#bookie_put_timings.prep_time, put_ink_time => PT#bookie_put_timings.ink_time, - put_mem_time => PT#bookie_put_timings.mem_time + put_mem_time => PT#bookie_put_timings.mem_time, + fetch_count_by_level => FCL }. %%%============================================================================ From acd0b57e38d4d01930187944e102d844682f9bd7 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 20 Jan 2026 00:33:07 +0200 Subject: [PATCH 12/33] ct test for bookie status report wip --- test/end_to_end/basic_SUITE.erl | 75 ++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 1b903c20..8db65a15 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -15,25 +15,27 @@ bigsst_littlesst/1, safereaderror_startup/1, remove_journal_test/1, - bigpcl_bucketlist/1 + bigpcl_bucketlist/1, + bookie_status_report/1 ]). all() -> [ - simple_put_fetch_head_delete, - many_put_fetch_head, - journal_compaction, - fetchput_snapshot, - load_and_count, - load_and_count_withdelete, - space_clear_ondelete, - is_empty_test, - many_put_fetch_switchcompression, - bigjournal_littlejournal, - bigsst_littlesst, - safereaderror_startup, - remove_journal_test, - bigpcl_bucketlist + bookie_status_report + %% simple_put_fetch_head_delete, + %% many_put_fetch_head, + %% journal_compaction, + %% fetchput_snapshot, + %% load_and_count, + %% load_and_count_withdelete, + %% space_clear_ondelete, + %% is_empty_test, + %% many_put_fetch_switchcompression, + %% bigjournal_littlejournal, + %% bigsst_littlesst, + %% safereaderror_startup, + %% remove_journal_test, + %% bigpcl_bucketlist ]. init_per_suite(Config) -> @@ -43,6 +45,49 @@ init_per_suite(Config) -> end_per_suite(Config) -> testutil:end_per_suite(Config). +bookie_status_report(_Config) -> + RootPath = testutil:reset_filestructure(), + StartOpts = + [ + {root_path, RootPath}, + {sync_strategy, testutil:sync_strategy()}, + {log_level, info}, + {forced_logs, []} + ], + {ok, Bookie} = leveled_bookie:book_start(StartOpts), + + InitialReport = + #{ + fetch_count_by_level => + #{ + not_found => #{count => 0, time => 0}, + mem => #{count => 0, time => 0}, + lower => #{count => 0, time => 0}, + '0' => #{count => 0, time => 0}, + '1' => #{count => 0, time => 0}, + '2' => #{count => 0, time => 0}, + '3' => #{count => 0, time => 0} + }, + get_body_time => 0, + get_sample_count => 0, + head_rsp_time => 0, + head_sample_count => 0, + put_ink_time => 0, + put_mem_time => 0, + put_prep_time => 0, + put_sample_count => 0 + }, + InitialReport = leveled_bookie:book_status(Bookie), + + {TestObject, TestSpec} = testutil:generate_testobject(), + ok = testutil:book_riakput(Bookie, TestObject, TestSpec), + + ReportAfterOnePut = + maps:merge(#{ledger_cache_size => 1}, InitialReport), + ReportAfterOnePut = leveled_bookie:book_status(Bookie), + + ok = leveled_bookie:book_destroy(Bookie). + simple_put_fetch_head_delete(_Config) -> io:format("simple test with info and no forced logs~n"), simple_test_withlog(info, []), From dffe29abca36b4bf51689e0cad119d3cd1208a91 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 22 Jan 2026 04:14:08 +0200 Subject: [PATCH 13/33] avoid funs taking #state{}; assemble enriched_bookie_statue in situ --- src/leveled_monitor.erl | 105 +++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 12726b57..ca16eb86 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -326,8 +326,60 @@ init([LogOpts, LogFrequency, LogOrder]) -> bookie_status = #{} }}. -handle_call(get_bookie_status, _From, State) -> - {reply, enriched_bookie_status(State), State}; +handle_call( + get_bookie_status, + _From, + #state{ + bookie_status = BS, + bookie_get_timings = GT, + bookie_put_timings = PT, + bookie_head_timings = HT, + pcl_fetch_timings = PFT + } = State +) -> + FCL = #{ + not_found => #{ + count => PFT#pcl_fetch_timings.notfound_count, + time => PFT#pcl_fetch_timings.notfound_time + }, + mem => #{ + count => PFT#pcl_fetch_timings.foundmem_count, + time => PFT#pcl_fetch_timings.foundmem_time + }, + '0' => #{ + count => PFT#pcl_fetch_timings.found0_count, + time => PFT#pcl_fetch_timings.found0_time + }, + '1' => #{ + count => PFT#pcl_fetch_timings.found1_count, + time => PFT#pcl_fetch_timings.found1_time + }, + '2' => #{ + count => PFT#pcl_fetch_timings.found2_count, + time => PFT#pcl_fetch_timings.found2_time + }, + '3' => #{ + count => PFT#pcl_fetch_timings.found3_count, + time => PFT#pcl_fetch_timings.found3_time + }, + lower => #{ + count => PFT#pcl_fetch_timings.foundlower_count, + time => PFT#pcl_fetch_timings.foundlower_time + } + }, + StatusEnriched = + BS#{ + get_sample_count => GT#bookie_get_timings.sample_count, + get_body_time => GT#bookie_get_timings.body_time, + head_sample_count => HT#bookie_head_timings.sample_count, + head_rsp_time => HT#bookie_head_timings.rsp_time, + put_sample_count => PT#bookie_put_timings.sample_count, + put_prep_time => PT#bookie_put_timings.prep_time, + put_ink_time => PT#bookie_put_timings.ink_time, + put_mem_time => PT#bookie_put_timings.mem_time, + fetch_count_by_level => FCL + }, + {reply, StatusEnriched, State}; handle_call(close, _From, State) -> {stop, normal, ok, State}. @@ -759,55 +811,6 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. -enriched_bookie_status(#state{ - bookie_status = BS, - bookie_get_timings = GT, - bookie_put_timings = PT, - bookie_head_timings = HT, - pcl_fetch_timings = PFT -}) -> - FCL = #{ - not_found => #{ - count => PFT#pcl_fetch_timings.notfound_count, - time => PFT#pcl_fetch_timings.notfound_time - }, - mem => #{ - count => PFT#pcl_fetch_timings.foundmem_count, - time => PFT#pcl_fetch_timings.foundmem_time - }, - '0' => #{ - count => PFT#pcl_fetch_timings.found0_count, - time => PFT#pcl_fetch_timings.found0_time - }, - '1' => #{ - count => PFT#pcl_fetch_timings.found1_count, - time => PFT#pcl_fetch_timings.found1_time - }, - '2' => #{ - count => PFT#pcl_fetch_timings.found2_count, - time => PFT#pcl_fetch_timings.found2_time - }, - '3' => #{ - count => PFT#pcl_fetch_timings.found3_count, - time => PFT#pcl_fetch_timings.found3_time - }, - lower => #{ - count => PFT#pcl_fetch_timings.foundlower_count, - time => PFT#pcl_fetch_timings.foundlower_time - } - }, - BS#{ - get_sample_count => GT#bookie_get_timings.sample_count, - get_body_time => GT#bookie_get_timings.body_time, - head_sample_count => HT#bookie_head_timings.sample_count, - head_rsp_time => HT#bookie_head_timings.rsp_time, - put_sample_count => PT#bookie_put_timings.sample_count, - put_prep_time => PT#bookie_put_timings.prep_time, - put_ink_time => PT#bookie_put_timings.ink_time, - put_mem_time => PT#bookie_put_timings.mem_time, - fetch_count_by_level => FCL - }. - %%%============================================================================ %%% Test %%%============================================================================ From 4314334af761fd7d08901c90e1f6208b6e93d421 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 22 Jan 2026 04:14:37 +0200 Subject: [PATCH 14/33] spec and typo fixes --- src/leveled_bookie.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index fac37c55..4e6dae7b 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -1317,9 +1317,9 @@ book_removelogs(Pid, ForcedLogs) -> book_headstatus(Pid) -> gen_server:call(Pid, head_status, infinity). --spec book_status(pid()) -> proplists:proplist(). +-spec book_status(pid()) -> map(). %% @doc -%% Return a proplist conteaining the following items: +%% Return a proplist containing the following items: %% * current size of the ledger cache; %% * number of active journal files; %% * average compaction score for the journal; From 11e30b98880e7661fb8aee150eab71882e163338 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 22 Jan 2026 04:16:17 +0200 Subject: [PATCH 15/33] include all keys, with values 'undefined' initially, in bookie status --- src/leveled_bookie.erl | 25 +++++++++++++++++- src/leveled_monitor.erl | 58 +++++++++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 4e6dae7b..f004875e 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -3082,7 +3082,30 @@ maybelog_snap_timing(_Monitor, _, _) -> status(#state{monitor = {no_monitor, 0}}) -> #{}; status(#state{monitor = {Monitor, _}}) -> - leveled_monitor:get_bookie_status(Monitor). + AllZeros = + #{ + ledger_cache_size => undefined, + n_active_journal_files => undefined, + avg_compaction_score => undefined, + level_files_count => undefined, + penciller_inmem_cache_size => undefined, + penciller_work_backlog_status => undefined, + penciller_last_merge_time => undefined, + journal_last_compaction_time => undefined, + journal_last_compaction_result => undefined, + get_sample_count => undefined, + get_body_time => undefined, + head_sample_count => undefined, + head_rsp_time => undefined, + put_sample_count => undefined, + put_prep_time => undefined, + put_ink_time => undefined, + put_mem_time => undefined, + fetch_count_by_level => undefined + }, + maps:merge( + AllZeros, leveled_monitor:get_bookie_status(Monitor) + ). %%%============================================================================ %%% Test diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index ca16eb86..3a681b63 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -132,24 +132,29 @@ }). -type bookie_status() :: #{ - ledger_cache_size => non_neg_integer(), - n_active_journal_files => pos_integer(), - avg_compaction_score_sample => [float()], - level_files_count => #{non_neg_integer() => non_neg_integer()}, - penciller_inmem_cache_size => pos_integer(), - penciller_work_backlog_status => {non_neg_integer(), boolean(), boolean()}, - penciller_last_merge_time => integer(), - journal_last_compaction_time => integer(), - journal_last_compaction_result => {non_neg_integer(), float()}, - recent_putgethead_counts => { - non_neg_integer(), non_neg_integer(), non_neg_integer() - }, - fetch_count_by_level => #{ - reporting_fetch_level() => #{ - count => non_neg_integer(), - time => non_neg_integer() + ledger_cache_size => undefined | non_neg_integer(), + n_active_journal_files => undefined | pos_integer(), + avg_compaction_score_sample => undefined | [float()], + level_files_count => undefined | #{non_neg_integer() => non_neg_integer()}, + penciller_inmem_cache_size => undefined | pos_integer(), + penciller_work_backlog_status => + undefined | {non_neg_integer(), boolean(), boolean()}, + penciller_last_merge_time => undefined | integer(), + journal_last_compaction_time => undefined | integer(), + journal_last_compaction_result => undefined | {non_neg_integer(), float()}, + recent_putgethead_counts => + undefined + | { + non_neg_integer(), non_neg_integer(), non_neg_integer() + }, + fetch_count_by_level => + undefined + | #{ + reporting_fetch_level() => #{ + count => non_neg_integer(), + time => non_neg_integer() + } } - } }. -type reporting_fetch_level() :: not_found | mem | '0' | '1' | '2' | '3' | lower. @@ -745,14 +750,23 @@ handle_cast({ledger_cache_size_update, A}, State = #state{bookie_status = BS}) - handle_cast( {n_active_journal_files_update, Delta}, State = #state{bookie_status = BS0} ) -> - A = maps:get(n_active_journal_files, BS0, 0), + A = + case maps:get(n_active_journal_files, BS0, undefined) of + undefined -> 0; + Defined -> Defined + end, BS = maps:put(n_active_journal_files, A + Delta, BS0), {noreply, State#state{bookie_status = BS}}; handle_cast( {avg_compaction_score_update, A}, State = #state{bookie_status = BS} ) -> + OldSample = + case maps:get(avg_compaction_score_sample, BS, undefined) of + undefined -> []; + Defined -> Defined + end, NewSample = - case [A | maps:get(avg_compaction_score_sample, BS, [])] of + case [A | OldSample] of L when length(L) > ?AVG_COMPACTION_SCORE_OVER_MAX -> lists:sublist(L, ?AVG_COMPACTION_SCORE_OVER_MAX); L -> @@ -764,7 +778,11 @@ handle_cast( handle_cast( {level_files_count_update, U, TS}, State = #state{bookie_status = BS0} ) -> - A = maps:get(level_files_count, BS0, #{}), + A = + case maps:get(level_files_count, BS0, undefined) of + undefined -> #{}; + Defined -> Defined + end, BS1 = maps:put(level_files_count, maps:merge(A, U), BS0), BS2 = maps:put(penciller_last_merge_time, TS, BS1), {noreply, State#state{bookie_status = BS2}}; From 56d848eca35368b69d16df6203274d5fb9a34a33 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 29 Jan 2026 01:07:26 +0200 Subject: [PATCH 16/33] extend book status report ct case --- test/end_to_end/basic_SUITE.erl | 77 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 8db65a15..46a8aa61 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -21,21 +21,21 @@ all() -> [ - bookie_status_report - %% simple_put_fetch_head_delete, - %% many_put_fetch_head, - %% journal_compaction, - %% fetchput_snapshot, - %% load_and_count, - %% load_and_count_withdelete, - %% space_clear_ondelete, - %% is_empty_test, - %% many_put_fetch_switchcompression, - %% bigjournal_littlejournal, - %% bigsst_littlesst, - %% safereaderror_startup, - %% remove_journal_test, - %% bigpcl_bucketlist + bookie_status_report, + simple_put_fetch_head_delete, + many_put_fetch_head, + journal_compaction, + fetchput_snapshot, + load_and_count, + load_and_count_withdelete, + space_clear_ondelete, + is_empty_test, + many_put_fetch_switchcompression, + bigjournal_littlejournal, + bigsst_littlesst, + safereaderror_startup, + remove_journal_test, + bigpcl_bucketlist ]. init_per_suite(Config) -> @@ -52,12 +52,21 @@ bookie_status_report(_Config) -> {root_path, RootPath}, {sync_strategy, testutil:sync_strategy()}, {log_level, info}, + {stats_percentage, 100}, {forced_logs, []} ], {ok, Bookie} = leveled_bookie:book_start(StartOpts), InitialReport = #{ + ledger_cache_size => undefined, + n_active_journal_files => undefined, + level_files_count => undefined, + penciller_inmem_cache_size => undefined, + penciller_work_backlog_status => undefined, + penciller_last_merge_time => undefined, + journal_last_compaction_time => undefined, + journal_last_compaction_result => undefined, fetch_count_by_level => #{ not_found => #{count => 0, time => 0}, @@ -68,26 +77,44 @@ bookie_status_report(_Config) -> '2' => #{count => 0, time => 0}, '3' => #{count => 0, time => 0} }, - get_body_time => 0, get_sample_count => 0, - head_rsp_time => 0, + get_body_time => 0, head_sample_count => 0, + head_rsp_time => 0, + put_sample_count => 0, + put_prep_time => 0, put_ink_time => 0, put_mem_time => 0, - put_prep_time => 0, - put_sample_count => 0 + avg_compaction_score => undefined }, InitialReport = leveled_bookie:book_status(Bookie), - {TestObject, TestSpec} = testutil:generate_testobject(), - ok = testutil:book_riakput(Bookie, TestObject, TestSpec), - - ReportAfterOnePut = - maps:merge(#{ledger_cache_size => 1}, InitialReport), - ReportAfterOnePut = leveled_bookie:book_status(Bookie), + {TObj, TSpec} = testutil:generate_testobject(), + ok = testutil:book_riakput(Bookie, TObj, TSpec), + + Rep1 = leveled_bookie:book_status(Bookie), + 1 = maps:get(ledger_cache_size, Rep1), + 1 = maps:get(put_sample_count, Rep1), + GoodPutPrepTime = 10000, + GoodPutInkTime = 10000, + GoodPutMemTime = 100, + within_range(1, GoodPutPrepTime, maps:get(put_prep_time, Rep1)), + within_range(1, GoodPutInkTime, maps:get(put_ink_time, Rep1)), + within_range(1, GoodPutMemTime, maps:get(put_mem_time, Rep1)), + + {r_object, TBkt, TKey, _, _, _, _} = TObj, + {ok, _} = testutil:book_riakget(Bookie, TBkt, TKey), + Rep2 = leveled_bookie:book_status(Bookie), + 1 = maps:get(get_sample_count, Rep2), + GoodGetBodyTime = 500, + within_range(1, GoodGetBodyTime, maps:get(get_body_time, Rep2)), ok = leveled_bookie:book_destroy(Bookie). +within_range(Min, Max, V) -> + true = Min =< V, + true = Max >= V. + simple_put_fetch_head_delete(_Config) -> io:format("simple test with info and no forced logs~n"), simple_test_withlog(info, []), From d9a2a078a114a2f5f0619e0fa95cd2536f0eb7cc Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Mon, 2 Feb 2026 00:00:24 +0200 Subject: [PATCH 17/33] move journal_last_compaction_time_update to where compaction results are updated --- src/leveled_iclerk.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index a20e0162..56091edb 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -431,6 +431,10 @@ handle_cast( Monitor, {journal_last_compaction_result_update, {length(BestRun0), Score}} ), + leveled_monitor:add_stat( + Monitor, + {journal_last_compaction_time_update, os:system_time(millisecond)} + ), ?TMR_LOG(ic003, [Score, length(BestRun0)], SW), case Score > 0.0 of true -> @@ -478,10 +482,6 @@ handle_cast( leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), CDBopts = State#state.cdb_options, {Monitor, _} = CDBopts#cdb_options.monitor, - leveled_monitor:add_stat( - Monitor, - {journal_last_compaction_time_update, os:system_time(millisecond)} - ), ?STD_LOG(ic007, []), ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete), {noreply, State}; From 4f1fabb5e4a8585e13946d2563bdb6e0320dd19d Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Mon, 2 Feb 2026 00:04:08 +0200 Subject: [PATCH 18/33] extend bookie status report test (delete+compact again) --- test/end_to_end/basic_SUITE.erl | 66 +++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 46a8aa61..68ff7def 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -101,13 +101,79 @@ bookie_status_report(_Config) -> within_range(1, GoodPutPrepTime, maps:get(put_prep_time, Rep1)), within_range(1, GoodPutInkTime, maps:get(put_ink_time, Rep1)), within_range(1, GoodPutMemTime, maps:get(put_mem_time, Rep1)), + undefined = maps:get(level_files_count, Rep1), {r_object, TBkt, TKey, _, _, _, _} = TObj, {ok, _} = testutil:book_riakget(Bookie, TBkt, TKey), Rep2 = leveled_bookie:book_status(Bookie), + io:format(user, "Rep2: ~p\n", [Rep2]), 1 = maps:get(get_sample_count, Rep2), GoodGetBodyTime = 500, within_range(1, GoodGetBodyTime, maps:get(get_body_time, Rep2)), + undefined = maps:get(level_files_count, Rep2), + + io:format("Prompt journal compaction~n"), + CompactionStarted1 = os:system_time(millisecond), + ok = leveled_bookie:book_compactjournal(Bookie, 30000), + testutil:wait_for_compaction(Bookie), + + Rep3 = leveled_bookie:book_status(Bookie), + io:format(user, "Rep3: ~p\n", [Rep3]), + {0, +0.0} = maps:get(journal_last_compaction_result, Rep3), + within_range( + CompactionStarted1, + os:system_time(millisecond), + maps:get(journal_last_compaction_time, Rep3) + ), + undefined = maps:get(level_files_count, Rep3), + undefined = maps:get(penciller_inmem_cache_size, Rep3), + undefined = maps:get(penciller_last_merge_time, Rep3), + + io:format("Load 80K objects and then delete them~n"), + testutil:load_objects( + 20000, + [binary_uuid, binary_uuid, binary_uuid, binary_uuid], + Bookie, + no_check, + fun testutil:generate_compressibleobjects/2 + ), + FoldKeysFun = fun(B, K, Acc) -> [{B, K} | Acc] end, + {async, F1} = + leveled_bookie:book_keylist(Bookie, o_rkv, {FoldKeysFun, []}), + KL1 = F1(), + lists:foreach( + fun({Bucket, Key}) -> + testutil:book_riakdelete(Bookie, Bucket, Key, []) + end, + KL1 + ), + + io:format("Prompt journal compaction again~n"), + CompactionStarted2 = os:system_time(millisecond), + ok = leveled_bookie:book_compactjournal(Bookie, 30000), + testutil:wait_for_compaction(Bookie), + + Rep4 = leveled_bookie:book_status(Bookie), + io:format(user, "Rep4: ~p\n", [Rep4]), + #{1 := 1} = maps:get(level_files_count, Rep4), + within_range( + CompactionStarted2, + os:system_time(millisecond), + maps:get(journal_last_compaction_time, Rep4) + ), + %% penciller last merge actually happens before second compaction, + %% but only gets recorded in book monitor at the time level files + %% count is updated, i.e., earlier than CompactionStarted2, so: + within_range( + CompactionStarted1, + os:system_time(millisecond), + maps:get(penciller_last_merge_time, Rep4) + ), + within_range( + 0, + 20000, + maps:get(penciller_inmem_cache_size, Rep4) + ), ok = leveled_bookie:book_destroy(Bookie). From 644b2bf2860a2e260f493b30c6ba883808f17253 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Mon, 2 Feb 2026 23:11:22 +0200 Subject: [PATCH 19/33] fixup for d9a2a078a114 --- src/leveled_iclerk.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 56091edb..6db72558 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -481,7 +481,6 @@ handle_cast( FilesToDelete = leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), CDBopts = State#state.cdb_options, - {Monitor, _} = CDBopts#cdb_options.monitor, ?STD_LOG(ic007, []), ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete), {noreply, State}; From 83032d29995e8642a7b47a0f8403c5bc9d16d6cb Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Mon, 2 Feb 2026 23:11:31 +0200 Subject: [PATCH 20/33] typo --- src/leveled_iclerk.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 6db72558..ed27546b 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -729,7 +729,7 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) -> %% %% Although this requires many loops over the list of the candidate, as the %% file scores have already been calculated the cost per loop should not be -%% a high burden. Reducing the maximum run length, will reduce the cost of +%% a high burden. Reducing the maximum run length, will reduce the cost if %% this exercise should be a problem. %% %% The score parameters are used to produce the score of the compaction run, From bd54c7e68306be7c5a2f57e60a4ce3161e115111 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 3 Feb 2026 00:33:14 +0200 Subject: [PATCH 21/33] more typos --- src/leveled_iclerk.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index ed27546b..355b6059 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -771,7 +771,7 @@ assess_candidates(AllCandidates, Params) -> {list(candidate()), float()}. %% @doc %% For a given run length, calculate the scores for all consecutive runs of -%% files, comparing the score with the best run which has beens een so far. +%% files, comparing the score with the best run which has beens seen so far. %% The best is a tuple of the actual run of candidates, along with the score %% achieved for that run assess_for_runlength(RunLength, AllCandidates, Params, Best) -> @@ -790,7 +790,7 @@ assess_for_runlength(RunLength, AllCandidates, Params, Best) -> -spec score_run(list(candidate()), score_parameters()) -> float(). %% @doc %% Score a run. Caluclate the avergae score across all the files in the run, -%% and deduct that from a target score. Good candidate runs for comapction +%% and deduct that from a target score. Good candidate runs for compaction %% have larger (positive) scores. Bad candidate runs for compaction have %% negative scores. score_run([], _Params) -> From 526a362e9641f8e4eb4faad2a3c94b0f6a479ee4 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 3 Feb 2026 01:47:04 +0200 Subject: [PATCH 22/33] extend, tweak book status report, now with journal and penciller merges --- test/end_to_end/basic_SUITE.erl | 67 +++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 16 deletions(-) diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 68ff7def..6f9af36d 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -52,7 +52,13 @@ bookie_status_report(_Config) -> {root_path, RootPath}, {sync_strategy, testutil:sync_strategy()}, {log_level, info}, + %% ensure all stats are always collected {stats_percentage, 100}, + %% to trigger penciller merge events sooner + {max_pencillercachesize, 16000}, + %% to create more journal files (exactly 8 for the 80k of + %% keys loaded in the test) + {max_journalobjectcount, 10000}, {forced_logs, []} ], {ok, Bookie} = leveled_bookie:book_start(StartOpts), @@ -88,6 +94,7 @@ bookie_status_report(_Config) -> avg_compaction_score => undefined }, InitialReport = leveled_bookie:book_status(Bookie), + io:format(user, "\nInitial report, before any IO\n~p\n", [InitialReport]), {TObj, TSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie, TObj, TSpec), @@ -106,7 +113,7 @@ bookie_status_report(_Config) -> {r_object, TBkt, TKey, _, _, _, _} = TObj, {ok, _} = testutil:book_riakget(Bookie, TBkt, TKey), Rep2 = leveled_bookie:book_status(Bookie), - io:format(user, "Rep2: ~p\n", [Rep2]), + io:format(user, "\nReport after a single PUT+GET\n~p\n", [Rep2]), 1 = maps:get(get_sample_count, Rep2), GoodGetBodyTime = 500, within_range(1, GoodGetBodyTime, maps:get(get_body_time, Rep2)), @@ -118,7 +125,7 @@ bookie_status_report(_Config) -> testutil:wait_for_compaction(Bookie), Rep3 = leveled_bookie:book_status(Bookie), - io:format(user, "Rep3: ~p\n", [Rep3]), + io:format(user, "\nReport after first compaction:\n~p\n", [Rep3]), {0, +0.0} = maps:get(journal_last_compaction_result, Rep3), within_range( CompactionStarted1, @@ -147,33 +154,61 @@ bookie_status_report(_Config) -> end, KL1 ), + Rep4 = leveled_bookie:book_status(Bookie), + io:format(user, "\nReport after loading 80K objects:\n~p\n", [Rep4]), + %% we have reduced max penciller cache size to make it certain a + %% merge occurs after so many PUTs + within_range( + CompactionStarted1, + os:system_time(millisecond), + maps:get(penciller_last_merge_time, Rep4) + ), io:format("Prompt journal compaction again~n"), CompactionStarted2 = os:system_time(millisecond), ok = leveled_bookie:book_compactjournal(Bookie, 30000), testutil:wait_for_compaction(Bookie), - Rep4 = leveled_bookie:book_status(Bookie), - io:format(user, "Rep4: ~p\n", [Rep4]), - #{1 := 1} = maps:get(level_files_count, Rep4), + Rep5 = leveled_bookie:book_status(Bookie), + io:format(user, "\nReport after second compaction:\n~p\n", [Rep5]), + #{1 := 1} = maps:get(level_files_count, Rep5), within_range( CompactionStarted2, os:system_time(millisecond), - maps:get(journal_last_compaction_time, Rep4) - ), - %% penciller last merge actually happens before second compaction, - %% but only gets recorded in book monitor at the time level files - %% count is updated, i.e., earlier than CompactionStarted2, so: - within_range( - CompactionStarted1, - os:system_time(millisecond), - maps:get(penciller_last_merge_time, Rep4) + maps:get(journal_last_compaction_time, Rep5) ), + {8, JLCRScore} = maps:get(journal_last_compaction_result, Rep5), + within_range(0.0, 100.0, JLCRScore), + true = 0 < length(maps:get(avg_compaction_score_sample, Rep5)), + within_range( 0, - 20000, - maps:get(penciller_inmem_cache_size, Rep4) + 40000, + maps:get(penciller_inmem_cache_size, Rep5) + ), + NAJF = maps:get(n_active_journal_files, Rep5), + {ok, FF1a} = file:list_dir( + RootPath ++ "/journal/journal_files/post_compact" + ), + {ok, FF2a} = file:list_dir(RootPath ++ "/journal/journal_files/"), + io:format(user, "journal files: ~b, in post_compact: ~b\n", [ + length(FF1a), length(FF2a) + ]), + %% 8 is the number of journal files to accommodate 80k of objects + %% with max_journalobjectcount = 10000 + NAJF = 8, + NAJF = length(FF1a), + + io:format(user, "sleeping 10s to see 8 files are actually deleted\n", []), + timer:sleep(_DELETE_TIMEOUT = 10_000 + 1_000), + {ok, FF1b} = file:list_dir( + RootPath ++ "/journal/journal_files/post_compact" ), + {ok, FF2b} = file:list_dir(RootPath ++ "/journal/journal_files/"), + io:format(user, "journal files: ~b, in post_compact: ~b\n", [ + length(FF1b), length(FF2b) + ]), + NAJF = length(FF2a) - length(FF2b), ok = leveled_bookie:book_destroy(Bookie). From e1faffc1f2b7f4bfff06848df2400164313e2e91 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 3 Feb 2026 22:37:03 +0200 Subject: [PATCH 23/33] move n_active_journal_files_update increment to cdb_roll handler --- src/leveled_cdb.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 11b4f0d6..226bbdeb 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -494,7 +494,6 @@ starting({call, From}, {open_writer, Filename}, State) -> starting({call, From}, {open_reader, Filename}, State) -> leveled_log:save(State#state.log_options), {Monitor, _} = State#state.monitor, - leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), ?STD_LOG(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, false), State0 = State#state{ @@ -507,7 +506,6 @@ starting({call, From}, {open_reader, Filename}, State) -> starting({call, From}, {open_reader, Filename, LastKey}, State) -> leveled_log:save(State#state.log_options), {Monitor, _} = State#state.monitor, - leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), ?STD_LOG(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), State0 = State#state{ @@ -654,6 +652,8 @@ writer( ) when ?IS_DEF(LP) -> + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), ok = leveled_iclerk:clerk_hashtablecalc( State#state.hashtree, LP, self() From 04d4bd7af2d0de07f4a8d42d084a625f99328e8c Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 3 Feb 2026 22:38:58 +0200 Subject: [PATCH 24/33] better counting and checking of active journal files in bookie_status ct --- test/end_to_end/basic_SUITE.erl | 43 +++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 6f9af36d..bcab1379 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -186,29 +186,29 @@ bookie_status_report(_Config) -> 40000, maps:get(penciller_inmem_cache_size, Rep5) ), - NAJF = maps:get(n_active_journal_files, Rep5), - {ok, FF1a} = file:list_dir( - RootPath ++ "/journal/journal_files/post_compact" + NAJF1 = maps:get(n_active_journal_files, Rep5), + {FF1a, FF1b} = list_journal_files(RootPath), + io:format( + user, "journal files: ~b (reported: ~b), in post_compact: ~b\n", [ + length(FF1a), NAJF1, length(FF1b) + ] ), - {ok, FF2a} = file:list_dir(RootPath ++ "/journal/journal_files/"), - io:format(user, "journal files: ~b, in post_compact: ~b\n", [ - length(FF1a), length(FF2a) - ]), - %% 8 is the number of journal files to accommodate 80k of objects - %% with max_journalobjectcount = 10000 - NAJF = 8, - NAJF = length(FF1a), + NAJF1 = length(FF1a), io:format(user, "sleeping 10s to see 8 files are actually deleted\n", []), timer:sleep(_DELETE_TIMEOUT = 10_000 + 1_000), - {ok, FF1b} = file:list_dir( - RootPath ++ "/journal/journal_files/post_compact" + + Rep6 = leveled_bookie:book_status(Bookie), + NAJF2 = maps:get(n_active_journal_files, Rep6), + {FF2a, FF2b} = list_journal_files(RootPath), + io:format( + user, + "after cleaning up, journal files: ~b (reported: ~b), in post_compact: ~b\n", + [ + length(FF2a), NAJF2, length(FF2b) + ] ), - {ok, FF2b} = file:list_dir(RootPath ++ "/journal/journal_files/"), - io:format(user, "journal files: ~b, in post_compact: ~b\n", [ - length(FF1b), length(FF2b) - ]), - NAJF = length(FF2a) - length(FF2b), + NAJF2 = length(FF2a), ok = leveled_bookie:book_destroy(Bookie). @@ -216,6 +216,13 @@ within_range(Min, Max, V) -> true = Min =< V, true = Max >= V. +list_journal_files(RootPath) -> + FFa = filelib:wildcard(RootPath ++ "/journal/journal_files/*.cdb"), + FFb = filelib:wildcard( + RootPath ++ "/journal/journal_files/post_compact/*.cdb" + ), + {FFa, FFb}. + simple_put_fetch_head_delete(_Config) -> io:format("simple test with info and no forced logs~n"), simple_test_withlog(info, []), From 27749c83e0671aaf0f276216bb209a46ea23daaf Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 3 Feb 2026 22:42:44 +0200 Subject: [PATCH 25/33] fix unused vars after removing code that used them --- src/leveled_cdb.erl | 2 -- src/leveled_iclerk.erl | 1 - 2 files changed, 3 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 226bbdeb..76469aae 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -493,7 +493,6 @@ starting({call, From}, {open_writer, Filename}, State) -> {next_state, writer, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename}, State) -> leveled_log:save(State#state.log_options), - {Monitor, _} = State#state.monitor, ?STD_LOG(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, false), State0 = State#state{ @@ -505,7 +504,6 @@ starting({call, From}, {open_reader, Filename}, State) -> {next_state, reader, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename, LastKey}, State) -> leveled_log:save(State#state.log_options), - {Monitor, _} = State#state.monitor, ?STD_LOG(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), State0 = State#state{ diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 355b6059..5bb87fec 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -480,7 +480,6 @@ handle_cast( -> FilesToDelete = leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), - CDBopts = State#state.cdb_options, ?STD_LOG(ic007, []), ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete), {noreply, State}; From ddf15c45b0802601ff23d69528e1a4bf2d05dd1d Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Wed, 4 Feb 2026 14:46:29 +0200 Subject: [PATCH 26/33] also incr n_active_journal_files on open_reader --- src/leveled_cdb.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 76469aae..ebbc3a09 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -494,6 +494,8 @@ starting({call, From}, {open_writer, Filename}, State) -> starting({call, From}, {open_reader, Filename}, State) -> leveled_log:save(State#state.log_options), ?STD_LOG(cdb02, [Filename]), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), {Handle, Index, LastKey} = open_for_readonly(Filename, false), State0 = State#state{ handle = Handle, From 0bf69057e2a91e5893379df513051425b1bd4a35 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 5 Feb 2026 02:36:22 +0200 Subject: [PATCH 27/33] have all items in bookie status map at outset --- src/leveled_bookie.erl | 25 +----------- src/leveled_monitor.erl | 71 +++++++++++++++++++++++---------- test/end_to_end/basic_SUITE.erl | 43 +++++++++++++++----- 3 files changed, 83 insertions(+), 56 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index f004875e..4e6dae7b 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -3082,30 +3082,7 @@ maybelog_snap_timing(_Monitor, _, _) -> status(#state{monitor = {no_monitor, 0}}) -> #{}; status(#state{monitor = {Monitor, _}}) -> - AllZeros = - #{ - ledger_cache_size => undefined, - n_active_journal_files => undefined, - avg_compaction_score => undefined, - level_files_count => undefined, - penciller_inmem_cache_size => undefined, - penciller_work_backlog_status => undefined, - penciller_last_merge_time => undefined, - journal_last_compaction_time => undefined, - journal_last_compaction_result => undefined, - get_sample_count => undefined, - get_body_time => undefined, - head_sample_count => undefined, - head_rsp_time => undefined, - put_sample_count => undefined, - put_prep_time => undefined, - put_ink_time => undefined, - put_mem_time => undefined, - fetch_count_by_level => undefined - }, - maps:merge( - AllZeros, leveled_monitor:get_bookie_status(Monitor) - ). + leveled_monitor:get_bookie_status(Monitor). %%%============================================================================ %%% Test diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 3a681b63..0d61dc8f 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -55,6 +55,38 @@ ]). -define(LOG_FREQUENCY_SECONDS, 30). +-define(INITIAL_BOOKIE_STATUS, #{ + avg_compaction_score => undefined, + avg_compaction_score_sample => [], + fetch_count_by_level => + #{ + not_found => #{count => 0, time => 0}, + mem => #{count => 0, time => 0}, + lower => #{count => 0, time => 0}, + '0' => #{count => 0, time => 0}, + '1' => #{count => 0, time => 0}, + '2' => #{count => 0, time => 0}, + '3' => #{count => 0, time => 0} + }, + get_body_time => 0, + get_sample_count => 0, + head_rsp_time => 0, + head_sample_count => 0, + journal_last_compaction_result => undefined, + journal_last_compaction_time => undefined, + ledger_cache_size => undefined, + level_files_count => #{}, + n_active_journal_files => 0, + penciller_inmem_cache_size => undefined, + penciller_last_merge_time => undefined, + penciller_work_backlog_status => undefined, + put_ink_time => 0, + put_mem_time => 0, + put_prep_time => 0, + put_sample_count => 0, + recent_putgethead_counts => undefined +}). + -record(bookie_get_timings, { sample_count = 0 :: non_neg_integer(), head_time = 0 :: non_neg_integer(), @@ -133,9 +165,10 @@ -type bookie_status() :: #{ ledger_cache_size => undefined | non_neg_integer(), - n_active_journal_files => undefined | pos_integer(), - avg_compaction_score_sample => undefined | [float()], - level_files_count => undefined | #{non_neg_integer() => non_neg_integer()}, + n_active_journal_files => non_neg_integer(), + avg_compaction_score => undefined | float(), + avg_compaction_score_sample => [float()], + level_files_count => #{non_neg_integer() => non_neg_integer()}, penciller_inmem_cache_size => undefined | pos_integer(), penciller_work_backlog_status => undefined | {non_neg_integer(), boolean(), boolean()}, @@ -154,7 +187,15 @@ count => non_neg_integer(), time => non_neg_integer() } - } + }, + get_body_time => undefined | non_neg_integer(), + get_sample_count => non_neg_integer(), + head_rsp_time => undefined | non_neg_integer(), + head_sample_count => non_neg_integer(), + put_ink_time => undefined | non_neg_integer(), + put_mem_time => undefined | non_neg_integer(), + put_prep_time => undefined | non_neg_integer(), + put_sample_count => non_neg_integer() }. -type reporting_fetch_level() :: not_found | mem | '0' | '1' | '2' | '3' | lower. @@ -171,7 +212,7 @@ cdb_get_timings = #cdb_get_timings{} :: cdb_get_timings(), log_frequency = ?LOG_FREQUENCY_SECONDS :: pos_integer(), log_order = [] :: list(log_type()), - bookie_status = #{} :: bookie_status() + bookie_status :: bookie_status() }). -type bookie_get_timings() :: #bookie_get_timings{}. @@ -328,7 +369,7 @@ init([LogOpts, LogFrequency, LogOrder]) -> {ok, #state{ log_frequency = LogFrequency, log_order = RandomLogOrder, - bookie_status = #{} + bookie_status = ?INITIAL_BOOKIE_STATUS }}. handle_call( @@ -750,21 +791,13 @@ handle_cast({ledger_cache_size_update, A}, State = #state{bookie_status = BS}) - handle_cast( {n_active_journal_files_update, Delta}, State = #state{bookie_status = BS0} ) -> - A = - case maps:get(n_active_journal_files, BS0, undefined) of - undefined -> 0; - Defined -> Defined - end, + A = maps:get(n_active_journal_files, BS0), BS = maps:put(n_active_journal_files, A + Delta, BS0), {noreply, State#state{bookie_status = BS}}; handle_cast( {avg_compaction_score_update, A}, State = #state{bookie_status = BS} ) -> - OldSample = - case maps:get(avg_compaction_score_sample, BS, undefined) of - undefined -> []; - Defined -> Defined - end, + OldSample = maps:get(avg_compaction_score_sample, BS), NewSample = case [A | OldSample] of L when length(L) > ?AVG_COMPACTION_SCORE_OVER_MAX -> @@ -778,11 +811,7 @@ handle_cast( handle_cast( {level_files_count_update, U, TS}, State = #state{bookie_status = BS0} ) -> - A = - case maps:get(level_files_count, BS0, undefined) of - undefined -> #{}; - Defined -> Defined - end, + A = maps:get(level_files_count, BS0, undefined), BS1 = maps:put(level_files_count, maps:merge(A, U), BS0), BS2 = maps:put(penciller_last_merge_time, TS, BS1), {noreply, State#state{bookie_status = BS2}}; diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index bcab1379..feca5f78 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -62,12 +62,13 @@ bookie_status_report(_Config) -> {forced_logs, []} ], {ok, Bookie} = leveled_bookie:book_start(StartOpts), + {FF10, FF10} = list_journal_files(RootPath), InitialReport = #{ ledger_cache_size => undefined, - n_active_journal_files => undefined, - level_files_count => undefined, + n_active_journal_files => 0, + level_files_count => #{}, penciller_inmem_cache_size => undefined, penciller_work_backlog_status => undefined, penciller_last_merge_time => undefined, @@ -83,6 +84,7 @@ bookie_status_report(_Config) -> '2' => #{count => 0, time => 0}, '3' => #{count => 0, time => 0} }, + recent_putgethead_counts => undefined, get_sample_count => 0, get_body_time => 0, head_sample_count => 0, @@ -91,10 +93,19 @@ bookie_status_report(_Config) -> put_prep_time => 0, put_ink_time => 0, put_mem_time => 0, - avg_compaction_score => undefined + avg_compaction_score => undefined, + avg_compaction_score_sample => [] }, InitialReport = leveled_bookie:book_status(Bookie), + {FF0a, FF0b} = list_journal_files(RootPath), io:format(user, "\nInitial report, before any IO\n~p\n", [InitialReport]), + io:format( + user, "journal files: ~b (reported: ~b), in post_compact: ~b\n", [ + length(FF0a), + maps:get(n_active_journal_files, InitialReport), + length(FF0b) + ] + ), {TObj, TSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie, TObj, TSpec), @@ -108,7 +119,7 @@ bookie_status_report(_Config) -> within_range(1, GoodPutPrepTime, maps:get(put_prep_time, Rep1)), within_range(1, GoodPutInkTime, maps:get(put_ink_time, Rep1)), within_range(1, GoodPutMemTime, maps:get(put_mem_time, Rep1)), - undefined = maps:get(level_files_count, Rep1), + #{} = maps:get(level_files_count, Rep1), {r_object, TBkt, TKey, _, _, _, _} = TObj, {ok, _} = testutil:book_riakget(Bookie, TBkt, TKey), @@ -117,7 +128,7 @@ bookie_status_report(_Config) -> 1 = maps:get(get_sample_count, Rep2), GoodGetBodyTime = 500, within_range(1, GoodGetBodyTime, maps:get(get_body_time, Rep2)), - undefined = maps:get(level_files_count, Rep2), + #{} = maps:get(level_files_count, Rep2), io:format("Prompt journal compaction~n"), CompactionStarted1 = os:system_time(millisecond), @@ -132,7 +143,7 @@ bookie_status_report(_Config) -> os:system_time(millisecond), maps:get(journal_last_compaction_time, Rep3) ), - undefined = maps:get(level_files_count, Rep3), + #{} = maps:get(level_files_count, Rep3), undefined = maps:get(penciller_inmem_cache_size, Rep3), undefined = maps:get(penciller_last_merge_time, Rep3), @@ -203,14 +214,24 @@ bookie_status_report(_Config) -> {FF2a, FF2b} = list_journal_files(RootPath), io:format( user, - "after cleaning up, journal files: ~b (reported: ~b), in post_compact: ~b\n", - [ - length(FF2a), NAJF2, length(FF2b) - ] + "after cleaning up, journal files: " + "~b (reported: ~b), in post_compact: ~b\n", + [length(FF2a), NAJF2, length(FF2b)] ), NAJF2 = length(FF2a), - ok = leveled_bookie:book_destroy(Bookie). + io:format(user, "\nClosing book and reopening\n", []), + ok = leveled_bookie:book_close(Bookie), + {ok, Bookie2} = leveled_bookie:book_start(StartOpts), + Rep7 = leveled_bookie:book_status(Bookie2), + {FF3a, _} = list_journal_files(RootPath), + io:format( + user, + "\nAfter reopening store, ~p files reported, ~b found on disk\n", + [maps:get(n_active_journal_files, Rep7), length(FF3a)] + ), + + ok = leveled_bookie:book_destroy(Bookie2). within_range(Min, Max, V) -> true = Min =< V, From df7954daa3e320b17750553e5471fb23a14cc51f Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Thu, 5 Feb 2026 02:36:22 +0200 Subject: [PATCH 28/33] get right the counting of active journal files --- src/leveled_cdb.erl | 2 ++ src/leveled_monitor.erl | 4 +-- test/end_to_end/basic_SUITE.erl | 60 ++++++++++++--------------------- 3 files changed, 25 insertions(+), 41 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index ebbc3a09..fe94d482 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -507,6 +507,8 @@ starting({call, From}, {open_reader, Filename}, State) -> starting({call, From}, {open_reader, Filename, LastKey}, State) -> leveled_log:save(State#state.log_options), ?STD_LOG(cdb02, [Filename]), + {Monitor, _} = State#state.monitor, + leveled_monitor:add_stat(Monitor, {n_active_journal_files_update, +1}), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), State0 = State#state{ handle = Handle, diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 0d61dc8f..41af02a7 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -76,7 +76,7 @@ journal_last_compaction_time => undefined, ledger_cache_size => undefined, level_files_count => #{}, - n_active_journal_files => 0, + n_active_journal_files => 1, penciller_inmem_cache_size => undefined, penciller_last_merge_time => undefined, penciller_work_backlog_status => undefined, @@ -165,7 +165,7 @@ -type bookie_status() :: #{ ledger_cache_size => undefined | non_neg_integer(), - n_active_journal_files => non_neg_integer(), + n_active_journal_files => pos_integer(), avg_compaction_score => undefined | float(), avg_compaction_score_sample => [float()], level_files_count => #{non_neg_integer() => non_neg_integer()}, diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index feca5f78..5c45fcea 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -62,12 +62,11 @@ bookie_status_report(_Config) -> {forced_logs, []} ], {ok, Bookie} = leveled_bookie:book_start(StartOpts), - {FF10, FF10} = list_journal_files(RootPath), InitialReport = #{ ledger_cache_size => undefined, - n_active_journal_files => 0, + n_active_journal_files => 1, level_files_count => #{}, penciller_inmem_cache_size => undefined, penciller_work_backlog_status => undefined, @@ -97,15 +96,8 @@ bookie_status_report(_Config) -> avg_compaction_score_sample => [] }, InitialReport = leveled_bookie:book_status(Bookie), - {FF0a, FF0b} = list_journal_files(RootPath), io:format(user, "\nInitial report, before any IO\n~p\n", [InitialReport]), - io:format( - user, "journal files: ~b (reported: ~b), in post_compact: ~b\n", [ - length(FF0a), - maps:get(n_active_journal_files, InitialReport), - length(FF0b) - ] - ), + check_n_journal_files(RootPath, InitialReport), {TObj, TSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie, TObj, TSpec), @@ -120,6 +112,7 @@ bookie_status_report(_Config) -> within_range(1, GoodPutInkTime, maps:get(put_ink_time, Rep1)), within_range(1, GoodPutMemTime, maps:get(put_mem_time, Rep1)), #{} = maps:get(level_files_count, Rep1), + check_n_journal_files(RootPath, Rep1), {r_object, TBkt, TKey, _, _, _, _} = TObj, {ok, _} = testutil:book_riakget(Bookie, TBkt, TKey), @@ -129,6 +122,7 @@ bookie_status_report(_Config) -> GoodGetBodyTime = 500, within_range(1, GoodGetBodyTime, maps:get(get_body_time, Rep2)), #{} = maps:get(level_files_count, Rep2), + check_n_journal_files(RootPath, Rep2), io:format("Prompt journal compaction~n"), CompactionStarted1 = os:system_time(millisecond), @@ -146,6 +140,7 @@ bookie_status_report(_Config) -> #{} = maps:get(level_files_count, Rep3), undefined = maps:get(penciller_inmem_cache_size, Rep3), undefined = maps:get(penciller_last_merge_time, Rep3), + check_n_journal_files(RootPath, Rep3), io:format("Load 80K objects and then delete them~n"), testutil:load_objects( @@ -165,6 +160,7 @@ bookie_status_report(_Config) -> end, KL1 ), + Rep4 = leveled_bookie:book_status(Bookie), io:format(user, "\nReport after loading 80K objects:\n~p\n", [Rep4]), %% we have reduced max penciller cache size to make it certain a @@ -174,6 +170,7 @@ bookie_status_report(_Config) -> os:system_time(millisecond), maps:get(penciller_last_merge_time, Rep4) ), + check_n_journal_files(RootPath, Rep4), io:format("Prompt journal compaction again~n"), CompactionStarted2 = os:system_time(millisecond), @@ -191,45 +188,24 @@ bookie_status_report(_Config) -> {8, JLCRScore} = maps:get(journal_last_compaction_result, Rep5), within_range(0.0, 100.0, JLCRScore), true = 0 < length(maps:get(avg_compaction_score_sample, Rep5)), - within_range( 0, 40000, maps:get(penciller_inmem_cache_size, Rep5) ), - NAJF1 = maps:get(n_active_journal_files, Rep5), - {FF1a, FF1b} = list_journal_files(RootPath), - io:format( - user, "journal files: ~b (reported: ~b), in post_compact: ~b\n", [ - length(FF1a), NAJF1, length(FF1b) - ] - ), - NAJF1 = length(FF1a), + check_n_journal_files(RootPath, Rep5), io:format(user, "sleeping 10s to see 8 files are actually deleted\n", []), timer:sleep(_DELETE_TIMEOUT = 10_000 + 1_000), Rep6 = leveled_bookie:book_status(Bookie), - NAJF2 = maps:get(n_active_journal_files, Rep6), - {FF2a, FF2b} = list_journal_files(RootPath), - io:format( - user, - "after cleaning up, journal files: " - "~b (reported: ~b), in post_compact: ~b\n", - [length(FF2a), NAJF2, length(FF2b)] - ), - NAJF2 = length(FF2a), + check_n_journal_files(RootPath, Rep6), io:format(user, "\nClosing book and reopening\n", []), ok = leveled_bookie:book_close(Bookie), {ok, Bookie2} = leveled_bookie:book_start(StartOpts), Rep7 = leveled_bookie:book_status(Bookie2), - {FF3a, _} = list_journal_files(RootPath), - io:format( - user, - "\nAfter reopening store, ~p files reported, ~b found on disk\n", - [maps:get(n_active_journal_files, Rep7), length(FF3a)] - ), + check_n_journal_files(RootPath, Rep7), ok = leveled_bookie:book_destroy(Bookie2). @@ -237,12 +213,18 @@ within_range(Min, Max, V) -> true = Min =< V, true = Max >= V. -list_journal_files(RootPath) -> - FFa = filelib:wildcard(RootPath ++ "/journal/journal_files/*.cdb"), - FFb = filelib:wildcard( - RootPath ++ "/journal/journal_files/post_compact/*.cdb" +check_n_journal_files(RootPath, Rep) -> + A = length( + filelib:wildcard(RootPath ++ "/journal/journal_files/*.{cdb,pnd}") + ), + B = length( + filelib:wildcard( + RootPath ++ "/journal/journal_files/post_compact/*.{cdb,pnd}" + ) ), - {FFa, FFb}. + C = maps:get(n_active_journal_files, Rep), + io:format(user, "journal files: ~b (reported: ~b)\n", [A + B, C]), + C = A + B. simple_put_fetch_head_delete(_Config) -> io:format("simple test with info and no forced logs~n"), From 74a7b0c81d6ccaea76d414f407618dbc1c632d21 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Tue, 10 Feb 2026 23:47:29 +0200 Subject: [PATCH 29/33] add missing penciller_work_backlog_status_update, provide initial value instead of 'undefined' --- src/leveled_penciller.erl | 10 ++++++++++ test/end_to_end/basic_SUITE.erl | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index a3b74e43..745e05bd 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1290,9 +1290,19 @@ handle_cast( {WL, WC} = leveled_pmanifest:check_for_work(Man), case {WC, (CacheAlreadyFull or CacheOverSize)} of {0, false} -> + leveled_monitor:add_stat( + Monitor, + {penciller_work_backlog_status_update, + {0, false, false}} + ), % No work required {noreply, State#state{work_backlog = false}}; {WC, true} when WC < ?WORKQUEUE_BACKLOG_TOLERANCE -> + leveled_monitor:add_stat( + Monitor, + {penciller_work_backlog_status_update, + {WC, false, true}} + ), % Rolling the memory to create a new Level Zero file % Must not do this if there is a work backlog beyond the % tolerance, as then the backlog may never be addressed. diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 5c45fcea..44919fd3 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -69,7 +69,7 @@ bookie_status_report(_Config) -> n_active_journal_files => 1, level_files_count => #{}, penciller_inmem_cache_size => undefined, - penciller_work_backlog_status => undefined, + penciller_work_backlog_status => {0, false, false}, penciller_last_merge_time => undefined, journal_last_compaction_time => undefined, journal_last_compaction_result => undefined, From 3c795a1c78a0ba8c05bb07e53703d71067d7a5c2 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Wed, 11 Feb 2026 02:54:57 +0200 Subject: [PATCH 30/33] recent_putgethead_counts exists as 3 individual stats items, rm it --- src/leveled_monitor.erl | 8 +------- test/end_to_end/basic_SUITE.erl | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 41af02a7..673d7db9 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -83,8 +83,7 @@ put_ink_time => 0, put_mem_time => 0, put_prep_time => 0, - put_sample_count => 0, - recent_putgethead_counts => undefined + put_sample_count => 0 }). -record(bookie_get_timings, { @@ -175,11 +174,6 @@ penciller_last_merge_time => undefined | integer(), journal_last_compaction_time => undefined | integer(), journal_last_compaction_result => undefined | {non_neg_integer(), float()}, - recent_putgethead_counts => - undefined - | { - non_neg_integer(), non_neg_integer(), non_neg_integer() - }, fetch_count_by_level => undefined | #{ diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 44919fd3..948da57a 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -83,7 +83,6 @@ bookie_status_report(_Config) -> '2' => #{count => 0, time => 0}, '3' => #{count => 0, time => 0} }, - recent_putgethead_counts => undefined, get_sample_count => 0, get_body_time => 0, head_sample_count => 0, From b2915f50774c6983897494146a6a7f198e0bdef9 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Wed, 11 Feb 2026 04:40:15 +0200 Subject: [PATCH 31/33] drop compaction score sample, report {min,max}_compaction_score instead --- src/leveled_monitor.erl | 26 ++++++++++++++++++-------- test/end_to_end/basic_SUITE.erl | 7 ++++--- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 673d7db9..d179b388 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -56,8 +56,6 @@ -define(LOG_FREQUENCY_SECONDS, 30). -define(INITIAL_BOOKIE_STATUS, #{ - avg_compaction_score => undefined, - avg_compaction_score_sample => [], fetch_count_by_level => #{ not_found => #{count => 0, time => 0}, @@ -76,6 +74,8 @@ journal_last_compaction_time => undefined, ledger_cache_size => undefined, level_files_count => #{}, + min_compaction_score => undefined, + max_compaction_score => undefined, n_active_journal_files => 1, penciller_inmem_cache_size => undefined, penciller_last_merge_time => undefined, @@ -83,7 +83,8 @@ put_ink_time => 0, put_mem_time => 0, put_prep_time => 0, - put_sample_count => 0 + put_sample_count => 0, + tmp_compaction_score_sample => [] }). -record(bookie_get_timings, { @@ -165,8 +166,11 @@ -type bookie_status() :: #{ ledger_cache_size => undefined | non_neg_integer(), n_active_journal_files => pos_integer(), - avg_compaction_score => undefined | float(), - avg_compaction_score_sample => [float()], + min_compaction_score => undefined | float(), + max_compaction_score => undefined | float(), + tmp_compaction_score_sample => [float()], + %% this sample is a tmp buffer, only used to produce min_ and max_ + %% items above; will be dropped from final bookie_status level_files_count => #{non_neg_integer() => non_neg_integer()}, penciller_inmem_cache_size => undefined | pos_integer(), penciller_work_backlog_status => @@ -419,7 +423,9 @@ handle_call( put_mem_time => PT#bookie_put_timings.mem_time, fetch_count_by_level => FCL }, - {reply, StatusEnriched, State}; + StatusTrimmed = + maps:remove(tmp_compaction_score_sample, StatusEnriched), + {reply, StatusTrimmed, State}; handle_call(close, _From, State) -> {stop, normal, ok, State}. @@ -791,7 +797,7 @@ handle_cast( handle_cast( {avg_compaction_score_update, A}, State = #state{bookie_status = BS} ) -> - OldSample = maps:get(avg_compaction_score_sample, BS), + OldSample = maps:get(tmp_compaction_score_sample, BS), NewSample = case [A | OldSample] of L when length(L) > ?AVG_COMPACTION_SCORE_OVER_MAX -> @@ -800,7 +806,11 @@ handle_cast( L end, {noreply, State#state{ - bookie_status = BS#{avg_compaction_score_sample => NewSample} + bookie_status = BS#{ + tmp_compaction_score_sample => NewSample, + min_compaction_score => lists:min(NewSample), + max_compaction_score => lists:max(NewSample) + } }}; handle_cast( {level_files_count_update, U, TS}, State = #state{bookie_status = BS0} diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 948da57a..42c2a980 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -91,8 +91,8 @@ bookie_status_report(_Config) -> put_prep_time => 0, put_ink_time => 0, put_mem_time => 0, - avg_compaction_score => undefined, - avg_compaction_score_sample => [] + min_compaction_score => undefined, + max_compaction_score => undefined }, InitialReport = leveled_bookie:book_status(Bookie), io:format(user, "\nInitial report, before any IO\n~p\n", [InitialReport]), @@ -186,7 +186,8 @@ bookie_status_report(_Config) -> ), {8, JLCRScore} = maps:get(journal_last_compaction_result, Rep5), within_range(0.0, 100.0, JLCRScore), - true = 0 < length(maps:get(avg_compaction_score_sample, Rep5)), + true = +0.0 =< maps:get(min_compaction_score, Rep5), + true = 100.0 >= maps:get(max_compaction_score, Rep5), within_range( 0, 40000, From 2ca7735d3ff00c6ba00cf723b5243799bf015916 Mon Sep 17 00:00:00 2001 From: Andriy Zavada Date: Wed, 11 Feb 2026 11:21:17 +0200 Subject: [PATCH 32/33] better still report avg_compaction_score instead of min_ --- src/leveled_monitor.erl | 13 +++++++++---- test/end_to_end/basic_SUITE.erl | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index d179b388..47b38c57 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -74,7 +74,7 @@ journal_last_compaction_time => undefined, ledger_cache_size => undefined, level_files_count => #{}, - min_compaction_score => undefined, + avg_compaction_score => undefined, max_compaction_score => undefined, n_active_journal_files => 1, penciller_inmem_cache_size => undefined, @@ -166,7 +166,7 @@ -type bookie_status() :: #{ ledger_cache_size => undefined | non_neg_integer(), n_active_journal_files => pos_integer(), - min_compaction_score => undefined | float(), + avg_compaction_score => undefined | float(), max_compaction_score => undefined | float(), tmp_compaction_score_sample => [float()], %% this sample is a tmp buffer, only used to produce min_ and max_ @@ -805,11 +805,16 @@ handle_cast( L -> L end, + {Avg, Max} = + case length(NewSample) of + 0 -> {undefined, undefined}; + Length -> {lists:sum(NewSample) / Length, lists:max(NewSample)} + end, {noreply, State#state{ bookie_status = BS#{ tmp_compaction_score_sample => NewSample, - min_compaction_score => lists:min(NewSample), - max_compaction_score => lists:max(NewSample) + avg_compaction_score => Avg, + max_compaction_score => Max } }}; handle_cast( diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 42c2a980..4831f836 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -91,7 +91,7 @@ bookie_status_report(_Config) -> put_prep_time => 0, put_ink_time => 0, put_mem_time => 0, - min_compaction_score => undefined, + avg_compaction_score => undefined, max_compaction_score => undefined }, InitialReport = leveled_bookie:book_status(Bookie), @@ -186,7 +186,7 @@ bookie_status_report(_Config) -> ), {8, JLCRScore} = maps:get(journal_last_compaction_result, Rep5), within_range(0.0, 100.0, JLCRScore), - true = +0.0 =< maps:get(min_compaction_score, Rep5), + true = +0.0 =< maps:get(avg_compaction_score, Rep5), true = 100.0 >= maps:get(max_compaction_score, Rep5), within_range( 0, From dba1ab371ffd17e39d01f8feebbfa075d345f24c Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 11 Feb 2026 17:38:13 +0000 Subject: [PATCH 33/33] Journal last compaction stats Gather journal last compaction stats and update in one call. Allows for scoring of all files (not just a sample). Ensure coverage of a status check on a snapshot bookie. Resolve eqwalizer issue with merge of a map with undefined (the level_files_count defaults to an empty map and so will be present and cannot be undefined) --- src/leveled_iclerk.erl | 153 ++++++++++++++++++-------------- src/leveled_monitor.erl | 100 +++++++++------------ test/end_to_end/basic_SUITE.erl | 45 ++++++---- 3 files changed, 155 insertions(+), 143 deletions(-) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 5bb87fec..29989368 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -351,7 +351,7 @@ handle_cast( {noreply, State#state{scored_files = [], scoring_state = ScoringState}}; handle_cast( {score_filelist, [Entry | Tail]}, - State = #state{scoring_state = ScoringState, cdb_options = CDBOpts} + State = #state{scoring_state = ScoringState} ) when ?IS_DEF(ScoringState) -> @@ -376,8 +376,7 @@ handle_cast( ScoringState#scoring_state.max_sqn, ?SAMPLE_SIZE, ?BATCH_SIZE, - State#state.reload_strategy, - CDBOpts#cdb_options.monitor + State#state.reload_strategy ); {CachedScore, true, _ScoreOneIn} -> % If caches are used roll the score towards the current score @@ -392,8 +391,7 @@ handle_cast( ScoringState#scoring_state.max_sqn, ?SAMPLE_SIZE, ?BATCH_SIZE, - State#state.reload_strategy, - CDBOpts#cdb_options.monitor + State#state.reload_strategy ), (NewScore + CachedScore) / 2; {CachedScore, false, _ScoreOneIn} -> @@ -423,55 +421,69 @@ handle_cast( CloseFun = ScoringState#scoring_state.close_fun, SW = ScoringState#scoring_state.start_time, ScoreParams = - {MaxRunLength, State#state.maxrunlength_compactionperc, - State#state.singlefile_compactionperc}, + { + MaxRunLength, + State#state.maxrunlength_compactionperc, + State#state.singlefile_compactionperc + }, {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), + ?TMR_LOG(ic003, [Score, length(BestRun0)], SW), + LRL = + case Score > 0.0 of + true -> + BestRun1 = sort_run(BestRun0), + print_compaction_run(BestRun1, ScoreParams), + ManifestSlice = + compact_files( + BestRun1, + CDBopts, + FilterFun, + FilterServer, + MaxSQN, + State#state.reload_strategy, + State#state.compression_method + ), + FilesToDelete = + lists:map( + fun(C) -> + { + C#candidate.low_sqn, + C#candidate.filename, + C#candidate.journal, + undefined + } + end, + BestRun1 + ), + ?STD_LOG(ic002, [length(FilesToDelete)]), + ok = CloseFun(FilterServer), + ok = + leveled_inker:ink_clerkcomplete( + State#state.inker, ManifestSlice, FilesToDelete + ), + length(BestRun0); + false -> + ok = CloseFun(FilterServer), + ok = + leveled_inker:ink_clerkcomplete(State#state.inker, [], []), + 0 + end, {Monitor, _} = CDBopts#cdb_options.monitor, + {MaxScore, MeanScore} = calc_run_stats(Candidates), + {MegaST, SecST, MicroST} = ScoringState#scoring_state.start_time, + StartTimeMilli = (MegaST * 1000000 + SecST) * 1000 + (MicroST div 1000), leveled_monitor:add_stat( Monitor, - {journal_last_compaction_result_update, {length(BestRun0), Score}} - ), - leveled_monitor:add_stat( - Monitor, - {journal_last_compaction_time_update, os:system_time(millisecond)} + { + journal_compaction, + MaxScore, + MeanScore, + Score, + LRL, + os:system_time(millisecond) - StartTimeMilli, + StartTimeMilli + } ), - ?TMR_LOG(ic003, [Score, length(BestRun0)], SW), - case Score > 0.0 of - true -> - BestRun1 = sort_run(BestRun0), - print_compaction_run(BestRun1, ScoreParams), - ManifestSlice = - compact_files( - BestRun1, - CDBopts, - FilterFun, - FilterServer, - MaxSQN, - State#state.reload_strategy, - State#state.compression_method - ), - FilesToDelete = - lists:map( - fun(C) -> - { - C#candidate.low_sqn, - C#candidate.filename, - C#candidate.journal, - undefined - } - end, - BestRun1 - ), - ?STD_LOG(ic002, [length(FilesToDelete)]), - ok = CloseFun(FilterServer), - ok = - leveled_inker:ink_clerkcomplete( - State#state.inker, ManifestSlice, FilesToDelete - ); - false -> - ok = CloseFun(FilterServer), - ok = leveled_inker:ink_clerkcomplete(State#state.inker, [], []) - end, {noreply, State#state{scoring_state = undefined}, hibernate}; handle_cast( {trim, PersistedSQN, ManifestAsList}, State = #state{inker = Ink} @@ -595,6 +607,18 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> %%% Internal functions %%%============================================================================ +-spec calc_run_stats(list(candidate())) -> {float(), float()}. +calc_run_stats(Candidates) -> + case lists:map(fun(C) -> C#candidate.compaction_perc end, Candidates) of + L when length(L) > 0 -> + { + lists:max(L), + lists:sum(L) / length(L) + }; + _ -> + {0.0, 0.0} + end. + -spec check_single_file( pid(), leveled_inker:filterfun(), @@ -602,8 +626,7 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> leveled_codec:sqn(), non_neg_integer(), non_neg_integer(), - leveled_codec:compaction_strategy(), - leveled_monitor:monitor() + leveled_codec:compaction_strategy() ) -> float(). %% @doc @@ -624,8 +647,7 @@ check_single_file( MaxSQN, SampleSize, BatchSize, - ReloadStrategy, - {Monitor, _} + ReloadStrategy ) -> FN = leveled_cdb:cdb_filename(CDB), SW = os:timestamp(), @@ -639,7 +661,6 @@ check_single_file( MaxSQN, ReloadStrategy ), - leveled_monitor:add_stat(Monitor, {avg_compaction_score_update, Score}), safely_log_filescore(PositionList, FN, Score, SW), Score. @@ -1276,22 +1297,17 @@ check_single_file_test() -> replaced end end, - Score1 = check_single_file( - CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0} - ), + Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), ?assertMatch(37.5, Score1), LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> current end, - Score2 = check_single_file( - CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0} - ), + Score2 = + check_single_file( + CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS + ), ?assertMatch(100.0, Score2), - Score3 = check_single_file( - CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS, {no_monitor, 0} - ), + Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS), ?assertMatch(37.5, Score3), - Score4 = check_single_file( - CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS, {no_monitor, 0} - ), + Score4 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS), ?assertMatch(75.0, Score4), ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). @@ -1436,9 +1452,8 @@ compact_empty_file_test() -> {3, {o, "Bucket", "Key3", null}} ], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, - Score1 = check_single_file( - CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS, {no_monitor, 0} - ), + Score1 = + check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), ?assert((+0.0 =:= Score1) orelse (-0.0 =:= Score1)), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 47b38c57..792384d9 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -70,12 +70,14 @@ get_sample_count => 0, head_rsp_time => 0, head_sample_count => 0, - journal_last_compaction_result => undefined, journal_last_compaction_time => undefined, + journal_last_compaction_duration => undefined, + journal_last_compaction_score => undefined, + journal_last_compaction_max => undefined, + journal_last_compaction_mean => undefined, + journal_last_compaction_runlength => undefined, ledger_cache_size => undefined, level_files_count => #{}, - avg_compaction_score => undefined, - max_compaction_score => undefined, n_active_journal_files => 1, penciller_inmem_cache_size => undefined, penciller_last_merge_time => undefined, @@ -83,8 +85,7 @@ put_ink_time => 0, put_mem_time => 0, put_prep_time => 0, - put_sample_count => 0, - tmp_compaction_score_sample => [] + put_sample_count => 0 }). -record(bookie_get_timings, { @@ -99,7 +100,6 @@ sample_count = 0 :: non_neg_integer(), cache_count = 0 :: non_neg_integer(), found_count = 0 :: non_neg_integer(), - cache_hits = 0 :: non_neg_integer(), fetch_ledger_time = 0 :: non_neg_integer(), fetch_ledgercache_time = 0 :: non_neg_integer(), rsp_time = 0 :: non_neg_integer(), @@ -166,18 +166,17 @@ -type bookie_status() :: #{ ledger_cache_size => undefined | non_neg_integer(), n_active_journal_files => pos_integer(), - avg_compaction_score => undefined | float(), - max_compaction_score => undefined | float(), - tmp_compaction_score_sample => [float()], - %% this sample is a tmp buffer, only used to produce min_ and max_ - %% items above; will be dropped from final bookie_status level_files_count => #{non_neg_integer() => non_neg_integer()}, penciller_inmem_cache_size => undefined | pos_integer(), penciller_work_backlog_status => undefined | {non_neg_integer(), boolean(), boolean()}, penciller_last_merge_time => undefined | integer(), - journal_last_compaction_time => undefined | integer(), - journal_last_compaction_result => undefined | {non_neg_integer(), float()}, + journal_last_compaction_time => undefined | pos_integer(), + journal_last_compaction_duration => undefined | non_neg_integer(), + journal_last_compaction_score => undefined | float(), + journal_last_compaction_max => undefined | float(), + journal_last_compaction_mean => undefined | float(), + journal_last_compaction_runlength => undefined | non_neg_integer(), fetch_count_by_level => undefined | #{ @@ -198,8 +197,6 @@ -type reporting_fetch_level() :: not_found | mem | '0' | '1' | '2' | '3' | lower. --define(AVG_COMPACTION_SCORE_OVER_MAX, 50). - -record(state, { bookie_get_timings = #bookie_get_timings{} :: bookie_get_timings(), bookie_head_timings = #bookie_head_timings{} :: bookie_head_timings(), @@ -262,9 +259,15 @@ | {penciller_work_backlog_status_update, { non_neg_integer(), boolean(), boolean() }} - %% | {penciller_last_merge_time_update, pos_integer()} via level_files_count_update - | {journal_last_compaction_time_update, integer()} - | {journal_last_compaction_result_update, {non_neg_integer(), float()}} + | { + journal_compaction, + float(), + float(), + float(), + pos_integer(), + non_neg_integer(), + pos_integer() + } | {metadata_objsize_ratio_update, not_implemented}. -type statistic() :: bookie_get_update() @@ -423,9 +426,7 @@ handle_call( put_mem_time => PT#bookie_put_timings.mem_time, fetch_count_by_level => FCL }, - StatusTrimmed = - maps:remove(tmp_compaction_score_sample, StatusEnriched), - {reply, StatusTrimmed, State}; + {reply, StatusEnriched, State}; handle_call(close, _From, State) -> {stop, normal, ok, State}. @@ -794,33 +795,10 @@ handle_cast( A = maps:get(n_active_journal_files, BS0), BS = maps:put(n_active_journal_files, A + Delta, BS0), {noreply, State#state{bookie_status = BS}}; -handle_cast( - {avg_compaction_score_update, A}, State = #state{bookie_status = BS} -) -> - OldSample = maps:get(tmp_compaction_score_sample, BS), - NewSample = - case [A | OldSample] of - L when length(L) > ?AVG_COMPACTION_SCORE_OVER_MAX -> - lists:sublist(L, ?AVG_COMPACTION_SCORE_OVER_MAX); - L -> - L - end, - {Avg, Max} = - case length(NewSample) of - 0 -> {undefined, undefined}; - Length -> {lists:sum(NewSample) / Length, lists:max(NewSample)} - end, - {noreply, State#state{ - bookie_status = BS#{ - tmp_compaction_score_sample => NewSample, - avg_compaction_score => Avg, - max_compaction_score => Max - } - }}; handle_cast( {level_files_count_update, U, TS}, State = #state{bookie_status = BS0} ) -> - A = maps:get(level_files_count, BS0, undefined), + A = maps:get(level_files_count, BS0), BS1 = maps:put(level_files_count, maps:merge(A, U), BS0), BS2 = maps:put(penciller_last_merge_time, TS, BS1), {noreply, State#state{bookie_status = BS2}}; @@ -836,18 +814,23 @@ handle_cast( bookie_status = BS#{penciller_work_backlog_status => A} }}; handle_cast( - {journal_last_compaction_time_update, A}, State = #state{bookie_status = BS} -) -> - {noreply, State#state{ - bookie_status = BS#{journal_last_compaction_time => A} - }}; -handle_cast( - {journal_last_compaction_result_update, A}, + {journal_compaction, MaxScore, MeanScore, Score, LRL, Duration, StartTime}, State = #state{bookie_status = BS} ) -> - {noreply, State#state{ - bookie_status = BS#{journal_last_compaction_result => A} - }}. + { + noreply, + State#state{ + bookie_status = + BS#{ + journal_last_compaction_time => StartTime, + journal_last_compaction_duration => Duration, + journal_last_compaction_score => Score, + journal_last_compaction_max => MaxScore, + journal_last_compaction_mean => MeanScore, + journal_last_compaction_runlength => LRL + } + } + }. handle_info(report_next_stats, State) -> erlang:send_after( @@ -878,7 +861,12 @@ code_change(_OldVsn, State, _Extra) -> coverage_cheat_test() -> {ok, M} = monitor_start(1, []), timer:sleep(2000), - {ok, _State1} = code_change(null, #state{}, null), + {ok, _State1} = + code_change( + null, + #state{bookie_status = ?INITIAL_BOOKIE_STATUS}, + null + ), ok = add_stat(M, {pcl_fetch_update, 4, 100}), ok = report_stats(M, pcl_fetch), % Can close, so empty log_order hasn't crashed diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 4831f836..65c3c7f1 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -21,7 +21,6 @@ all() -> [ - bookie_status_report, simple_put_fetch_head_delete, many_put_fetch_head, journal_compaction, @@ -35,7 +34,8 @@ all() -> bigsst_littlesst, safereaderror_startup, remove_journal_test, - bigpcl_bucketlist + bigpcl_bucketlist, + bookie_status_report ]. init_per_suite(Config) -> @@ -72,7 +72,11 @@ bookie_status_report(_Config) -> penciller_work_backlog_status => {0, false, false}, penciller_last_merge_time => undefined, journal_last_compaction_time => undefined, - journal_last_compaction_result => undefined, + journal_last_compaction_duration => undefined, + journal_last_compaction_score => undefined, + journal_last_compaction_max => undefined, + journal_last_compaction_mean => undefined, + journal_last_compaction_runlength => undefined, fetch_count_by_level => #{ not_found => #{count => 0, time => 0}, @@ -90,12 +94,10 @@ bookie_status_report(_Config) -> put_sample_count => 0, put_prep_time => 0, put_ink_time => 0, - put_mem_time => 0, - avg_compaction_score => undefined, - max_compaction_score => undefined + put_mem_time => 0 }, InitialReport = leveled_bookie:book_status(Bookie), - io:format(user, "\nInitial report, before any IO\n~p\n", [InitialReport]), + io:format("\nInitial report, before any IO\n~p\n", [InitialReport]), check_n_journal_files(RootPath, InitialReport), {TObj, TSpec} = testutil:generate_testobject(), @@ -116,7 +118,7 @@ bookie_status_report(_Config) -> {r_object, TBkt, TKey, _, _, _, _} = TObj, {ok, _} = testutil:book_riakget(Bookie, TBkt, TKey), Rep2 = leveled_bookie:book_status(Bookie), - io:format(user, "\nReport after a single PUT+GET\n~p\n", [Rep2]), + io:format("\nReport after a single PUT+GET\n~p\n", [Rep2]), 1 = maps:get(get_sample_count, Rep2), GoodGetBodyTime = 500, within_range(1, GoodGetBodyTime, maps:get(get_body_time, Rep2)), @@ -129,8 +131,8 @@ bookie_status_report(_Config) -> testutil:wait_for_compaction(Bookie), Rep3 = leveled_bookie:book_status(Bookie), - io:format(user, "\nReport after first compaction:\n~p\n", [Rep3]), - {0, +0.0} = maps:get(journal_last_compaction_result, Rep3), + io:format("\nReport after first compaction:\n~p\n", [Rep3]), + +0.0 = maps:get(journal_last_compaction_score, Rep3), within_range( CompactionStarted1, os:system_time(millisecond), @@ -161,7 +163,7 @@ bookie_status_report(_Config) -> ), Rep4 = leveled_bookie:book_status(Bookie), - io:format(user, "\nReport after loading 80K objects:\n~p\n", [Rep4]), + io:format("\nReport after loading 80K objects:\n~p\n", [Rep4]), %% we have reduced max penciller cache size to make it certain a %% merge occurs after so many PUTs within_range( @@ -177,17 +179,19 @@ bookie_status_report(_Config) -> testutil:wait_for_compaction(Bookie), Rep5 = leveled_bookie:book_status(Bookie), - io:format(user, "\nReport after second compaction:\n~p\n", [Rep5]), + io:format("\nReport after second compaction:\n~p\n", [Rep5]), #{1 := 1} = maps:get(level_files_count, Rep5), within_range( CompactionStarted2, os:system_time(millisecond), maps:get(journal_last_compaction_time, Rep5) ), - {8, JLCRScore} = maps:get(journal_last_compaction_result, Rep5), + JLCRScore = maps:get(journal_last_compaction_score, Rep5), within_range(0.0, 100.0, JLCRScore), - true = +0.0 =< maps:get(avg_compaction_score, Rep5), - true = 100.0 >= maps:get(max_compaction_score, Rep5), + within_range(20.0, 90.0, maps:get(journal_last_compaction_mean, Rep5)), + true = 100.0 >= maps:get(journal_last_compaction_max, Rep5), + JLCRRL = maps:get(journal_last_compaction_runlength, Rep5), + within_range(7, 9, JLCRRL), within_range( 0, 40000, @@ -195,13 +199,18 @@ bookie_status_report(_Config) -> ), check_n_journal_files(RootPath, Rep5), - io:format(user, "sleeping 10s to see 8 files are actually deleted\n", []), + io:format("Sleeping 10s to see 8 files are actually deleted\n", []), timer:sleep(_DELETE_TIMEOUT = 10_000 + 1_000), Rep6 = leveled_bookie:book_status(Bookie), check_n_journal_files(RootPath, Rep6), - io:format(user, "\nClosing book and reopening\n", []), + SnapOpts = [{snapshot_bookie, Bookie}], + {ok, BookSnap} = leveled_bookie:book_start(SnapOpts), + #{} = leveled_bookie:book_status(BookSnap), + ok = leveled_bookie:book_close(BookSnap), + + io:format("\nClosing book and reopening\n", []), ok = leveled_bookie:book_close(Bookie), {ok, Bookie2} = leveled_bookie:book_start(StartOpts), Rep7 = leveled_bookie:book_status(Bookie2), @@ -223,7 +232,7 @@ check_n_journal_files(RootPath, Rep) -> ) ), C = maps:get(n_active_journal_files, Rep), - io:format(user, "journal files: ~b (reported: ~b)\n", [A + B, C]), + io:format("journal files: ~b (reported: ~b)\n", [A + B, C]), C = A + B. simple_put_fetch_head_delete(_Config) ->