Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,9 @@ DEFINE_Int32(ann_index_result_cache_stale_sweep_time_sec, "1800");

// inverted index
DEFINE_mDouble(inverted_index_ram_buffer_size, "512");
// Cap the CLucene buffered postings for analyzed inverted indexes when RAM directory is disabled.
// Values <= 0 keep inverted_index_ram_buffer_size unchanged.
DEFINE_mDouble(inverted_index_ram_buffer_size_when_ram_dir_disabled, "64");
// -1 indicates not working.
// Normally we should not change this, it's useful for testing.
DEFINE_mInt32(inverted_index_max_buffered_docs, "-1");
Expand Down
1 change: 1 addition & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,7 @@ DECLARE_Int32(ann_index_result_cache_stale_sweep_time_sec);

// inverted index
DECLARE_mDouble(inverted_index_ram_buffer_size);
DECLARE_mDouble(inverted_index_ram_buffer_size_when_ram_dir_disabled);
DECLARE_mInt32(inverted_index_max_buffered_docs);
// dict path for chinese analyzer
DECLARE_String(inverted_index_dict_path);
Expand Down
58 changes: 54 additions & 4 deletions be/src/storage/index/inverted/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

#include "storage/index/inverted/inverted_index_writer.h"

#include <algorithm>
#include <cstring>

#include "common/cast_set.h"
#include "storage/index/inverted/analyzer/analyzer.h"
#include "storage/index/inverted/inverted_index_common.h"
#include "storage/index/inverted/inverted_index_fs_directory.h"
Expand All @@ -32,6 +36,46 @@ const int32_t MAX_LEAF_COUNT = 1024;
const float MAXMBSortInHeap = 512.0 * 8;
const int DIMS = 1;

namespace {

int64_t index_writer_memory_size(const std::unique_ptr<lucene::index::IndexWriter>& index_writer) {
if (index_writer == nullptr) {
return 0;
}
return index_writer->ramSizeInBytes();
}

int64_t ram_directory_memory_size(const std::shared_ptr<DorisFSDirectory>& dir) {
if (dir == nullptr || std::strcmp(dir->getObjectName(), "DorisRAMFSDirectory") != 0) {
return 0;
}

int64_t size = 0;
std::vector<std::string> files;
dir->list(&files);
for (const auto& file : files) {
size += dir->fileLength(file.c_str());
}
return size;
}

bool is_fs_directory(const std::shared_ptr<DorisFSDirectory>& dir) {
return dir != nullptr && std::strcmp(dir->getObjectName(), "DorisFSDirectory") == 0;
}

float index_writer_ram_buffer_size(const std::shared_ptr<DorisFSDirectory>& dir,
bool should_analyzer) {
auto ram_buffer_size = config::inverted_index_ram_buffer_size;
if (should_analyzer && is_fs_directory(dir) && ram_buffer_size > 0 &&
config::inverted_index_ram_buffer_size_when_ram_dir_disabled > 0) {
ram_buffer_size = std::min(ram_buffer_size,
config::inverted_index_ram_buffer_size_when_ram_dir_disabled);
}
return static_cast<float>(ram_buffer_size);
}

} // namespace

template <FieldType field_type>
InvertedIndexColumnWriter<field_type>::InvertedIndexColumnWriter(const std::string& field_name,
IndexFileWriter* index_file_writer,
Expand Down Expand Up @@ -141,7 +185,7 @@ InvertedIndexColumnWriter<field_type>::create_index_writer() {
{ index_writer->setMaxBufferedDocs(1); })
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_index_writer_setMergeFactor_error",
{ index_writer->setMergeFactor(1); })
index_writer->setRAMBufferSizeMB(static_cast<float>(config::inverted_index_ram_buffer_size));
index_writer->setRAMBufferSizeMB(index_writer_ram_buffer_size(_dir, _should_analyzer));
index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs);
index_writer->setMaxFieldLength(MAX_FIELD_LEN);
index_writer->setMergeFactor(MERGE_FACTOR);
Expand Down Expand Up @@ -566,8 +610,14 @@ Status InvertedIndexColumnWriter<field_type>::add_value(const CppType& value) {

template <FieldType field_type>
int64_t InvertedIndexColumnWriter<field_type>::size() const {
//TODO: get memory size of inverted index
return 0;
int64_t size = cast_set<int64_t>(_null_bitmap.getSizeInBytes(false));
if constexpr (field_is_slice_type(field_type)) {
if (_should_analyzer) {
size += index_writer_memory_size(_index_writer);
}
}
size += ram_directory_memory_size(_dir);
return size;
}

template <FieldType field_type>
Expand Down Expand Up @@ -683,4 +733,4 @@ template class InvertedIndexColumnWriter<FieldType::OLAP_FIELD_TYPE_IPV6>;
template class InvertedIndexColumnWriter<FieldType::OLAP_FIELD_TYPE_FLOAT>;
template class InvertedIndexColumnWriter<FieldType::OLAP_FIELD_TYPE_DOUBLE>;

} // namespace doris::segment_v2
} // namespace doris::segment_v2
21 changes: 18 additions & 3 deletions be/src/storage/segment/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,12 @@ uint64_t ScalarColumnWriter::estimate_buffer_size() {
if (_opts.need_bloom_filter) {
size += _bloom_filter_index_builder->size();
}
if (_opts.need_inverted_index) {
for (const auto& builder : _inverted_index_builders) {
DORIS_CHECK(builder != nullptr);
size += builder->size();
}
}
return size;
}

Expand Down Expand Up @@ -1121,9 +1127,18 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
}

uint64_t ArrayColumnWriter::estimate_buffer_size() {
return _offset_writer->estimate_buffer_size() +
(is_nullable() ? _null_writer->estimate_buffer_size() : 0) +
_item_writer->estimate_buffer_size();
uint64_t size = _offset_writer->estimate_buffer_size() +
(is_nullable() ? _null_writer->estimate_buffer_size() : 0) +
_item_writer->estimate_buffer_size();
if (_opts.need_inverted_index) {
DORIS_CHECK(_inverted_index_writer != nullptr);
size += _inverted_index_writer->size();
}
if (_opts.need_ann_index) {
DORIS_CHECK(_ann_index_writer != nullptr);
size += _ann_index_writer->size();
}
return size;
}

Status ArrayColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t** ptr,
Expand Down
Loading
Loading