Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions include/paimon/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,20 @@ struct PAIMON_EXPORT Options {
/// "partition.legacy-name" - The legacy partition name is using `ToString` for all types. If
/// false, using casting to string for all types. Default value is "true".
static const char PARTITION_GENERATE_LEGACY_NAME[];
/// "map-storage-layout" - Suffix for per-column MAP storage layout configuration.
/// Used as `fields.<column>.map-storage-layout`. Values: "default" (standard KV arrays)
/// or "extend" (columnar-extend with column reuse). Default is "default".
/// The column must be of type MAP<STRING, T>. Each column must be configured individually.
/// For example, to enable extend layout for two columns "metrics" and "tags":
/// fields.metrics.map-storage-layout = extend
/// fields.tags.map-storage-layout = extend
static const char MAP_STORAGE_LAYOUT[];
/// "map-extend.max-columns" - Suffix for per-column upper bound K_max configuration.
/// Used as `fields.<column>.map-extend.max-columns`. Only effective when
/// map-storage-layout = extend. Rows with more fields than K_max spill to __overflow.
/// Default value is 256. Each column can have its own max-columns setting.
static const char MAP_EXTEND_MAX_COLUMNS[];

/// "blob-as-descriptor" - Read blob field using blob descriptor rather than blob
/// bytes. Default value is "false".
static const char BLOB_AS_DESCRIPTOR[];
Expand Down
2 changes: 2 additions & 0 deletions src/paimon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ set(PAIMON_COMMON_SRCS
common/utils/bloom_filter64.cpp
common/utils/crc32c.cpp
common/utils/decimal_utils.cpp
common/utils/extend_map_utils.cpp
common/utils/delta_varint_compressor.cpp
common/utils/fields_comparator.cpp
common/utils/path_util.cpp
Expand Down Expand Up @@ -531,6 +532,7 @@ if(PAIMON_BUILD_TESTS)
common/utils/decimal_utils_test.cpp
common/utils/threadsafe_queue_test.cpp
common/utils/generic_lru_cache_test.cpp
common/utils/extend_map_utils_test.cpp
STATIC_LINK_LIBS
paimon_shared
test_utils_static
Expand Down
2 changes: 2 additions & 0 deletions src/paimon/common/defs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ const char Options::ROW_TRACKING_PARTITION_GROUP_ON_COMMIT[] =
"row-tracking.partition-group-on-commit";
const char Options::DATA_EVOLUTION_ENABLED[] = "data-evolution.enabled";
const char Options::PARTITION_GENERATE_LEGACY_NAME[] = "partition.legacy-name";
const char Options::MAP_STORAGE_LAYOUT[] = "map-storage-layout";
const char Options::MAP_EXTEND_MAX_COLUMNS[] = "map-extend.max-columns";
const char Options::BLOB_AS_DESCRIPTOR[] = "blob-as-descriptor";
const char Options::BLOB_FIELD[] = "blob-field";
const char Options::BLOB_DESCRIPTOR_FIELD[] = "blob-descriptor-field";
Expand Down
90 changes: 90 additions & 0 deletions src/paimon/common/utils/extend_map_defs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstdint>
#include <map>
#include <set>
#include <string>
#include <vector>

#include "arrow/type.h"
#include "fmt/format.h"
namespace paimon {

/// Constants for the columnar-extend MAP storage layout.
/// Includes file footer meta keys and physical sub-column names.
struct ExtendMapDefine {
// ---- File footer meta keys ----

/// Version of the extend-map meta format.
static constexpr const char* kVersion = "paimon.map-extend.version";
/// Current meta format version.
static constexpr int32_t kCurrentVersion = 1;

/// Marker key indicating this file uses extend layout. Value is the layout type string.
static constexpr const char* kStorageLayout = "paimon.map-extend.storage-layout";
/// Value for kStorageLayout when using extend layout.
static constexpr const char* kStorageLayoutExtend = "extend";
/// JSON-encoded field name <-> field id dictionary (may be compressed).
static constexpr const char* kFieldDict = "paimon.map-extend.field-dict";
/// Original (uncompressed) size of field_dict value.
static constexpr const char* kFieldDictOriginalSize =
"paimon.map-extend.field-dict-original-size";
/// JSON-encoded field_id -> set of physical column indices.
static constexpr const char* kFieldColumns = "paimon.map-extend.field-columns";
/// JSON-encoded set of field_ids that ever spilled into __overflow.
static constexpr const char* kOverflowSet = "paimon.map-extend.overflow-set";
/// The number of physical columns K used in this file.
static constexpr const char* kNumColumns = "paimon.map-extend.num-columns";
/// The maximum row width observed in this file.
static constexpr const char* kMaxRowWidth = "paimon.map-extend.max-row-width";

// ---- Physical sub-column names ----

/// Per-row field mapping column name.
static constexpr const char* kFieldMapping = "__field_mapping";
/// Overflow column name.
static constexpr const char* kOverflow = "__overflow";

/// Returns the name of the i-th physical column: "__col_0", "__col_1", etc.
static std::string PhysicalColumnName(int32_t index) {
return fmt::format("__col_{}", index);
}
};

/// Parsed file-level meta for one columnar-extend MAP column.
struct ExtendMapFileMeta {
/// field_name -> field_id
std::map<std::string, int32_t> name_to_id;
/// field_id -> set of physical column indices S
std::map<int32_t, std::vector<int32_t>> field_to_columns;
/// Set of field_ids that ever spilled into __overflow
std::set<int32_t> overflow_field_set;
/// Number of physical columns K in this file
int32_t num_columns = 0;
/// Maximum row width observed in this file
int32_t max_row_width = 0;

bool operator==(const ExtendMapFileMeta& other) const {
return name_to_id == other.name_to_id && field_to_columns == other.field_to_columns &&
overflow_field_set == other.overflow_field_set && num_columns == other.num_columns &&
max_row_width == other.max_row_width;
}
};

} // namespace paimon
Loading
Loading