Skip to content

Commit b806ac5

Browse files
authored
Merge pull request #1490 from Altinity/export_filename_pattern_setting
add setting to define filename pattern for part exports
2 parents 7aa5fcb + 4120b57 commit b806ac5

13 files changed

Lines changed: 330 additions & 3 deletions

File tree

docs/en/engines/table-engines/mergetree-family/part_export.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,13 @@ In case a table function is used as the destination, the schema can be omitted a
8484
- **Default**: `true`
8585
- **Description**: If set to true, throws if pending patch parts exists for a given part. Note that by default mutations are applied to all parts, which means that if a mutation in practice would only affetct part/partition x, all the other parts/partition will throw upon export. The exception is when the `IN PARTITION` clause was used in the mutation command. Note the `IN PARTITION` clause is not properly implemented for plain MergeTree tables.
8686

87+
### export_merge_tree_part_filename_pattern
88+
89+
- **Type**: `String`
90+
- **Default**: `{part_name}_{checksum}`
91+
- **Description**: Pattern for the filename of the exported merge tree part. The `part_name` and `checksum` are calculated and replaced on the fly. Additional macros are supported.
92+
93+
8794
## Examples
8895

8996
### Basic Export to S3

docs/en/engines/table-engines/mergetree-family/partition_export.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ TO TABLE [destination_database.]destination_table
8282
- **Default**: `true`
8383
- **Description**: If set to true, throws if pending patch parts exists for a given part. Note that by default mutations are applied to all parts, which means that if a mutation in practice would only affetct part/partition x, all the other parts/partition will throw upon export. The exception is when the `IN PARTITION` clause was used in the mutation command. Note the `IN PARTITION` clause is not properly implemented for plain MergeTree tables.
8484

85+
### export_merge_tree_part_filename_pattern
86+
87+
- **Type**: `String`
88+
- **Default**: `{part_name}_{checksum}`
89+
- **Description**: Pattern for the filename of the exported merge tree part. The `part_name` and `checksum` are calculated and replaced on the fly. Additional macros are supported.
90+
8591
## Examples
8692

8793
### Basic Export to S3

src/Core/Settings.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7398,6 +7398,9 @@ Possible values:
73987398
- `` (empty value) - use server or session timezone
73997399
74007400
Default value is empty.
7401+
)", 0) \
7402+
DECLARE(String, export_merge_tree_part_filename_pattern, "{part_name}_{checksum}", R"(
7403+
Pattern for the filename of the exported merge tree part. The `part_name` and `checksum` are calculated and replaced on the fly. Additional macros are supported.
74017404
)", 0) \
74027405
\
74037406
/* ####################################################### */ \

src/Core/SettingsChangesHistory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
4444
{"iceberg_partition_timezone", "", "", "New setting."},
4545
// {"object_storage_max_nodes", 0, 0, "Antalya: New setting"},
4646
{"s3_propagate_credentials_to_other_storages", false, false, "New setting"},
47+
{"export_merge_tree_part_filename_pattern", "", "{part_name}_{checksum}", "New setting"},
4748
});
4849
addSettingsChanges(settings_changes_history, "26.1",
4950
{

src/Storages/ExportReplicatedMergeTreePartitionManifest.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ struct ExportReplicatedMergeTreePartitionManifest
116116
size_t max_bytes_per_file;
117117
size_t max_rows_per_file;
118118
MergeTreePartExportManifest::FileAlreadyExistsPolicy file_already_exists_policy;
119+
String filename_pattern;
119120
bool lock_inside_the_task; /// todo temporary
120121

121122
std::string toJsonString() const
@@ -139,6 +140,7 @@ struct ExportReplicatedMergeTreePartitionManifest
139140
json.set("max_bytes_per_file", max_bytes_per_file);
140141
json.set("max_rows_per_file", max_rows_per_file);
141142
json.set("file_already_exists_policy", String(magic_enum::enum_name(file_already_exists_policy)));
143+
json.set("filename_pattern", filename_pattern);
142144
json.set("create_time", create_time);
143145
json.set("max_retries", max_retries);
144146
json.set("ttl_seconds", ttl_seconds);
@@ -175,6 +177,7 @@ struct ExportReplicatedMergeTreePartitionManifest
175177
manifest.parquet_parallel_encoding = json->getValue<bool>("parquet_parallel_encoding");
176178
manifest.max_bytes_per_file = json->getValue<size_t>("max_bytes_per_file");
177179
manifest.max_rows_per_file = json->getValue<size_t>("max_rows_per_file");
180+
manifest.filename_pattern = json->getValue<String>("filename_pattern");
178181

179182
if (json->has("file_already_exists_policy"))
180183
{

src/Storages/MergeTree/ExportPartTask.cpp

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
44
#include <Storages/MergeTree/MergeTreeData.h>
55
#include <Interpreters/Context.h>
6-
#include <Interpreters/DatabaseCatalog.h>
76
#include <Interpreters/inplaceBlockConversions.h>
87
#include <Core/Settings.h>
8+
#include <Common/Macros.h>
9+
#include <boost/algorithm/string/replace.hpp>
910
#include <Interpreters/ExpressionActions.h>
1011
#include <Interpreters/ActionsDAG.h>
12+
#include <Interpreters/DatabaseCatalog.h>
1113
#include <Processors/Executors/CompletedPipelineExecutor.h>
1214
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
1315
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
@@ -17,6 +19,7 @@
1719
#include "Common/setThreadName.h"
1820
#include <Common/Exception.h>
1921
#include <Common/ProfileEventsScope.h>
22+
#include <Databases/DatabaseReplicated.h>
2023
#include <Storages/MergeTree/ExportList.h>
2124
#include <Formats/FormatFactory.h>
2225
#include <Databases/enableAllExperimentalSettings.h>
@@ -47,6 +50,7 @@ namespace Setting
4750
extern const SettingsUInt64 export_merge_tree_part_max_bytes_per_file;
4851
extern const SettingsUInt64 export_merge_tree_part_max_rows_per_file;
4952
extern const SettingsBool allow_experimental_analyzer;
53+
extern const SettingsString export_merge_tree_part_filename_pattern;
5054
}
5155

5256
namespace
@@ -93,6 +97,33 @@ namespace
9397
plan_for_part.addStep(std::move(expression_step));
9498
}
9599
}
100+
101+
String buildDestinationFilename(
102+
const MergeTreePartExportManifest & manifest,
103+
const StorageID & storage_id,
104+
const ContextPtr & local_context)
105+
{
106+
auto filename = manifest.settings[Setting::export_merge_tree_part_filename_pattern].value;
107+
108+
boost::replace_all(filename, "{part_name}", manifest.data_part->name);
109+
boost::replace_all(filename, "{checksum}", manifest.data_part->checksums.getTotalChecksumHex());
110+
111+
Macros::MacroExpansionInfo macro_info;
112+
macro_info.table_id = storage_id;
113+
114+
if (auto database = DatabaseCatalog::instance().tryGetDatabase(storage_id.database_name))
115+
{
116+
if (const auto replicated = dynamic_cast<const DatabaseReplicated *>(database.get()))
117+
{
118+
macro_info.shard = replicated->getShardName();
119+
macro_info.replica = replicated->getReplicaName();
120+
}
121+
}
122+
123+
filename = local_context->getMacros()->expand(filename, macro_info);
124+
125+
return filename;
126+
}
96127
}
97128

98129
ExportPartTask::ExportPartTask(MergeTreeData & storage_, const MergeTreePartExportManifest & manifest_)
@@ -154,8 +185,10 @@ bool ExportPartTask::executeStep()
154185

155186
try
156187
{
188+
const auto filename = buildDestinationFilename(manifest, storage.getStorageID(), local_context);
189+
157190
sink = destination_storage->import(
158-
manifest.data_part->name + "_" + manifest.data_part->checksums.getTotalChecksumHex(),
191+
filename,
159192
block_with_partition_values,
160193
new_file_path_callback,
161194
manifest.file_already_exists_policy == MergeTreePartExportManifest::FileAlreadyExistsPolicy::overwrite,

src/Storages/MergeTree/ExportPartitionTaskScheduler.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ namespace
5656
context_copy->setSetting("export_merge_tree_part_throw_on_pending_mutations", false);
5757
context_copy->setSetting("export_merge_tree_part_throw_on_pending_patch_parts", false);
5858

59-
return context_copy;
59+
context_copy->setSetting("export_merge_tree_part_filename_pattern", manifest.filename_pattern);
60+
61+
return context_copy;
6062
}
6163
}
6264

src/Storages/StorageReplicatedMergeTree.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ namespace Setting
217217
extern const SettingsBool export_merge_tree_part_throw_on_pending_mutations;
218218
extern const SettingsBool export_merge_tree_part_throw_on_pending_patch_parts;
219219
extern const SettingsBool export_merge_tree_partition_lock_inside_the_task;
220+
extern const SettingsString export_merge_tree_part_filename_pattern;
220221
}
221222

222223
namespace MergeTreeSetting
@@ -8209,6 +8210,7 @@ void StorageReplicatedMergeTree::exportPartitionToTable(const PartitionCommand &
82098210

82108211

82118212
manifest.file_already_exists_policy = query_context->getSettingsRef()[Setting::export_merge_tree_part_file_already_exists_policy].value;
8213+
manifest.filename_pattern = query_context->getSettingsRef()[Setting::export_merge_tree_part_filename_pattern].value;
82128214

82138215
ops.emplace_back(zkutil::makeCreateRequest(
82148216
fs::path(partition_exports_path) / "metadata.json",
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<clickhouse>
2+
<macros>
3+
<shard>shard1</shard>
4+
<replica>replica1</replica>
5+
</macros>
6+
</clickhouse>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<clickhouse>
2+
<macros>
3+
<shard>shard2</shard>
4+
<replica>replica1</replica>
5+
</macros>
6+
</clickhouse>

0 commit comments

Comments
 (0)