forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathIcebergDataObjectInfo.cpp
More file actions
95 lines (81 loc) · 3.63 KB
/
IcebergDataObjectInfo.cpp
File metadata and controls
95 lines (81 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <Poco/String.h>
#include "config.h"
#if USE_AVRO
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <Poco/JSON/Array.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Parser.h>
#include <Core/Types.h>
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Interpreters/Context_fwd.h>
#include <Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h>
#include <Storages/ObjectStorage/DataLakes/Iceberg/PositionDeleteTransform.h>
#include <base/defines.h>
#include <Common/SharedMutex.h>
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
namespace DB::ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
using namespace DB::Iceberg;
namespace DB
{
namespace Setting
{
extern const SettingsBool use_roaring_bitmap_iceberg_positional_deletes;
};
IcebergDataObjectInfo::IcebergDataObjectInfo(Iceberg::ManifestFileEntry data_manifest_file_entry_)
: PathWithMetadata(data_manifest_file_entry_.file_path, std::nullopt,
data_manifest_file_entry_.file_path_key.empty() ? std::nullopt : std::make_optional(data_manifest_file_entry_.file_path_key))
, data_object_file_path_key(data_manifest_file_entry_.file_path_key)
, underlying_format_read_schema_id(data_manifest_file_entry_.schema_id)
, file_format(data_manifest_file_entry_.file_format)
, sequence_number(data_manifest_file_entry_.added_sequence_number)
{
if (!position_deletes_objects.empty() && Poco::toUpperInPlace(data_manifest_file_entry_.file_format) != "PARQUET")
{
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Position deletes are only supported for data files of Parquet format in Iceberg, but got {}",
data_manifest_file_entry_.file_format);
}
}
IcebergDataObjectInfo::IcebergDataObjectInfo(
Iceberg::ManifestFileEntry data_manifest_file_entry_,
ObjectStoragePtr resolved_storage,
const String & resolved_key)
: PathWithMetadata(resolved_key, std::nullopt,
data_manifest_file_entry_.file_path.empty() ? std::nullopt : std::make_optional(data_manifest_file_entry_.file_path),
resolved_storage)
, data_object_file_path_key(data_manifest_file_entry_.file_path_key)
, underlying_format_read_schema_id(data_manifest_file_entry_.schema_id)
, file_format(data_manifest_file_entry_.file_format)
, sequence_number(data_manifest_file_entry_.added_sequence_number)
{
if (!position_deletes_objects.empty() && Poco::toUpperInPlace(data_manifest_file_entry_.file_format) != "PARQUET")
{
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Position deletes are only supported for data files of Parquet format in Iceberg, but got {}",
data_manifest_file_entry_.file_format);
}
}
std::shared_ptr<ISimpleTransform> IcebergDataObjectInfo::getPositionDeleteTransformer(
ObjectStoragePtr object_storage,
const SharedHeader & header,
const std::optional<FormatSettings> & format_settings,
ContextPtr context_,
const String & table_location,
SecondaryStorages & secondary_storages)
{
IcebergDataObjectInfoPtr self = shared_from_this();
if (!context_->getSettingsRef()[Setting::use_roaring_bitmap_iceberg_positional_deletes].value)
return std::make_shared<IcebergStreamingPositionDeleteTransform>(header, self, object_storage, format_settings, context_, table_location, secondary_storages);
else
return std::make_shared<IcebergBitmapPositionDeleteTransform>(header, self, object_storage, format_settings, context_, table_location, secondary_storages);
}
}
#endif