Skip to content

Commit 6d5f57f

Browse files
committed
TypeAwareCompress(tac) for column-wise data compression like (U)INT64 in shuffle
1 parent 9b268b5 commit 6d5f57f

22 files changed

Lines changed: 1601 additions & 40 deletions

File tree

backends-velox/src/main/scala/org/apache/spark/shuffle/ColumnarShuffleWriter.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ class ColumnarShuffleWriter[K, V](
156156
conf.get(SHUFFLE_FILE_BUFFER_SIZE).toInt,
157157
tempDataFile.getAbsolutePath,
158158
localDirs,
159-
GlutenConfig.get.columnarShuffleEnableDictionary
159+
GlutenConfig.get.columnarShuffleEnableDictionary,
160+
GlutenConfig.get.columnarShuffleEnableTypeAwareCompress
160161
)
161162

162163
nativeShuffleWriter = if (isSort) {

cpp/core/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ set(SPARK_COLUMNAR_PLUGIN_SRCS
143143
shuffle/Spill.cc
144144
shuffle/Utils.cc
145145
utils/Compression.cc
146+
utils/tac/FForCodec.cc
147+
utils/tac/TypeAwareCompressCodec.cc
146148
utils/StringUtil.cc
147149
utils/ObjectStore.cc
148150
jni/JniError.cc

cpp/core/jni/JniWrapper.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,8 @@ Java_org_apache_gluten_vectorized_LocalPartitionWriterJniWrapper_createPartition
945945
jint shuffleFileBufferSize,
946946
jstring dataFileJstr,
947947
jstring localDirsJstr,
948-
jboolean enableDictionary) {
948+
jboolean enableDictionary,
949+
jboolean enableTypeAwareCompress) {
949950
JNI_METHOD_START
950951

951952
const auto ctx = getRuntime(env, wrapper);
@@ -960,7 +961,8 @@ Java_org_apache_gluten_vectorized_LocalPartitionWriterJniWrapper_createPartition
960961
mergeBufferSize,
961962
mergeThreshold,
962963
numSubDirs,
963-
enableDictionary);
964+
enableDictionary,
965+
enableTypeAwareCompress);
964966

965967
auto partitionWriter = std::make_shared<LocalPartitionWriter>(
966968
numPartitions,

cpp/core/shuffle/LocalPartitionWriter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ class LocalPartitionWriter : public PartitionWriter {
5454
return arrow::Status::NotImplemented("Invalid code path for local shuffle writer.");
5555
}
5656

57+
bool enableTypeAwareCompress() const override {
58+
// Type-aware compression is not compatible with dictionary encoding
59+
// since it may change the buffer layout and types.
60+
return options_->enableTypeAwareCompress && !options_->enableDictionary;
61+
}
62+
5763
/// The stop function performs several tasks:
5864
/// 1. Opens the final data file.
5965
/// 2. Iterates over each partition ID (pid) to:

cpp/core/shuffle/Options.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ static constexpr int64_t kDefaultReadBufferSize = 1 << 20;
4242
static constexpr int64_t kDefaultDeserializerBufferSize = 1 << 20;
4343
static constexpr int64_t kDefaultShuffleFileBufferSize = 32 << 10;
4444
static constexpr bool kDefaultEnableDictionary = false;
45+
static constexpr bool kDefaultEnableTypeAwareCompress = false;
4546

4647
enum class ShuffleWriterType { kHashShuffle, kSortShuffle, kRssSortShuffle, kGpuHashShuffle };
4748

@@ -175,6 +176,7 @@ struct LocalPartitionWriterOptions {
175176
int32_t numSubDirs = kDefaultNumSubDirs; // spark.diskStore.subDirectories
176177

177178
bool enableDictionary = kDefaultEnableDictionary;
179+
bool enableTypeAwareCompress = kDefaultEnableTypeAwareCompress;
178180

179181
LocalPartitionWriterOptions() = default;
180182

@@ -185,14 +187,16 @@ struct LocalPartitionWriterOptions {
185187
int32_t mergeBufferSize,
186188
double mergeThreshold,
187189
int32_t numSubDirs,
188-
bool enableDictionary)
190+
bool enableDictionary,
191+
bool enableTypeAwareCompress = kDefaultEnableTypeAwareCompress)
189192
: shuffleFileBufferSize(shuffleFileBufferSize),
190193
compressionBufferSize(compressionBufferSize),
191194
compressionThreshold(compressionThreshold),
192195
mergeBufferSize(mergeBufferSize),
193196
mergeThreshold(mergeThreshold),
194197
numSubDirs(numSubDirs),
195-
enableDictionary(enableDictionary) {}
198+
enableDictionary(enableDictionary),
199+
enableTypeAwareCompress(enableTypeAwareCompress) {}
196200
};
197201

198202
struct RssPartitionWriterOptions {

cpp/core/shuffle/PartitionWriter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ class PartitionWriter : public Reclaimable {
7171
return payloadPool_->bytes_allocated();
7272
}
7373

74+
virtual bool enableTypeAwareCompress() const {
75+
return false;
76+
}
77+
7478
protected:
7579
uint32_t numPartitions_;
7680
std::unique_ptr<arrow::util::Codec> codec_;

cpp/core/shuffle/Payload.cc

Lines changed: 111 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "shuffle/Utils.h"
2727
#include "utils/Exception.h"
2828
#include "utils/Timer.h"
29+
#include "utils/tac/TypeAwareCompressCodec.h"
2930

3031
namespace gluten {
3132
namespace {
@@ -36,6 +37,7 @@ static const Payload::Type kUncompressedType = gluten::BlockPayload::kUncompress
3637
static constexpr int64_t kZeroLengthBuffer = 0;
3738
static constexpr int64_t kNullBuffer = -1;
3839
static constexpr int64_t kUncompressedBuffer = -2;
40+
static constexpr int64_t kTypeAwareBuffer = -3;
3941

4042
template <typename T>
4143
void write(uint8_t** dst, T data) {
@@ -86,6 +88,51 @@ arrow::Result<int64_t> compressBuffer(
8688
return kCompressedBufferHeaderLength + compressedLength;
8789
}
8890

91+
// Type-aware buffer compression via TypeAwareCompressCodec.
92+
// Same wire format as compressBuffer:
93+
// kTypeAwareBuffer (int64) | uncompressedLength (int64) | compressedLength (int64) | compressed data
94+
// If compressed size >= uncompressed size, falls back to kUncompressedBuffer (same as standard codec).
95+
arrow::Result<int64_t> compressTypeAwareBuffer(
96+
const std::shared_ptr<arrow::Buffer>& buffer,
97+
uint8_t* output,
98+
int64_t outputLength,
99+
int8_t typeKind) {
100+
auto outputPtr = &output;
101+
if (!buffer) {
102+
write<int64_t>(outputPtr, kNullBuffer);
103+
return sizeof(int64_t);
104+
}
105+
if (buffer->size() == 0) {
106+
write<int64_t>(outputPtr, kZeroLengthBuffer);
107+
return sizeof(int64_t);
108+
}
109+
110+
static const int64_t kHeaderLength = 3 * sizeof(int64_t); // marker + uncompressedLen + compressedLen
111+
if (outputLength < kHeaderLength + buffer->size()) {
112+
return arrow::Status::Invalid("Output buffer too small for type-aware compression.");
113+
}
114+
auto* dataOutput = output + kHeaderLength;
115+
auto availableOutput = outputLength - kHeaderLength;
116+
117+
ARROW_ASSIGN_OR_RAISE(
118+
auto compressedSize,
119+
TypeAwareCompressCodec::compress(buffer->data(), buffer->size(), dataOutput, availableOutput, typeKind));
120+
121+
if (compressedSize >= buffer->size()) {
122+
// Compression didn't help. Fall back to uncompressed, same as compressBuffer.
123+
write<int64_t>(outputPtr, kUncompressedBuffer);
124+
write(outputPtr, static_cast<int64_t>(buffer->size()));
125+
memcpy(*outputPtr, buffer->data(), buffer->size());
126+
return 2 * sizeof(int64_t) + buffer->size();
127+
}
128+
129+
write<int64_t>(outputPtr, kTypeAwareBuffer);
130+
write(outputPtr, static_cast<int64_t>(buffer->size()));
131+
write(outputPtr, static_cast<int64_t>(compressedSize));
132+
// compressed data already written at dataOutput by TypeAwareCompressCodec::compress.
133+
return kHeaderLength + compressedSize;
134+
}
135+
89136
arrow::Status compressAndFlush(
90137
const std::shared_ptr<arrow::Buffer>& buffer,
91138
arrow::io::OutputStream* outputStream,
@@ -146,6 +193,24 @@ arrow::Result<std::shared_ptr<arrow::Buffer>> readCompressedBuffer(
146193

147194
int64_t uncompressedLength;
148195
RETURN_NOT_OK(inputStream->Read(sizeof(int64_t), &uncompressedLength));
196+
197+
if (compressedLength == kTypeAwareBuffer) {
198+
// Type-aware compressed buffer. This marker only appears when compression helped.
199+
// Wire format: compressedLength (int64) already consumed above as kTypeAwareBuffer,
200+
// then uncompressedLength (already read), then actualCompressedLen, then data.
201+
int64_t actualCompressedLen;
202+
RETURN_NOT_OK(inputStream->Read(sizeof(int64_t), &actualCompressedLen));
203+
ARROW_ASSIGN_OR_RAISE(auto compressed, arrow::AllocateResizableBuffer(actualCompressedLen, pool));
204+
RETURN_NOT_OK(inputStream->Read(actualCompressedLen, compressed->mutable_data()));
205+
206+
timer.switchTo(&decompressTime);
207+
ARROW_ASSIGN_OR_RAISE(auto output, arrow::AllocateResizableBuffer(uncompressedLength, pool));
208+
RETURN_NOT_OK(TypeAwareCompressCodec::decompress(
209+
compressed->data(), actualCompressedLen, output->mutable_data(), uncompressedLength)
210+
.status());
211+
return output;
212+
}
213+
149214
if (compressedLength == kUncompressedBuffer) {
150215
ARROW_ASSIGN_OR_RAISE(auto uncompressed, arrow::AllocateResizableBuffer(uncompressedLength, pool));
151216
RETURN_NOT_OK(inputStream->Read(uncompressedLength, uncompressed->mutable_data()));
@@ -185,25 +250,38 @@ arrow::Result<std::unique_ptr<BlockPayload>> BlockPayload::fromBuffers(
185250
std::vector<std::shared_ptr<arrow::Buffer>> buffers,
186251
const std::vector<bool>* isValidityBuffer,
187252
arrow::MemoryPool* pool,
188-
arrow::util::Codec* codec) {
253+
arrow::util::Codec* codec,
254+
const std::vector<int8_t>* bufferTypes) {
189255
const uint32_t numBuffers = buffers.size();
190256

191257
if (payloadType == Payload::Type::kCompressed) {
192258
Timer compressionTime;
193259
compressionTime.start();
194-
// Compress.
195-
auto maxLength = maxCompressedLength(buffers, codec);
196-
std::shared_ptr<arrow::Buffer> compressedBuffer;
197260

261+
// Compute max compressed length, accounting for type-aware compression where applicable.
262+
auto maxLength = maxCompressedLength(buffers, codec, bufferTypes);
263+
264+
std::shared_ptr<arrow::Buffer> compressedBuffer;
198265
ARROW_ASSIGN_OR_RAISE(compressedBuffer, arrow::AllocateResizableBuffer(maxLength, pool));
199266
auto* output = compressedBuffer->mutable_data();
200267

201268
int64_t actualLength = 0;
202269
// Compress buffers one by one.
203-
for (auto& buffer : buffers) {
270+
for (size_t i = 0; i < buffers.size(); ++i) {
204271
auto availableLength = maxLength - actualLength;
205-
// Release buffer after compression.
206-
ARROW_ASSIGN_OR_RAISE(auto compressedSize, compressBuffer(std::move(buffer), output, availableLength, codec));
272+
auto typeKind =
273+
(bufferTypes != nullptr && i < bufferTypes->size()) ? (*bufferTypes)[i] : tac::kUnsupported;
274+
275+
int64_t compressedSize = 0;
276+
if (TypeAwareCompressCodec::support(typeKind)) {
277+
// Use type-aware compression for supported types.
278+
ARROW_ASSIGN_OR_RAISE(
279+
compressedSize, compressTypeAwareBuffer(std::move(buffers[i]), output, availableLength, typeKind));
280+
} else {
281+
// Use standard codec (LZ4/ZSTD) for unsupported types.
282+
ARROW_ASSIGN_OR_RAISE(
283+
compressedSize, compressBuffer(std::move(buffers[i]), output, availableLength, codec));
284+
}
207285
output += compressedSize;
208286
actualLength += compressedSize;
209287
}
@@ -327,16 +405,29 @@ int64_t BlockPayload::rawSize() {
327405

328406
int64_t BlockPayload::maxCompressedLength(
329407
const std::vector<std::shared_ptr<arrow::Buffer>>& buffers,
330-
arrow::util::Codec* codec) {
408+
arrow::util::Codec* codec,
409+
const std::vector<int8_t>* bufferTypes) {
331410
// Compressed buffer layout: | buffer1 compressedLength | buffer1 uncompressedLength | buffer1 | ...
332-
const auto metadataLength = sizeof(int64_t) * 2 * buffers.size();
333-
int64_t totalCompressedLength =
334-
std::accumulate(buffers.begin(), buffers.end(), 0LL, [&](auto sum, const auto& buffer) {
335-
if (!buffer) {
336-
return sum;
337-
}
338-
return sum + codec->MaxCompressedLen(buffer->size(), buffer->data());
339-
});
411+
int64_t metadataLength = sizeof(int64_t) * 2 * buffers.size();
412+
int64_t totalCompressedLength = 0;
413+
for (size_t i = 0; i < buffers.size(); ++i) {
414+
const auto& buffer = buffers[i];
415+
if (!buffer) {
416+
continue;
417+
}
418+
if (bufferTypes != nullptr && i < bufferTypes->size()) {
419+
auto typeKind = (*bufferTypes)[i];
420+
if (TypeAwareCompressCodec::support(typeKind)) {
421+
// Type-aware compressed buffer has an extra int64 marker to indicate type-aware compression.
422+
// buffer layout: | kTypeAwareBuffer (int64) | buffer 1 uncompressedLength | buffer 1 compressedLength | buffer 1 | ...
423+
metadataLength += sizeof(int64_t);
424+
totalCompressedLength += TypeAwareCompressCodec::maxCompressedLen(buffer->size(), typeKind);
425+
continue;
426+
}
427+
}
428+
// Standard codec: compressed data.
429+
totalCompressedLength += codec->MaxCompressedLen(buffer->size(), buffer->data());
430+
}
340431
return metadataLength + totalCompressedLength;
341432
}
342433

@@ -413,12 +504,14 @@ arrow::Result<std::unique_ptr<InMemoryPayload>> InMemoryPayload::merge(
413504
}
414505
}
415506
}
416-
return std::make_unique<InMemoryPayload>(mergedRows, isValidityBuffer, source->schema(), std::move(merged));
507+
return std::make_unique<InMemoryPayload>(
508+
mergedRows, isValidityBuffer, source->schema(), std::move(merged), false, source->bufferTypes_);
417509
}
418510

419511
arrow::Result<std::unique_ptr<BlockPayload>>
420512
InMemoryPayload::toBlockPayload(Payload::Type payloadType, arrow::MemoryPool* pool, arrow::util::Codec* codec) {
421-
return BlockPayload::fromBuffers(payloadType, numRows_, std::move(buffers_), isValidityBuffer_, pool, codec);
513+
return BlockPayload::fromBuffers(
514+
payloadType, numRows_, std::move(buffers_), isValidityBuffer_, pool, codec, bufferTypes_);
422515
}
423516

424517
arrow::Status InMemoryPayload::serialize(arrow::io::OutputStream* outputStream) {

cpp/core/shuffle/Payload.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ class BlockPayload final : public Payload {
7979
std::vector<std::shared_ptr<arrow::Buffer>> buffers,
8080
const std::vector<bool>* isValidityBuffer,
8181
arrow::MemoryPool* pool,
82-
arrow::util::Codec* codec);
82+
arrow::util::Codec* codec,
83+
const std::vector<int8_t>* bufferTypes = nullptr);
8384

8485
static arrow::Result<std::vector<std::shared_ptr<arrow::Buffer>>> deserialize(
8586
arrow::io::InputStream* inputStream,
@@ -91,7 +92,8 @@ class BlockPayload final : public Payload {
9192

9293
static int64_t maxCompressedLength(
9394
const std::vector<std::shared_ptr<arrow::Buffer>>& buffers,
94-
arrow::util::Codec* codec);
95+
arrow::util::Codec* codec,
96+
const std::vector<int8_t>* bufferTypes = nullptr);
9597

9698
arrow::Status serialize(arrow::io::OutputStream* outputStream) override;
9799

@@ -121,11 +123,13 @@ class InMemoryPayload final : public Payload {
121123
const std::vector<bool>* isValidityBuffer,
122124
const std::shared_ptr<arrow::Schema>& schema,
123125
std::vector<std::shared_ptr<arrow::Buffer>> buffers,
124-
bool hasComplexType = false)
126+
bool hasComplexType = false,
127+
const std::vector<int8_t>* bufferTypes = nullptr)
125128
: Payload(Type::kUncompressed, numRows, isValidityBuffer),
126129
schema_(schema),
127130
buffers_(std::move(buffers)),
128-
hasComplexType_(hasComplexType) {}
131+
hasComplexType_(hasComplexType),
132+
bufferTypes_(bufferTypes) {}
129133

130134
static arrow::Result<std::unique_ptr<InMemoryPayload>>
131135
merge(std::unique_ptr<InMemoryPayload> source, std::unique_ptr<InMemoryPayload> append, arrow::MemoryPool* pool);
@@ -155,6 +159,7 @@ class InMemoryPayload final : public Payload {
155159
std::shared_ptr<arrow::Schema> schema_;
156160
std::vector<std::shared_ptr<arrow::Buffer>> buffers_;
157161
bool hasComplexType_;
162+
const std::vector<int8_t>* bufferTypes_;
158163
};
159164

160165
class UncompressedDiskBlockPayload final : public Payload {

cpp/core/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@
1616
add_test_case(round_robin_partitioner_test SOURCES RoundRobinPartitionerTest.cc)
1717
add_test_case(object_store_test SOURCES ObjectStoreTest.cc)
1818
add_test_case(memory_allocator_test SOURCES MemoryAllocatorTest.cc)
19+
add_test_case(ffor_codec_test SOURCES FForCodecTest.cc)

0 commit comments

Comments
 (0)