Skip to content

Commit 09600c7

Browse files
committed
address comments & refine test
1 parent 59ad345 commit 09600c7

File tree

5 files changed

+95
-65
lines changed

5 files changed

+95
-65
lines changed

velox/common/compression/Compression.cpp

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,6 @@ bool Codec::supportsGetUncompressedLength(CompressionKind kind) {
112112
return false;
113113
}
114114

115-
bool Codec::supportsStreamingCompression(CompressionKind kind) {
116-
switch (kind) {
117-
#ifdef VELOX_ENABLE_COMPRESSION_LZ4
118-
case CompressionKind_LZ4:
119-
return true;
120-
#endif
121-
default:
122-
return false;
123-
}
124-
}
125-
126115
bool Codec::supportsCompressFixedLength(CompressionKind kind) {
127116
// TODO: Return true if it's supported by compression kind.
128117
return false;
@@ -133,9 +122,6 @@ Expected<std::unique_ptr<Codec>> Codec::create(
133122
const CodecOptions& codecOptions) {
134123
if (!isAvailable(kind)) {
135124
auto name = compressionKindToString(kind);
136-
VELOX_RETURN_UNEXPECTED_IF(
137-
folly::StringPiece({name}).startsWith("unknown"),
138-
Status::Invalid("Unrecognized codec: ", name));
139125
return folly::makeUnexpected(Status::Invalid(
140126
"Support for codec '{}' is either not built or not implemented.",
141127
name));
@@ -158,9 +144,10 @@ Expected<std::unique_ptr<Codec>> Codec::create(
158144
codec = makeLz4HadoopCodec();
159145
break;
160146
}
147+
} else {
148+
// By default, create LZ4 Frame codec.
149+
codec = makeLz4FrameCodec(compressionLevel);
161150
}
162-
// By default, create LZ4 Frame codec.
163-
codec = makeLz4FrameCodec(compressionLevel);
164151
break;
165152
#endif
166153
default:
@@ -185,8 +172,6 @@ Expected<std::unique_ptr<Codec>> Codec::create(
185172

186173
bool Codec::isAvailable(CompressionKind kind) {
187174
switch (kind) {
188-
case CompressionKind_NONE:
189-
return true;
190175
#ifdef VELOX_ENABLE_COMPRESSION_LZ4
191176
case CompressionKind_LZ4:
192177
return true;
@@ -196,10 +181,11 @@ bool Codec::isAvailable(CompressionKind kind) {
196181
}
197182
}
198183

199-
std::optional<uint64_t> Codec::getUncompressedLength(
184+
Expected<uint64_t> Codec::getUncompressedLength(
200185
const uint8_t* input,
201186
uint64_t inputLength) const {
202-
return std::nullopt;
187+
return folly::makeUnexpected(Status::Invalid(
188+
"getUncompressedLength is unsupported with {} format.", name()));
203189
}
204190

205191
Expected<uint64_t> Codec::compressFixedLength(
@@ -211,6 +197,10 @@ Expected<uint64_t> Codec::compressFixedLength(
211197
Status::Invalid("'{}' doesn't support fixed-length compression", name()));
212198
}
213199

200+
bool Codec::supportsStreamingCompression() const {
201+
return false;
202+
}
203+
214204
Expected<std::shared_ptr<StreamingCompressor>>
215205
Codec::makeStreamingCompressor() {
216206
return folly::makeUnexpected(Status::Invalid(

velox/common/compression/Compression.h

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class StreamingCompressor {
7373

7474
/// Compress some input.
7575
/// If CompressResult.outputTooSmall is true on return, compress() should be
76-
/// called again with a larger output buffer.
76+
/// called again with a larger output buffer, such as doubling its size.
7777
virtual Expected<CompressResult> compress(
7878
const uint8_t* input,
7979
uint64_t inputLength,
@@ -82,15 +82,16 @@ class StreamingCompressor {
8282

8383
/// Flush part of the compressed output.
8484
/// If FlushResult.outputTooSmall is true on return, flush() should be called
85-
/// again with a larger output buffer.
85+
/// again with a larger output buffer, such as doubling its size.
8686
virtual Expected<FlushResult> flush(
8787
uint8_t* output,
8888
uint64_t outputLength) = 0;
8989

90-
/// End compressing, doing whatever is necessary to end the stream.
90+
/// End compressing, doing whatever is necessary to end the stream, and
91+
/// flushing the compressed output.
9192
/// If EndResult.outputTooSmall is true on return, end() should be called
92-
/// again with a larger output buffer. Otherwise, the StreamingCompressor
93-
/// should not be used anymore. end() will flush the compressed output.
93+
/// again with a larger output buffer, such as doubling its size.
94+
/// Otherwise, the StreamingCompressor should not be used anymore.
9495
virtual Expected<EndResult> end(uint8_t* output, uint64_t outputLength) = 0;
9596
};
9697

@@ -106,7 +107,7 @@ class StreamingDecompressor {
106107

107108
/// Decompress some input.
108109
/// If DecompressResult.outputTooSmall is true on return, decompress() should
109-
/// be called again with a larger output buffer.
110+
/// be called again with a larger output buffer, such as doubling its size.
110111
virtual Expected<DecompressResult> decompress(
111112
const uint8_t* input,
112113
uint64_t inputLength,
@@ -154,9 +155,6 @@ class Codec {
154155
/// compressed length.
155156
static bool supportsCompressFixedLength(CompressionKind kind);
156157

157-
// Return true if indicated kind supports creating streaming de/compressor.
158-
static bool supportsStreamingCompression(CompressionKind kind);
159-
160158
/// Return the smallest supported compression level.
161159
/// If the codec doesn't support compression level,
162160
/// `kUseDefaultCompressionLevel` will be returned.
@@ -214,19 +212,28 @@ class Codec {
214212
// Maximum compressed length of given input length.
215213
virtual uint64_t maxCompressedLength(uint64_t inputLength) = 0;
216214

217-
/// Retrieves the actual uncompressed length of data using the specified
218-
/// compression library.
219-
/// Note: This functionality is not universally supported by all compression
220-
/// libraries. If not supported, `std::nullopt` will be returned.
221-
virtual std::optional<uint64_t> getUncompressedLength(
215+
/// Retrieves the uncompressed length of the given compressed data using the
216+
/// specified compression library.
217+
/// If the input data is corrupted, returns `Unexpected` with
218+
/// `Status::IOError`. Not all compression libraries support this
219+
/// functionality. Use supportsGetUncompressedLength() to check before
220+
/// calling. If unsupported, returns `Unexpected` with `Status::Invalid`.
221+
virtual Expected<uint64_t> getUncompressedLength(
222222
const uint8_t* input,
223223
uint64_t inputLength) const;
224224

225-
// Create a streaming compressor instance.
225+
// Return true if indicated kind supports creating streaming de/compressor.
226+
virtual bool supportsStreamingCompression() const;
227+
228+
/// Create a streaming compressor instance.
229+
/// Use supportsStreamingCompression() to check before calling.
230+
/// If unsupported, returns `Unexpected` with `Status::Invalid`.
226231
virtual Expected<std::shared_ptr<StreamingCompressor>>
227232
makeStreamingCompressor();
228233

229-
// Create a streaming compressor instance.
234+
/// Create a streaming decompressor instance.
235+
/// Use supportsStreamingCompression() to check before calling.
236+
/// If unsupported, returns `Unexpected` with `Status::Invalid`.
230237
virtual Expected<std::shared_ptr<StreamingDecompressor>>
231238
makeStreamingDecompressor();
232239

@@ -237,7 +244,7 @@ class Codec {
237244
virtual int32_t compressionLevel() const;
238245

239246
// The name of this Codec's compression type.
240-
std::string name() const;
247+
virtual std::string name() const;
241248

242249
private:
243250
// Initializes the codec's resources.

velox/common/compression/Lz4Compression.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ Status LZ4Compressor::compressBegin(
230230
return Status::OK();
231231
}
232232

233-
Status common::LZ4Decompressor::init() {
233+
Status LZ4Decompressor::init() {
234234
finished_ = false;
235235
auto ret = LZ4F_createDecompressionContext(&ctx_, LZ4F_VERSION);
236236
VELOX_RETURN_IF(LZ4F_isError(ret), lz4Error("LZ4 init failed: ", ret));
@@ -304,7 +304,7 @@ int32_t Lz4CodecBase::compressionLevel() const {
304304
}
305305

306306
CompressionKind Lz4CodecBase::compressionKind() const {
307-
return CompressionKind::CompressionKind_LZ4;
307+
return CompressionKind_LZ4;
308308
}
309309

310310
Lz4FrameCodec::Lz4FrameCodec(int32_t compressionLevel)
@@ -363,6 +363,10 @@ Expected<uint64_t> Lz4FrameCodec::decompress(
363363
});
364364
}
365365

366+
bool Lz4FrameCodec::supportsStreamingCompression() const {
367+
return true;
368+
}
369+
366370
Expected<std::shared_ptr<StreamingCompressor>>
367371
Lz4FrameCodec::makeStreamingCompressor() {
368372
auto ptr = std::make_shared<LZ4Compressor>(compressionLevel_);
@@ -430,6 +434,10 @@ Expected<uint64_t> Lz4RawCodec::decompress(
430434
return static_cast<uint64_t>(decompressedSize);
431435
}
432436

437+
std::string Lz4RawCodec::name() const {
438+
return "lz4_raw";
439+
}
440+
433441
Lz4HadoopCodec::Lz4HadoopCodec() : Lz4RawCodec(kLz4DefaultCompressionLevel) {}
434442

435443
uint64_t Lz4HadoopCodec::maxCompressedLength(uint64_t inputLength) {
@@ -491,6 +499,10 @@ int32_t Lz4HadoopCodec::defaultCompressionLevel() const {
491499
return kUseDefaultCompressionLevel;
492500
}
493501

502+
std::string Lz4HadoopCodec::name() const {
503+
return "lz4_hadoop";
504+
}
505+
494506
Expected<uint64_t> Lz4HadoopCodec::decompressInternal(
495507
const uint8_t* input,
496508
uint64_t inputLength,

velox/common/compression/Lz4Compression.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ struct Lz4CodecOptions : CodecOptions {
2929
enum Type { kLz4Frame, kLz4Raw, kLz4Hadoop };
3030

3131
Lz4CodecOptions(
32-
Lz4CodecOptions::Type type,
32+
Type type,
3333
int32_t compressionLevel = kUseDefaultCompressionLevel)
3434
: CodecOptions(compressionLevel), type(type) {}
3535

36-
Lz4CodecOptions::Type type;
36+
Type type;
3737
};
3838

3939
class Lz4CodecBase : public Codec {
@@ -72,6 +72,8 @@ class Lz4FrameCodec : public Lz4CodecBase {
7272
uint8_t* output,
7373
uint64_t outputLength) override;
7474

75+
bool supportsStreamingCompression() const override;
76+
7577
Expected<std::shared_ptr<StreamingCompressor>> makeStreamingCompressor()
7678
override;
7779

@@ -99,6 +101,8 @@ class Lz4RawCodec : public Lz4CodecBase {
99101
uint64_t inputLength,
100102
uint8_t* output,
101103
uint64_t outputLength) override;
104+
105+
std::string name() const override;
102106
};
103107

104108
/// The Hadoop Lz4Codec source code can be found here:
@@ -127,6 +131,8 @@ class Lz4HadoopCodec : public Lz4RawCodec, public HadoopCompressionFormat {
127131

128132
int32_t defaultCompressionLevel() const override;
129133

134+
std::string name() const override;
135+
130136
private:
131137
Expected<uint64_t> decompressInternal(
132138
const uint8_t* input,

velox/common/compression/tests/CompressionTest.cpp

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,10 @@ std::vector<TestParams> generateLz4TestParams() {
5757
Lz4CodecOptions::kLz4Frame,
5858
Lz4CodecOptions::kLz4Hadoop}) {
5959
params.emplace_back(
60-
CompressionKind::CompressionKind_LZ4,
61-
std::make_shared<Lz4CodecOptions>(type));
60+
CompressionKind_LZ4, std::make_shared<Lz4CodecOptions>(type));
6261
}
62+
// Add default CodecOptions.
63+
params.emplace_back(CompressionKind_LZ4);
6364
return params;
6465
}
6566

@@ -386,14 +387,8 @@ class CodecTest : public ::testing::TestWithParam<TestParams> {
386387
TEST_P(CodecTest, specifyCompressionLevel) {
387388
std::vector<uint8_t> data = makeRandomData(2000);
388389
const auto kind = getCompressionKind();
389-
if (!Codec::isAvailable(kind)) {
390-
// Support for this codec hasn't been built.
391-
VELOX_ASSERT_THROW(
392-
Codec::create(kind, kUseDefaultCompressionLevel),
393-
"Support for codec '" + compressionKindToString(kind) +
394-
"' is either not built or not implemented.");
395-
return;
396-
}
390+
ASSERT_TRUE(Codec::isAvailable(kind));
391+
397392
auto codecDefault =
398393
Codec::create(kind).thenOrThrow(folly::identity, throwsNotOk);
399394
checkCodecRoundtrip(codecDefault, data);
@@ -421,14 +416,22 @@ TEST_P(CodecTest, getUncompressedLength) {
421416
compressed.resize(compressedLength);
422417

423418
if (Codec::supportsGetUncompressedLength(getCompressionKind())) {
424-
ASSERT_EQ(
425-
codec->getUncompressedLength(compressed.data(), compressedLength),
426-
inputLength);
419+
auto uncompressedLength =
420+
codec->getUncompressedLength(compressed.data(), compressedLength)
421+
.thenOrThrow(folly::identity, throwsNotOk);
422+
ASSERT_EQ(uncompressedLength, inputLength);
427423
} else {
428-
ASSERT_EQ(
429-
codec->getUncompressedLength(compressed.data(), compressedLength),
430-
std::nullopt);
424+
VELOX_ASSERT_ERROR_STATUS(
425+
codec->getUncompressedLength(compressed.data(), compressedLength)
426+
.error(),
427+
StatusCode::kInvalid,
428+
fmt::format(
429+
"getUncompressedLength is unsupported with {} format.",
430+
codec->name()));
431431
}
432+
433+
// TODO: For codecs that support getUncompressedLength(), verify the error
434+
// message for corrupted data.
432435
}
433436

434437
TEST_P(CodecTest, codecRoundtrip) {
@@ -440,47 +443,47 @@ TEST_P(CodecTest, codecRoundtrip) {
440443
}
441444

442445
TEST_P(CodecTest, streamingCompressor) {
443-
if (!Codec::supportsStreamingCompression(getCompressionKind())) {
446+
const auto codec = makeCodec();
447+
if (!codec->supportsStreamingCompression()) {
444448
return;
445449
}
446450

447451
for (auto dataSize : {0, 10, 10000, 100000}) {
448-
auto codec = makeCodec();
449452
checkStreamingCompressor(codec.get(), makeRandomData(dataSize));
450453
checkStreamingCompressor(codec.get(), makeCompressibleData(dataSize));
451454
}
452455
}
453456

454457
TEST_P(CodecTest, streamingDecompressor) {
455-
if (!Codec::supportsStreamingCompression(getCompressionKind())) {
458+
const auto codec = makeCodec();
459+
if (!codec->supportsStreamingCompression()) {
456460
return;
457461
}
458462

459463
for (auto dataSize : {0, 10, 10000, 100000}) {
460-
auto codec = makeCodec();
461464
checkStreamingDecompressor(codec.get(), makeRandomData(dataSize));
462465
checkStreamingDecompressor(codec.get(), makeCompressibleData(dataSize));
463466
}
464467
}
465468

466469
TEST_P(CodecTest, streamingRoundtrip) {
467-
if (!Codec::supportsStreamingCompression(getCompressionKind())) {
470+
const auto codec = makeCodec();
471+
if (!codec->supportsStreamingCompression()) {
468472
return;
469473
}
470474

471475
for (auto dataSize : {0, 10, 10000, 100000}) {
472-
auto codec = makeCodec();
473476
checkStreamingRoundtrip(codec.get(), makeRandomData(dataSize));
474477
checkStreamingRoundtrip(codec.get(), makeCompressibleData(dataSize));
475478
}
476479
}
477480

478481
TEST_P(CodecTest, streamingDecompressorReuse) {
479-
if (!Codec::supportsStreamingCompression(getCompressionKind())) {
482+
const auto codec = makeCodec();
483+
if (!codec->supportsStreamingCompression()) {
480484
return;
481485
}
482486

483-
auto codec = makeCodec();
484487
const auto& decompressor = makeStreamingDecompressor(codec.get());
485488
checkStreamingRoundtrip(
486489
makeStreamingCompressor(codec.get()), decompressor, makeRandomData(100));
@@ -512,4 +515,16 @@ TEST(CodecLZ4HadoopTest, compatibility) {
512515
checkCodecRoundtrip(c1, c2, makeRandomData(dataSize));
513516
}
514517
}
518+
519+
TEST(CodecTestInvalid, invalidKind) {
520+
CompressionKind kind = CompressionKind_NONE;
521+
ASSERT_FALSE(Codec::isAvailable(kind));
522+
523+
VELOX_ASSERT_ERROR_STATUS(
524+
Codec::create(kind, kUseDefaultCompressionLevel).error(),
525+
StatusCode::kInvalid,
526+
fmt::format(
527+
"Support for codec '{}' is either not built or not implemented.",
528+
compressionKindToString(kind)));
529+
}
515530
} // namespace facebook::velox::common

0 commit comments

Comments
 (0)