Skip to content

Commit e02e7e0

Browse files
committed
sstable: always use target buffer for CompressAndChecksum
Currently `CompressAndChecksum` can alias the original data buffer if we do not compress the data (either because compression is disabled, or the data was not compressible enough). In most cases, we write out the resulting data which can mangle the buffer. This leads most callers to check if the buffer is not compressed and make a copy. This change moves the copy into `CompressAndChecksum`; we always use the dst buffer, even if we don't compress. This simplifies the callers and makes things less fragile.
1 parent 310fac7 commit e02e7e0

File tree

4 files changed

+22
-82
lines changed

4 files changed

+22
-82
lines changed

sstable/block/compression.go

Lines changed: 19 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -212,18 +212,6 @@ func (b PhysicalBlock) Clone() PhysicalBlock {
212212
return PhysicalBlock{data: data, trailer: b.trailer}
213213
}
214214

215-
// CloneUsingBuf makes a copy of the block data, using the given slice if it has
216-
// enough capacity.
217-
func (b PhysicalBlock) CloneUsingBuf(buf []byte) (_ PhysicalBlock, newBuf []byte) {
218-
newBuf = append(buf[:0], b.data...)
219-
return PhysicalBlock{data: newBuf, trailer: b.trailer}, newBuf
220-
}
221-
222-
// IsCompressed returns true if the block is compressed.
223-
func (b *PhysicalBlock) IsCompressed() bool {
224-
return CompressionIndicator(b.trailer[0]) != NoCompressionIndicator
225-
}
226-
227215
// WriteTo writes the block (including its trailer) to the provided Writable. If
228216
// err == nil, n is the number of bytes successfully written to the Writable.
229217
//
@@ -247,35 +235,37 @@ func (b *PhysicalBlock) WriteTo(w objstorage.Writable) (n int, err error) {
247235
}
248236

249237
// CompressAndChecksum compresses and checksums the provided block, returning
250-
// the compressed block and its trailer. The dst argument is used for the
251-
// compressed payload if it's sufficiently large. If it's not, a new buffer is
252-
// allocated and *dst is updated to point to it.
238+
// the compressed block and its trailer. The result is appended to the dst
239+
// argument.
253240
//
254241
// If the compressed block is not sufficiently smaller than the original block,
255-
// the compressed payload is discarded and the original, uncompressed block is
256-
// used to avoid unnecessary decompression overhead at read time.
242+
// the compressed payload is discarded and the original, uncompressed block data
243+
// is used to avoid unnecessary decompression overhead at read time.
257244
func CompressAndChecksum(
258-
dst *[]byte, block []byte, compression Compression, checksummer *Checksummer,
245+
dst *[]byte, blockData []byte, compression Compression, checksummer *Checksummer,
259246
) PhysicalBlock {
247+
buf := (*dst)[:0]
260248
// Compress the buffer, discarding the result if the improvement isn't at
261249
// least 12.5%.
262250
algo := NoCompressionIndicator
263251
if compression != NoCompression {
264-
var compressed []byte
265-
algo, compressed = compress(compression, block, *dst)
266-
if algo != NoCompressionIndicator && cap(compressed) > cap(*dst) {
267-
*dst = compressed[:cap(compressed)]
268-
}
269-
if len(compressed) < len(block)-len(block)/8 {
270-
block = compressed
271-
} else {
252+
algo, buf = compress(compression, blockData, buf)
253+
if len(buf) >= len(blockData)-len(blockData)/8 {
272254
algo = NoCompressionIndicator
273255
}
274256
}
257+
if algo == NoCompressionIndicator {
258+
// We don't want to use the given blockData buffer directly: typically the
259+
// result will be written to disk and that can mangle the buffer, leading to
260+
// fragile code.
261+
buf = append(buf[:0], blockData...)
262+
}
263+
264+
*dst = buf
275265

276266
// Calculate the checksum.
277-
pb := PhysicalBlock{data: block}
278-
checksum := checksummer.Checksum(block, byte(algo))
267+
pb := PhysicalBlock{data: buf}
268+
checksum := checksummer.Checksum(buf, byte(algo))
279269
pb.trailer = MakeTrailer(byte(algo), checksum)
280270
return pb
281271
}
@@ -375,19 +365,8 @@ func (b *Buffer) CompressAndChecksum() (PhysicalBlock, *BufHandle) {
375365
// Grab a buffer to use as the destination for compression.
376366
compressedBuf := compressedBuffers.Get()
377367
pb := CompressAndChecksum(&compressedBuf.b, b.h.b, b.compression, &b.checksummer)
378-
if pb.IsCompressed() {
379-
// Compression was fruitful, and pb's data points into compressedBuf. We
380-
// can reuse b.Buffer because we've copied the compressed data.
381-
b.h.b = b.h.b[:0]
382-
return pb, compressedBuf
383-
}
384-
// Compression was not fruitful, and pb's data points into b.h. The
385-
// compressedBuf we retrieved from the pool isn't needed, but our b.h is.
386-
// Use the compressedBuf as the new b.h.
387-
pbHandle := b.h
388-
b.h = compressedBuf
389368
b.h.b = b.h.b[:0]
390-
return pb, pbHandle
369+
return pb, compressedBuf
391370
}
392371

393372
// SetCompression changes the compression algorithm used by CompressAndChecksum.

sstable/colblk_writer.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -667,14 +667,6 @@ func (w *RawColumnWriter) enqueueDataBlock(
667667
w.opts.Compression,
668668
&cb.blockBuf.checksummer,
669669
)
670-
if !cb.physical.IsCompressed() {
671-
// If the block isn't compressed, cb.physical's underlying data points
672-
// directly into a buffer owned by w.dataBlock. Clone it before passing
673-
// it to the write queue to be asynchronously written to disk.
674-
// TODO(jackson): Should we try to avoid this clone by tracking the
675-
// lifetime of the DataBlockWriters?
676-
cb.physical, cb.blockBuf.dataBuf = cb.physical.CloneUsingBuf(cb.blockBuf.dataBuf)
677-
}
678670
return w.enqueuePhysicalBlock(cb, separator)
679671
}
680672

@@ -1164,14 +1156,6 @@ func (w *RawColumnWriter) addDataBlock(b, sep []byte, bhp block.HandleWithProper
11641156
w.opts.Compression,
11651157
&cb.blockBuf.checksummer,
11661158
)
1167-
if !cb.physical.IsCompressed() {
1168-
// If the block isn't compressed, cb.physical's underlying data points
1169-
// directly into a buffer owned by w.dataBlock. Clone it before passing
1170-
// it to the write queue to be asynchronously written to disk.
1171-
// TODO(jackson): Should we try to avoid this clone by tracking the
1172-
// lifetime of the DataBlockWriters?
1173-
cb.physical, cb.blockBuf.dataBuf = cb.physical.CloneUsingBuf(cb.blockBuf.dataBuf)
1174-
}
11751159
if err := w.enqueuePhysicalBlock(cb, sep); err != nil {
11761160
return err
11771161
}

sstable/layout.go

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/cockroachdb/pebble/internal/base"
1919
"github.com/cockroachdb/pebble/internal/binfmt"
2020
"github.com/cockroachdb/pebble/internal/bytealloc"
21-
"github.com/cockroachdb/pebble/internal/invariants"
2221
"github.com/cockroachdb/pebble/internal/sstableinternal"
2322
"github.com/cockroachdb/pebble/internal/treeprinter"
2423
"github.com/cockroachdb/pebble/objstorage"
@@ -675,8 +674,7 @@ func (w *layoutWriter) Abort() {
675674
}
676675

677676
// WriteDataBlock constructs a trailer for the provided data block and writes
678-
// the block and trailer to the writer. It returns the block's handle. It can
679-
// mangle b.
677+
// the block and trailer to the writer. It returns the block's handle.
680678
func (w *layoutWriter) WriteDataBlock(b []byte, buf *blockBuf) (block.Handle, error) {
681679
return w.writeBlock(b, w.compression, buf)
682680
}
@@ -692,8 +690,6 @@ func (w *layoutWriter) WritePrecompressedDataBlock(blk block.PhysicalBlock) (blo
692690
// second-level) and writes the block and trailer to the writer. It remembers
693691
// the last-written index block's handle and adds it to the file's meta index
694692
// when the writer is finished.
695-
//
696-
// WriteIndexBlock can mangle b.
697693
func (w *layoutWriter) WriteIndexBlock(b []byte) (block.Handle, error) {
698694
h, err := w.writeBlock(b, w.compression, &w.buf)
699695
if err == nil {
@@ -716,17 +712,13 @@ func (w *layoutWriter) WriteFilterBlock(f filterWriter) (bh block.Handle, err er
716712
// WritePropertiesBlock constructs a trailer for the provided properties block
717713
// and writes the block and trailer to the writer. It automatically adds the
718714
// properties block to the file's meta index when the writer is finished.
719-
//
720-
// WritePropertiesBlock can mangle b.
721715
func (w *layoutWriter) WritePropertiesBlock(b []byte) (block.Handle, error) {
722716
return w.writeNamedBlock(b, metaPropertiesName)
723717
}
724718

725719
// WriteRangeKeyBlock constructs a trailer for the provided range key block and
726720
// writes the block and trailer to the writer. It automatically adds the range
727721
// key block to the file's meta index when the writer is finished.
728-
//
729-
// WriteRangeKeyBlock can mangle the block data.
730722
func (w *layoutWriter) WriteRangeKeyBlock(b []byte) (block.Handle, error) {
731723
return w.writeNamedBlock(b, metaRangeKeyName)
732724
}
@@ -735,13 +727,10 @@ func (w *layoutWriter) WriteRangeKeyBlock(b []byte) (block.Handle, error) {
735727
// block and writes the block and trailer to the writer. It automatically adds
736728
// the range deletion block to the file's meta index when the writer is
737729
// finished.
738-
//
739-
// WriteRangeDeletionBlock can mangle the block data.
740730
func (w *layoutWriter) WriteRangeDeletionBlock(b []byte) (block.Handle, error) {
741731
return w.writeNamedBlock(b, metaRangeDelV2Name)
742732
}
743733

744-
// writeNamedBlock can mangle the block data.
745734
func (w *layoutWriter) writeNamedBlock(b []byte, name string) (bh block.Handle, err error) {
746735
bh, err = w.writeBlock(b, block.NoCompression, &w.buf)
747736
if err == nil {
@@ -770,20 +759,12 @@ func (w *layoutWriter) WriteValueIndexBlock(
770759
return h, nil
771760
}
772761

773-
// writeBlock checksums, compresses, and writes out a block. It can mangle b.
762+
// writeBlock checksums, compresses, and writes out a block.
774763
func (w *layoutWriter) writeBlock(
775764
b []byte, compression block.Compression, buf *blockBuf,
776765
) (block.Handle, error) {
777766
pb := block.CompressAndChecksum(&buf.dataBuf, b, compression, &buf.checksummer)
778767
h, err := w.writePrecompressedBlock(pb)
779-
// This method is allowed to mangle b, but that only happens when the block
780-
// data is not compressible. Mangle it anyway in invariant builds to catch
781-
// callers that don't handle this.
782-
if invariants.Enabled && invariants.Sometimes(1) {
783-
for i := range b {
784-
b[i] = 0xFF
785-
}
786-
}
787768
return h, err
788769
}
789770

sstable/rowblk_writer.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,13 +1968,9 @@ func (w *RawRowWriter) addDataBlock(b, sep []byte, bhp block.HandleWithPropertie
19681968
w.layout.compression,
19691969
&blockBuf.checksummer,
19701970
)
1971-
if !pb.IsCompressed() {
1972-
// If the block isn't compressed, pb's underlying data points
1973-
// directly b. Clone it before writing it, as writing can mangle the buffer.
1974-
pb, blockBuf.dataBuf = pb.CloneUsingBuf(blockBuf.dataBuf)
1975-
}
19761971

19771972
// layout.WriteDataBlock keeps layout.offset up-to-date for us.
1973+
// Note that this can mangle the pb data.
19781974
bh, err := w.layout.writePrecompressedBlock(pb)
19791975
if err != nil {
19801976
return err

0 commit comments

Comments
 (0)