Skip to content

Commit aab405e

Browse files
committed
store metadata in the block
1 parent 3ab3fb0 commit aab405e

File tree

6 files changed

+104
-40
lines changed

6 files changed

+104
-40
lines changed

pkg/experiment/block/compaction.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ func newBlockCompaction(
182182
}
183183

184184
func (b *CompactionPlan) Compact(ctx context.Context, dst objstore.Bucket, tmpdir string) (m *metastorev1.BlockMeta, err error) {
185-
w := NewBlockWriter(dst, b.path, tmpdir)
185+
w, err := NewBlockWriter(dst, b.path, tmpdir)
186+
if err != nil {
187+
return nil, err
188+
}
186189
defer func() {
187190
err = multierror.New(err, w.Close()).Err()
188191
}()
@@ -198,7 +201,11 @@ func (b *CompactionPlan) Compact(ctx context.Context, dst objstore.Bucket, tmpdi
198201
return nil, fmt.Errorf("writing tenant index: %w", err)
199202
}
200203
b.meta.StringTable = b.strings.Strings
201-
if err = w.Flush(ctx); err != nil {
204+
b.meta.MetadataOffset = w.Offset()
205+
if err = metadata.Encode(w, b.meta); err != nil {
206+
return nil, fmt.Errorf("writing metadata: %w", err)
207+
}
208+
if err = w.Upload(ctx); err != nil {
202209
return nil, fmt.Errorf("flushing block writer: %w", err)
203210
}
204211
b.meta.Size = w.Offset()

pkg/experiment/block/metadata/metadata.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
package metadata
22

33
import (
4+
"encoding/binary"
5+
"errors"
6+
"fmt"
7+
"hash/crc32"
8+
"io"
49
"sync"
510
"time"
611

@@ -10,6 +15,8 @@ import (
1015
"github.com/grafana/pyroscope/pkg/iter"
1116
)
1217

18+
var ErrMetadataInvalid = errors.New("metadata: invalid metadata")
19+
1320
func Tenant(md *metastorev1.BlockMeta) string {
1421
if md.Tenant <= 0 || int(md.Tenant) >= len(md.StringTable) {
1522
return ""
@@ -148,3 +155,39 @@ func OpenStringTable(src *metastorev1.BlockMeta) *StringTable {
148155
}
149156
return t
150157
}
158+
159+
var castagnoli = crc32.MakeTable(crc32.Castagnoli)
160+
161+
// Encode writes the metadata to the writer in the following format:
162+
//
163+
// raw | protobuf-encoded metadata
164+
// be_uint32 | size of the raw metadata
165+
// be_uint32 | CRC32 of the raw metadata and size
166+
func Encode(w io.Writer, md *metastorev1.BlockMeta) error {
167+
ww := crc32.New(castagnoli)
168+
b, _ := md.MarshalVT()
169+
n, err := w.Write(b)
170+
if err != nil {
171+
return err
172+
}
173+
if err = binary.Write(w, binary.BigEndian, uint32(n)); err != nil {
174+
return err
175+
}
176+
return binary.Write(w, binary.BigEndian, ww.Sum32())
177+
}
178+
179+
// Decode metadata encoded with Encode.
180+
func Decode(b []byte, md *metastorev1.BlockMeta) error {
181+
if len(b) <= 8 {
182+
return fmt.Errorf("%w: invalid size", ErrMetadataInvalid)
183+
}
184+
crc := binary.BigEndian.Uint32(b[len(b)-4:])
185+
size := binary.BigEndian.Uint32(b[len(b)-8 : len(b)-4])
186+
if size != uint32(len(b)-8) {
187+
return fmt.Errorf("%w: invalid size", ErrMetadataInvalid)
188+
}
189+
if crc32.Checksum(b[:len(b)-4], castagnoli) != crc {
190+
return fmt.Errorf("%w: invalid CRC", ErrMetadataInvalid)
191+
}
192+
return md.UnmarshalVT(b[:len(b)-8])
193+
}

pkg/experiment/block/object.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ func (obj *Object) ReadMetadata(ctx context.Context) error {
213213
return fmt.Errorf("reading block metadata %s: %w", obj.path, err)
214214
}
215215
var meta metastorev1.BlockMeta
216-
if err := meta.UnmarshalVT(buf.B); err != nil {
216+
if err := metadata.Decode(buf.B, &meta); err != nil {
217217
return fmt.Errorf("decoding block metadata %s: %w", obj.path, err)
218218
}
219219
obj.meta = &meta

pkg/experiment/block/writer.go

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,92 @@
11
package block
22

33
import (
4+
"bufio"
45
"context"
56
"io"
67
"os"
78
"path/filepath"
89
"strconv"
910

11+
"github.com/grafana/dskit/multierror"
12+
1013
"github.com/grafana/pyroscope/pkg/objstore"
1114
"github.com/grafana/pyroscope/pkg/util/bufferpool"
1215
)
1316

1417
// TODO(kolesnikovae):
15-
// - Avoid staging files where possible.
16-
// - If stage files are required, at least avoid
17-
// recreating them for each tenant dataset.
18-
// - objstore.Bucket should provide object writer.
18+
// * Get rid of the staging files.
19+
// * Pipe upload reader.
1920

2021
type Writer struct {
2122
storage objstore.Bucket
2223
path string
2324
local string
2425
off uint64
25-
w *os.File
26+
w *bufio.Writer
27+
f *os.File
2628

2729
tmp string
2830
n int
2931
cur string
3032

33+
// Used by CopyBuffer when copying
34+
// data from staging files.
3135
buf *bufferpool.Buffer
3236
}
3337

34-
func NewBlockWriter(storage objstore.Bucket, path string, tmp string) *Writer {
35-
b := &Writer{
38+
func NewBlockWriter(storage objstore.Bucket, path string, tmp string) (*Writer, error) {
39+
w := &Writer{
3640
storage: storage,
3741
path: path,
3842
tmp: tmp,
3943
local: filepath.Join(tmp, FileNameDataObject),
4044
buf: bufferpool.GetBuffer(compactionCopyBufferSize),
4145
}
42-
return b
46+
if err := w.open(); err != nil {
47+
return nil, err
48+
}
49+
return w, nil
50+
}
51+
52+
func (b *Writer) open() (err error) {
53+
if b.f, err = os.Create(b.local); err != nil {
54+
return err
55+
}
56+
b.w = bufio.NewWriter(b.f)
57+
return nil
58+
}
59+
60+
func (b *Writer) Close() error {
61+
var merr multierror.MultiError
62+
if b.w != nil {
63+
merr.Add(b.w.Flush())
64+
b.w = nil
65+
}
66+
if b.buf != nil {
67+
bufferpool.Put(b.buf)
68+
b.buf = nil
69+
}
70+
if b.f != nil {
71+
merr.Add(b.f.Close())
72+
b.f = nil
73+
}
74+
return merr.Err()
4375
}
4476

77+
func (b *Writer) Offset() uint64 { return b.off }
78+
4579
// Dir returns path to the new temp directory.
4680
func (b *Writer) Dir() string {
4781
b.n++
4882
b.cur = filepath.Join(b.tmp, strconv.Itoa(b.n))
4983
return b.cur
5084
}
5185

52-
// ReadFromFiles located in the directory Dir.
53-
func (b *Writer) ReadFromFiles(files ...string) (toc []uint64, err error) {
54-
toc = make([]uint64, len(files))
55-
for i := range files {
56-
toc[i] = b.off
57-
if err = b.ReadFromFile(files[i]); err != nil {
58-
break
59-
}
60-
}
61-
return toc, err
62-
}
86+
func (b *Writer) Write(p []byte) (n int, err error) { return b.w.Write(p) }
6387

6488
// ReadFromFile located in the directory Dir.
6589
func (b *Writer) ReadFromFile(file string) (err error) {
66-
if b.w == nil {
67-
if b.w, err = os.Create(b.local); err != nil {
68-
return err
69-
}
70-
}
7190
f, err := os.Open(filepath.Join(b.cur, file))
7291
if err != nil {
7392
return err
@@ -86,10 +105,8 @@ func (b *Writer) ReadFrom(r io.Reader) (n int64, err error) {
86105
return n, err
87106
}
88107

89-
func (b *Writer) Offset() uint64 { return b.off }
90-
91-
func (b *Writer) Flush(ctx context.Context) error {
92-
if err := b.w.Close(); err != nil {
108+
func (b *Writer) Upload(ctx context.Context) error {
109+
if err := b.Close(); err != nil {
93110
return err
94111
}
95112
b.w = nil
@@ -100,13 +117,5 @@ func (b *Writer) Flush(ctx context.Context) error {
100117
defer func() {
101118
_ = f.Close()
102119
}()
103-
return b.storage.Upload(ctx, b.path, f)
104-
}
105-
106-
func (b *Writer) Close() error {
107-
bufferpool.Put(b.buf)
108-
if b.w != nil {
109-
return b.w.Close()
110-
}
111-
return nil
120+
return b.storage.Upload(ctx, b.path, bufio.NewReader(f))
112121
}

pkg/experiment/ingester/segment.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ func (s *segment) flushBlock(stream flushStream) ([]byte, *metastorev1.BlockMeta
281281
}
282282

283283
meta.StringTable = stringTable.Strings
284+
meta.MetadataOffset = uint64(w.offset)
285+
if err := metadata.Encode(w, meta); err != nil {
286+
return nil, nil, fmt.Errorf("failed to encode metadata: %w", err)
287+
}
284288
meta.Size = uint64(w.offset)
285289
s.debuginfo.flushBlockDuration = time.Since(start)
286290
return blockFile.Bytes(), meta, nil

pkg/experiment/metastore/index/query.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ func cloneBlockMetadataForQuery(b *metastorev1.BlockMeta) *metastorev1.BlockMeta
182182

183183
func cloneDatasetMetadataForQuery(ds *metastorev1.Dataset) *metastorev1.Dataset {
184184
ls := ds.Labels
185+
// TODO: Preserve __labels__
185186
ds.Labels = nil
186187
c := ds.CloneVT()
187188
ds.Labels = ls

0 commit comments

Comments
 (0)