Skip to content

Commit

Permalink
db,objstorage: add FileTypeBlob
Browse files Browse the repository at this point in the history
Introduce a new file type for blob files. These files will contain the values
for keys stored in separate sstables. This commit introduces the new file type
and plumbs some simple machinery, including around deleting obsolete blob files
subject to the same deletion pacing as sstables.

Informs #112.
  • Loading branch information
jbowens committed Jan 30, 2025
1 parent dcd7bef commit 67eb5a2
Show file tree
Hide file tree
Showing 15 changed files with 223 additions and 81 deletions.
4 changes: 2 additions & 2 deletions compaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -2320,7 +2320,7 @@ func (d *DB) cleanupVersionEdit(ve *versionEdit) {
// Add this file to zombie tables as well, as the versionSet
// asserts on whether every obsolete file was at one point
// marked zombie.
d.mu.versions.zombieTables[obsoleteFiles[i].DiskFileNum] = tableInfo{
d.mu.versions.zombieTables[obsoleteFiles[i].DiskFileNum] = objectInfo{
fileInfo: fileInfo{
FileNum: obsoleteFiles[i].DiskFileNum,
FileSize: obsoleteFiles[i].Size,
Expand Down Expand Up @@ -2936,7 +2936,7 @@ func (d *DB) runCompaction(
// Add this file to zombie tables as well, as the versionSet
// asserts on whether every obsolete file was at one point
// marked zombie.
d.mu.versions.zombieTables[backing.DiskFileNum] = tableInfo{
d.mu.versions.zombieTables[backing.DiskFileNum] = objectInfo{
fileInfo: fileInfo{
FileNum: backing.DiskFileNum,
FileSize: backing.Size,
Expand Down
2 changes: 1 addition & 1 deletion internal/base/cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ var _ NeedsFileContents = ArchiveCleaner{}
// also write to the secondary. We should consider archiving to the primary.
func (ArchiveCleaner) Clean(fs vfs.FS, fileType FileType, path string) error {
switch fileType {
case FileTypeLog, FileTypeManifest, FileTypeTable:
case FileTypeLog, FileTypeManifest, FileTypeTable, FileTypeBlob:
destDir := fs.PathJoin(fs.PathDir(path), "archive")

if err := fs.MkdirAll(destDir, 0755); err != nil {
Expand Down
41 changes: 40 additions & 1 deletion internal/base/filenames.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,44 @@ const (
FileTypeOptions
FileTypeOldTemp
FileTypeTemp
FileTypeBlob
)

var fileTypeStrings = [...]string{
FileTypeLog: "log",
FileTypeLock: "lock",
FileTypeTable: "sstable",
FileTypeManifest: "manifest",
FileTypeOptions: "options",
FileTypeOldTemp: "old-temp",
FileTypeTemp: "temp",
FileTypeBlob: "blob",
}

// FileTypeFromName parses a FileType from its string representation.
func FileTypeFromName(name string) FileType {
for i, s := range fileTypeStrings {
if s == name {
return FileType(i)
}
}
panic(fmt.Sprintf("unknown file type: %q", name))
}

// SafeFormat implements redact.SafeFormatter.
func (ft FileType) SafeFormat(w redact.SafePrinter, _ rune) {
if ft < 0 || int(ft) >= len(fileTypeStrings) {
w.Print(redact.SafeString("unknown"))
return
}
w.Print(redact.SafeString(fileTypeStrings[ft]))
}

// String implements fmt.Stringer.
func (ft FileType) String() string {
return redact.StringWithoutMarkers(ft)
}

// MakeFilename builds a filename from components.
func MakeFilename(fileType FileType, dfn DiskFileNum) string {
switch fileType {
Expand All @@ -80,6 +116,8 @@ func MakeFilename(fileType FileType, dfn DiskFileNum) string {
return fmt.Sprintf("CURRENT.%s.dbtmp", dfn)
case FileTypeTemp:
return fmt.Sprintf("temporary.%s.dbtmp", dfn)
case FileTypeBlob:
return fmt.Sprintf("%s.blob", dfn)
}
panic("unreachable")
}
Expand Down Expand Up @@ -130,10 +168,11 @@ func ParseFilename(fs vfs.FS, filename string) (fileType FileType, dfn DiskFileN
if !ok {
break
}
// TODO(sumeer): stop handling FileTypeLog in this function.
switch filename[i+1:] {
case "sst":
return FileTypeTable, dfn, true
case "blob":
return FileTypeBlob, dfn, true
}
}
return 0, dfn, false
Expand Down
5 changes: 5 additions & 0 deletions internal/base/filenames_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ func TestParseFilename(t *testing.T) {
"CURRENT.dbtmp": false,
"CURRENT.123456.dbtmp": true,
"temporary.123456.dbtmp": true,
"foo.blob": false,
"000000.blob": true,
"000001.blob": true,
"935203523.blob": true,
}
fs := vfs.NewMem()
for tc, want := range testCases {
Expand All @@ -65,6 +69,7 @@ func TestFilenameRoundTrip(t *testing.T) {
FileTypeOptions: true,
FileTypeOldTemp: true,
FileTypeTemp: true,
FileTypeBlob: true,
// NB: Log filenames are created and parsed elsewhere in the wal/
// package.
// FileTypeLog: true,
Expand Down
10 changes: 5 additions & 5 deletions objstorage/objstorageprovider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,13 +488,13 @@ func (p *provider) Lookup(
if !ok {
return objstorage.ObjectMetadata{}, errors.Wrapf(
os.ErrNotExist,
"file %s (type %d) unknown to the objstorage provider",
fileNum, errors.Safe(fileType),
"file %s (type %s) unknown to the objstorage provider",
fileNum, fileType,
)
}
if meta.FileType != fileType {
return objstorage.ObjectMetadata{}, base.AssertionFailedf(
"file %s type mismatch (known type %d, expected type %d)",
"file %s type mismatch (known type %s, expected type %s)",
fileNum, errors.Safe(meta.FileType), errors.Safe(fileType),
)
}
Expand Down Expand Up @@ -549,8 +549,8 @@ func (p *provider) CheckpointState(
if _, ok := p.mu.knownObjects[fileNums[i]]; !ok {
return errors.Wrapf(
os.ErrNotExist,
"file %s (type %d) unknown to the objstorage provider",
fileNums[i], errors.Safe(fileType),
"file %s (type %s) unknown to the objstorage provider",
fileNums[i], fileType,
)
}
// Prevent this object from deletion, at least for the life of this instance.
Expand Down
38 changes: 28 additions & 10 deletions objstorage/objstorageprovider/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,27 @@ func TestProvider(t *testing.T) {
opts := objstorage.CreateOptions{
SharedCleanupMethod: objstorage.SharedRefTracking,
}
ft := base.FileTypeTable
if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "file-type" {
ft = base.FileTypeFromName(d.CmdArgs[0].FirstVal(t))
d.CmdArgs = d.CmdArgs[1:]
}
if len(d.CmdArgs) == 5 && d.CmdArgs[4].Key == "no-ref-tracking" {
d.CmdArgs = d.CmdArgs[:4]
opts.SharedCleanupMethod = objstorage.SharedNoCleanup
}
var fileNum base.DiskFileNum
var typ string
var salt, size int
scanArgs("<file-num> <local|shared> <salt> <size> [no-ref-tracking]", &fileNum, &typ, &salt, &size)
scanArgs("[file-type=sstable|blob] <file-num> <local|shared> <salt> <size> [no-ref-tracking]", &fileNum, &typ, &salt, &size)
switch typ {
case "local":
case "shared":
opts.PreferSharedStorage = true
default:
d.Fatalf(t, "'%s' should be 'local' or 'shared'", typ)
}
w, _, err := curProvider.Create(ctx, base.FileTypeTable, fileNum, opts)
w, _, err := curProvider.Create(ctx, ft, fileNum, opts)
if err != nil {
return err.Error()
}
Expand All @@ -141,14 +146,19 @@ func TestProvider(t *testing.T) {
opts := objstorage.CreateOptions{
SharedCleanupMethod: objstorage.SharedRefTracking,
}
ft := base.FileTypeTable
if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "file-type" {
ft = base.FileTypeFromName(d.CmdArgs[0].FirstVal(t))
d.CmdArgs = d.CmdArgs[1:]
}
if len(d.CmdArgs) == 5 && d.CmdArgs[4].Key == "no-ref-tracking" {
d.CmdArgs = d.CmdArgs[:4]
opts.SharedCleanupMethod = objstorage.SharedNoCleanup
}
var fileNum base.DiskFileNum
var typ string
var salt, size int
scanArgs("<file-num> <local|shared> <salt> <size> [no-ref-tracking]", &fileNum, &typ, &salt, &size)
scanArgs("[file-type=sstable|blob] <file-num> <local|shared> <salt> <size> [no-ref-tracking]", &fileNum, &typ, &salt, &size)
switch typ {
case "local":
case "shared":
Expand All @@ -168,9 +178,7 @@ func TestProvider(t *testing.T) {
require.NoError(t, err)
require.NoError(t, f.Close())

_, err = curProvider.LinkOrCopyFromLocal(
ctx, fs, tmpFilename, base.FileTypeTable, fileNum, opts,
)
_, err = curProvider.LinkOrCopyFromLocal(ctx, fs, tmpFilename, ft, fileNum, opts)
require.NoError(t, err)
return log.String()

Expand All @@ -195,10 +203,15 @@ func TestProvider(t *testing.T) {
}
}

ft := base.FileTypeTable
if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "file-type" {
ft = base.FileTypeFromName(d.CmdArgs[0].FirstVal(t))
d.CmdArgs = d.CmdArgs[1:]
}
d.CmdArgs = d.CmdArgs[:1]
var fileNum base.DiskFileNum
scanArgs("<file-num> [for-compaction] [readahead|speculative-overhead=off|sys-readahead|fadvise-sequential]", &fileNum)
r, err := curProvider.OpenForReading(ctx, base.FileTypeTable, fileNum, objstorage.OpenOptions{})
scanArgs("[file-type=sstable|blob] <file-num> [for-compaction] [readahead|speculative-overhead=off|sys-readahead|fadvise-sequential]", &fileNum)
r, err := curProvider.OpenForReading(ctx, ft, fileNum, objstorage.OpenOptions{})
if err != nil {
return err.Error()
}
Expand Down Expand Up @@ -231,9 +244,14 @@ func TestProvider(t *testing.T) {
return log.String()

case "remove":
ft := base.FileTypeTable
if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "file-type" {
ft = base.FileTypeFromName(d.CmdArgs[0].FirstVal(t))
d.CmdArgs = d.CmdArgs[1:]
}
var fileNum base.DiskFileNum
scanArgs("<file-num>", &fileNum)
if err := curProvider.Remove(base.FileTypeTable, fileNum); err != nil {
scanArgs("[file-type=sstable|blob] <file-num>", &fileNum)
if err := curProvider.Remove(ft, fileNum); err != nil {
return err.Error()
}
return log.String()
Expand Down
34 changes: 33 additions & 1 deletion objstorage/objstorageprovider/testdata/provider/local
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ list

read 1
----
file 000001 (type 2) unknown to the objstorage provider: file does not exist
file 000001 (type sstable) unknown to the objstorage provider: file does not exist

link-or-copy 3 local 3 100
----
Expand Down Expand Up @@ -94,6 +94,38 @@ size: 1234
0 1234: ok (salt 4)
<local fs> close: p0/000004.sst

create file-type=blob 000005 local 1 4096
----
<local fs> create: p0/000005.blob
<local fs> sync-data: p0/000005.blob
<local fs> close: p0/000005.blob

read file-type=blob 000005
0 1024
2048 1024
----
<local fs> open: p0/000005.blob (options: *vfs.randomReadsOption)
size: 4096
<local fs> read-at(0, 1024): p0/000005.blob
0 1024: ok (salt 1)
<local fs> read-at(2048, 1024): p0/000005.blob
2048 1024: ok (salt 1)
<local fs> close: p0/000005.blob

link-or-copy file-type=blob 000006 shared 6 1234
----
<local fs> create: temp-file-3
<local fs> close: temp-file-3
<local fs> link: temp-file-3 -> p0/000006.blob

list
----
000002 -> p0/000002.sst
000003 -> p0/000003.sst
000004 -> p0/000004.sst
000005 -> p0/000005.blob
000006 -> p0/000006.blob

close
----
<local fs> sync: p0
Expand Down
13 changes: 8 additions & 5 deletions objstorage/objstorageprovider/vfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,15 @@ func (p *provider) vfsInit() error {

for _, filename := range listing {
fileType, fileNum, ok := base.ParseFilename(p.st.FS, filename)
if ok && fileType == base.FileTypeTable {
o := objstorage.ObjectMetadata{
FileType: fileType,
DiskFileNum: fileNum,
if ok {
switch fileType {
case base.FileTypeTable, base.FileTypeBlob:
o := objstorage.ObjectMetadata{
FileType: fileType,
DiskFileNum: fileNum,
}
p.mu.knownObjects[o.DiskFileNum] = o
}
p.mu.knownObjects[o.DiskFileNum] = o
}
}
return nil
Expand Down
Loading

0 comments on commit 67eb5a2

Please sign in to comment.