Skip to content

Commit 310fac7

Browse files
committed
cockroachkvs: always use non-empty index separators
Adapt the cockroachkv Successor implementation to always return non-empty keys with a version length byte. Previously it was possible for Successor to return an empty slice if passed an empty slice, resulting in an empty string index separator. When combined with a synthetic prefix, this produced an invalid key: a non-empty string without a version length byte. Additionally, document that Successor may receive an empty key as an operand. Document and enforce under invariants that Successor must never receive empty keys as operands.
1 parent 7b5e23d commit 310fac7

File tree

6 files changed

+149
-42
lines changed

6 files changed

+149
-42
lines changed

cockroachkvs/cockroachkvs.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ var Comparer = base.Comparer{
7070
FormatKey: FormatKey,
7171

7272
Separator: func(dst, a, b []byte) []byte {
73+
if len(a) == 0 || len(b) == 0 {
74+
panic(errors.AssertionFailedf("cannot separate empty keys"))
75+
}
7376
aKey, ok := getKeyPartFromEngineKey(a)
7477
if !ok {
7578
return append(dst, a...)
@@ -94,6 +97,13 @@ var Comparer = base.Comparer{
9497
},
9598

9699
Successor: func(dst, a []byte) []byte {
100+
// If a is empty, the key consisting of just a single 0x00 byte is the
101+
// smallest non-empty key ≥ a. We don't return an empty key because
102+
// empty keys are problematic when combined with a synthetic prefix
103+
// (they form a key that has no trailing sentinel byte).
104+
if len(a) == 0 {
105+
return append(dst, 0x00)
106+
}
97107
aKey, ok := getKeyPartFromEngineKey(a)
98108
if !ok {
99109
return append(dst, a...)
@@ -1035,6 +1045,10 @@ func FormatKey(key []byte) fmt.Formatter {
10351045
type formatKey []byte
10361046

10371047
func (fk formatKey) Format(f fmt.State, c rune) {
1048+
if len(fk) == 0 {
1049+
fmt.Fprint(f, "<empty>")
1050+
return
1051+
}
10381052
i := Split(fk)
10391053
fmt.Fprintf(f, "%s", []byte(fk)[:i-1])
10401054
if i == len(fk) {
@@ -1083,7 +1097,15 @@ func (fk formatKeySuffix) Format(f fmt.State, c rune) {
10831097
// c. <WALLTIME-SECONDS>,<LOGICAL>
10841098
// d. <WALLTIME-SECONDS>.<WALLTIME-NANOS>,<LOGICAL>
10851099
// 2. A lock table key in the format: <STRENGTH-BYTE>,<TXNUUID>
1100+
// 3. A raw hex-encoded string prefixed with "hex:".
10861101
func ParseFormattedKey(formattedKey string) []byte {
1102+
if strings.HasPrefix(formattedKey, "hex:") {
1103+
decoded, err := hex.DecodeString(formattedKey[4:])
1104+
if err != nil {
1105+
panic(errors.Newf("invalid hex-encoded key: %w", err))
1106+
}
1107+
return decoded
1108+
}
10871109
i := strings.IndexByte(formattedKey, '@')
10881110
if i == -1 {
10891111
// Suffix-less.

cockroachkvs/cockroachkvs_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,46 @@ func TestComparer(t *testing.T) {
7979
}
8080
}
8181

82+
func TestComparerFuncs(t *testing.T) {
83+
defer leaktest.AfterTest(t)()
84+
85+
var buf bytes.Buffer
86+
datadriven.RunTest(t, "testdata/comparer_funcs",
87+
func(t *testing.T, td *datadriven.TestData) string {
88+
buf.Reset()
89+
switch td.Cmd {
90+
case "separator":
91+
var keys [][]byte
92+
var dst []byte
93+
for _, line := range crstrings.Lines(td.Input) {
94+
keys = keys[:0]
95+
for _, formattedKey := range strings.Fields(line) {
96+
k := ParseFormattedKey(formattedKey)
97+
keys = append(keys, k)
98+
}
99+
if len(keys) != 2 {
100+
t.Fatalf("expected two keys, but found %d", len(keys))
101+
}
102+
dst = Comparer.Separator(dst[:0], keys[0], keys[1])
103+
fmt.Fprintf(&buf, "Separator(%q [%x], %q [%x]) = %q [%x]\n",
104+
FormatKey(keys[0]), keys[0], FormatKey(keys[1]), keys[1], FormatKey(dst), dst)
105+
}
106+
return buf.String()
107+
case "successor":
108+
var dst []byte
109+
for _, line := range crstrings.Lines(td.Input) {
110+
k := ParseFormattedKey(line)
111+
dst = Comparer.Successor(dst[:0], k)
112+
fmt.Fprintf(&buf, "Successor(%q [%x]) = %q [%x]\n",
113+
FormatKey(k), k, FormatKey(dst), dst)
114+
}
115+
return buf.String()
116+
default:
117+
panic(fmt.Sprintf("unknown command: %s", td.Cmd))
118+
}
119+
})
120+
}
121+
82122
func TestKeySchema_KeyWriter(t *testing.T) {
83123
defer leaktest.AfterTest(t)()
84124

cockroachkvs/testdata/comparer_funcs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
successor
2+
foo
3+
bar
4+
foo@1000000,0
5+
6+
foo@1000000,8
7+
8+
foo@01,cf379e85-6371-42e6-acc8-ed5259fa177a
9+
----
10+
Successor(foo [666f6f00]) = g [6700]
11+
Successor(bar [62617200]) = c [6300]
12+
Successor(foo@1000000,0 [666f6f0000038d7ea4c6800009]) = g [6700]
13+
Successor([email protected],0 [666f6f0000038d7ea4c7480c09]) = g [6700]
14+
Successor(foo@1000000,8 [666f6f0000038d7ea4c68000000000080d]) = g [6700]
15+
Successor([email protected],7 [666f6f0000038d7ea4c7480c000000070d]) = g [6700]
16+
Successor(foo@01,cf379e85-6371-42e6-acc8-ed5259fa177a [666f6f0001cf379e85637142e6acc8ed5259fa177a12]) = g [6700]
17+
18+
# The successor for an empty string should be a key consisting only of the 0x00
19+
# byte.
20+
successor
21+
hex:
22+
----
23+
Successor(<empty> []) = [00]
24+
25+
separator
26+
bar foo
27+
foo@1000000,0 foo@1000000,0
28+
foo@1000000,1 foo@1000000,0
29+
foo@01,a65a118c-5dcb-452b-bd82-7dd17e121138 foo@01,cf379e85-6371-42e6-acc8-ed5259fa177a
30+
fax@01,a65a118c-5dcb-452b-bd82-7dd17e121138 foo@01,cf379e85-6371-42e6-acc8-ed5259fa177a
31+
----
32+
Separator(bar [62617200], foo [666f6f00]) = c [6300]
33+
Separator(foo@1000000,0 [666f6f0000038d7ea4c6800009], foo@1000000,0 [666f6f0000038d7ea4c6800009]) = foo@1000000,0 [666f6f0000038d7ea4c6800009]
34+
Separator(foo@1000000,1 [666f6f0000038d7ea4c68000000000010d], foo@1000000,0 [666f6f0000038d7ea4c6800009]) = foo@1000000,1 [666f6f0000038d7ea4c68000000000010d]
35+
Separator(foo@01,a65a118c-5dcb-452b-bd82-7dd17e121138 [666f6f0001a65a118c5dcb452bbd827dd17e12113812], foo@01,cf379e85-6371-42e6-acc8-ed5259fa177a [666f6f0001cf379e85637142e6acc8ed5259fa177a12]) = foo@01,a65a118c-5dcb-452b-bd82-7dd17e121138 [666f6f0001a65a118c5dcb452bbd827dd17e12113812]
36+
Separator(fax@01,a65a118c-5dcb-452b-bd82-7dd17e121138 [6661780001a65a118c5dcb452bbd827dd17e12113812], foo@01,cf379e85-6371-42e6-acc8-ed5259fa177a [666f6f0001cf379e85637142e6acc8ed5259fa177a12]) = fb [666200]

internal/base/comparer.go

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"strconv"
1313
"unicode/utf8"
1414

15+
"github.com/cockroachdb/crlib/crbytes"
1516
"github.com/cockroachdb/errors"
1617
)
1718

@@ -124,11 +125,18 @@ type FormatValue func(key, value []byte) fmt.Formatter
124125
//
125126
// For example, if a and b are the []byte equivalents of the strings "black" and
126127
// "blue", then the function may append "blb" to dst.
128+
//
129+
// Callers must guarantee that len(a) > 0 and len(b) > 0.
127130
type Separator func(dst, a, b []byte) []byte
128131

129132
// Successor appends to dst a shortened key k given a key a such that
130133
// Compare(a, k) <= 0. A simple implementation may return a unchanged.
131134
// The appended key k must be valid to pass to Compare.
135+
//
136+
// The parameter a may be an empty slice even if an empty slice was never
137+
// committed to Pebble and is not a valid key representation. If a is the empty
138+
// slice, Successor must return a valid key but otherwise has no other
139+
// constraints.
132140
type Successor func(dst, a []byte) []byte
133141

134142
// ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
@@ -312,14 +320,15 @@ var DefaultComparer = &Comparer{
312320
FormatKey: DefaultFormatter,
313321

314322
Separator: func(dst, a, b []byte) []byte {
315-
i, n := SharedPrefixLen(a, b), len(dst)
323+
if len(a) == 0 || len(b) == 0 {
324+
panic(errors.AssertionFailedf("empty keys"))
325+
}
326+
327+
i := crbytes.CommonPrefix(a, b)
328+
n := len(dst)
316329
dst = append(dst, a...)
317330

318-
min := len(a)
319-
if min > len(b) {
320-
min = len(b)
321-
}
322-
if i >= min {
331+
if i == len(a) || i == len(b) {
323332
// Do not shorten if one string is a prefix of the other.
324333
return dst
325334
}
@@ -366,25 +375,6 @@ var DefaultComparer = &Comparer{
366375
Name: "leveldb.BytewiseComparator",
367376
}
368377

369-
// SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
370-
// This function can be useful in implementing the Comparer interface.
371-
func SharedPrefixLen(a, b []byte) int {
372-
i, n := 0, len(a)
373-
if n > len(b) {
374-
n = len(b)
375-
}
376-
asUint64 := func(c []byte, i int) uint64 {
377-
return binary.LittleEndian.Uint64(c[i:])
378-
}
379-
for i < n-7 && asUint64(a, i) == asUint64(b, i) {
380-
i += 8
381-
}
382-
for i < n && a[i] == b[i] {
383-
i++
384-
}
385-
return i
386-
}
387-
388378
// MinUserKey returns the smaller of two user keys. If one of the keys is nil,
389379
// the other one is returned.
390380
func MinUserKey(cmp Compare, a, b []byte) []byte {
@@ -444,6 +434,9 @@ func MakeAssertComparer(c Comparer) Comparer {
444434
CompareRangeSuffixes: c.CompareRangeSuffixes,
445435
AbbreviatedKey: c.AbbreviatedKey,
446436
Separator: func(dst, a, b []byte) []byte {
437+
if len(a) == 0 || len(b) == 0 {
438+
panic(errors.AssertionFailedf("empty keys"))
439+
}
447440
ret := c.Separator(dst, a, b)
448441
// The Separator func must return a valid key.
449442
c.ValidateKey.MustValidate(ret)

internal/base/comparer_test.go

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,11 @@ import (
1212
"time"
1313
)
1414

15-
func TestDefAppendSeparator(t *testing.T) {
15+
func TestDefaultComparer_Separator(t *testing.T) {
1616
testCases := []struct {
1717
a, b, want string
1818
}{
19-
// Examples from the doc comments.
2019
{"black", "blue", "blb"},
21-
{"green", "", "green"},
22-
// Non-empty b values. The C++ Level-DB code calls these separators.
23-
{"", "2", ""},
2420
{"1", "2", "1"},
2521
{"1", "29", "2"},
2622
{"13", "19", "14"},
@@ -33,22 +29,37 @@ func TestDefAppendSeparator(t *testing.T) {
3329
{"1\xff\xff", "19", "1\xff\xff"},
3430
{"1\xff\xff", "2", "1\xff\xff"},
3531
{"1\xff\xff", "9", "2"},
36-
// Empty b values. The C++ Level-DB code calls these successors.
37-
{"", "", ""},
38-
{"1", "", "1"},
39-
{"11", "", "11"},
40-
{"11\xff", "", "11\xff"},
41-
{"1\xff", "", "1\xff"},
42-
{"1\xff\xff", "", "1\xff\xff"},
43-
{"\xff", "", "\xff"},
44-
{"\xff\xff", "", "\xff\xff"},
45-
{"\xff\xff\xff", "", "\xff\xff\xff"},
4632
}
4733
for _, tc := range testCases {
4834
t.Run("", func(t *testing.T) {
4935
got := string(DefaultComparer.Separator(nil, []byte(tc.a), []byte(tc.b)))
5036
if got != tc.want {
51-
t.Errorf("a, b = %q, %q: got %q, want %q", tc.a, tc.b, got, tc.want)
37+
t.Errorf("DefaultComparer.Separator(nil, %q, %q) = %q, want %q", tc.a, tc.b, got, tc.want)
38+
}
39+
})
40+
}
41+
}
42+
43+
func TestDefaultComparer_Successor(t *testing.T) {
44+
testCases := []struct {
45+
a, want string
46+
}{
47+
{"green", "h"},
48+
{"", ""},
49+
{"1", "2"},
50+
{"11", "2"},
51+
{"11\xff", "2"},
52+
{"1\xff", "2"},
53+
{"1\xff\xff", "2"},
54+
{"\xff", "\xff"},
55+
{"\xff\xff", "\xff\xff"},
56+
{"\xff\xff\xff", "\xff\xff\xff"},
57+
}
58+
for _, tc := range testCases {
59+
t.Run("", func(t *testing.T) {
60+
got := string(DefaultComparer.Successor(nil, []byte(tc.a)))
61+
if got != tc.want {
62+
t.Errorf("DefaultComparer.Successor(nil, %q) = %q, want %q", tc.a, got, tc.want)
5263
}
5364
})
5465
}

internal/base/internal.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"strings"
1313
"sync/atomic"
1414

15+
"github.com/cockroachdb/errors"
16+
"github.com/cockroachdb/pebble/internal/invariants"
1517
"github.com/cockroachdb/redact"
1618
)
1719

@@ -398,6 +400,9 @@ func (k InternalKey) EncodeTrailer() [8]byte {
398400
func (k InternalKey) Separator(
399401
cmp Compare, sep Separator, buf []byte, other InternalKey,
400402
) InternalKey {
403+
if invariants.Enabled && (len(k.UserKey) == 0 || len(other.UserKey) == 0) {
404+
panic(errors.AssertionFailedf("empty keys passed to Separator: %s, %s", k, other))
405+
}
401406
buf = sep(buf, k.UserKey, other.UserKey)
402407
if len(buf) <= len(k.UserKey) && cmp(k.UserKey, buf) < 0 {
403408
// The separator user key is physically shorter than k.UserKey (if it is
@@ -416,7 +421,7 @@ func (k InternalKey) Separator(
416421
// InternalKey.UserKey, though it is valid to pass a nil.
417422
func (k InternalKey) Successor(cmp Compare, succ Successor, buf []byte) InternalKey {
418423
buf = succ(buf, k.UserKey)
419-
if len(buf) <= len(k.UserKey) && cmp(k.UserKey, buf) < 0 {
424+
if (len(k.UserKey) == 0 || len(buf) <= len(k.UserKey)) && cmp(k.UserKey, buf) < 0 {
420425
// The successor user key is physically shorter that k.UserKey (if it is
421426
// longer, we'll continue to use "k"), but logically after. Tack on the max
422427
// sequence number to the shortened user key. Note that we could tack on

0 commit comments

Comments
 (0)