Skip to content

Commit 0d9d1b1

Browse files
committed
Switch to using a hashmap instead of a BTree in some (not all yet) places where ordering is not important.
1 parent 0e78a9a commit 0d9d1b1

File tree

6 files changed

+164
-57
lines changed

6 files changed

+164
-57
lines changed

aggregates/distinct.go

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package aggregates
22

33
import (
4-
"github.com/tidwall/btree"
4+
"hash/fnv"
55

6+
"github.com/zyedidia/generic/hashmap"
7+
8+
"github.com/cube2222/octosql/execution"
69
"github.com/cube2222/octosql/execution/nodes"
710
"github.com/cube2222/octosql/octosql"
811
"github.com/cube2222/octosql/physical"
@@ -22,33 +25,36 @@ func DistinctAggregateOverloads(overloads []physical.AggregateDescriptor) []phys
2225
}
2326

2427
type Distinct struct {
25-
items *btree.Generic[*distinctKey]
28+
items *hashmap.Map[octosql.Value, *distinctKey]
2629
wrapped nodes.Aggregate
2730
}
2831

2932
func NewDistinctPrototype(wrapped func() nodes.Aggregate) func() nodes.Aggregate {
3033
return func() nodes.Aggregate {
3134
return &Distinct{
32-
items: btree.NewGenericOptions(func(key, than *distinctKey) bool {
33-
return key.value.Compare(than.value) == -1
34-
}, btree.Options{NoLocks: true}),
35+
items: hashmap.New[octosql.Value, *distinctKey](
36+
execution.BTreeDefaultDegree,
37+
func(a, b octosql.Value) bool {
38+
return a.Compare(b) == 0
39+
}, func(v octosql.Value) uint64 {
40+
hash := fnv.New64()
41+
v.Hash(hash)
42+
return hash.Sum64()
43+
}),
3544
wrapped: wrapped(),
3645
}
3746
}
3847
}
3948

4049
type distinctKey struct {
41-
value octosql.Value
4250
count int
4351
}
4452

4553
func (c *Distinct) Add(retraction bool, value octosql.Value) bool {
46-
var hint btree.PathHint
47-
48-
item, ok := c.items.GetHint(&distinctKey{value: value}, &hint)
54+
item, ok := c.items.Get(value)
4955
if !ok {
50-
item = &distinctKey{value: value, count: 0}
51-
c.items.SetHint(item, &hint)
56+
item = &distinctKey{count: 0}
57+
c.items.Put(value, item)
5258
}
5359
if !retraction {
5460
item.count++
@@ -58,10 +64,10 @@ func (c *Distinct) Add(retraction bool, value octosql.Value) bool {
5864
if item.count == 1 && !retraction {
5965
c.wrapped.Add(false, value)
6066
} else if item.count == 0 {
61-
c.items.DeleteHint(item, &hint)
67+
c.items.Remove(value)
6268
c.wrapped.Add(true, value)
6369
}
64-
return c.items.Len() == 0
70+
return c.items.Size() == 0
6571
}
6672

6773
func (c *Distinct) Trigger() octosql.Value {

execution/nodes/distinct.go

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ package nodes
22

33
import (
44
"fmt"
5+
"hash/fnv"
56

6-
"github.com/tidwall/btree"
7+
"github.com/zyedidia/generic/hashmap"
78

89
. "github.com/cube2222/octosql/execution"
910
"github.com/cube2222/octosql/octosql"
@@ -20,30 +21,33 @@ func NewDistinct(source Node) *Distinct {
2021
}
2122

2223
type distinctItem struct {
23-
Values []octosql.Value
24-
Count int
24+
Count int
2525
}
2626

2727
func (o *Distinct) Run(execCtx ExecutionContext, produce ProduceFn, metaSend MetaSendFn) error {
28-
recordCounts := btree.NewGenericOptions(func(item, than *distinctItem) bool {
29-
for i := 0; i < len(item.Values); i++ {
30-
if comp := item.Values[i].Compare(than.Values[i]); comp != 0 {
31-
return comp == -1
28+
recordCounts := hashmap.New[[]octosql.Value, *distinctItem](
29+
BTreeDefaultDegree,
30+
func(a, b []octosql.Value) bool {
31+
for i := range a {
32+
if a[i].Compare(b[i]) != 0 {
33+
return false
34+
}
3235
}
33-
}
34-
35-
return false
36-
}, btree.Options{
37-
NoLocks: true,
38-
})
36+
return true
37+
}, func(k []octosql.Value) uint64 {
38+
hash := fnv.New64()
39+
for _, v := range k {
40+
v.Hash(hash)
41+
}
42+
return hash.Sum64()
43+
})
3944
o.source.Run(
4045
execCtx,
4146
func(ctx ProduceContext, record Record) error {
42-
item, ok := recordCounts.Get(&distinctItem{Values: record.Values})
47+
item, ok := recordCounts.Get(record.Values)
4348
if !ok {
4449
item = &distinctItem{
45-
Values: record.Values,
46-
Count: 0,
50+
Count: 0,
4751
}
4852
}
4953
if !record.Retraction {
@@ -52,18 +56,18 @@ func (o *Distinct) Run(execCtx ExecutionContext, produce ProduceFn, metaSend Met
5256
item.Count--
5357
}
5458
if item.Count > 0 {
55-
// New record.
5659
if !record.Retraction && item.Count == 1 {
60+
// New record.
5761
if err := produce(ctx, record); err != nil {
5862
return fmt.Errorf("couldn't produce new record: %w", err)
5963
}
60-
recordCounts.Set(item)
64+
recordCounts.Put(record.Values, item)
6165
}
6266
} else {
6367
if err := produce(ctx, record); err != nil {
6468
return fmt.Errorf("couldn't retract record record: %w", err)
6569
}
66-
recordCounts.Delete(item)
70+
recordCounts.Remove(record.Values)
6771
}
6872
return nil
6973
},

execution/nodes/simple_group_by.go

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ package nodes
22

33
import (
44
"fmt"
5+
"hash/fnv"
56
"time"
67

7-
"github.com/google/btree"
8+
"github.com/zyedidia/generic/hashmap"
89

910
. "github.com/cube2222/octosql/execution"
1011
"github.com/cube2222/octosql/octosql"
@@ -32,9 +33,30 @@ func NewSimpleGroupBy(
3233
}
3334
}
3435

36+
type hashmapAggregatesItem struct {
37+
Aggregates []Aggregate
38+
39+
// AggregatedSetSize omits NULL inputs.
40+
AggregatedSetSize []int
41+
42+
// OverallRecordCount counts all records minus retractions.
43+
OverallRecordCount int
44+
}
45+
3546
func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend MetaSendFn) error {
36-
aggregates := btree.NewG[*aggregatesItem](BTreeDefaultDegree, func(a, b *aggregatesItem) bool {
37-
return CompareValueSlices(a.GroupKey, b.GroupKey)
47+
aggregates := hashmap.New[GroupKey, *hashmapAggregatesItem](BTreeDefaultDegree, func(a, b GroupKey) bool {
48+
for i := range a {
49+
if a[i].Compare(b[i]) != 0 {
50+
return false
51+
}
52+
}
53+
return true
54+
}, func(k GroupKey) uint64 {
55+
hash := fnv.New64()
56+
for _, v := range k {
57+
v.Hash(hash)
58+
}
59+
return hash.Sum64()
3860
})
3961

4062
if err := g.source.Run(ctx, func(produceCtx ProduceContext, record Record) error {
@@ -50,16 +72,16 @@ func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend Me
5072
}
5173

5274
{
53-
itemTyped, ok := aggregates.Get(&aggregatesItem{GroupKey: key})
75+
itemTyped, ok := aggregates.Get(key)
5476

5577
if !ok {
5678
newAggregates := make([]Aggregate, len(g.aggregatePrototypes))
5779
for i := range g.aggregatePrototypes {
5880
newAggregates[i] = g.aggregatePrototypes[i]()
5981
}
6082

61-
itemTyped = &aggregatesItem{GroupKey: key, Aggregates: newAggregates, AggregatedSetSize: make([]int, len(g.aggregatePrototypes))}
62-
aggregates.ReplaceOrInsert(itemTyped)
83+
itemTyped = &hashmapAggregatesItem{Aggregates: newAggregates, AggregatedSetSize: make([]int, len(g.aggregatePrototypes))}
84+
aggregates.Put(key, itemTyped)
6385
}
6486

6587
if !record.Retraction {
@@ -84,7 +106,7 @@ func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend Me
84106
}
85107

86108
if itemTyped.OverallRecordCount == 0 {
87-
aggregates.Delete(itemTyped)
109+
aggregates.Remove(key)
88110
}
89111
}
90112

@@ -96,26 +118,35 @@ func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend Me
96118
}
97119

98120
var err error
99-
aggregates.Ascend(func(itemTyped *aggregatesItem) bool {
100-
key := itemTyped.GroupKey
101-
102-
outputValues := make([]octosql.Value, len(key)+len(g.aggregateExprs))
103-
copy(outputValues, key)
104-
105-
for i := range itemTyped.Aggregates {
106-
if itemTyped.AggregatedSetSize[i] > 0 {
107-
outputValues[len(key)+i] = itemTyped.Aggregates[i].Trigger()
108-
} else {
109-
outputValues[len(key)+i] = octosql.NewNull()
121+
func() {
122+
type stopEach struct{}
123+
defer func() {
124+
msg := recover()
125+
if msg == nil {
126+
return
127+
}
128+
if _, ok := msg.(stopEach); ok {
129+
return
130+
}
131+
panic(msg)
132+
}()
133+
aggregates.Each(func(key GroupKey, itemTyped *hashmapAggregatesItem) {
134+
outputValues := make([]octosql.Value, len(key)+len(g.aggregateExprs))
135+
copy(outputValues, key)
136+
137+
for i := range itemTyped.Aggregates {
138+
if itemTyped.AggregatedSetSize[i] > 0 {
139+
outputValues[len(key)+i] = itemTyped.Aggregates[i].Trigger()
140+
} else {
141+
outputValues[len(key)+i] = octosql.NewNull()
142+
}
110143
}
111-
}
112-
113-
if err = produce(ProduceFromExecutionContext(ctx), NewRecord(outputValues, false, time.Time{})); err != nil {
114-
return false
115-
}
116144

117-
return true
118-
})
145+
if err = produce(ProduceFromExecutionContext(ctx), NewRecord(outputValues, false, time.Time{})); err != nil {
146+
panic(stopEach{})
147+
}
148+
})
149+
}()
119150

120151
return err
121152
}

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ require (
2626
github.com/stretchr/testify v1.7.0
2727
github.com/tidwall/btree v1.3.1
2828
github.com/valyala/fastjson v1.6.3
29+
github.com/zyedidia/generic v1.1.0
2930
golang.org/x/exp v0.0.0-20220414153411-bcd21879b8fd
3031
google.golang.org/grpc v1.42.0
3132
google.golang.org/protobuf v1.27.1
@@ -58,6 +59,7 @@ require (
5859
github.com/pkg/term v1.2.0-beta.2 // indirect
5960
github.com/rivo/uniseg v0.2.0 // indirect
6061
github.com/segmentio/encoding v0.3.5 // indirect
62+
github.com/segmentio/fasthash v1.0.3 // indirect
6163
github.com/shopspring/decimal v1.2.0 // indirect
6264
github.com/spf13/pflag v1.0.5 // indirect
6365
github.com/ulikunitz/xz v0.5.10 // indirect

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
159159
github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg=
160160
github.com/segmentio/encoding v0.3.5 h1:UZEiaZ55nlXGDL92scoVuw00RmiRCazIEmvPSbSvt8Y=
161161
github.com/segmentio/encoding v0.3.5/go.mod h1:n0JeuIqEQrQoPDGsjo8UNd1iA0U8d8+oHAA4E3G3OxM=
162+
github.com/segmentio/fasthash v1.0.3 h1:EI9+KE1EwvMLBWwjpRDc+fEM+prwxDYbslddQGtrmhM=
163+
github.com/segmentio/fasthash v1.0.3/go.mod h1:waKX8l2N8yckOgmSsXJi7x1ZfdKZ4x7KRMzBtS3oedY=
162164
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
163165
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
164166
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 h1:JIAuq3EEf9cgbU6AtGPK4CTG3Zf6CKMNqf0MHTggAUA=
@@ -183,6 +185,8 @@ github.com/valyala/fastjson v1.6.3 h1:tAKFnnwmeMGPbwJ7IwxcTPCNr3uIzoIj3/Fh90ra4x
183185
github.com/valyala/fastjson v1.6.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
184186
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
185187
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
188+
github.com/zyedidia/generic v1.1.0 h1:G9kbhNFCZhf2d9SC53RkHQdmMoPwImguLOGx9DW2ADM=
189+
github.com/zyedidia/generic v1.1.0/go.mod h1:ly2RBz4mnz1yeuVbQA/VFwGjK3mnHGRj1JuoG336Bis=
186190
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
187191
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
188192
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=

octosql/values.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package octosql
22

33
import (
4+
"encoding/binary"
45
"fmt"
6+
"hash"
7+
"math"
58
"strings"
69
"time"
710
)
@@ -236,6 +239,63 @@ func (value Value) Compare(other Value) int {
236239
}
237240
}
238241

242+
func (value Value) Hash(hash hash.Hash64) {
243+
switch value.TypeID {
244+
case TypeIDNull:
245+
hash.Write([]byte{0})
246+
247+
case TypeIDInt:
248+
var data [8]byte
249+
binary.BigEndian.PutUint64(data[:], uint64(value.Int))
250+
hash.Write(data[:])
251+
252+
case TypeIDFloat:
253+
var data [8]byte
254+
binary.BigEndian.PutUint64(data[:], math.Float64bits(value.Float))
255+
hash.Write(data[:])
256+
257+
case TypeIDBoolean:
258+
if value.Boolean {
259+
hash.Write([]byte{1})
260+
} else {
261+
hash.Write([]byte{0})
262+
}
263+
264+
case TypeIDString:
265+
hash.Write([]byte(value.Str))
266+
267+
case TypeIDTime:
268+
var data [8]byte
269+
binary.BigEndian.PutUint64(data[:], uint64(value.Time.UnixNano()))
270+
hash.Write(data[:])
271+
272+
case TypeIDDuration:
273+
var data [8]byte
274+
binary.BigEndian.PutUint64(data[:], uint64(value.Duration))
275+
hash.Write(data[:])
276+
277+
case TypeIDList:
278+
for i := range value.List {
279+
value.List[i].Hash(hash)
280+
}
281+
282+
case TypeIDStruct:
283+
for i := range value.List {
284+
value.Struct[i].Hash(hash)
285+
}
286+
287+
case TypeIDTuple:
288+
for i := range value.List {
289+
value.Tuple[i].Hash(hash)
290+
}
291+
292+
case TypeIDUnion:
293+
panic("can't have union type as concrete value instance")
294+
default:
295+
panic("impossible, type switch bug")
296+
}
297+
}
298+
239299
func (value Value) Equal(other Value) bool {
240300
if value.TypeID == TypeIDNull && other.TypeID == TypeIDNull {
241301
return false

0 commit comments

Comments
 (0)