Skip to content

Commit 7ac8799

Browse files
committed
Hashing optimizations to reduce unnecessary allocations.
1 parent 0d9d1b1 commit 7ac8799

File tree

5 files changed

+32
-40
lines changed

5 files changed

+32
-40
lines changed

aggregates/distinct.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package aggregates
22

33
import (
4-
"hash/fnv"
5-
64
"github.com/zyedidia/generic/hashmap"
75

86
"github.com/cube2222/octosql/execution"
@@ -37,9 +35,7 @@ func NewDistinctPrototype(wrapped func() nodes.Aggregate) func() nodes.Aggregate
3735
func(a, b octosql.Value) bool {
3836
return a.Compare(b) == 0
3937
}, func(v octosql.Value) uint64 {
40-
hash := fnv.New64()
41-
v.Hash(hash)
42-
return hash.Sum64()
38+
return v.Hash()
4339
}),
4440
wrapped: wrapped(),
4541
}

execution/nodes/distinct.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package nodes
22

33
import (
44
"fmt"
5-
"hash/fnv"
65

76
"github.com/zyedidia/generic/hashmap"
87

@@ -35,11 +34,7 @@ func (o *Distinct) Run(execCtx ExecutionContext, produce ProduceFn, metaSend Met
3534
}
3635
return true
3736
}, func(k []octosql.Value) uint64 {
38-
hash := fnv.New64()
39-
for _, v := range k {
40-
v.Hash(hash)
41-
}
42-
return hash.Sum64()
37+
return octosql.HashManyValues(k)
4338
})
4439
o.source.Run(
4540
execCtx,

execution/nodes/simple_group_by.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package nodes
22

33
import (
44
"fmt"
5-
"hash/fnv"
65
"time"
76

87
"github.com/zyedidia/generic/hashmap"
@@ -52,11 +51,7 @@ func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend Me
5251
}
5352
return true
5453
}, func(k GroupKey) uint64 {
55-
hash := fnv.New64()
56-
for _, v := range k {
57-
v.Hash(hash)
58-
}
59-
return hash.Sum64()
54+
return octosql.HashManyValues(k)
6055
})
6156

6257
if err := g.source.Run(ctx, func(produceCtx ProduceContext, record Record) error {

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ require (
2020
github.com/pkg/errors v0.9.1
2121
github.com/pkg/profile v1.6.0
2222
github.com/pmezard/go-difflib v1.0.0
23+
github.com/segmentio/fasthash v1.0.3
2324
github.com/segmentio/parquet-go v0.0.0-20220421002521-93f8e5ed3407
2425
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
2526
github.com/spf13/cobra v1.4.0
@@ -59,7 +60,6 @@ require (
5960
github.com/pkg/term v1.2.0-beta.2 // indirect
6061
github.com/rivo/uniseg v0.2.0 // indirect
6162
github.com/segmentio/encoding v0.3.5 // indirect
62-
github.com/segmentio/fasthash v1.0.3 // indirect
6363
github.com/shopspring/decimal v1.2.0 // indirect
6464
github.com/spf13/pflag v1.0.5 // indirect
6565
github.com/ulikunitz/xz v0.5.10 // indirect

octosql/values.go

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package octosql
22

33
import (
4-
"encoding/binary"
54
"fmt"
6-
"hash"
75
"math"
86
"strings"
97
"time"
8+
9+
"github.com/segmentio/fasthash/fnv1a"
1010
)
1111

1212
var ZeroValue = Value{}
@@ -239,61 +239,67 @@ func (value Value) Compare(other Value) int {
239239
}
240240
}
241241

242-
func (value Value) Hash(hash hash.Hash64) {
242+
func (value Value) Hash() uint64 {
243+
return value.hash(fnv1a.Init64)
244+
}
245+
246+
func HashManyValues(values []Value) uint64 {
247+
hash := fnv1a.Init64
248+
for _, v := range values {
249+
hash = v.hash(hash)
250+
}
251+
return hash
252+
}
253+
254+
func (value Value) hash(hash uint64) uint64 {
243255
switch value.TypeID {
244256
case TypeIDNull:
245-
hash.Write([]byte{0})
257+
hash = fnv1a.AddUint64(hash, 0)
246258

247259
case TypeIDInt:
248-
var data [8]byte
249-
binary.BigEndian.PutUint64(data[:], uint64(value.Int))
250-
hash.Write(data[:])
260+
hash = fnv1a.AddUint64(hash, uint64(value.Int))
251261

252262
case TypeIDFloat:
253-
var data [8]byte
254-
binary.BigEndian.PutUint64(data[:], math.Float64bits(value.Float))
255-
hash.Write(data[:])
263+
hash = fnv1a.AddUint64(hash, math.Float64bits(value.Float))
256264

257265
case TypeIDBoolean:
258266
if value.Boolean {
259-
hash.Write([]byte{1})
267+
hash = fnv1a.AddUint64(hash, 1)
260268
} else {
261-
hash.Write([]byte{0})
269+
hash = fnv1a.AddUint64(hash, 0)
262270
}
263271

264272
case TypeIDString:
265-
hash.Write([]byte(value.Str))
273+
hash = fnv1a.AddString64(hash, value.Str)
266274

267275
case TypeIDTime:
268-
var data [8]byte
269-
binary.BigEndian.PutUint64(data[:], uint64(value.Time.UnixNano()))
270-
hash.Write(data[:])
276+
hash = fnv1a.AddUint64(hash, uint64(value.Time.UnixNano()))
271277

272278
case TypeIDDuration:
273-
var data [8]byte
274-
binary.BigEndian.PutUint64(data[:], uint64(value.Duration))
275-
hash.Write(data[:])
279+
hash = fnv1a.AddUint64(hash, uint64(value.Duration))
276280

277281
case TypeIDList:
278282
for i := range value.List {
279-
value.List[i].Hash(hash)
283+
hash = value.List[i].hash(hash)
280284
}
281285

282286
case TypeIDStruct:
283287
for i := range value.List {
284-
value.Struct[i].Hash(hash)
288+
hash = value.Struct[i].hash(hash)
285289
}
286290

287291
case TypeIDTuple:
288292
for i := range value.List {
289-
value.Tuple[i].Hash(hash)
293+
hash = value.Tuple[i].hash(hash)
290294
}
291295

292296
case TypeIDUnion:
293297
panic("can't have union type as concrete value instance")
294298
default:
295299
panic("impossible, type switch bug")
296300
}
301+
302+
return hash
297303
}
298304

299305
func (value Value) Equal(other Value) bool {

0 commit comments

Comments
 (0)