From 7ac87990d15f9ada641140a295f3c2b9f56de8af Mon Sep 17 00:00:00 2001 From: Jakub Martin Date: Mon, 10 Oct 2022 01:03:35 +0200 Subject: [PATCH] Hashing optimizations to reduce unnecessary allocations. --- aggregates/distinct.go | 6 +--- execution/nodes/distinct.go | 7 +---- execution/nodes/simple_group_by.go | 7 +---- go.mod | 2 +- octosql/values.go | 50 +++++++++++++++++------------- 5 files changed, 32 insertions(+), 40 deletions(-) diff --git a/aggregates/distinct.go b/aggregates/distinct.go index 47a25e84..03ae0929 100644 --- a/aggregates/distinct.go +++ b/aggregates/distinct.go @@ -1,8 +1,6 @@ package aggregates import ( - "hash/fnv" - "github.com/zyedidia/generic/hashmap" "github.com/cube2222/octosql/execution" @@ -37,9 +35,7 @@ func NewDistinctPrototype(wrapped func() nodes.Aggregate) func() nodes.Aggregate func(a, b octosql.Value) bool { return a.Compare(b) == 0 }, func(v octosql.Value) uint64 { - hash := fnv.New64() - v.Hash(hash) - return hash.Sum64() + return v.Hash() }), wrapped: wrapped(), } diff --git a/execution/nodes/distinct.go b/execution/nodes/distinct.go index bde6ebad..6a3588c5 100644 --- a/execution/nodes/distinct.go +++ b/execution/nodes/distinct.go @@ -2,7 +2,6 @@ package nodes import ( "fmt" - "hash/fnv" "github.com/zyedidia/generic/hashmap" @@ -35,11 +34,7 @@ func (o *Distinct) Run(execCtx ExecutionContext, produce ProduceFn, metaSend Met } return true }, func(k []octosql.Value) uint64 { - hash := fnv.New64() - for _, v := range k { - v.Hash(hash) - } - return hash.Sum64() + return octosql.HashManyValues(k) }) o.source.Run( execCtx, diff --git a/execution/nodes/simple_group_by.go b/execution/nodes/simple_group_by.go index 24328736..8f0cc809 100644 --- a/execution/nodes/simple_group_by.go +++ b/execution/nodes/simple_group_by.go @@ -2,7 +2,6 @@ package nodes import ( "fmt" - "hash/fnv" "time" "github.com/zyedidia/generic/hashmap" @@ -52,11 +51,7 @@ func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend Me } return true }, func(k GroupKey) uint64 { - hash := fnv.New64() - for _, v := range k { - v.Hash(hash) - } - return hash.Sum64() + return octosql.HashManyValues(k) }) if err := g.source.Run(ctx, func(produceCtx ProduceContext, record Record) error { diff --git a/go.mod b/go.mod index 638b1214..06390d9f 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/pkg/profile v1.6.0 github.com/pmezard/go-difflib v1.0.0 + github.com/segmentio/fasthash v1.0.3 github.com/segmentio/parquet-go v0.0.0-20220421002521-93f8e5ed3407 github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 github.com/spf13/cobra v1.4.0 @@ -59,7 +60,6 @@ require ( github.com/pkg/term v1.2.0-beta.2 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/segmentio/encoding v0.3.5 // indirect - github.com/segmentio/fasthash v1.0.3 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/ulikunitz/xz v0.5.10 // indirect diff --git a/octosql/values.go b/octosql/values.go index 31700165..1e54c31d 100644 --- a/octosql/values.go +++ b/octosql/values.go @@ -1,12 +1,12 @@ package octosql import ( - "encoding/binary" "fmt" - "hash" "math" "strings" "time" + + "github.com/segmentio/fasthash/fnv1a" ) var ZeroValue = Value{} @@ -239,54 +239,58 @@ func (value Value) Compare(other Value) int { } } -func (value Value) Hash(hash hash.Hash64) { +func (value Value) Hash() uint64 { + return value.hash(fnv1a.Init64) +} + +func HashManyValues(values []Value) uint64 { + hash := fnv1a.Init64 + for _, v := range values { + hash = v.hash(hash) + } + return hash +} + +func (value Value) hash(hash uint64) uint64 { switch value.TypeID { case TypeIDNull: - hash.Write([]byte{0}) + hash = fnv1a.AddUint64(hash, 0) case TypeIDInt: - var data [8]byte - binary.BigEndian.PutUint64(data[:], uint64(value.Int)) - hash.Write(data[:]) + hash = fnv1a.AddUint64(hash, uint64(value.Int)) case TypeIDFloat: - var data [8]byte - binary.BigEndian.PutUint64(data[:], math.Float64bits(value.Float)) - hash.Write(data[:]) + hash = fnv1a.AddUint64(hash, math.Float64bits(value.Float)) case TypeIDBoolean: if value.Boolean { - hash.Write([]byte{1}) + hash = fnv1a.AddUint64(hash, 1) } else { - hash.Write([]byte{0}) + hash = fnv1a.AddUint64(hash, 0) } case TypeIDString: - hash.Write([]byte(value.Str)) + hash = fnv1a.AddString64(hash, value.Str) case TypeIDTime: - var data [8]byte - binary.BigEndian.PutUint64(data[:], uint64(value.Time.UnixNano())) - hash.Write(data[:]) + hash = fnv1a.AddUint64(hash, uint64(value.Time.UnixNano())) case TypeIDDuration: - var data [8]byte - binary.BigEndian.PutUint64(data[:], uint64(value.Duration)) - hash.Write(data[:]) + hash = fnv1a.AddUint64(hash, uint64(value.Duration)) case TypeIDList: for i := range value.List { - value.List[i].Hash(hash) + hash = value.List[i].hash(hash) } case TypeIDStruct: for i := range value.List { - value.Struct[i].Hash(hash) + hash = value.Struct[i].hash(hash) } case TypeIDTuple: for i := range value.List { - value.Tuple[i].Hash(hash) + hash = value.Tuple[i].hash(hash) } case TypeIDUnion: @@ -294,6 +298,8 @@ func (value Value) Hash(hash hash.Hash64) { default: panic("impossible, type switch bug") } + + return hash } func (value Value) Equal(other Value) bool {