Skip to content

Commit

Permalink
Hashing optimizations to reduce unnecessary allocations.
Browse files Browse the repository at this point in the history
  • Loading branch information
cube2222 committed Oct 9, 2022
1 parent 0d9d1b1 commit 7ac8799
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 40 deletions.
6 changes: 1 addition & 5 deletions aggregates/distinct.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package aggregates

import (
"hash/fnv"

"github.com/zyedidia/generic/hashmap"

"github.com/cube2222/octosql/execution"
Expand Down Expand Up @@ -37,9 +35,7 @@ func NewDistinctPrototype(wrapped func() nodes.Aggregate) func() nodes.Aggregate
func(a, b octosql.Value) bool {
return a.Compare(b) == 0
}, func(v octosql.Value) uint64 {
hash := fnv.New64()
v.Hash(hash)
return hash.Sum64()
return v.Hash()
}),
wrapped: wrapped(),
}
Expand Down
7 changes: 1 addition & 6 deletions execution/nodes/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package nodes

import (
"fmt"
"hash/fnv"

"github.com/zyedidia/generic/hashmap"

Expand Down Expand Up @@ -35,11 +34,7 @@ func (o *Distinct) Run(execCtx ExecutionContext, produce ProduceFn, metaSend Met
}
return true
}, func(k []octosql.Value) uint64 {
hash := fnv.New64()
for _, v := range k {
v.Hash(hash)
}
return hash.Sum64()
return octosql.HashManyValues(k)
})
o.source.Run(
execCtx,
Expand Down
7 changes: 1 addition & 6 deletions execution/nodes/simple_group_by.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package nodes

import (
"fmt"
"hash/fnv"
"time"

"github.com/zyedidia/generic/hashmap"
Expand Down Expand Up @@ -52,11 +51,7 @@ func (g *SimpleGroupBy) Run(ctx ExecutionContext, produce ProduceFn, metaSend Me
}
return true
}, func(k GroupKey) uint64 {
hash := fnv.New64()
for _, v := range k {
v.Hash(hash)
}
return hash.Sum64()
return octosql.HashManyValues(k)
})

if err := g.source.Run(ctx, func(produceCtx ProduceContext, record Record) error {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/pkg/profile v1.6.0
github.com/pmezard/go-difflib v1.0.0
github.com/segmentio/fasthash v1.0.3
github.com/segmentio/parquet-go v0.0.0-20220421002521-93f8e5ed3407
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
github.com/spf13/cobra v1.4.0
Expand Down Expand Up @@ -59,7 +60,6 @@ require (
github.com/pkg/term v1.2.0-beta.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/segmentio/encoding v0.3.5 // indirect
github.com/segmentio/fasthash v1.0.3 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
Expand Down
50 changes: 28 additions & 22 deletions octosql/values.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package octosql

import (
"encoding/binary"
"fmt"
"hash"
"math"
"strings"
"time"

"github.com/segmentio/fasthash/fnv1a"
)

var ZeroValue = Value{}
Expand Down Expand Up @@ -239,61 +239,67 @@ func (value Value) Compare(other Value) int {
}
}

func (value Value) Hash(hash hash.Hash64) {
func (value Value) Hash() uint64 {
return value.hash(fnv1a.Init64)
}

func HashManyValues(values []Value) uint64 {
hash := fnv1a.Init64
for _, v := range values {
hash = v.hash(hash)
}
return hash
}

func (value Value) hash(hash uint64) uint64 {
switch value.TypeID {
case TypeIDNull:
hash.Write([]byte{0})
hash = fnv1a.AddUint64(hash, 0)

case TypeIDInt:
var data [8]byte
binary.BigEndian.PutUint64(data[:], uint64(value.Int))
hash.Write(data[:])
hash = fnv1a.AddUint64(hash, uint64(value.Int))

case TypeIDFloat:
var data [8]byte
binary.BigEndian.PutUint64(data[:], math.Float64bits(value.Float))
hash.Write(data[:])
hash = fnv1a.AddUint64(hash, math.Float64bits(value.Float))

case TypeIDBoolean:
if value.Boolean {
hash.Write([]byte{1})
hash = fnv1a.AddUint64(hash, 1)
} else {
hash.Write([]byte{0})
hash = fnv1a.AddUint64(hash, 0)
}

case TypeIDString:
hash.Write([]byte(value.Str))
hash = fnv1a.AddString64(hash, value.Str)

case TypeIDTime:
var data [8]byte
binary.BigEndian.PutUint64(data[:], uint64(value.Time.UnixNano()))
hash.Write(data[:])
hash = fnv1a.AddUint64(hash, uint64(value.Time.UnixNano()))

case TypeIDDuration:
var data [8]byte
binary.BigEndian.PutUint64(data[:], uint64(value.Duration))
hash.Write(data[:])
hash = fnv1a.AddUint64(hash, uint64(value.Duration))

case TypeIDList:
for i := range value.List {
value.List[i].Hash(hash)
hash = value.List[i].hash(hash)
}

case TypeIDStruct:
for i := range value.List {
value.Struct[i].Hash(hash)
hash = value.Struct[i].hash(hash)
}

case TypeIDTuple:
for i := range value.List {
value.Tuple[i].Hash(hash)
hash = value.Tuple[i].hash(hash)
}

case TypeIDUnion:
panic("can't have union type as concrete value instance")
default:
panic("impossible, type switch bug")
}

return hash
}

func (value Value) Equal(other Value) bool {
Expand Down

0 comments on commit 7ac8799

Please sign in to comment.