Skip to content

Commit

Permalink
impr: indexing & filtering - to reduce sizes of indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
marino39 committed Feb 3, 2025
1 parent 4308db3 commit 8cb04f1
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 169 deletions.
163 changes: 98 additions & 65 deletions filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,29 @@ import (
"context"
"fmt"
"path"
"reflect"

"github.com/0xsequence/ethwal/storage"
"github.com/0xsequence/ethwal/storage/local"
"github.com/RoaringBitmap/roaring/v2/roaring64"
)

type Filter interface {
Eval(ctx context.Context) FilterIterator
}
// Filter is an interface that defines the methods to filter blocks
// based on the index data.
type Filter[T any] interface {
// Filter blocks inner data based on the filter criteria.
Filter(block Block[T]) Block[T]

// IndexIterator returns the iterator for the filter.
IndexIterator(ctx context.Context) *IndexIterator

type FilterIterator interface {
HasNext() bool
Next() (uint64, uint16)
Peek() (uint64, uint16)
Bitmap() *roaring64.Bitmap
bitmap(block Block[T]) *roaring64.Bitmap
}

type FilterBuilder interface {
And(filters ...Filter) Filter
Or(filters ...Filter) Filter
Eq(index string, key string) Filter
type FilterBuilder[T any] interface {
And(filters ...Filter[T]) Filter[T]
Or(filters ...Filter[T]) Filter[T]
Eq(index string, key string) Filter[T]
}

type FilterBuilderOptions[T any] struct {
Expand All @@ -47,7 +49,7 @@ type filterBuilder[T any] struct {
fs storage.FS
}

func NewFilterBuilder[T any](opt FilterBuilderOptions[T]) (FilterBuilder, error) {
func NewFilterBuilder[T any](opt FilterBuilderOptions[T]) (FilterBuilder[T], error) {
// apply default options on uninitialized fields
opt = opt.WithDefaults()

Expand All @@ -60,29 +62,54 @@ func NewFilterBuilder[T any](opt FilterBuilderOptions[T]) (FilterBuilder, error)
}, nil
}

type filter struct {
resultSet func(ctx context.Context) *roaring64.Bitmap
type filter[T any] struct {
blockBitmap func(ctx context.Context) *roaring64.Bitmap
dataIndexBitmapFunc func(block Block[T]) *roaring64.Bitmap
}

func (c *filter) Eval(ctx context.Context) FilterIterator {
if c.resultSet == nil {
c.resultSet = func(ctx context.Context) *roaring64.Bitmap {
func (c *filter[T]) IndexIterator(ctx context.Context) *IndexIterator {
if c.blockBitmap == nil {
c.blockBitmap = func(ctx context.Context) *roaring64.Bitmap {
return roaring64.New()
}
}
return newFilterIterator(c.resultSet(ctx))
return NewIndexIterator(c.blockBitmap(ctx))
}

func (c *filter[T]) Filter(block Block[T]) Block[T] {
dataIndexesBitmap := c.dataIndexBitmapFunc(block)
dataIndexes := dataIndexesBitmap.ToArray()
if len(dataIndexes) == 1 && dataIndexes[0] == IndexAllDataIndexes {
return block
}

if dType := reflect.TypeOf(block.Data); dType.Kind() == reflect.Slice || dType.Kind() == reflect.Array {
newData := reflect.Indirect(reflect.New(dType))
for _, dataIndex := range dataIndexes {
newData = reflect.Append(newData, reflect.ValueOf(block.Data).Index(int(dataIndex)))
}
block.Data = newData.Interface().(T)
}
return block
}

func (c *filter[T]) bitmap(block Block[T]) *roaring64.Bitmap {
if c.dataIndexBitmapFunc == nil {
return roaring64.New()
}
return c.dataIndexBitmapFunc(block)
}

func (c *filterBuilder[T]) And(filters ...Filter) Filter {
return &filter{
resultSet: func(ctx context.Context) *roaring64.Bitmap {
func (c *filterBuilder[T]) And(conds ...Filter[T]) Filter[T] {
return &filter[T]{
blockBitmap: func(ctx context.Context) *roaring64.Bitmap {
var bmap *roaring64.Bitmap
for _, filter := range filters {
if filter == nil {
for _, cond := range conds {
if cond == nil {
continue
}

iter := filter.Eval(ctx)
iter := cond.IndexIterator(ctx)
if bmap == nil {
bmap = iter.Bitmap().Clone()
} else {
Expand All @@ -91,19 +118,31 @@ func (c *filterBuilder[T]) And(filters ...Filter) Filter {
}
return bmap
},
dataIndexBitmapFunc: func(block Block[T]) *roaring64.Bitmap {
var bmap *roaring64.Bitmap
for _, cond := range conds {
condBitmap := cond.bitmap(block)
if bmap == nil {
bmap = condBitmap.Clone()
} else {
bmap.And(condBitmap)
}
}
return bmap
},
}
}

func (c *filterBuilder[T]) Or(filters ...Filter) Filter {
return &filter{
resultSet: func(ctx context.Context) *roaring64.Bitmap {
func (c *filterBuilder[T]) Or(conds ...Filter[T]) Filter[T] {
return &filter[T]{
blockBitmap: func(ctx context.Context) *roaring64.Bitmap {
var bmap *roaring64.Bitmap
for _, filter := range filters {
if filter == nil {
for _, cond := range conds {
if cond == nil {
continue
}

iter := filter.Eval(ctx)
iter := cond.IndexIterator(ctx)
if bmap == nil {
bmap = iter.Bitmap().Clone()
} else {
Expand All @@ -112,13 +151,24 @@ func (c *filterBuilder[T]) Or(filters ...Filter) Filter {
}
return bmap
},
dataIndexBitmapFunc: func(block Block[T]) *roaring64.Bitmap {
var bmap *roaring64.Bitmap
for _, cond := range conds {
condBitmap := cond.bitmap(block)
if bmap == nil {
bmap = condBitmap.Clone()
} else {
bmap.Or(condBitmap)
}
}
return bmap
},
}
}

func (c *filterBuilder[T]) Eq(index string, key string) Filter {

return &filter{
resultSet: func(ctx context.Context) *roaring64.Bitmap {
func (c *filterBuilder[T]) Eq(index string, key string) Filter[T] {
return &filter[T]{
blockBitmap: func(ctx context.Context) *roaring64.Bitmap {
// fetch the index file and include it in the result set
index_ := IndexName(index).Normalize()
idx, ok := c.indexes[index_]
Expand All @@ -132,36 +182,19 @@ func (c *filterBuilder[T]) Eq(index string, key string) Filter {
}
return bitmap
},
}
}

type filterIterator struct {
iter roaring64.IntPeekable64
bitmap *roaring64.Bitmap
}
dataIndexBitmapFunc: func(block Block[T]) *roaring64.Bitmap {
index_ := IndexName(index).Normalize()
idx, ok := c.indexes[index_]
if !ok {
return roaring64.New()
}

func newFilterIterator(bmap *roaring64.Bitmap) FilterIterator {
return &filterIterator{
iter: bmap.Iterator(),
bitmap: bmap,
indexUpdate, _ := idx.IndexBlock(context.Background(), nil, block)
bitmap, ok := indexUpdate.DataIndexBitmap[IndexedValue(key)]
if !ok {
return roaring64.New()
}
return bitmap
},
}
}

func (f *filterIterator) HasNext() bool {
return f.iter.HasNext()
}

func (f *filterIterator) Next() (uint64, uint16) {
// TODO: how to handle if there's no next?
val := f.iter.Next()
return IndexCompoundID(val).Split()
}

func (f *filterIterator) Peek() (uint64, uint16) {
val := f.iter.PeekNext()
return IndexCompoundID(val).Split()
}

func (f *filterIterator) Bitmap() *roaring64.Bitmap {
return f.bitmap
}
45 changes: 18 additions & 27 deletions filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func indexOnlyEvenBlocks(block Block[[]int]) (toIndex bool, indexValueMap map[In
}

if toIndex {
indexValueMap["true"] = []uint16{math.MaxUint16}
indexValueMap["true"] = []uint16{IndexAllDataIndexes}
}

return
Expand All @@ -202,7 +202,7 @@ func indexOnlyOddBlocks(block Block[[]int]) (toIndex bool, indexValueMap map[Ind
}

if toIndex {
indexValueMap["true"] = []uint16{math.MaxUint16}
indexValueMap["true"] = []uint16{IndexAllDataIndexes}
}

return
Expand Down Expand Up @@ -251,12 +251,6 @@ func indexNone(block Block[[]int]) (toIndex bool, indexValueMap map[IndexedValue
return false, nil, nil
}

func TestMaxMagicCompoundID(t *testing.T) {
id := NewIndexCompoundID(uint64(math.Exp2(48)-1), math.MaxUint16)
assert.Equal(t, uint64(math.Exp2(48)-1), id.BlockNumber())
assert.Equal(t, uint16(math.MaxUint16), id.DataIndex())
}

func TestIntMixFiltering(t *testing.T) {
_, indexes, _, cleanup, err := setupMockData(generateMixedIntIndexes, generateMixedIntBlocks)
assert.NoError(t, err)
Expand Down Expand Up @@ -284,7 +278,7 @@ func TestIntMixFiltering(t *testing.T) {
"555",
"111",
}
var numberFilter Filter
var numberFilter Filter[[]int]
for _, number := range numbersIdxs {
if numberFilter == nil {
numberFilter = f.Eq("all", number)
Expand All @@ -293,30 +287,27 @@ func TestIntMixFiltering(t *testing.T) {
}
}

onlyEvenResults := onlyEvenFilter.Eval(context.Background())
onlyEvenResults := onlyEvenFilter.IndexIterator(context.Background())
assert.Len(t, onlyEvenResults.Bitmap().ToArray(), 20)
for _, id := range onlyEvenResults.Bitmap().ToArray() {
block, _ := IndexCompoundID(id).Split()
for _, block := range onlyEvenResults.Bitmap().ToArray() {
assert.True(t, block <= 20)
}

onlyOddResults := onlyOddFilter.Eval(context.Background())
onlyOddResults := onlyOddFilter.IndexIterator(context.Background())
assert.Len(t, onlyOddResults.Bitmap().ToArray(), 20+20)
for _, id := range onlyOddResults.Bitmap().ToArray() {
block, _ := IndexCompoundID(id).Split()
for _, block := range onlyOddResults.Bitmap().ToArray() {
assert.True(t, (block > 20 && block < 41) || (block > 50 && block < 71))
}

numberAllResults := numberFilter.Eval(context.Background())
// 20*20
assert.Len(t, numberAllResults.Bitmap().ToArray(), 400)
for _, id := range numberAllResults.Bitmap().ToArray() {
block, _ := IndexCompoundID(id).Split()
numberAllResults := numberFilter.IndexIterator(context.Background())
// 20
assert.Len(t, numberAllResults.Bitmap().ToArray(), 20)
for _, block := range numberAllResults.Bitmap().ToArray() {
assert.True(t, block > 50 && block < 71)
}

allNumberAndOdd := f.And(numberFilter, oddFilter)
allNumberOddResults := allNumberAndOdd.Eval(context.Background())
allNumberOddResults := allNumberAndOdd.IndexIterator(context.Background())
assert.ElementsMatch(t, numberAllResults.Bitmap().ToArray(), allNumberOddResults.Bitmap().ToArray())
}

Expand All @@ -331,20 +322,20 @@ func TestFiltering(t *testing.T) {
})
assert.NoError(t, err)
assert.NotNil(t, f)
result := f.Or(f.And(f.Eq("all", "1"), f.Eq("all", "2")), f.Eq("all", "3")).Eval(context.Background())
result := f.Or(f.And(f.Eq("all", "1"), f.Eq("all", "2")), f.Eq("all", "3")).IndexIterator(context.Background())
// result should contain block 1, 2, 3
assert.Len(t, result.Bitmap().ToArray(), 3)
block, _ := result.Next()
block := result.Next()
assert.Equal(t, uint64(1), block)
block, _ = result.Next()
block = result.Next()
assert.Equal(t, uint64(2), block)
block, _ = result.Next()
block = result.Next()
assert.Equal(t, uint64(3), block)

result = f.And(f.Eq("all", "1"), f.Eq("all", "2")).Eval(context.Background())
result = f.And(f.Eq("all", "1"), f.Eq("all", "2")).IndexIterator(context.Background())
// result should contain block 1
assert.Len(t, result.Bitmap().ToArray(), 1)
block, _ = result.Next()
block = result.Next()
assert.Equal(t, uint64(1), block)
}

Expand Down
Loading

0 comments on commit 8cb04f1

Please sign in to comment.