Skip to content

Commit b0f4cb4

Browse files
MB-59421: add validation for vector field aliases (#1903)
vector field Aliases (fields with same name and type as vector) must have same value for dimensions and similarity, respectively. --------- Co-authored-by: Abhinav Dangeti <[email protected]>
1 parent 631c1bd commit b0f4cb4

File tree

5 files changed

+256
-45
lines changed

5 files changed

+256
-45
lines changed

mapping/document.go

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,21 @@ type DocumentMapping struct {
5050
StructTagKey string `json:"struct_tag_key,omitempty"`
5151
}
5252

53-
func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
53+
func (dm *DocumentMapping) Validate(cache *registry.Cache,
54+
parentName string, fieldAliasCtx map[string]*FieldMapping) error {
5455
var err error
5556
if dm.DefaultAnalyzer != "" {
5657
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
5758
if err != nil {
5859
return err
5960
}
6061
}
61-
for _, property := range dm.Properties {
62-
err = property.Validate(cache)
62+
for propertyName, property := range dm.Properties {
63+
newParent := propertyName
64+
if parentName != "" {
65+
newParent = fmt.Sprintf("%s.%s", parentName, propertyName)
66+
}
67+
err = property.Validate(cache, newParent, fieldAliasCtx)
6368
if err != nil {
6469
return err
6570
}
@@ -78,21 +83,24 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
7883
}
7984
}
8085

81-
err := validateFieldType(field.Type)
86+
err := validateFieldMapping(field, parentName, fieldAliasCtx)
8287
if err != nil {
8388
return err
8489
}
85-
86-
if field.Type == "vector" {
87-
err := validateVectorField(field)
88-
if err != nil {
89-
return err
90-
}
91-
}
9290
}
9391
return nil
9492
}
9593

94+
func validateFieldType(field *FieldMapping) error {
95+
switch field.Type {
96+
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
97+
return nil
98+
default:
99+
return fmt.Errorf("field: '%s', unknown field type: '%s'",
100+
field.Name, field.Type)
101+
}
102+
}
103+
96104
// analyzerNameForPath attempts to first find the field
97105
// described by this path, then returns the analyzer
98106
// configured for that field

mapping/index.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,14 @@ func (im *IndexMappingImpl) Validate() error {
174174
if err != nil {
175175
return err
176176
}
177-
err = im.DefaultMapping.Validate(im.cache)
177+
178+
fieldAliasCtx := make(map[string]*FieldMapping)
179+
err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
178180
if err != nil {
179181
return err
180182
}
181183
for _, docMapping := range im.TypeMapping {
182-
err = docMapping.Validate(im.cache)
184+
err = docMapping.Validate(im.cache, "", fieldAliasCtx)
183185
if err != nil {
184186
return err
185187
}

mapping/mapping_no_vectors.go

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package mapping
1919

20-
import "fmt"
21-
2220
func NewVectorFieldMapping() *FieldMapping {
2321
return nil
2422
}
@@ -31,16 +29,7 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
3129
// -----------------------------------------------------------------------------
3230
// document validation functions
3331

34-
func validateVectorField(fieldMapping *FieldMapping) error {
35-
return nil
36-
}
37-
38-
func validateFieldType(fieldType string) error {
39-
switch fieldType {
40-
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
41-
default:
42-
return fmt.Errorf("unknown field type: '%s'", fieldType)
43-
}
44-
45-
return nil
32+
func validateFieldMapping(field *FieldMapping, parentName string,
33+
fieldAliasCtx map[string]*FieldMapping) error {
34+
return validateFieldType(field)
4635
}

mapping/mapping_vectors.go

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ import (
2626
index "github.com/blevesearch/bleve_index_api"
2727
)
2828

29+
// Min and Max allowed dimensions for a vector field
30+
const (
31+
MinVectorDims = 1
32+
MaxVectorDims = 2048
33+
)
34+
2935
func NewVectorFieldMapping() *FieldMapping {
3036
return &FieldMapping{
3137
Type: "vector",
@@ -136,12 +142,22 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
136142
// -----------------------------------------------------------------------------
137143
// document validation functions
138144

139-
func validateVectorField(field *FieldMapping) error {
140-
if field.Dims <= 0 || field.Dims > 2048 {
141-
return fmt.Errorf("invalid vector dimension,"+
142-
" value should be in range (%d, %d)", 0, 2048)
145+
func validateFieldMapping(field *FieldMapping, parentName string,
146+
fieldAliasCtx map[string]*FieldMapping) error {
147+
switch field.Type {
148+
case "vector":
149+
return validateVectorFieldAlias(field, parentName, fieldAliasCtx)
150+
default: // non-vector field
151+
return validateFieldType(field)
143152
}
153+
}
144154

155+
func validateVectorFieldAlias(field *FieldMapping, parentName string,
156+
fieldAliasCtx map[string]*FieldMapping) error {
157+
158+
if field.Name == "" {
159+
field.Name = parentName
160+
}
145161
if field.Similarity == "" {
146162
field.Similarity = index.DefaultSimilarityMetric
147163
}
@@ -154,21 +170,40 @@ func validateVectorField(field *FieldMapping) error {
154170
field.DocValues = false
155171
field.SkipFreqNorm = true
156172

173+
// # If alias is present, validate the field options as per the alias
174+
// note: reading from a nil map is safe
175+
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
176+
if field.Dims != fieldAlias.Dims {
177+
return fmt.Errorf("field: '%s', invalid alias "+
178+
"(different dimensions %d and %d)", fieldAlias.Name, field.Dims,
179+
fieldAlias.Dims)
180+
}
181+
182+
if field.Similarity != fieldAlias.Similarity {
183+
return fmt.Errorf("field: '%s', invalid alias "+
184+
"(different similarity values %s and %s)", fieldAlias.Name,
185+
field.Similarity, fieldAlias.Similarity)
186+
}
187+
188+
return nil
189+
}
190+
191+
// # Validate field options
192+
193+
if field.Dims < MinVectorDims || field.Dims > MaxVectorDims {
194+
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
195+
" value should be in range (%d, %d)", field.Name, field.Dims,
196+
MinVectorDims, MaxVectorDims)
197+
}
198+
157199
if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok {
158-
return fmt.Errorf("invalid similarity metric: '%s', "+
159-
"valid metrics are: %+v", field.Similarity,
200+
return fmt.Errorf("field: '%s', invalid similarity "+
201+
"metric: '%s', valid metrics are: %+v", field.Name, field.Similarity,
160202
reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys())
161203
}
162204

163-
return nil
164-
}
165-
166-
func validateFieldType(fieldType string) error {
167-
switch fieldType {
168-
case "text", "datetime", "number", "boolean", "geopoint", "geoshape",
169-
"IP", "vector":
170-
default:
171-
return fmt.Errorf("unknown field type: '%s'", fieldType)
205+
if fieldAliasCtx != nil { // writing to a nil map is unsafe
206+
fieldAliasCtx[field.Name] = field
172207
}
173208

174209
return nil

mapping/mapping_vectors_test.go

Lines changed: 180 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,186 @@
1717

1818
package mapping
1919

20-
import (
21-
"testing"
22-
)
20+
import "testing"
21+
22+
func TestVectorFieldAliasValidation(t *testing.T) {
23+
tests := []struct {
24+
// input
25+
name string // name of the test
26+
mappingStr string // index mapping json string
27+
28+
// expected output
29+
expValidity bool // validity of the mapping
30+
errMsg string // error message, given expValidity is false
31+
}{
32+
{
33+
name: "test1",
34+
mappingStr: `
35+
{
36+
"default_mapping": {
37+
"properties": {
38+
"cityVec": {
39+
"fields": [
40+
{
41+
"type": "vector",
42+
"dims": 3
43+
},
44+
{
45+
"name": "cityVec",
46+
"type": "vector",
47+
"dims": 4
48+
}
49+
]
50+
}
51+
}
52+
}
53+
}`,
54+
expValidity: false,
55+
errMsg: `field: 'cityVec', invalid alias (different dimensions 4 and 3)`,
56+
},
57+
{
58+
name: "test2",
59+
mappingStr: `
60+
{
61+
"default_mapping": {
62+
"properties": {
63+
"cityVec": {
64+
"fields": [
65+
{
66+
"type": "vector",
67+
"dims": 3,
68+
"similarity": "l2_norm"
69+
},
70+
{
71+
"name": "cityVec",
72+
"type": "vector",
73+
"dims": 3,
74+
"similarity": "dot_product"
75+
}
76+
]
77+
}
78+
}
79+
}
80+
}`,
81+
expValidity: false,
82+
errMsg: `field: 'cityVec', invalid alias (different similarity values dot_product and l2_norm)`,
83+
},
84+
{
85+
name: "test3",
86+
mappingStr: `
87+
{
88+
"default_mapping": {
89+
"properties": {
90+
"cityVec": {
91+
"fields": [
92+
{
93+
"type": "vector",
94+
"dims": 3
95+
},
96+
{
97+
"name": "cityVec",
98+
"type": "vector",
99+
"dims": 3
100+
}
101+
]
102+
}
103+
}
104+
}
105+
}`,
106+
expValidity: true,
107+
errMsg: "",
108+
},
109+
{
110+
name: "test4",
111+
mappingStr: `
112+
{
113+
"default_mapping": {
114+
"properties": {
115+
"cityVec": {
116+
"fields": [
117+
{
118+
"name": "vecData",
119+
"type": "vector",
120+
"dims": 4
121+
}
122+
]
123+
},
124+
"countryVec": {
125+
"fields": [
126+
{
127+
"name": "vecData",
128+
"type": "vector",
129+
"dims": 3
130+
}
131+
]
132+
}
133+
}
134+
}
135+
}`,
136+
expValidity: false,
137+
errMsg: `field: 'vecData', invalid alias (different dimensions 3 and 4)`,
138+
},
139+
{
140+
name: "test5",
141+
mappingStr: `
142+
{
143+
"default_mapping": {
144+
"properties": {
145+
"cityVec": {
146+
"fields": [
147+
{
148+
"name": "vecData",
149+
"type": "vector",
150+
"dims": 3
151+
}
152+
]
153+
}
154+
}
155+
},
156+
"types": {
157+
"type1": {
158+
"properties": {
159+
"cityVec": {
160+
"fields": [
161+
{
162+
"name": "vecData",
163+
"type": "vector",
164+
"dims": 4
165+
}
166+
]
167+
}
168+
}
169+
}
170+
}
171+
}`,
172+
expValidity: false,
173+
errMsg: `field: 'vecData', invalid alias (different dimensions 4 and 3)`,
174+
},
175+
}
176+
177+
for _, test := range tests {
178+
t.Run(test.name, func(t *testing.T) {
179+
im := NewIndexMapping()
180+
err := im.UnmarshalJSON([]byte(test.mappingStr))
181+
if err != nil {
182+
t.Fatalf("failed to unmarshal index mapping: %v", err)
183+
}
184+
185+
err = im.Validate()
186+
isValid := err == nil
187+
if test.expValidity != isValid {
188+
t.Fatalf("validity mismatch, expected: %v, got: %v",
189+
test.expValidity, isValid)
190+
}
191+
192+
if !isValid && err.Error() != test.errMsg {
193+
t.Fatalf("invalid error message, expected: %v, got: %v",
194+
test.errMsg, err.Error())
195+
}
196+
})
197+
}
198+
}
199+
23200

24201
// A test case for processVector function
25202
type vectorTest struct {

0 commit comments

Comments
 (0)