Skip to content

Commit e565e0b

Browse files
authored
feat(vector): support time travel query for vector index (#23464)
1 parent 278d8e5 commit e565e0b

File tree

12 files changed

+375
-237
lines changed

12 files changed

+375
-237
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
statement ok
22
set query_mode = distributed;
33

4-
include vector_nearest.slt.part
4+
include vector_index.slt.part
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
statement ok
2+
create table items (id int primary key, extra string, text string, embedding vector(3)) append only;
3+
4+
statement ok
5+
create materialized view no_index_mv as select * from items;
6+
7+
statement ok
8+
insert into items values (1, 'extra1', 'first', get_embedding('first'));
9+
10+
statement ok
11+
create index i on items using flat (get_embedding(text)) include(text) with (distance_type = 'l2');
12+
13+
statement ok
14+
insert into items values (2, 'extra2', 'second', get_embedding('second')), (3, 'extra3', 'third', '[7, 8, 9]'::vector(3));
15+
16+
statement ok
17+
flush;
18+
19+
query T
20+
select * from items order by id;
21+
----
22+
1 extra1 first [1,2,3]
23+
2 extra2 second [4,5,6]
24+
3 extra3 third [7,8,9]
25+
26+
# test covering index on functional embedding column
27+
query T
28+
select id, text, power(distance, 2)::int from (select id, text, get_embedding('query') <-> get_embedding(text) as distance from no_index_mv order by distance limit 2) order by distance;
29+
----
30+
1 first 8
31+
2 second 35
32+
33+
statement ok
34+
create view query_view as select id, text, power(distance, 2)::int as distance from (select id, text, get_embedding('query') <-> get_embedding(text) as distance from items order by distance limit 2);
35+
36+
# ensure that vector index is used
37+
query T
38+
explain(verbose) select * from query_view;
39+
----
40+
<slt:ignore>BatchProject { exprs: [Field(Unnest($1), 1:Int32) as $expr1, Field(Unnest($1), 0:Int32) as $expr2, Pow(Field(Unnest($1), 2:Int32), 2:Float64)::Int32 as $expr3] }
41+
└─BatchProjectSet { select_list: [Unnest($1)] }
42+
└─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true }
43+
└─BatchValues { rows: [[OpenaiEmbedding('{"api_base": "http://127.0.0.1:8088/v1", "model": "model"}':Jsonb, 'query':Varchar)::Vector(3)]] }
44+
45+
46+
query T
47+
select * from query_view order by distance;
48+
----
49+
1 first 8
50+
2 second 35
51+
52+
statement ok
53+
drop view query_view;
54+
55+
# test non-covering index on functional embedding column
56+
query T
57+
select id, text, extra, power(distance, 2)::int from (select id, text, extra, get_embedding('query') <-> get_embedding(text) as distance from no_index_mv order by distance limit 2) order by distance;
58+
----
59+
1 first extra1 8
60+
2 second extra2 35
61+
62+
statement ok
63+
create view query_view as select id, text, extra, power(distance, 2)::int as distance from (select id, text, extra, get_embedding('query') <-> get_embedding(text) as distance from items order by distance limit 2);
64+
65+
# ensure that vector index is used
66+
query T
67+
explain(verbose) select * from query_view;
68+
----
69+
<slt:ignore>BatchProject { exprs: [$expr2, $expr1, items.extra, Pow($expr3, 2:Float64)::Int32 as $expr4] }
70+
<slt:ignore>└─BatchLookupJoin { type: Inner, predicate: $expr2 IS NOT DISTINCT FROM items.id, output: [$expr2, items.extra, $expr1, $expr3], lookup table: items }
71+
<slt:ignore> └─BatchProject { exprs: [Field(Unnest($1), 0:Int32) as $expr1, Field(Unnest($1), 1:Int32) as $expr2, Field(Unnest($1), 2:Int32) as $expr3] }
72+
<slt:ignore> └─BatchProjectSet { select_list: [Unnest($1)] }
73+
<slt:ignore> └─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true }
74+
<slt:ignore> └─BatchValues { rows: [[OpenaiEmbedding('{"api_base": "http://127.0.0.1:8088/v1", "model": "model"}':Jsonb, 'query':Varchar)::Vector(3)]] }
75+
76+
query T
77+
select * from query_view order by distance;
78+
----
79+
1 first extra1 8
80+
2 second extra2 35
81+
82+
statement ok
83+
drop view query_view;
84+
85+
statement ok
86+
drop index i;
87+
88+
statement ok
89+
drop materialized view no_index_mv;
90+
91+
statement ok
92+
drop table items;
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
statement ok
2+
create table items (id int primary key, extra string, text string, embedding vector(3)) append only;
3+
4+
statement ok
5+
insert into items values (1, 'extra1', 'first', get_embedding('first'));
6+
7+
statement ok
8+
create index i on items using hnsw (embedding) include(text) with (distance_type = 'l2');
9+
10+
statement ok
11+
insert into items values (2, 'extra2', 'second', get_embedding('second')), (3, 'extra3', 'third', '[7, 8, 9]'::vector(3));
12+
13+
statement ok
14+
flush;
15+
16+
query T
17+
select * from items order by id;
18+
----
19+
1 extra1 first [1,2,3]
20+
2 extra2 second [4,5,6]
21+
3 extra3 third [7,8,9]
22+
23+
statement ok
24+
create view query_view as select id, text, power(distance, 2)::int as distance from (select id, text, '[3,2,1]'::vector(3) <-> embedding as distance from items order by distance limit 2);
25+
26+
statement ok
27+
set batch_hnsw_ef_search = default;
28+
29+
# ensure that vector index is used
30+
query T
31+
explain(verbose) select * from query_view;
32+
----
33+
<slt:ignore>BatchProject { exprs: [Field(Unnest($1), 1:Int32) as $expr1, Field(Unnest($1), 0:Int32) as $expr2, Pow(Field(Unnest($1), 2:Int32), 2:Float64)::Int32 as $expr3] }
34+
<slt:ignore>└─BatchProjectSet { select_list: [Unnest($1)] }
35+
<slt:ignore> └─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true, hnsw_ef_search: 40 }
36+
<slt:ignore> └─BatchValues { rows: [['[3,2,1]':Vector(3)]] }<slt:ignore>
37+
38+
statement ok
39+
set batch_hnsw_ef_search = 20;
40+
41+
# ensure that vector index is used
42+
query T
43+
explain(verbose) select * from query_view;
44+
----
45+
<slt:ignore>BatchProject { exprs: [Field(Unnest($1), 1:Int32) as $expr1, Field(Unnest($1), 0:Int32) as $expr2, Pow(Field(Unnest($1), 2:Int32), 2:Float64)::Int32 as $expr3] }
46+
<slt:ignore>└─BatchProjectSet { select_list: [Unnest($1)] }
47+
<slt:ignore> └─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true, hnsw_ef_search: 20 }
48+
<slt:ignore> └─BatchValues { rows: [['[3,2,1]':Vector(3)]] }
49+
50+
query T
51+
select * from query_view order by distance;
52+
----
53+
1 first 8
54+
2 second 35
55+
56+
statement ok
57+
drop view query_view;
58+
59+
statement ok
60+
drop index i;
61+
62+
statement ok
63+
drop table items;

e2e_test/vector_search/local.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
statement ok
22
set query_mode = local;
33

4-
include vector_nearest.slt.part
4+
include vector_index.slt.part
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
statement ok
2+
create table items (id int primary key, extra string, text string, embedding vector(3)) append only;
3+
4+
statement ok
5+
create materialized view no_index_mv as select * from items;
6+
7+
statement ok
8+
insert into items values (1, 'extra1', 'first', get_embedding('first'));
9+
10+
statement ok
11+
create index i on items using flat (embedding) include(text) with (distance_type = 'l2');
12+
13+
statement ok
14+
insert into items values (2, 'extra2', 'second', get_embedding('second')), (3, 'extra3', 'third', '[7, 8, 9]'::vector(3));
15+
16+
statement ok
17+
flush;
18+
19+
query T
20+
select * from items order by id;
21+
----
22+
1 extra1 first [1,2,3]
23+
2 extra2 second [4,5,6]
24+
3 extra3 third [7,8,9]
25+
26+
# test covering index on raw embedding column
27+
query T
28+
select id, text, power(distance, 2)::int from (select id, text, '[3,2,1]'::vector(3) <-> embedding as distance from no_index_mv order by distance limit 2) order by distance;
29+
----
30+
1 first 8
31+
2 second 35
32+
33+
statement ok
34+
create view query_view as select id, text, power(distance, 2)::int as distance from (select id, text, '[3,2,1]'::vector(3) <-> embedding as distance from items order by distance limit 2);
35+
36+
# ensure that vector index is used
37+
query T
38+
explain(verbose) select * from query_view;
39+
----
40+
<slt:ignore>BatchProject { exprs: [Field(Unnest($1), 1:Int32) as $expr1, Field(Unnest($1), 0:Int32) as $expr2, Pow(Field(Unnest($1), 2:Int32), 2:Float64)::Int32 as $expr3] }
41+
└─BatchProjectSet { select_list: [Unnest($1)] }
42+
└─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true }
43+
└─BatchValues { rows: [['[3,2,1]':Vector(3)]] }
44+
45+
46+
query T
47+
select * from query_view order by distance;
48+
----
49+
1 first 8
50+
2 second 35
51+
52+
statement ok
53+
drop view query_view;
54+
55+
# test non-covering index on raw embedding column
56+
query T
57+
select id, text, extra, power(distance, 2)::int from (select id, text, extra, get_embedding('query') <-> embedding as distance from no_index_mv order by distance limit 2) order by distance;
58+
----
59+
1 first extra1 8
60+
2 second extra2 35
61+
62+
statement ok
63+
create view query_view as select id, text, extra, power(distance, 2)::int as distance from (select id, text, extra, get_embedding('query') <-> embedding as distance from items order by distance limit 2);
64+
65+
# ensure that vector index is used
66+
query T
67+
explain(verbose) select * from query_view;
68+
----
69+
<slt:ignore>BatchProject { exprs: [$expr2, $expr1, items.extra, Pow($expr3, 2:Float64)::Int32 as $expr4] }
70+
<slt:ignore>└─BatchLookupJoin { type: Inner, predicate: $expr2 IS NOT DISTINCT FROM items.id, output: [$expr2, items.extra, $expr1, $expr3], lookup table: items }
71+
<slt:ignore> └─BatchProject { exprs: [Field(Unnest($1), 0:Int32) as $expr1, Field(Unnest($1), 1:Int32) as $expr2, Field(Unnest($1), 2:Int32) as $expr3] }
72+
<slt:ignore> └─BatchProjectSet { select_list: [Unnest($1)] }
73+
<slt:ignore> └─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true }
74+
<slt:ignore> └─BatchValues { rows: [[OpenaiEmbedding('{"api_base": "http://127.0.0.1:8088/v1", "model": "model"}':Jsonb, 'query':Varchar)::Vector(3)]] }
75+
76+
query T
77+
select * from query_view order by distance;
78+
----
79+
1 first extra1 8
80+
2 second extra2 35
81+
82+
statement ok
83+
drop view query_view;
84+
85+
statement ok
86+
drop index i;
87+
88+
statement ok
89+
drop materialized view no_index_mv;
90+
91+
statement ok
92+
drop table items;
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
statement ok
2+
create table items (id int primary key, extra string, text string, embedding vector(3)) append only;
3+
4+
statement ok
5+
insert into items values (2, 'extra2', 'second', get_embedding('second'));
6+
7+
statement ok
8+
create index i on items using flat (embedding) include(text) with (distance_type = 'l2');
9+
10+
statement ok
11+
insert into items values (3, 'extra3', 'third', '[7, 8, 9]'::vector(3));
12+
13+
statement ok
14+
flush;
15+
16+
query T
17+
select * from items order by id;
18+
----
19+
2 extra2 second [4,5,6]
20+
3 extra3 third [7,8,9]
21+
22+
statement ok
23+
create view cover_index_query_view as select id, text, power(distance, 2)::int as distance from (select id, text, '[3,2,1]'::vector(3) <-> embedding as distance from items FOR SYSTEM_TIME AS OF now() - '3' second order by distance limit 2);
24+
25+
# ensure that vector index is used
26+
query T
27+
explain(verbose) select * from cover_index_query_view;
28+
----
29+
<slt:ignore>BatchProject { exprs: [Field(Unnest($1), 1:Int32) as $expr1, Field(Unnest($1), 0:Int32) as $expr2, Pow(Field(Unnest($1), 2:Int32), 2:Float64)::Int32 as $expr3] }
30+
└─BatchProjectSet { select_list: [Unnest($1)] }
31+
└─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true, as_of: ProcessTimeWithInterval(("3", Second)) }
32+
└─BatchValues { rows: [['[3,2,1]':Vector(3)]] }
33+
34+
statement ok
35+
create view non_cover_index_query_view as select id, text, extra, power(distance, 2)::int as distance from (select id, text, extra, get_embedding('query') <-> embedding as distance from items FOR SYSTEM_TIME AS OF now() - '3' second order by distance limit 2);
36+
37+
# ensure that vector index is used
38+
query T
39+
explain(verbose) select * from non_cover_index_query_view;
40+
----
41+
<slt:ignore>BatchProject { exprs: [$expr2, $expr1, items.extra, Pow($expr3, 2:Float64)::Int32 as $expr4] }
42+
<slt:ignore>└─BatchLookupJoin { type: Inner, predicate: $expr2 IS NOT DISTINCT FROM items.id, output: [$expr2, items.extra, $expr1, $expr3], lookup table: items, as_of: ProcessTimeWithInterval(("3", Second)) }
43+
<slt:ignore> └─BatchProject { exprs: [Field(Unnest($1), 0:Int32) as $expr1, Field(Unnest($1), 1:Int32) as $expr2, Field(Unnest($1), 2:Int32) as $expr3] }
44+
<slt:ignore> └─BatchProjectSet { select_list: [Unnest($1)] }
45+
<slt:ignore> └─BatchVectorSearch { top_n: 2, distance_type: L2Sqr, index_name: "i", vector: query_vector, lookup_output: [("text", Varchar), ("items.id", Int32)], include_distance: true, as_of: ProcessTimeWithInterval(("3", Second)) }
46+
<slt:ignore> └─BatchValues { rows: [[OpenaiEmbedding('{"api_base": "http://127.0.0.1:8088/v1", "model": "model"}':Jsonb, 'query':Varchar)::Vector(3)]] }
47+
48+
sleep 5s
49+
50+
statement ok
51+
insert into items values (1, 'extra1', 'first', get_embedding('first'));
52+
53+
statement ok
54+
flush;
55+
56+
query T
57+
select * from items order by id;
58+
----
59+
1 extra1 first [1,2,3]
60+
2 extra2 second [4,5,6]
61+
3 extra3 third [7,8,9]
62+
63+
query T
64+
select * from cover_index_query_view order by distance;
65+
----
66+
2 second 35
67+
3 third 116
68+
69+
query T
70+
select * from non_cover_index_query_view order by distance;
71+
----
72+
2 second extra2 35
73+
3 third extra3 116
74+
75+
sleep 5s
76+
77+
query T
78+
select * from cover_index_query_view order by distance;
79+
----
80+
1 first 8
81+
2 second 35
82+
83+
query T
84+
select * from non_cover_index_query_view order by distance;
85+
----
86+
1 first extra1 8
87+
2 second extra2 35
88+
89+
statement ok
90+
drop view non_cover_index_query_view;
91+
92+
statement ok
93+
drop view cover_index_query_view;
94+
95+
statement ok
96+
drop index i;
97+
98+
statement ok
99+
drop table items;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
include create_embedding_udf.slt.part
2+
3+
include raw_column_index.slt.part
4+
include expression_column_index.slt.part
5+
include hnsw_index.slt.part
6+
include time_travel.slt.part
7+
8+
include drop_embedding_udf.slt.part

0 commit comments

Comments
 (0)