Skip to content

Commit 4a6b26e

Browse files
liaoxin01xiaokang
authored andcommitted
[fix](agg) Aggregating string types with null values may result in incorrect result (#42067)
Aggregating string types with null values may result in incorrect result because using the replace_column_data function can cause incorrect offsets in the column. A reproducible case: ``` CREATE TABLE `test_scan_keys_with_bool_type` ( `col1` tinyint NOT NULL, `col2` int NOT NULL, `col3` tinyint NOT NULL, `col5` boolean REPLACE NOT NULL, `col4` datetime(2) REPLACE NOT NULL, `col6` double REPLACE_IF_NOT_NULL NULL, `col7` varchar(100) REPLACE_IF_NOT_NULL NULL ) ENGINE=OLAP AGGREGATE KEY(`col1`, `col2`, `col3`) DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true" ); insert into test_scan_keys_with_bool_type values ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), ( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "hi" ), ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ); insert into test_scan_keys_with_bool_type values ( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "doris" ); MySQL [test]> select * from test_scan_keys_with_bool_type; +------+------+------+------+------------------------+---------------------+-------+ | col1 | col2 | col3 | col5 | col4 | col6 | col7 | +------+------+------+------+------------------------+---------------------+-------+ | -100 | 0 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hi | | -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | NULL | | -100 | 1 | 1 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hidor | | -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 | NULL | +------+------+------+------+------------------------+---------------------+-------+ 4 rows in set (0.057 sec) MySQL [test]> set skip_storage_engine_merge = true; select * from test_scan_keys_with_bool_type; +------+------+------+------+------------------------+---------------------+-------+ | col1 | col2 | col3 | col5 | col4 | col6 | col7 | +------+------+------+------+------------------------+---------------------+-------+ | -100 | 0 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hi | | -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | NULL | | -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 | NULL | | -100 | 1 | 1 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | doris | +------+------+------+------+------------------------+---------------------+-------+ 4 rows in set (0.023 sec) ``` #33493 By supporting variant type aggregation, this issue has been resolved.So versions after 2.1 do not have this issue.
1 parent 0f65853 commit 4a6b26e

File tree

3 files changed

+40
-2
lines changed

3 files changed

+40
-2
lines changed

be/src/vec/olap/block_reader.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -470,10 +470,10 @@ size_t BlockReader::_copy_agg_data() {
470470
auto& dst_column = _stored_data_columns[idx];
471471
if (_stored_has_variable_length_tag[idx]) {
472472
//variable length type should replace ordered
473+
dst_column->clear();
473474
for (size_t i = 0; i < copy_size; i++) {
474475
auto& ref = _stored_row_ref[i];
475-
dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
476-
ref.row_pos, i);
476+
dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos);
477477
}
478478
} else {
479479
for (auto& it : _temp_ref_map) {

regression-test/data/data_model_p0/aggregate/test_aggregate_table.out

+6
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,9 @@ datetimev2_value_min_2 datetime(6) Yes false \N MIN
4646
datetimev2_value_replace_2 datetime(6) Yes false \N REPLACE
4747
datetimev2_value_replace_if_not_null_2 datetime(6) Yes false \N REPLACE_IF_NOT_NULL
4848

49+
-- !string_agg_table_with_null --
50+
-100 0 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 hi
51+
-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
52+
-100 1 1 true 2024-02-16T04:37:37 1.399962421904282E9 doris
53+
-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7 NULL
54+

regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy

+32
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,36 @@ suite("test_aggregate_table") {
9999
qt_desc_date_table """desc date_agg"""
100100
sql """DROP TABLE date_agg"""
101101

102+
sql """DROP TABLE IF EXISTS test_string_agg_with_null"""
103+
sql """
104+
CREATE TABLE `test_string_agg_with_null` (
105+
`col1` tinyint NOT NULL,
106+
`col2` int NOT NULL,
107+
`col3` tinyint NOT NULL,
108+
`col5` boolean REPLACE NOT NULL,
109+
`col4` datetime(2) REPLACE NOT NULL,
110+
`col6` double REPLACE_IF_NOT_NULL NULL,
111+
`col7` varchar(100) REPLACE_IF_NOT_NULL NULL
112+
) ENGINE=OLAP
113+
AGGREGATE KEY(`col1`, `col2`, `col3`)
114+
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
115+
PROPERTIES (
116+
"replication_allocation" = "tag.location.default: 1",
117+
"disable_auto_compaction" = "true"
118+
);
119+
"""
120+
121+
sql """ insert into test_string_agg_with_null values
122+
( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
123+
( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "hi" ),
124+
( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , "NULL" );
125+
"""
126+
127+
sql """ insert into test_string_agg_with_null values
128+
( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' , 1399962421.904282 , "doris" );
129+
"""
130+
131+
qt_string_agg_table_with_null """ select * from test_string_agg_with_null """
132+
sql """DROP TABLE test_string_agg_with_null"""
133+
102134
}

0 commit comments

Comments
 (0)