Skip to content

Commit 9a987a1

Browse files
authored
Merge pull request #1248 from zhicwu/main
fix bitmap64 serialization issue
2 parents c6744b7 + 1f3ef08 commit 9a987a1

File tree

4 files changed

+73
-33
lines changed

4 files changed

+73
-33
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* potential endless loop when handling batch update error. [#1233](https://github.com/ClickHouse/clickhouse-java/issues/1233)
66
* exception when deserializing Array(FixedString(2)) from RowBinary. [#1235](https://github.com/ClickHouse/clickhouse-java/issues/1235)
77
* deserialization failure of Nested data type. [#1240](https://github.com/ClickHouse/clickhouse-java/issues/1240)
8+
* fix 64bit bitmap serialization issue. [#641](https://github.com/ClickHouse/clickhouse-java/issues/641), [#874](https://github.com/ClickHouse/clickhouse-java/issues/874), [#1141](https://github.com/ClickHouse/clickhouse-java/issues/1141)
89

910
## 0.4.0, 2023-01-19
1011
### Breaking changes

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,18 @@ Java libraries for connecting to ClickHouse and processing data in various forma
1313
| API | [JDBC](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) | :white_check_mark: | |
1414
| | [R2DBC](https://r2dbc.io/) | :white_check_mark: | supported since 0.4.0 |
1515
| Query Language | SQL | :white_check_mark: | |
16-
| | [PRQL](https://prql-lang.org/) | :x: | |
17-
| | [GraphQL](https://graphql.org/) | :x: | |
16+
| | [PRQL](https://prql-lang.org/) | :x: | will be available in 0.4.2 |
17+
| | [GraphQL](https://graphql.org/) | :x: | will be available in 0.4.2 |
1818
| Protocol | [HTTP](https://clickhouse.com/docs/en/interfaces/http/) | :white_check_mark: | recommended, defaults to `java.net.HttpURLConnection` and it can be changed to `java.net.http.HttpClient`(unstable) or `Apache HTTP Client 5`. Note that the latter was added in 0.4.0 to support custom socket options. |
19-
| | [gRPC](https://clickhouse.com/docs/en/interfaces/grpc/) | :white_check_mark: | still experimental, works with 22.3+, known to has [issue](https://github.com/ClickHouse/ClickHouse/issues/28671#issuecomment-1087049993) when using LZ4 compression |
19+
| | [gRPC](https://clickhouse.com/docs/en/interfaces/grpc/) | :white_check_mark: | :warning: experimental, works with 22.3+, known to has issue with lz4 compression and may cause high memory usage on server |
2020
| | [TCP/Native](https://clickhouse.com/docs/en/interfaces/tcp/) | :white_check_mark: | `clickhouse-cli-client`(wrapper of ClickHouse native command-line client) was added in 0.3.2-patch10, `clickhouse-tcp-client` will be available in 0.5 |
2121
| | [Local/File](https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/) | :x: | `clickhouse-cli-client` will be enhanced to support `clickhouse-local` |
2222
| Compatibility | Server < 20.7 | :x: | use 0.3.1-patch(or 0.2.6 if you're stuck with JDK 7) |
2323
| | Server >= 20.7 | :white_check_mark: | use 0.3.2 or above. All [active releases](https://github.com/ClickHouse/ClickHouse/pulls?q=is%3Aopen+is%3Apr+label%3Arelease) are supported. |
2424
| Compression | [lz4](https://lz4.github.io/lz4/) | :white_check_mark: | default |
2525
| | [zstd](https://facebook.github.io/zstd/) | :white_check_mark: | supported since 0.4.0, works with ClickHouse 22.10+ |
2626
| Data Format | RowBinary | :white_check_mark: | `RowBinaryWithNamesAndTypes` for query and `RowBinary` for insertion |
27-
| | TabSeparated | :white_check_mark: | Does not support as many data types as RowBinary |
27+
| | TabSeparated | :white_check_mark: | :warning: does not support as many data types as RowBinary |
2828
| Data Type | AggregatedFunction | :x: | :warning: limited to `groupBitmap`; 64bit bitmap was NOT working properly before 0.4.1 |
2929
| | Array(\*) | :white_check_mark: | |
3030
| | Bool | :white_check_mark: | |

clickhouse-data/src/main/java/com/clickhouse/data/value/ClickHouseBitmap.java

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -172,18 +172,14 @@ public void serialize(ByteBuffer buffer) {
172172
try (ByteArrayOutputStream bas = new ByteArrayOutputStream(size)) {
173173
DataOutput out = new DataOutputStream(bas);
174174
try {
175-
// https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1105
176-
rb.serialize(out);
175+
// https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1253
176+
rb.serializePortable(out);
177177
} catch (IOException e) {
178178
throw new IllegalArgumentException("Failed to serialize given bitmap", e);
179179
}
180180

181181
byte[] bytes = bas.toByteArray();
182-
for (int i = 4; i > 0; i--) {
183-
buffer.put(bytes[i]);
184-
}
185-
buffer.putInt(0);
186-
buffer.put(bytes, 5, size - 5);
182+
buffer.put(bytes, 0, size);
187183
} catch (IOException e) {
188184
throw new IllegalStateException("Failed to serialize given bitmap", e);
189185
}
@@ -196,6 +192,13 @@ public int serializedSizeInBytes() {
196192

197193
@Override
198194
public long serializedSizeInBytesAsLong() {
195+
// no idea why it's implemented this way...
196+
// https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1371-L1380
197+
// TODO completely drop RoaringBitmap dependency
198+
if (Roaring64NavigableMap.SERIALIZATION_MODE != Roaring64NavigableMap.SERIALIZATION_MODE_PORTABLE) {
199+
throw new IllegalStateException(
200+
"Please change Roaring64NavigableMap.SERIALIZATION_MODE to portable first");
201+
}
199202
return rb.serializedSizeInBytes();
200203
}
201204

@@ -364,20 +367,10 @@ public static ClickHouseBitmap deserialize(DataInputStream in, ClickHouseDataTyp
364367
b.deserialize(flip(newBuffer(len).put(bytes)));
365368
rb = ClickHouseBitmap.wrap(b, innerType);
366369
} else {
367-
// TODO implement a wrapper of DataInput to get rid of byte array here
368-
bytes[0] = (byte) 0; // always unsigned
369-
// read map size in big-endian byte order
370-
for (int i = 4; i > 0; i--) {
371-
bytes[i] = in.readByte();
372-
}
373-
if (in.readByte() != 0 || in.readByte() != 0 || in.readByte() != 0 || in.readByte() != 0) { // NOSONAR
374-
throw new IllegalStateException(
375-
"Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!");
376-
}
377-
// read the rest
378-
in.readFully(bytes, 5, len - 8);
370+
in.readFully(bytes, 0, len);
379371
Roaring64NavigableMap b = new Roaring64NavigableMap();
380-
b.deserialize(new DataInputStream(new ByteArrayInputStream(bytes)));
372+
// https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1337
373+
b.deserializePortable(new DataInputStream(new ByteArrayInputStream(bytes)));
381374
rb = ClickHouseBitmap.wrap(b, innerType);
382375
}
383376
}
@@ -564,9 +557,9 @@ public long[] toLongArray() {
564557
public ByteBuffer toByteBuffer() {
565558
ByteBuffer buf;
566559

567-
int cardinality = getCardinality();
568-
if (cardinality <= 32) {
569-
buf = newBuffer(2 + byteLen * cardinality);
560+
long cardinality = getLongCardinality();
561+
if (cardinality <= 32L) {
562+
buf = newBuffer(2 + byteLen * (int) cardinality);
570563
buf.put((byte) 0);
571564
buf.put((byte) cardinality);
572565
if (byteLen == 1) {
@@ -595,12 +588,7 @@ public ByteBuffer toByteBuffer() {
595588
ClickHouseByteUtils.setVarInt(buf, size);
596589
serialize(buf);
597590
} else { // 64
598-
// 1) deduct one to exclude the leading byte - boolean flag, see below:
599-
// https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1107
600-
// 2) add 4 bytes because CRoaring uses long to store count of 32-bit bitmaps,
601-
// while Java uses int - see
602-
// https://github.com/RoaringBitmap/CRoaring/blob/v0.2.66/cpp/roaring64map.hh#L597
603-
long size = serializedSizeInBytesAsLong() - 1 + 4;
591+
long size = serializedSizeInBytesAsLong();
604592
int varIntSize = ClickHouseByteUtils.getVarLongSize(size);
605593
// TODO add serialize(DataOutput) to handle more
606594
int intSize = (int) size;

clickhouse-jdbc/src/test/java/com/clickhouse/jdbc/ClickHouseStatementTest.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,14 @@
3838
import com.clickhouse.client.http.config.ClickHouseHttpOption;
3939
import com.clickhouse.data.ClickHouseDataType;
4040
import com.clickhouse.data.ClickHouseValues;
41+
import com.clickhouse.data.value.ClickHouseBitmap;
4142
import com.clickhouse.data.value.ClickHouseDateTimeValue;
4243
import com.clickhouse.data.value.UnsignedByte;
4344
import com.clickhouse.data.value.UnsignedInteger;
4445
import com.clickhouse.data.value.UnsignedLong;
4546
import com.clickhouse.data.value.UnsignedShort;
4647

48+
import org.roaringbitmap.longlong.Roaring64NavigableMap;
4749
import org.testng.Assert;
4850
import org.testng.SkipException;
4951
import org.testng.annotations.DataProvider;
@@ -65,6 +67,55 @@ private Object[][] getConnectionProperties() {
6567
new Object[] { emptyProps }, new Object[] { sessionProps } };
6668
}
6769

70+
@Test(groups = "integration")
71+
public void testBitmap64() throws SQLException {
72+
Properties props = new Properties();
73+
String sql = "select k,\n"
74+
+ "[ tuple(arraySort(groupUniqArrayIf(n, n > 33)), groupBitmapStateIf(n, n > 33)),\n"
75+
+ " tuple(arraySort(groupUniqArrayIf(n, n < 32)), groupBitmapStateIf(n, n < 32)),\n"
76+
+ " tuple(arraySort(groupUniqArray(n)), groupBitmapState(n)),\n"
77+
+ " tuple(arraySort(groupUniqArray(v)), groupBitmapState(v))\n"
78+
+ "]::Array(Tuple(Array(UInt64), AggregateFunction(groupBitmap, UInt64))) v\n"
79+
+ "from (select 'k' k, (number % 33)::UInt64 as n, (9223372036854775807 + number::Int16)::UInt64 v from numbers(300000))\n"
80+
+ "group by k";
81+
try (ClickHouseConnection conn = newConnection(props);
82+
ClickHouseStatement stmt = conn.createStatement()) {
83+
stmt.execute("drop table if exists test_bitmap64_serde; "
84+
+ "create table test_bitmap64_serde(k String, v Array(Tuple(Array(UInt64), AggregateFunction(groupBitmap, UInt64))))engine=Memory");
85+
try (PreparedStatement ps = conn.prepareStatement("insert into test_bitmap64_serde");
86+
ResultSet rs = stmt.executeQuery(sql)) {
87+
Assert.assertTrue(rs.next());
88+
Assert.assertEquals(rs.getString(1), "k");
89+
ps.setString(1, rs.getString(1));
90+
Object[] values = (Object[]) rs.getObject(2);
91+
ps.setObject(2, values);
92+
Assert.assertEquals(values.length, 4);
93+
for (int i = 0; i < values.length; i++) {
94+
List<?> tuple = (List<?>) values[i];
95+
Assert.assertEquals(tuple.size(), 2);
96+
long[] nums = (long[]) tuple.get(0);
97+
ClickHouseBitmap bitmap = (ClickHouseBitmap) tuple.get(1);
98+
Roaring64NavigableMap bitmap64 = (Roaring64NavigableMap) bitmap.unwrap();
99+
Assert.assertEquals(nums.length, bitmap64.getLongCardinality());
100+
for (int j = 0; j < nums.length; j++) {
101+
Assert.assertTrue(bitmap64.contains(nums[j]), "Bitmap does not contain value: " + nums[j]);
102+
}
103+
}
104+
Assert.assertFalse(rs.next());
105+
106+
// Assert.assertThrows(IllegalStateException.class, () -> ps.executeUpdate());
107+
Roaring64NavigableMap.SERIALIZATION_MODE = Roaring64NavigableMap.SERIALIZATION_MODE_PORTABLE;
108+
ps.executeUpdate();
109+
}
110+
111+
stmt.execute("insert into test_bitmap64_serde\n" + sql);
112+
try (ResultSet rs = stmt.executeQuery("select distinct * from test_bitmap64_serde")) {
113+
Assert.assertTrue(rs.next(), "Should have at least one row");
114+
Assert.assertFalse(rs.next(), "Should have only one unique row");
115+
}
116+
}
117+
}
118+
68119
@Test(groups = "integration")
69120
public void testDialect() throws SQLException {
70121
Properties props = new Properties();

0 commit comments

Comments
 (0)