1
1
package ru .yandex .clickhouse .util ;
2
2
3
+ import java .io .ByteArrayInputStream ;
3
4
import java .io .ByteArrayOutputStream ;
4
5
import java .io .DataInputStream ;
5
6
import java .io .DataOutput ;
9
10
import java .nio .ByteBuffer ;
10
11
import java .nio .ByteOrder ;
11
12
import java .util .Objects ;
12
-
13
13
import org .roaringbitmap .RoaringBitmap ;
14
14
import org .roaringbitmap .buffer .ImmutableRoaringBitmap ;
15
15
import org .roaringbitmap .buffer .MutableRoaringBitmap ;
16
16
import org .roaringbitmap .longlong .Roaring64Bitmap ;
17
17
import org .roaringbitmap .longlong .Roaring64NavigableMap ;
18
-
19
18
import ru .yandex .clickhouse .domain .ClickHouseDataType ;
20
19
21
20
public abstract class ClickHouseBitmap {
@@ -131,16 +130,24 @@ public long getLongCardinality() {
131
130
@ Override
132
131
public void serialize (ByteBuffer buffer ) {
133
132
int size = serializedSizeInBytes ();
133
+ // TODO use custom data output so that we can handle large byte array
134
134
try (ByteArrayOutputStream bas = new ByteArrayOutputStream (size )) {
135
135
DataOutput out = new DataOutputStream (bas );
136
136
try {
137
+ // https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1105
137
138
rb .serialize (out );
138
139
} catch (IOException e ) {
139
140
throw new IllegalArgumentException ("Failed to serialize given bitmap" , e );
140
141
}
141
- buffer .put (bas .toByteArray (), 5 , size - 5 );
142
+
143
+ byte [] bytes = bas .toByteArray ();
144
+ for (int i = 4 ; i > 0 ; i --) {
145
+ buffer .put (bytes [i ]);
146
+ }
147
+ buffer .putInt (0 );
148
+ buffer .put (bytes , 5 , size - 5 );
142
149
} catch (IOException e ) {
143
- throw new IllegalArgumentException ("Failed to serialize given bitmap" , e );
150
+ throw new IllegalStateException ("Failed to serialize given bitmap" , e );
144
151
}
145
152
}
146
153
@@ -253,6 +260,8 @@ public static ClickHouseBitmap wrap(Object bitmap, ClickHouseDataType innerType)
253
260
}
254
261
255
262
public static ClickHouseBitmap deserialize (DataInputStream in , ClickHouseDataType innerType ) throws IOException {
263
+ final ClickHouseBitmap rb ;
264
+
256
265
int byteLen = byteLength (innerType );
257
266
int flag = in .readUnsignedByte ();
258
267
if (flag == 0 ) {
@@ -262,20 +271,36 @@ public static ClickHouseBitmap deserialize(DataInputStream in, ClickHouseDataTyp
262
271
bytes [1 ] = cardinality ;
263
272
in .read (bytes , 2 , bytes .length - 2 );
264
273
265
- return ClickHouseBitmap .deserialize (bytes , innerType );
266
- } else if ( byteLen <= 4 ) {
274
+ rb = ClickHouseBitmap .deserialize (bytes , innerType );
275
+ } else {
267
276
int len = Utils .readVarInt (in );
268
277
byte [] bytes = new byte [len ];
269
- Utils .readFully (in , bytes );
270
- RoaringBitmap b = new RoaringBitmap ();
271
- b .deserialize (flip (newBuffer (len ).put (bytes )));
272
- return ClickHouseBitmap .wrap (b , innerType );
273
- } else {
274
- // why? when serializing Roaring64NavigableMap, the initial 5 bytes were removed
275
- // with 8 unknown bytes appended
276
- throw new UnsupportedOperationException (
277
- "Deserializing Roaring64NavigableMap with cardinality larger than 32 is currently not supported." );
278
+
279
+ if (byteLen <= 4 ) {
280
+ Utils .readFully (in , bytes );
281
+ RoaringBitmap b = new RoaringBitmap ();
282
+ b .deserialize (flip (newBuffer (len ).put (bytes )));
283
+ rb = ClickHouseBitmap .wrap (b , innerType );
284
+ } else {
285
+ // TODO implement a wrapper of DataInput to get rid of byte array here
286
+ bytes [0 ] = (byte ) 0 ; // always unsigned
287
+ // read map size in big-endian byte order
288
+ for (int i = 4 ; i > 0 ; i --) {
289
+ bytes [i ] = in .readByte ();
290
+ }
291
+ if (in .readByte () != 0 || in .readByte () != 0 || in .readByte () != 0 || in .readByte () != 0 ) {
292
+ throw new IllegalStateException (
293
+ "Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!" );
294
+ }
295
+ // read the rest
296
+ Utils .readFully (in , bytes , 5 , len - 5 );
297
+ Roaring64NavigableMap b = new Roaring64NavigableMap ();
298
+ b .deserialize (new DataInputStream (new ByteArrayInputStream (bytes )));
299
+ rb = ClickHouseBitmap .wrap (b , innerType );
300
+ }
278
301
}
302
+
303
+ return rb ;
279
304
}
280
305
281
306
public static ClickHouseBitmap deserialize (byte [] bytes , ClickHouseDataType innerType ) throws IOException {
@@ -287,10 +312,7 @@ public static ClickHouseBitmap deserialize(byte[] bytes, ClickHouseDataType inne
287
312
}
288
313
289
314
int byteLen = byteLength (innerType );
290
- ByteBuffer buffer = ByteBuffer .allocate (bytes .length );
291
- if (buffer .order () != ByteOrder .LITTLE_ENDIAN ) {
292
- buffer = buffer .slice ().order (ByteOrder .LITTLE_ENDIAN );
293
- }
315
+ ByteBuffer buffer = newBuffer (bytes .length );
294
316
buffer = (ByteBuffer ) ((Buffer ) buffer .put (bytes )).flip ();
295
317
296
318
if (buffer .get () == (byte ) 0 ) { // small set
@@ -331,10 +353,29 @@ public static ClickHouseBitmap deserialize(byte[] bytes, ClickHouseDataType inne
331
353
b .deserialize (buffer );
332
354
rb = ClickHouseBitmap .wrap (b , innerType );
333
355
} else {
334
- // why? when serializing Roaring64NavigableMap, the initial 5 bytes were removed
335
- // with 8 unknown bytes appended
336
- throw new UnsupportedOperationException (
337
- "Deserializing Roaring64NavigableMap with cardinality larger than 32 is currently not supported." );
356
+ // consume map size(long in little-endian byte order)
357
+ byte [] bitmaps = new byte [4 ];
358
+ buffer .get (bitmaps );
359
+ if (buffer .get () != 0 || buffer .get () != 0 || buffer .get () != 0 || buffer .get () != 0 ) {
360
+ throw new IllegalStateException (
361
+ "Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!" );
362
+ }
363
+ // replace the last 5 bytes to flag(boolean for signed/unsigned) and map
364
+ // size(integer)
365
+ buffer .position (buffer .position () - 5 );
366
+ // always unsigned due to limit of CRoaring
367
+ buffer .put ((byte ) 0 );
368
+ // big-endian -> little-endian
369
+ for (int i = 3 ; i >= 0 ; i --) {
370
+ buffer .put (bitmaps [i ]);
371
+ }
372
+
373
+ buffer .position (buffer .position () - 5 );
374
+ bitmaps = new byte [buffer .remaining ()];
375
+ buffer .get (bitmaps );
376
+ Roaring64NavigableMap b = new Roaring64NavigableMap ();
377
+ b .deserialize (new DataInputStream (new ByteArrayInputStream (bitmaps )));
378
+ rb = ClickHouseBitmap .wrap (b , innerType );
338
379
}
339
380
}
340
381
@@ -436,15 +477,17 @@ public long[] toLongArray() {
436
477
return longs ;
437
478
}
438
479
480
+ /**
481
+ * Serialize the bitmap into a flipped ByteBuffer.
482
+ *
483
+ * @return flipped byte buffer
484
+ */
439
485
public ByteBuffer toByteBuffer () {
440
486
ByteBuffer buf ;
441
487
442
488
int cardinality = getCardinality ();
443
489
if (cardinality <= 32 ) {
444
- buf = ByteBuffer .allocate (2 + byteLen * cardinality );
445
- if (buf .order () != ByteOrder .LITTLE_ENDIAN ) {
446
- buf = buf .slice ().order (ByteOrder .LITTLE_ENDIAN );
447
- }
490
+ buf = newBuffer (2 + byteLen * cardinality );
448
491
buf .put ((byte ) 0 );
449
492
buf .put ((byte ) cardinality );
450
493
if (byteLen == 1 ) {
@@ -468,28 +511,23 @@ public ByteBuffer toByteBuffer() {
468
511
int size = serializedSizeInBytes ();
469
512
int varIntSize = Utils .getVarIntSize (size );
470
513
471
- buf = ByteBuffer .allocate (1 + varIntSize + size );
472
- if (buf .order () != ByteOrder .LITTLE_ENDIAN ) {
473
- buf = buf .slice ().order (ByteOrder .LITTLE_ENDIAN );
474
- }
514
+ buf = newBuffer (1 + varIntSize + size );
475
515
buf .put ((byte ) 1 );
476
516
Utils .writeVarInt (size , buf );
477
517
serialize (buf );
478
518
} else { // 64
479
- // 1) exclude the leading 5 bytes - boolean flag + map size , see below:
519
+ // 1) deduct one to exclude the leading byte - boolean flag, see below:
480
520
// https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1107
481
- // 2) not sure what's the extra 8 bytes?
482
- long size = serializedSizeInBytesAsLong () - 5 + 8 ;
521
+ // 2) add 4 bytes because CRoaring uses long to store count of 32-bit bitmaps,
522
+ // while Java uses int - see
523
+ // https://github.com/RoaringBitmap/CRoaring/blob/v0.2.66/cpp/roaring64map.hh#L597
524
+ long size = serializedSizeInBytesAsLong () - 1 + 4 ;
483
525
int varIntSize = Utils .getVarLongSize (size );
484
526
// TODO add serialize(DataOutput) to handle more
485
527
int intSize = (int ) size ;
486
- buf = ByteBuffer .allocate (1 + varIntSize + intSize );
487
- if (buf .order () != ByteOrder .LITTLE_ENDIAN ) {
488
- buf = buf .slice ().order (ByteOrder .LITTLE_ENDIAN );
489
- }
528
+ buf = newBuffer (1 + varIntSize + intSize );
490
529
buf .put ((byte ) 1 );
491
530
Utils .writeVarInt (intSize , buf );
492
- buf .putLong (1L ); // what's this?
493
531
serialize (buf );
494
532
}
495
533
0 commit comments