27
27
#include " utilities/ostream.hpp"
28
28
#include " utilities/reverse_bits.hpp"
29
29
30
- jint CompressedReadStream::read_signed_int () {
31
- return UNSIGNED5::decode_sign (read_int ());
32
- }
33
-
34
30
// Compressing floats is simple, because the only common pattern
35
31
// is trailing zeroes. (Compare leading sign bits on ints.)
36
32
// Since floats are left-justified, as opposed to right-justified
@@ -39,76 +35,135 @@ jint CompressedReadStream::read_signed_int() {
39
35
// leading zeroes, effect is better compression of those common
40
36
// 32-bit float values, such as integers or integers divided by
41
37
// powers of two, that have many trailing zeroes.
42
- jfloat CompressedReadStream::read_float () {
43
- int rf = read_int ();
44
- int f = reverse_bits (rf);
38
+
39
+ jfloat CompressedStream::decode_float (juint rf) {
40
+ int f = reverse_bits (rf);
45
41
return jfloat_cast (f);
46
42
}
47
43
44
+ juint CompressedStream::encode_float (jfloat value) {
45
+ juint f = jint_cast (value);
46
+ juint rf = reverse_bits (f);
47
+ assert (f == reverse_bits (rf), " can re-read same bits" );
48
+ return rf;
49
+ }
50
+
48
51
// The treatment of doubles is similar. We could bit-reverse each
49
- // entire 64-bit word, but it is almost as effective to bit-reverse
52
+ // entire 64-bit word, but it is just as effective to bit-reverse
50
53
// the individual halves. Since we are going to encode them
51
54
// separately as 32-bit halves anyway, it seems slightly simpler
52
55
// to reverse after splitting, and when reading reverse each
53
56
// half before joining them together.
54
- jdouble CompressedReadStream::read_double () {
55
- jint rh = read_int ();
56
- jint rl = read_int ();
57
- jint h = reverse_bits (rh);
58
- jint l = reverse_bits (rl);
57
+ //
58
+ // Although exponents have a small amount of sign replication, we do
59
+ // not attempt to do sign conversion. In fact, both (reversed) halves
60
+ // are treated identically, because we do not want to ask which half
61
+ // is which, in the 64-bit double representation. In principle we
62
+ // could attempt to compress the two halves differently, and even to
63
+ // use uint_pair encodings, but the benefit would be small and there
64
+ // would surely be bugs. Our workloads do not use many doubles.
65
+
66
+ jdouble CompressedStream::decode_double (juint rh, juint rl) {
67
+ jint h = reverse_bits (rh);
68
+ jint l = reverse_bits (rl);
59
69
return jdouble_cast (jlong_from (h, l));
60
70
}
61
71
72
+ void CompressedStream::encode_double (jdouble value, juint& rh, juint& rl) {
73
+ juint h = high (jlong_cast (value));
74
+ juint l = low ( jlong_cast (value));
75
+ rh = reverse_bits (h);
76
+ rl = reverse_bits (l);
77
+ assert (h == reverse_bits (rh), " can re-read same bits" );
78
+ assert (l == reverse_bits (rl), " can re-read same bits" );
79
+ }
80
+
62
81
// A 64-bit long is encoded into distinct 32-bit halves. This saves
63
82
// us from having to define a 64-bit encoding and is almost as
64
83
// effective. A modified LEB128 could encode longs into 9 bytes, and
65
84
// this technique maxes out at 10 bytes, so, if we didn't mind the
66
85
// extra complexity of another coding system, we could process 64-bit
67
86
// values as single units. But, the complexity does not seem
68
87
// worthwhile.
69
- jlong CompressedReadStream::read_long () {
70
- jint low = read_signed_int ();
71
- jint high = read_signed_int ();
88
+
89
+ jlong CompressedStream::decode_long (juint ulo, juint uhi) {
90
+ jint low = UNSIGNED5::decode_sign (ulo);
91
+ jint high = UNSIGNED5::decode_sign (uhi);
72
92
return jlong_from (high, low);
73
93
}
74
94
75
- CompressedWriteStream::CompressedWriteStream (int initial_size) : CompressedStream(nullptr , 0 ) {
76
- _buffer = NEW_RESOURCE_ARRAY (u_char, initial_size);
77
- _size = initial_size;
78
- _position = 0 ;
95
+ void CompressedStream::encode_long (jlong value, juint& ulo, juint& uhi) {
96
+ ulo = UNSIGNED5::encode_sign (low (value));
97
+ uhi = UNSIGNED5::encode_sign (high (value));
79
98
}
80
99
81
- void CompressedWriteStream::grow () {
82
- int nsize = _size * 2 ;
83
- const int min_expansion = UNSIGNED5::MAX_LENGTH;
84
- if (nsize < min_expansion*2 ) {
85
- nsize = min_expansion*2 ;
100
+ void CompressedIntReadStream::setup (u_char* buffer,
101
+ size_t limit,
102
+ bool suppress_zeroes) {
103
+ _r.setup (buffer, limit);
104
+ reset ();
105
+ if (!suppress_zeroes) _r.set_passthrough ();
106
+ }
107
+
108
+ void CompressedIntWriteStream::setup (address initial_buffer,
109
+ size_t initial_size,
110
+ bool suppress_zeroes) {
111
+ const size_t MIN_SIZE = UNSIGNED5::MAX_LENGTH; // avoid really small sizes
112
+ if (initial_size < MIN_SIZE) {
113
+ initial_size = MIN_SIZE; initial_buffer = nullptr ;
86
114
}
87
- u_char* _new_buffer = NEW_RESOURCE_ARRAY (u_char, nsize);
88
- memcpy (_new_buffer, _buffer, _position);
89
- _buffer = _new_buffer;
90
- _size = nsize;
115
+ if (initial_buffer == nullptr ) {
116
+ initial_buffer = NEW_RESOURCE_ARRAY (u_char, initial_size);
117
+ }
118
+ _w.grow_array (initial_buffer, initial_size);
119
+ reset ();
120
+ if (!suppress_zeroes) _w.set_passthrough ();
91
121
}
92
122
93
- void CompressedWriteStream::write_float (jfloat value ) {
94
- juint f = jint_cast (value );
95
- juint rf = reverse_bits (f );
96
- assert (f == reverse_bits (rf ), " can re-read same bits " );
97
- write_int (rf) ;
123
+ u_char* CompressedIntWriteStream::data_address_at ( size_t position, size_t length ) {
124
+ assert (_w. limit () != 0 , " " );
125
+ assert ( in_bounds (position, _w. limit (), length == 0 ), " oob " );
126
+ assert (in_bounds (position + length, _w. limit ( ), true ), " oob " );
127
+ return &_w. array ()[position] ;
98
128
}
99
129
100
- void CompressedWriteStream::write_double (jdouble value) {
101
- juint h = high (jlong_cast (value));
102
- juint l = low ( jlong_cast (value));
103
- juint rh = reverse_bits (h);
104
- juint rl = reverse_bits (l);
105
- assert (h == reverse_bits (rh), " can re-read same bits" );
106
- assert (l == reverse_bits (rl), " can re-read same bits" );
107
- write_int (rh);
108
- write_int (rl);
130
+ void CompressedIntWriteStream::grow () {
131
+ size_t nsize = _w.limit () * 2 ;
132
+ const size_t min_expansion = UNSIGNED5::MAX_LENGTH * 7 ;
133
+ if (nsize < min_expansion) {
134
+ nsize = min_expansion;
135
+ }
136
+ u_char* nbuf = NEW_RESOURCE_ARRAY (u_char, nsize);
137
+ _w.grow_array (nbuf, nsize);
138
+ }
139
+
140
+ size_t CompressedIntWriteStream::checkpoint () {
141
+ #ifdef DO_CZ
142
+ assert (_w.is_clean () || _w.is_passthrough (), " " );
143
+ _w_checkpoint = _w.checkpoint ();
144
+ return _w_checkpoint.position ();
145
+ #else
146
+ return _w_checkpoint = _w.position ();
147
+ #endif
148
+ }
149
+
150
+ size_t CompressedIntWriteStream::data_size_after_checkpoint (size_t checkpoint_pos) {
151
+ #ifdef DO_CZ
152
+ assert (_w_checkpoint.position () == checkpoint_pos, " " );
153
+ write_end_byte (); // close off any previous compression state
154
+ return _w.position () - checkpoint_pos;
155
+ #else
156
+ assert (_w_checkpoint == checkpoint_pos, " " );
157
+ return _w.position () - checkpoint_pos;
158
+ #endif
109
159
}
110
160
111
- void CompressedWriteStream::write_long (jlong value) {
112
- write_signed_int (low (value));
113
- write_signed_int (high (value));
161
+ void CompressedIntWriteStream::restore (size_t checkpoint_pos) {
162
+ #ifdef DO_CZ
163
+ assert (_w_checkpoint.position () == checkpoint_pos, " " );
164
+ _w.restore (_w_checkpoint);
165
+ #else
166
+ assert (_w_checkpoint == checkpoint_pos, " " );
167
+ _w.set_position (checkpoint_pos);
168
+ #endif
114
169
}
0 commit comments