19
19
20
20
#include " SpecUtils_config.h"
21
21
22
+ #include < iostream>
23
+
22
24
#include < cmath>
23
25
#include < stack>
24
26
#include < limits>
@@ -45,8 +47,8 @@ using namespace std;
45
47
46
48
namespace
47
49
{
48
- // anaonomous namespace for functions to help parse D3.js HTML files, that wont be
49
- // usefull outside of this file
50
+ // anonymous namespace for functions to help parse D3.js HTML files, that wont be
51
+ // useful outside of this file
50
52
#if ( SpecUtils_D3_SUPPORT_FILE_STATIC )
51
53
const unsigned char * const ns_libJsFiles[] = {
52
54
D3_MIN_JS // D3.js library
@@ -82,19 +84,31 @@ namespace
82
84
83
85
// Taken from the rapidxml.hpp that Wt uses
84
86
template <class Ch >
85
- void copy_check_utf8 (const Ch *& src, Ch *& dest)
87
+ void copy_check_utf8 ( const Ch *& src, Ch *& dest )
86
88
{
87
89
// skip entire UTF-8 encoded characters at once,
88
90
// checking their validity based on
89
91
// http://www.dwheeler.com/secure-programs/Secure-Programs-HOWTO/character-encoding.html (5.9.4 column 3)
90
92
93
+ assert ( src );
94
+ size_t src_len;
95
+ for ( src_len = 0 ; src[src_len] && src_len < 4 ; ++src_len )
96
+ {
97
+ }
98
+ assert ( src_len );
99
+ if ( !src_len )
100
+ return ;
101
+
102
+
91
103
unsigned length = 1 ;
92
104
bool legal = false ;
93
- if ((unsigned char )src[0 ] <= 0x7F ) {
105
+ if ((unsigned char )src[0 ] <= 0x7F )
106
+ {
94
107
unsigned char c = src[0 ];
95
108
if (c == 0x09 || c == 0x0A || c == 0x0D || c >= 0x20 )
96
109
legal = true ;
97
- } else if ((unsigned char )src[0 ] >= 0xF0 ) {
110
+ }else if ( ((unsigned char )src[0 ] >= 0xF0 ) && (src_len >= 4 ) )
111
+ {
98
112
length = 4 ;
99
113
100
114
if ((
@@ -120,7 +134,8 @@ namespace
120
134
))
121
135
legal = true ;
122
136
123
- } else if ((unsigned char )src[0 ] >= 0xE0 ) {
137
+ }else if ( ((unsigned char )src[0 ] >= 0xE0 ) && (src_len >= 3 ) )
138
+ {
124
139
length = 3 ;
125
140
126
141
if ((
@@ -142,7 +157,8 @@ namespace
142
157
))
143
158
legal = true ;
144
159
145
- } else if ((unsigned char )src[0 ] >= 0xC0 ) {
160
+ }else if ( ((unsigned char )src[0 ] >= 0xC0 ) && (src_len >= 2 ) )
161
+ {
146
162
length = 2 ;
147
163
148
164
if (
@@ -155,9 +171,12 @@ namespace
155
171
legal = true ;
156
172
}
157
173
158
- if (legal) {
159
- if (dest) {
160
- if (length == 3 ) {
174
+ if ( legal )
175
+ {
176
+ if ( dest )
177
+ {
178
+ if ( length == 3 )
179
+ {
161
180
/*
162
181
U+2028 and U+2029 may cause problems, they are line
163
182
separators that mess up JavaScript string literals.
@@ -172,44 +191,56 @@ namespace
172
191
} else
173
192
for (unsigned i = 0 ; i < length; ++i)
174
193
*dest++ = *src++;
175
- } else
176
- for (unsigned i = 0 ; i < length; ++i)
194
+ }else
195
+ {
196
+ for ( unsigned i = 0 ; i < length; ++i )
177
197
*dest++ = *src++;
178
- } else
198
+ }
199
+ }else
200
+ {
179
201
src += length;
180
- } else {
181
- if (dest)
182
- if (length >= 3 ) {
202
+ }
203
+ }else // if( legal )
204
+ {
205
+ if ( dest )
206
+ {
207
+ if ( length >= 3 )
208
+ {
183
209
/* insert U+FFFD, the replacement character */
184
210
*dest++ = (Ch)0xef ;
185
211
*dest++ = (Ch)0xbf ;
186
212
*dest++ = (Ch)0xbd ;
187
213
src += length;
188
- } else
189
- for (unsigned i = 0 ; i < length; ++i) {
214
+ }else
215
+ {
216
+ for (unsigned i = 0 ; i < length; ++i)
217
+ {
190
218
*dest++ = ' ?' ;
191
219
src++;
192
220
}
193
- else {
194
- // const Ch *problem_src = src;
195
- src += length;
196
- throw runtime_error ( " Invalid UTF-8 sequence" /* + const_cast<Ch *>(problem_src)*/ );
197
221
}
198
- }
199
- }
200
-
201
-
222
+ }else
223
+ {
224
+ // const Ch *problem_src = src;
225
+ src += length;
226
+ throw runtime_error ( " Invalid UTF-8 sequence" /* + const_cast<Ch *>(problem_src)*/ );
227
+ }// if( dest )
228
+ }// if( legal ) / else
229
+ }// void copy_check_utf8
202
230
203
231
void sanitize_unicode ( stringstream &sout, const std::string& text )
204
232
{
205
233
char buf[4 ];
206
234
207
235
for (const char *c = text.c_str (); *c;) {
236
+ assert ( c <= (text.c_str () + text.size ()) );
208
237
char *b = buf;
209
238
// but copy_check_utf8() does not declare the following ranges illegal:
210
239
// U+D800-U+DFFF
211
240
// U+FFFE-U+FFFF
212
241
copy_check_utf8<char >(c, b);
242
+ assert ( c <= (text.c_str () + text.size ()) );
243
+ assert ( (b - buf) <= 4 );
213
244
for (char *i = buf; i < b; ++i)
214
245
sout << *i;
215
246
}
@@ -264,7 +295,24 @@ namespace
264
295
265
296
namespace D3SpectrumExport
266
297
{
267
-
298
+ #if ( SpecUtils_BUILD_FUZZING_TESTS )
299
+ std::string escape_text_test ( const std::string &input )
300
+ {
301
+ return escape_text ( input );
302
+ }
303
+
304
+ void copy_check_utf8_test ( const char *& src, char *& dest )
305
+ {
306
+ copy_check_utf8 ( src, dest );
307
+ }
308
+
309
+ void sanitize_unicode_test ( std::stringstream &sout, const std::string& text )
310
+ {
311
+ sanitize_unicode ( sout, text );
312
+ }
313
+ #endif
314
+
315
+
268
316
#if ( SpecUtils_D3_SUPPORT_FILE_STATIC )
269
317
const unsigned char *d3_js (){ return D3_MIN_JS; }
270
318
const unsigned char *spectrum_chart_d3_js (){ return SPECTRUM_CHART_D3_JS; }
0 commit comments