-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathxml_unpack_base.h
635 lines (569 loc) · 18.9 KB
/
xml_unpack_base.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
/*
@copyright Russell Standish 2000-2013
@author Russell Standish
This file is part of Classdesc
Open source licensed under the MIT license. See LICENSE for details.
*/
/**\file
\brief XML deserialisation descriptor
*/
#ifndef CLASSDESC_XML_UNPACK_BASE_H
#define CLASSDESC_XML_UNPACK_BASE_H
#include <map>
#include <iostream>
#include <sstream>
#include <fstream>
#include <limits>
#include <cstdlib>
#include <cctype>
#include <stdarg.h>
#include "xml_common.h"
#include "classdesc.h"
#include "classdesc_access.h"
// for xml_unpack_t serialisation support
#include "pack_base.h"
#include "pack_stl.h"
namespace classdesc_access
{
template <class T> struct access_pack;
template <class T> struct access_unpack;
}
namespace classdesc
{
namespace
{
/// return true if s is a string of whitespaces
inline bool isspace(std::string s)
{
if (s.empty()) return false;
for (size_t i=0; i<s.size(); i++)
if (!std::isspace(s[i]))
return false;
return true;
}
}
// for remove() below
inline bool Isspace(char c) {return std::isspace(c)!=0;}
#ifdef _CLASSDESC
#pragma omit pack classdesc::XMLtoken
#pragma omit pack classdesc::xml_pack_error
#pragma omit unpack classdesc::XMLtoken
#pragma omit unpack classdesc::xml_pack_error
#pragma omit xml_pack classdesc::XMLtoken
#pragma omit xml_pack classdesc::xml_pack_error
#pragma omit xml_unpack classdesc::XMLtoken
#pragma omit xml_unpack classdesc::xml_pack_error
#pragma omit json_pack classdesc::xml_pack_error
#pragma omit json_unpack classdesc::xml_pack_error
#pragma omit dump classdesc::xml_pack_error
#endif
class xml_pack_error : public std::exception
{
std::string msg;
public:
xml_pack_error(const char *s): msg("xml_pack:") {msg+=s;}
xml_pack_error(std::string s): msg("xml_pack:") {msg+=s;}
virtual ~xml_pack_error() throw() {}
virtual const char* what() const throw() {return msg.c_str();}
};
// character accessor functions: istream and FILE* defined here.
inline bool get(std::istream& i, char& c) {return i.get(c).good();}
inline bool get(FILE*& i, char& c)
{int cc=fgetc(i); c=char(cc); return cc!=EOF;}
inline void unget(std::istream& i, char c) {i.putback(c);}
inline void unget(FILE*& i, char c) {ungetc(c,i);}
template <class Stream>
class XMLtoken
{
Stream& i;
char nexttok;
// basic I/O operations
bool get(char& c) {return classdesc::get(i,c);}
void unget(char c) {classdesc::unget(i,c);}
/// throw error on EOF
char getNoEOF() {
char r;
if (!get(r)) throw xml_pack_error("invalid XML");
return r;
}
void gobble_comment();
void gobble_whitespace() {
char c;
bool notEof=get(c);
while (notEof && std::isspace(c)) notEof=get(c);
if (notEof) unget(c);
}
char parse_entity();
std::string retval(char c, const std::string& tok);
public:
XMLtoken(Stream& i): i(i), nexttok('\0') {}
std::string token();
std::string tokenNoEOF() {
std::string tok=token();
if (tok.empty()) throw xml_pack_error("XML token expected");
else return tok;
}
/// handle tags starting with ! - comments and CDATAs
std::string processBang(std::string& tok,char c);
/// handle XML tags ('<' case)
std::string processOpenXMLTag(std::string&);
};
template <class Stream>
void XMLtoken<Stream>::gobble_comment()
{
int level=1;
bool inString=false;
char c;
while (level)
{
c=getNoEOF();
if (c=='"') inString=!inString;
if (inString) continue;
switch(c)
{
case '<': level++; break;
case '>': level--; break;
}
}
gobble_whitespace();
}
template <class Stream>
char XMLtoken<Stream>::parse_entity()
{
std::string name;
char c;
for (c=getNoEOF(); c!=';'; c=getNoEOF())
name+=c;
if (name=="amp") return '&';
if (name=="lt") return '<';
if (name=="gt") return '>';
if (name=="quot") return '"';
if (name=="apos") return '\'';
const char* cname=name.c_str();
if (cname[0]=='#') //character code supplied
{
if (cname[1]=='x') //is hex
{
//TODO - should we be doing this all in wide chars?
long r=std::strtol(cname+2,NULL,16);
if (r>std::numeric_limits<char>::max() || r<std::numeric_limits<char>::min())
throw xml_pack_error("XML numeric character reference out of range");
return char(r);
}
else
{
//TODO - should we be doing this all in wide chars?
long r=std::strtol(cname+1,NULL,10);
if (r>std::numeric_limits<char>::max() || r<std::numeric_limits<char>::min())
throw xml_pack_error("XML numeric character reference out of range");
return char(r);
}
}
// not sure what to do about user defined entities - throw, or issue a warning
throw xml_pack_error("Unidentified entity encountered");
}
// This allows a previous token to be return when a single character token in parsed
template <class Stream>
std::string XMLtoken<Stream>::retval(char c, const std::string& tok)
{
if (tok.empty())
{
nexttok='\0';
switch (c)
{
case '/': return "</";
case '\\': return "/>";
default: return std::string(1,c);
}
}
else
{
nexttok=c;
return tok;
}
}
template <class Stream>
std::string XMLtoken<Stream>::processBang(std::string& tok,char c)
{
// look ahead for [CDATA[
std::string leadin;
for (int i=0; i<7; i++)
{
if (!get(c) || c=='>') break; // in case of EOF, or end of tag
leadin+=c;
}
if (leadin=="[CDATA[")
{ // CDATA processing, add CDATA contents verbatim
leadin.clear();
while ((c=getNoEOF()))
{
if (c==']')
if (leadin=="]")
leadin+=c;
else
leadin=c;
else if (c=='>' && leadin=="]]")
return tok;
else
{
tok+=leadin+c;
leadin.clear();
}
}
}
else if (c!='>')
gobble_comment();
return "";
}
template <class Stream>
std::string XMLtoken<Stream>::processOpenXMLTag(std::string& tok)
{
char c=getNoEOF();
switch (c)
{
case '!': return processBang(tok,c);
case '?': //we have a comment or XML declaration, which we ignore, except for CDATA
gobble_comment(); return "";
case '/': //we have begin end tag token
return retval('/',tok);
default:
unget(c);
return retval('<',tok);
}
}
template <class Stream>
std::string XMLtoken<Stream>::token()
{
std::string tok;
char c;
// handle any tokens left over from previous parsing
if (nexttok)
return retval(nexttok,tok);
while (get(c))
{
// return white space as a separate token
if (std::isspace(c)) return retval(c,tok);
switch (c)
{
case '&':
tok+=parse_entity();
continue;
case '\'':
case '"': //process string literal as single token
{
char term=c;
while ((c=getNoEOF())!=term)
if (c=='&')
tok+=parse_entity();
else
tok+=c;
return tok;
}
case '<':
{
std::string r=processOpenXMLTag(tok);
if (r.empty()) continue;
return r;
}
case '/':
if ((c=getNoEOF())=='>') //we have end empty tag token
return retval('\\',tok);
else //TODO is a / in the middle of a token acceptible XML?
{
tok+='/';
unget(c);
break;
}
case '>':
case '=':
return retval(c,tok);
default:
tok+=c;
}
}
if (tok.empty())
return tok; //empty token returned on end of file
else
throw xml_pack_error("XML file truncated?");
}
/**
XML deserialisation object
*/
class xml_unpack_t
{
public:
typedef std::map<std::string,std::string> ContentMap;
CLASSDESC_ACCESS(xml_unpack_t);
private:
ContentMap contentMap;
std::map<std::string,unsigned> tokenCount;
void checkKey(const std::string& key)
{
if (missingException && !contentMap.count(key))
throw xml_pack_error(key+" is missing in XML data stream");
}
// add "#0" to components if no # label present
std::string addHashNoughts(const std::string& key)
{
std::string r;
std::string::size_type start=0, end;
bool hash_read=false;
for (end=0; end<=key.length(); end++)
if (key[end]=='#')
hash_read=true;
else if (key[end]=='.')
{
if (hash_read)
hash_read=false;
else // no hash read, so insert "#0"
{
r+=key.substr(start,end-start)+"#0";
start=end;
}
}
r+=key.substr(start,end-start);
if (!hash_read)
r+="#0";
return r;
}
friend struct classdesc_access::access_pack<xml_unpack_t>;
friend struct classdesc_access::access_unpack<xml_unpack_t>;
public:
/** set this to true if you wish an exception to be thrown if data is missing
from the XML stream */
bool missingException;
xml_unpack_t(): missingException(false) {}
xml_unpack_t(const char* fname): missingException(false) {std::ifstream i(fname); parse(i);}
template <class Stream> xml_unpack_t(Stream& i): missingException(false) {parse(i);}
template <class Stream> void process_attribute(XMLtoken<Stream>& i, const std::string& scope);
template <class Stream> void parse(Stream& i);
template <class Stream> void parse(XMLtoken<Stream>& stream, const std::string& scope);
/// first token starting with \a prefix
ContentMap::const_iterator firstToken(const std::string& prefix) const {
return contentMap.lower_bound(prefix);
}
ContentMap::const_iterator endToken(const std::string& prefix) const {
return contentMap.upper_bound(prefix);
}
///dump XML contents for debugging
void printContentMap() const {
for (std::map<std::string,std::string>::const_iterator i=contentMap.begin();
i!=contentMap.end(); i++)
std::cout << "["<<i->first<<"]="<<i->second<<std::endl;
std::cout << std::endl;
for (std::map<std::string,unsigned>::const_iterator i=tokenCount.begin();
i!=tokenCount.end(); i++)
std::cout << "Count["<<i->first<<"]="<<i->second<<std::endl;
}
// specialise floating point processing to handle special values (NaN, Inf etc).
// fallback to regular iostream processing
template <class T> void istoT(const std::string& s, T& x)
{
std::istringstream is(s);
is>>x;
}
#if defined(__cplusplus) && __cplusplus>=201103L
void stoT(const std::string& s, float& x)
try {x=std::stof(s);}
catch(...){istoT(s,x);}
void stoT(const std::string& s, double& x)
try {x=std::stod(s);}
catch(...){istoT(s,x);}
void stoT(const std::string& s, long double& x)
try {x=std::stold(s);}
catch(...){istoT(s,x);}
#endif
template <class T> void stoT(const std::string& s, T& x)
{istoT(s,x);}
///simple data type deserialisation
template <class T> void unpack(std::string key, T& var) {
key=addHashNoughts(key); checkKey(key);
std::map<std::string,std::string>::const_iterator it=contentMap.find(key);
if (it != contentMap.end()) stoT(it->second, var);
}
// specialisation to handle boolean values
void unpack(std::string key, bool& var) {
key=addHashNoughts(key); checkKey(key);
std::map<std::string,std::string>::const_iterator it=contentMap.find(key);
if (it != contentMap.end())
{
std::string val=it->second;
// strip any white space
val.erase(remove_if(val.begin(), val.end(), Isspace), val.end());
for (size_t i=0; i<val.length(); ++i) val[i]=char(tolower(val[i]));
var = val=="1" || val=="t" || val=="true"|| val=="y"|| val=="yes" ||
val=="on";
}
}
/// string deserialisation
void unpack(std::string key, std::string& var) {
key=addHashNoughts(key); checkKey(key);
std::map<std::string,std::string>::const_iterator it=contentMap.find(key);
if (it != contentMap.end())
var=it->second;
}
void unpack(std::string key, CDATA& a)
{unpack(key,static_cast<std::string&>(a));}
/// checks for existence of token unpacked from XML stream
bool exists(const std::string& key) {return count(key)>0;}
/// returns number of array elements with prefix key
size_t count(std::string key) {
key=addHashNoughts(key);
key=key.substr(0,key.rfind('#')); //strip final # marker
return tokenCount[key];
}
void clear() {contentMap.clear(); tokenCount.clear();}
};
/**
parse XML attribute string from XML stream
*/
template <class Stream>
void xml_unpack_t::process_attribute(XMLtoken<Stream>& stream, const std::string& scope)
{
std::string tok;
while (isspace(tok=stream.tokenNoEOF()));
if (tok!="=") throw xml_pack_error("ill-formed attribute");
while (isspace(tok=stream.tokenNoEOF()));
contentMap[scope]=tok;
}
/**
parse an input XML file, into the database
\a Stream is either std::istream or a FILE*
*/
template <class Stream>
void xml_unpack_t::parse(Stream& i)
{
XMLtoken<Stream> stream(i);
std::string tok;
while (isspace(tok=stream.token()));
if (tok.empty()) return;
if (tok=="<")
parse(stream,stream.tokenNoEOF());
else
throw xml_pack_error("no root element found");
}
template <class Stream>
void xml_unpack_t::parse(XMLtoken<Stream>& stream, const std::string& scope)
{
//count the number of times this token has been read, and append this to database key
std::string scope_idx=idx(scope,tokenCount[scope]++);
std::string tok;
//parse attributes
for (tok=stream.tokenNoEOF(); tok!=">" && tok!="/>"; tok=stream.tokenNoEOF())
if (!isspace(tok)) process_attribute(stream, scope_idx+"."+tok);
if (tok=="/>") return;
//parse content. We assume element is either just content, or just has child elements
std::string content;
for (tok=stream.tokenNoEOF(); tok!="</"; tok=stream.tokenNoEOF())
if (tok=="<")
parse(stream,scope_idx+"."+stream.tokenNoEOF()); //parse child element
else
content+=tok;
if (content.size())
contentMap[scope_idx]=content; //override content (to handle masked private members)
// finish parsing end tag
tok=stream.tokenNoEOF();
if (scope.length()-scope.rfind(tok)!=tok.length()) //tok matches last part of scope
throw xml_pack_error("unexpected end tag");
for (; tok!=">"; tok=stream.tokenNoEOF()); //skip rest of end tag
}
template <class T> void xml_unpack(xml_unpack_t&,const string&,T&);
template <class T> xml_unpack_t& operator>>(xml_unpack_t& t, T& a);
/*
base type implementations
*/
template <class T>
void xml_unpack_onbase(xml_unpack_t& x,const string& d,T& a)
{xml_unpack(x,d+basename<T>(),a);}
template <class T>
typename enable_if<is_fundamental<T>, void>::T
xml_unpackp(xml_unpack_t& x,const string& d,T& a)
{x.unpack(d,a);}
/* now define the array version */
template <class T> void xml_unpack(xml_unpack_t& x,const string& d,is_array ia,
T& a, int dims,size_t ncopies,...)
{
va_list ap;
va_start(ap,ncopies);
for (int i=1; i<dims; i++) ncopies*=va_arg(ap,int); //assume that 2 and higher D arrays dimensions are int
va_end(ap);
classdesc::string eName=classdesc::typeName<T>().c_str();
// strip leading namespace and qualifiers
const char *e=eName.c_str()+eName.length();
while (e!=eName.c_str() && *(e-1)!=' ' && *(e-1)!=':') e--;
for (size_t i=0; i<ncopies; i++)
xml_unpack(x,classdesc::idx(d+"."+e,i),(&a)[i]);
}
//Enum_handles have reference semantics
template <class T> void xml_unpack(xml_unpack_t& x,const string& d,Enum_handle<T> arg)
{
std::string tmp;
xml_unpack(x,d,tmp);
// remove extraneous white space
int (*isspace)(int)=std::isspace;
std::string::iterator end=std::remove_if(tmp.begin(),tmp.end(),isspace);
arg=tmp.substr(0,end-tmp.begin());
}
template <class T1, class T2>
void xml_unpack(xml_unpack_t& x, const string& d, std::pair<T1,T2>& arg)
{
xml_unpack(x,d+".first",arg.first);
xml_unpack(x,d+".second",arg.second);
}
template <class T> typename
enable_if<is_sequence<T>, void>::T
xml_unpackp(xml_unpack_t& x, const string& d, T& arg, dummy<1> dum=0)
{
string eName=typeName<typename T::value_type>().c_str();
eName=eName.substr(0,eName.find('<')); //trim off any template args
// strip leading namespace and qualifiers
const char *e=eName.c_str()+eName.length();
while (e!=eName.c_str() && *(e-1)!=' ' && *(e-1)!=':') e--;
size_t cnt=x.count(d+"."+e);
resize(arg,0); // clear container if resizable
resize(arg,cnt);
size_t i=0;
for (typename T::iterator j=arg.begin(); i<cnt && j!=arg.end(); ++i, ++j)
xml_unpack(x,idx(d+"."+e,i),*j);
}
template <class T> typename
enable_if<is_associative_container<T>, void>::T
xml_unpackp(xml_unpack_t& x, const string& d, T& arg, dummy<2> dum=0)
{
string eName=typeName<typename T::value_type>().c_str();
eName=eName.substr(0,eName.find('<')); //trim off any template args
// strip leading namespace and qualifiers
const char *e=eName.c_str()+eName.length();
while (e!=eName.c_str() && *(e-1)!=' ' && *(e-1)!=':') e--;
arg.clear();
string prefix=d.empty()? e: d+"."+e;
for (size_t i=0; i<x.count(prefix); ++i)
{
typename NonConstKeyValueType<typename T::value_type>::T v;
xml_unpack(x,idx(prefix,i),v);
arg.insert(v);
}
}
template<class T>
void//typename enable_if<Not<is_pointer<T> >,void>::T
xml_unpack(xml_unpack_t& targ, const string& desc, is_const_static i, T arg)
{}
template<class T>
void xml_unpack(xml_unpack_t& targ, const string& desc,
Exclude<T>&) {}
template<class T>
void xml_unpack(xml_unpack_t& targ, const string& desc,
CDATA& a)
{targ.unpack(desc,a);}
template<class T>
void xml_unpack(xml_unpack_t& targ, const string& desc, is_graphnode, T&)
{
throw exception("xml_unpack of arbitrary graphs not supported");
}
}
#include "use_mbr_pointers.h"
CLASSDESC_USE_OLDSTYLE_MEMBER_OBJECTS(xml_unpack)
CLASSDESC_FUNCTION_NOP(xml_unpack)
using classdesc::xml_unpack;
using classdesc::xml_unpack_onbase;
#endif