a Code for the Combination of Indirect and Direct Constraints on High Energy Physics Models Logo
AsciiXmlParser.cpp
Go to the documentation of this file.
1/*
2 * AsciiXmlStringParser.cpp
3 *
4 * Created on: Jan 23, 2012
5 * Author: Ben O'Leary (benjamin.oleary@gmail.com)
6 *
7 * This file is part of BOLlib, released under the
8 * GNU General Public License. Please see the accompanying
9 * README.BOLlib.txt file for a full list of files, brief documentation
10 * on how to use these classes, and further details on the license.
11 */
12
13#include "AsciiXmlParser.hpp"
14
15namespace BOL
16{
17 char const AsciiXmlParser::markupOpener( '<' );
18 char const AsciiXmlParser::markupCloser( '>' );
19 char const AsciiXmlParser::tagCloser( '/' );
20 std::string const AsciiXmlParser::allowedXmlWhitespaceChars( " \t\r\n" );
21 std::string const AsciiXmlParser::allowedXmlQuoteChars( "\'\"" );
22 std::pair< std::string, std::string > const
24 "-->" );
25 std::pair< std::string, std::string > const
27 "?>" );
28 std::pair< std::string, std::string > const
30 ">" );
31 std::pair< std::string, std::string > const
33 "]]>" );
34
35
36 AsciiXmlParser::AsciiXmlParser( bool const isVerbose ) :
37 isVerbose( isVerbose ),
38 fileParsingStream(),
39 stringParsingStream(),
40 textStream( &stringParsingStream ),
41 rootTag( "" ),
42 rootAttributeMap(),
43 rootLineRange( -1,
44 -1 ),
45 elementName( "" ),
46 fullElementContentAsFound( "" ),
47 fullOpeningTagAsFound( "" ),
48 elementAttributeMap(),
49 elementLineRange( -1,
50 -1 ),
51 markupString( "" ),
52 streamIsGood( false ),
53 currentChar( ' ' ),
54 readNewlines( 0 ),
55 newlinesBeforeMarkup( 0 ),
56 parseStart( 0 ),
57 parseEnd( 0 ),
58 previousLength( 0 ),
59 currentQuoteChar( '\'' ),
60 currentAttribute( "",
61 "" ),
62 closingTag( "" ),
63 closingTagsToFind( 0 ),
64 currentTagName( "" )
65 {
66 // just an initialization list.
67 }
68
70 {
71 if( fileParsingStream.is_open() )
72 {
73 fileParsingStream.close();
74 }
75 }
76
77
78 bool
79 AsciiXmlParser::openRootElementOfFile( std::string const& fileName )
80 /* this loads the file with the given name into the internal ifstream for
81 * parsing, then opens the root element. if there was a problem loading the
82 * file, or no root element could be opened, false is returned. this closes
83 * the previously-loaded file, if it was open.
84 */
85 {
86 closeFile();
87 fileParsingStream.open( fileName.c_str() );
88 if( !(fileParsingStream.good()) )
89 {
90 if( isVerbose )
91 {
92 std::cout
93 << std::endl
94 << "BOL::error! AsciiXmlParser::openRootElementOfFile( " << fileName
95 << " ) could not open the file!";
96 std::cout << std::endl;
97 }
98 return false;
99 }
102 if( !streamIsGood
103 &&
104 isVerbose )
105 {
106 std::cout
107 << std::endl
108 << "BOL::error! AsciiXmlParser::openRootElementOfFile( " << fileName
109 << " ) could not find a root element!";
110 std::cout << std::endl;
111 }
112 return streamIsGood;
113 }
114
115 bool
116 AsciiXmlParser::closeMarkup( size_t const startPosition )
117 /* this records characters from textStream by appending them to markupString,
118 * up to the 1st instance of markupCloser that is not enclosed in quotes,
119 * but only looking for quote characters from startPosition onwards. an
120 * exception is made if the markup was a comment: if markupString begins with
121 * commentDelimiter.first, all characters up to the next found
122 * ( commentDelimiter.first + markupCloser ) are discarded & markupString is
123 * emptied, then true is returned.
124 */
125 {
128 &&
130 if( !streamIsGood )
131 {
132 if( isVerbose )
133 {
134 std::cout
135 << std::endl
136 << "BOL::error! AsciiXmlParser::closeMarkup( " << startPosition
137 << " ) could not find the end of a markup!";
138 std::cout << std::endl;
139 }
140 return false;
141 }
143 startPosition );
144 while( std::string::npos != parseStart )
145 {
148 ( parseStart + 1 ) );
149 while( std::string::npos == parseEnd )
150 {
151 markupString.append( 1,
152 markupCloser );
155 markupCloser );
156 if( !streamIsGood )
157 {
158 if( isVerbose )
159 {
160 std::cout
161 << std::endl
162 << "BOL::error! AsciiXmlParser::closeMarkup( " << startPosition
163 << ") could not find the end of a markup!";
164 std::cout << std::endl;
165 }
166 return false;
167 }
170 }
172 ( parseEnd + 1 ) );
173 }
174 return true;
175 }
176
177 bool
179 std::pair< std::string, std::string > const& delimitingStrings )
180 /* this checks to see if markupString begins with delimitingStrings.first,
181 * & if so, ensures that markupString ends with delimitingStrings.second,
182 * appending to markupString if necessary, then empties it. true is then
183 * returned, unless the end of the text was reached before this could
184 * happen.
185 */
186 {
187 if( !(compareMarkupStart( delimitingStrings.first )) )
188 {
189 return true;
190 }
191 while( !(compareMarkupEnd( delimitingStrings.second )) )
192 {
194 markupCloser );
195 if( !streamIsGood )
196 {
197 if( isVerbose )
198 {
199 std::cout
200 << std::endl
201 << "BOL::error! AsciiXmlParser::ignoreDelimited( \""
202 << delimitingStrings.first << ", " << delimitingStrings.second
203 << "\" ) could not find the ending delimiter!";
204 std::cout << std::endl;
205 }
206 return false;
207 }
208 }
209 markupString.assign( "" );
210 return true;
211 }
212
213 bool
215 std::pair< std::string, std::string > const& delimitingStrings )
216 /* this checks to see if markupString begins with delimitingStrings.first,
217 * & if so, ensures that markupString ends with delimitingStrings.second,
218 * appending to markupString if necessary, then appends
219 * markupOpener + markupString + markupCloser to fullElementContentAsFound,
220 * then empties markupString. if markupString does not begin with
221 * delimitingStrings.first, no change is made to either markupString or
222 * fullElementContentAsFound. true is then returned, unless the end of the
223 * text was reached before this could happen.
224 */
225 {
226 if( !(compareMarkupStart( delimitingStrings.first )) )
227 {
228 return true;
229 }
230 while( !(compareMarkupEnd( delimitingStrings.second )) )
231 {
232 markupString.append( 1,
233 markupCloser );
235 markupCloser );
236 if( !streamIsGood )
237 {
238 if( isVerbose )
239 {
240 std::cout
241 << std::endl
242 << "BOL::error! AsciiXmlParser::ignoreDelimited( \""
243 << delimitingStrings.first << ", " << delimitingStrings.second
244 << "\" ) could not find the ending delimiter!";
245 std::cout << std::endl;
246 }
247 return false;
248 }
249 }
251 markupOpener );
254 markupCloser );
255 markupString.assign( "" );
256 return true;
257 }
258
259 bool
260 AsciiXmlParser::eraseQuotedStringsInMarkup( size_t const startPosition )
261 /* this erases any quoted text in markupString starting from startPosition,
262 * extending markupString from textStream to the next unquoted
263 * markupCloser.
264 */
265 {
266 // at this point, every opening quote in markupString from startPosition is
267 // matched by a closing quote (properly nested).
269 startPosition );
270 while( std::string::npos != parseStart )
271 {
274 ( parseStart + 1 ) );
276 ( parseEnd - parseStart + 1 ) );
277 // we have to erase the closing quote character as well.
279 parseStart );
280 }
281 // at this point, all quoted strings have been removed from markupString,
282 // & now it ends at the 1st unquoted '>' in the text.
283 return true;
284 }
285
286 bool
288 /* this parses any attributes in markupString, assuming that parseEnd is at
289 * the end of the tag's name. false is returned if a malformed attribute is
290 * found.
291 */
292 {
293 elementAttributeMap.clear();
295 parseEnd );
296 while( std::string::npos != parseStart )
297 {
298 if( ( ( markupString.size() - 1 ) == parseStart )
299 &&
301 {
302 // if there is only whitespace left in the markup or the indicator of
303 // an empty element, the parsing is done:
304 return true;
305 }
306 // at this point, we have found a new attribute.
307 parseEnd = markupString.find( '=',
308 parseStart );
309 if( std::string::npos == parseEnd )
310 {
311 if( isVerbose )
312 {
313 std::cout
314 << std::endl
315 << "BOL::error! AsciiXmlParser::parseOpeningTag() found an attribute"
316 << " (\"" << markupString.substr( parseStart )
317 << "\") without a value!";
318 std::cout << std::endl;
319 }
320 return false;
321 }
322 currentAttribute.first.assign( markupString.substr( parseStart,
323 ( parseEnd - parseStart ) ) );
325 // this really should be ' or " for valid XML, but we won't bother
326 // checking...
327 parseStart = ( ++parseEnd );
328 // the attribute's value begins after the '=' & the quote character (by
329 // this point, parseEnd has been incremented twice before parseStart gets
330 // set).
332 parseStart );
333 if( std::string::npos == parseEnd )
334 // this shouldn't ever happen, because such cases should have already
335 // been caught by closeMarkup().
336 {
337 if( isVerbose )
338 {
339 std::cout
340 << std::endl
341 << "BOL::error! AsciiXmlParser::parseOpeningTag() found an attribute"
342 << " (\"" << markupString.substr( parseStart )
343 << "\") without a well-formed value (no closing quote mark)!";
344 std::cout << std::endl;
345 }
346 return false;
347 }
348 currentAttribute.second.assign( markupString.substr( parseStart,
349 ( parseEnd - parseStart ) ) );
352 ( ++parseEnd ) );
353 // parseEnd has to be incremented so that parseStart doesn't just sit on
354 // the closing quote.
355 }
356 return true;
357 }
358
359 bool
361 /* this stores the characters between the opening tag & the corresponding
362 * closing tag in fullElementContentAsFound, returning false if the end of
363 * the text was reached before finding the closing tag.
364 */
365 {
366 fullOpeningTagAsFound.assign( "" );
368 fullElementContentAsFound.assign( "" );
369 // we note which line the opening tag is on:
370 elementLineRange.first = ( readNewlines + 1 );
371 if( tagCloser == markupString[ markupString.size() - 1 ] )
372 {
373 // empty tags need no recording:
374 elementLineRange.second = ( readNewlines + 1 );
375 return true;
376 }
377 closingTag.assign( 1,
378 tagCloser );
379 closingTag.append( elementName );
381 // there could be nested elements of the same name (not necessarily nested
382 // directly).
383 while( 0 < closingTagsToFind )
384 {
386 &&
388 if( !streamIsGood )
389 {
390 return false;
391 }
392 if( 0 == currentTagName.compare( elementName ) )
393 {
394 // if a nested child element of the same name is found, the child
395 // element is recorded too.
397 }
398 else if( 0 == currentTagName.compare( closingTag ) )
399 {
401 }
402 if( 0 < closingTagsToFind )
403 {
404 // if unless this is the closing tag of element, the markup must be
405 // recorded, & here, because textStream has already gone past it.
407 }
408 else
409 {
410 // if this is the closing tag, we note which line it is on:
411 elementLineRange.second = ( readNewlines + 1 );
412 }
413 }
414 return true;
415 }
416
417 bool
419 /* this skips any XML prolog & then opens the root element. the prolog is an
420 * optional XML declaration & an optional document type declaration, with any
421 * number of comments & processing instructions (though not before the XML
422 * declaration if there is one).
423 */
424 {
425 resetContent();
426 // the XML declaration markup is skipped as it is a special case of
427 // processing instruction markup.
428 while( markupString.empty() )
429 {
431 &&
433 &&
435 if( !streamIsGood )
436 {
437 if( isVerbose )
438 {
439 std::cout
440 << std::endl
441 << "BOL::error! AsciiXmlParser::skipPrologAndOpenRootElement() could"
442 << " not find any valid root element tag markup!";
443 std::cout << std::endl;
444 }
445 return false;
446 }
447 }
448 // at this point, either the current markup is the root element's opening
449 // tag, or is the document declaration if there is one, either only up to
450 // the 1st '>' regardless of if it actually is the end of the markup or
451 // not (e.g. if it is within quotes).
452 if( ( doctypeDelimiter.first.size() < ( markupString.size() + 2 ) )
453 &&
455 // if the markup is a document type declaration...
456 {
457 // since we will discard the document type declaration, it doesn't matter
458 // if we mangle markupString in doing so, since it will be over-written
459 // the opening tag of the root element anyway.
461 if( !streamIsGood )
462 {
463 if( isVerbose )
464 {
465 std::cout
466 << std::endl
467 << "BOL::error! AsciiXmlParser::skipPrologAndOpenRootElement() could"
468 << " not find any valid root element tag markup!";
469 std::cout << std::endl;
470 }
471 return false;
472 }
473 size_t unclosedSubmarkupOpener( markupString.find( markupOpener ) );
474 while( std::string::npos != unclosedSubmarkupOpener )
475 {
479 &&
481 if( !streamIsGood )
482 {
483 if( isVerbose )
484 {
485 std::cout
486 << std::endl
487 << "BOL::error! AsciiXmlParser::skipPrologAndOpenRootElement()"
488 << " could not find any valid root element tag markup!";
489 std::cout << std::endl;
490 }
491 return false;
492 }
493 unclosedSubmarkupOpener = markupString.find( markupOpener,
494 unclosedSubmarkupOpener );
495 }
496 markupString.assign( "" );
497 // at this point, we have read in an unquoted '>' for every '<'. now we
498 // can forget the mangled document type declaration & we only have to
499 // discard any more comments or processing instructions before the
500 // opening tag of the root element.
501 while( markupString.empty() )
502 {
504 &&
506 &&
508 if( !streamIsGood )
509 {
510 if( isVerbose )
511 {
512 std::cout
513 << std::endl
514 << "BOL::error! AsciiXmlParser::skipPrologAndOpenRootElement()"
515 << " could not find any valid root element tag markup!";
516 std::cout << std::endl;
517 }
518 return false;
519 }
520 }
521 }
522 // at this point, the current markup is the root element's opening tag,
523 // though only up to the 1st '>' regardless of if it actually is the end of
524 // the markup or not (e.g. if it is within quotes).
525 rootLineRange.first = ( readNewlines + 1 );
527 &&
528 parseAttributes() );
529 if( !streamIsGood )
530 {
531 if( isVerbose )
532 {
533 std::cout
534 << std::endl
535 << "BOL::error! AsciiXmlParser::skipPrologAndOpenRootElement() could"
536 << " not find any valid root element tag markup!";
537 std::cout << std::endl;
538 }
539 return false;
540 }
542 rootTag.assign( elementName );
543 return true;
544 }
545
546}
bool parseTagName(std::string &nameDestination)
std::map< std::string, std::string > elementAttributeMap
std::string currentTagName
static std::pair< std::string, std::string > const piDelimiter
static std::pair< std::string, std::string > const doctypeDelimiter
bool recordDelimited(std::pair< std::string, std::string > const &delimitingStrings)
bool closeMarkup(size_t const startPosition=0)
bool ignoreDelimited(std::pair< std::string, std::string > const &delimitingStrings)
std::string fullElementContentAsFound
bool compareMarkupEnd(std::string const &comparisonString) const
bool recordTo(std::string &recordingString, char const endChar)
std::ifstream fileParsingStream
static std::pair< std::string, std::string > const cdataDelimiter
static char const markupOpener
std::pair< int, int > rootLineRange
bool eraseQuotedStringsInMarkup(size_t const startPosition)
std::string fullOpeningTagAsFound
bool compareMarkupStart(std::string const &comparisonString) const
static std::string const allowedXmlWhitespaceChars
static char const markupCloser
std::pair< std::string, std::string > currentAttribute
void recordTagTo(std::string &recordingString)
bool openRootElementOfFile(std::string const &fileName)
AsciiXmlParser(bool const isVerbose=false)
static std::string const allowedXmlQuoteChars
static char const tagCloser
std::istream * textStream
std::map< std::string, std::string > rootAttributeMap
std::pair< int, int > elementLineRange
static std::pair< std::string, std::string > const commentDelimiter