a Code for the Combination of Indirect and Direct Constraints on High Energy Physics Models Logo
AsciiXmlParser.hpp
Go to the documentation of this file.
1/*
2 * AsciiXmlParser.hpp
3 *
4 * Created on: Jan 23, 2012
5 * Author: Ben O'Leary (benjamin.oleary@gmail.com)
6 *
7 * This file is part of BOLlib, released under the
8 * GNU General Public License. Please see the accompanying
9 * README.BOLlib.txt file for a full list of files, brief documentation
10 * on how to use these classes, and further details on the license.
11 */
12
13#ifndef ASCIIXMLPARSER_HPP_
14#define ASCIIXMLPARSER_HPP_
15
16#include <iostream>
17#include <fstream>
18#include <sstream>
19#include <string>
20#include <map>
21#include "StringParser.hpp"
22
23namespace BOL
24{
25 /* this parses out blocks of ASCII text from a string between XML opening &
26 * closing tags, & returns the text between the tags (without interpreting
27 * it further as XML).
28 */
30 {
31 public:
32 AsciiXmlParser( bool const isVerbose = false );
34
35 bool
36 openRootElementOfFile( std::string const& fileName );
37 /* this loads the file with the given name into the internal ifstream for
38 * parsing, then opens the root element. if there was a problem loading the
39 * file, or no root element could be opened, false is returned. this closes
40 * the previously-loaded file, if it was open.
41 */
42 bool
43 readAllOfRootElementOfFile( std::string const& fileName );
44 /* this loads the file with the given name into the internal ifstream for
45 * parsing, then reads in all the root element. if there was a problem
46 * loading the file, or no root element could be opened, false is returned.
47 * this closes the previously-loaded file, if it was open.
48 */
49 bool
50 loadString( std::string const stringToParse );
51 /* this loads stringToParse into the internal stringstream for parsing. if
52 * there was a problem loading the file, false is returned. if there was a
53 * file open, it is now closed.
54 */
55 void
56 closeFile();
57 // this closes fileToParse, if it was open.
58 void
60 // this sets whichever stream textToParse is pointing at to start reading
61 // again from its start.
62 bool
64 /* this reads in the entire next XML element. false is returned if the end
65 * of the text was reached before a new XML element could be found (or if
66 * no end for the next element could be found).
67 */
68 std::string const&
70 bool
71 currentElementNameMatches( std::string const& comparisonString ) const;
72 std::string const&
74 std::string
76 // this returns a string that is a copy of the current XML, element, with
77 // whitespace & newline characters trimmed from the front & back.
78 std::string const&
80 std::map< std::string, std::string > const&
82 std::pair< int, int > const&
84 /* this returns a pair of ints where the 1st is 1 plus the number of
85 * newline characters read in before the '<' of the current element's
86 * opening tag was found, & the 2nd is the 1st plus the number of newline
87 * characters read in before the '>' of the current element's closing tag
88 * was found.
89 */
90
91
92 protected:
93 static char const markupOpener;
94 static char const markupCloser;
95 static char const tagCloser;
96 static std::string const allowedXmlWhitespaceChars;
97 static std::string const allowedXmlQuoteChars;
98 static std::pair< std::string, std::string > const commentDelimiter;
99 static std::pair< std::string, std::string > const piDelimiter;
100 static std::pair< std::string, std::string > const doctypeDelimiter;
101 static std::pair< std::string, std::string > const cdataDelimiter;
102
103 bool const isVerbose;
104 std::ifstream fileParsingStream;
105 std::istringstream stringParsingStream;
106 std::istream* textStream;
107 std::string rootTag;
108 std::map< std::string, std::string > rootAttributeMap;
109 std::pair< int, int > rootLineRange;
110 std::string elementName;
112 // this includes any whitespace & child elements (as text); this does not
113 // include the opening or closing tags (& so does not include attributes).
115 // this includes any attributes; this does not include '<' or '>'.
116 std::map< std::string, std::string > elementAttributeMap;
117 std::pair< int, int > elementLineRange;
118
119 std::string markupString;
125 size_t parseEnd;
128 std::pair< std::string, std::string > currentAttribute;
129 std::string closingTag;
131 std::string currentTagName;
132
133 void
134 resetContent();
135 // this sets the various recording data to the values they should have
136 // before reading in some text.
137 bool
139 /* this reads the next character from textStream into currentChar &
140 * increments readNewlines if currentChar was '\n', returning
141 * textStream->good().
142 */
143 bool
144 recordTo( std::string& recordingString,
145 char const endChar );
146 /* this records characters from textStream by appending them to
147 * recordingStream, up to the 1st instance of endChar. it returns false if
148 * no further instances of endChar were found in textStream.
149 */
150 bool
151 closeMarkup( size_t const startPosition = 0 );
152 /* this records characters from textStream by appending them to
153 * markupString, up to the 1st instance of markupCloser that is not
154 * enclosed in quotes, but only looking for quote characters from
155 * startPosition onwards. an exception is made if the markup was a comment:
156 * if markupString begins with commentDelimiter.first, all characters up to
157 * the next found ( commentDelimiter.first + markupCloser ) are discarded
158 * & markupString is emptied, then true is returned.
159 */
160 bool
162 /* this skips characters up to the next XML markup opening character, then
163 * the characters up to the next markup closing character are stored in
164 * markupString. this also records the number of newline characters up to
165 * this markup.
166 */
167 bool
169 /* this calls discardToNextMarkup() & ignoreDelimited() on the non-tag
170 * markups until a markup corresponding to a tag is found. this also
171 * records the number of newline characters up to this tag.
172 */
173 bool
175 /* this appends characters from textStream to fullElementContentAsFound up
176 * to the next XML markup opening character, then the characters up to the
177 * next markup closing character are stored in markupString.
178 */
179 bool
181 /* this calls recordToNextMarkup() & recordDelimited() on the non-tag
182 * markups (ignoring comments however) until a markup corresponding to a
183 * tag is found.
184 */
185 bool
186 compareMarkupStart( std::string const& comparisonString ) const;
187 // this returns true if the start of "<" + markupString matches all of
188 // comparisonString.
189 bool
190 compareMarkupEnd( std::string const& comparisonString ) const;
191 // this returns true if the end of markupString + ">" matches all of
192 // comparisonString.
193 bool
195 std::pair< std::string, std::string > const& delimitingStrings );
196 /* this checks to see if markupString begins with delimitingStrings.first,
197 * & if so, ensures that markupString ends with delimitingStrings.second,
198 * appending to markupString if necessary, then empties it. true is then
199 * returned, unless the end of the text was reached before this could
200 * happen.
201 */
202 bool
204 std::pair< std::string, std::string > const& delimitingStrings );
205 /* this checks to see if markupString begins with delimitingStrings.first,
206 * & if so, ensures that markupString ends with delimitingStrings.second,
207 * appending to markupString if necessary, then appends
208 * markupOpener + markupString + markupCloser to fullElementContentAsFound,
209 * then empties markupString. true is then returned, unless the end of the
210 * text was reached before this could happen.
211 */
212 bool
213 eraseQuotedStringsInMarkup( size_t const startPosition );
214 /* this erases any quoted text in markupString starting from startPosition,
215 * extending markupString from textStream to the next unquoted
216 * markupCloser.
217 */
218 void
219 recordTagTo( std::string& recordingString );
220 // this appends markupOpener + markupString + markupCloser to
221 // recordingString.
222 bool
223 parseTagName( std::string& nameDestination );
224 // this parses markupString as the tag of an XML element, leaving parseEnd
225 // as the end of the name string.
226 bool
228 /* this parses any attributes in markupString, assuming that parseEnd is at
229 * the end of the tag's name. false is returned if a malformed attribute is
230 * found.
231 */
232 bool
234 /* this parses & stores any attributes & then stores the characters between
235 * the opening tag & the corresponding closing tag in
236 * fullElementContentAsFound, returning false if the end of the text was
237 * reached before finding the closing tag.
238 */
239 bool
241 };
242
243
244
245
246
247 inline bool
248 AsciiXmlParser::readAllOfRootElementOfFile( std::string const& fileName )
249 /* this loads the file with the given name into the internal ifstream for
250 * parsing, then reads in all the root element. if there was a problem
251 * loading the file, or no root element could be opened, false is returned.
252 * this closes the previously-loaded file, if it was open.
253 */
254 {
255 streamIsGood = ( openRootElementOfFile( fileName )
256 &&
258 if( !streamIsGood
259 &&
260 isVerbose )
261 {
262 std::cout
263 << std::endl
264 << "BOL::error! AsciiXmlParser::readAllOfRootElementOfFile( " << fileName
265 << " ) could not find a well-formed root element!";
266 std::cout << std::endl;
267 }
269 return streamIsGood;
270 }
271
272 inline bool
273 AsciiXmlParser::loadString( std::string const stringToParse )
274 /* this loads stringToParse into the internal stringstream for parsing. if
275 * there was a problem loading the file, false is returned. if there was a
276 * file open, it is now closed.
277 */
278 {
279 resetContent();
280 stringParsingStream.clear();
281 stringParsingStream.str( stringToParse );
283 return stringParsingStream.good();
284 }
285
286 inline void
288 // this closes fileToParse, if it was open.
289 {
290 if( fileParsingStream.is_open() )
291 {
292 fileParsingStream.close();
293 }
294 fileParsingStream.clear();
296 }
297
298 inline void
300 // this sets whichever stream textToParse is pointing at to start reading
301 // again from its start.
302 {
303 resetContent();
304 textStream->clear();
305 textStream->seekg( std::ios::beg );
306 }
307
308 inline bool
310 /* this reads in the entire next XML element. false is returned if the end
311 * of the text was reached before a new XML element could be found (or if
312 * no end for the next element could be found).
313 */
314 {
315 return ( discardToNextTag()
316 &&
318 &&
320 &&
322 }
323
324 inline std::string const&
326 {
327 return elementName;
328 }
329
330 inline bool
332 std::string const& comparisonString ) const
333 {
334 return ( 0 == elementName.compare( comparisonString ) );
335 }
336
337 inline std::string const&
339 {
341 }
342
343 inline std::string
345 {
348 }
349
350 inline std::string const&
352 {
354 }
355
356 inline std::map< std::string, std::string > const&
358 {
359 return elementAttributeMap;
360 }
361
362 inline std::pair< int, int > const&
364 /* this returns a pair of ints where the 1st is 1 plus the number of
365 * newline characters read in before the '<' of the current element's
366 * opening tag was found, & the 2nd is the 1st plus the number of newline
367 * characters read in before the '>' of the current element's closing tag
368 * was found.
369 */
370 {
371 return elementLineRange;
372 }
373
374 inline void
376 // this sets the various recording data to the values they should have
377 // before reading in some text.
378 {
379 rootTag.assign( "" );
380 rootAttributeMap.clear();
381 rootLineRange.first = -1;
382 rootLineRange.second = -1;
383 elementName.assign( "" );
384 fullElementContentAsFound.assign( "" );
385 fullOpeningTagAsFound.assign( "" );
386 elementAttributeMap.clear();
387 elementLineRange.first = -1;
388 elementLineRange.second = -1;
389 markupString.assign( "" );
390 readNewlines = 0;
391 }
392
393 inline bool
395 /* this reads the next character from textStream into currentChar &
396 * increments readNewlines if currentChar was '\n', returning
397 * textStream->good().
398 */
399 {
400 streamIsGood = textStream->get( currentChar ).good();
401 if( streamIsGood
402 &&
403 ( '\n' == currentChar ) )
404 {
405 ++readNewlines;
406 }
407 return streamIsGood;
408 }
409
410 inline bool
411 AsciiXmlParser::recordTo( std::string& recordingString,
412 char const endChar )
413 /* this records characters from textStream by appending them to
414 * recordingStream, up to the 1st instance of endChar. it returns false if
415 * no further instances of endChar were found in textStream.
416 */
417 {
418 while( readCharacter()
419 &&
420 endChar != currentChar )
421 {
422 recordingString.append( 1,
423 currentChar );
424 }
425 return streamIsGood;
426 }
427
428 inline bool
430 /* this skips characters up to the next XML markup opening character, then
431 * the characters up to the next markup closing character are stored in
432 * markupString. this also records the number of newline characters up to
433 * this markup.
434 */
435 {
436 while( readCharacter()
437 &&
439 {
440 // the conditional does the work of reading up to the next tag.
441 }
443 return ( streamIsGood
444 &&
445 closeMarkup() );
446 }
447
448 inline bool
450 // this calls discardToNextMarkup() & ignoreDelimited() on the non-tag
451 // markups until a markup corresponding to a tag is found.
452 {
453 markupString.assign( "" );
454 while( markupString.empty() )
455 {
457 &&
459 &&
461 &&
463 if( !streamIsGood )
464 {
465 return false;
466 }
467 }
468 return true;
469 }
470
471 inline bool
473 /* this appends characters from textStream to fullElementContentAsFound up
474 * to the next XML markup opening character, then the characters up to the
475 * next markup closing character are stored in markupString.
476 */
477 {
480 &&
481 closeMarkup() );
482 }
483
484 inline bool
486 /* this calls recordToNextMarkup() & recordDelimited() on the non-tag
487 * markups (ignoring comments however) until a markup corresponding to a tag
488 * is found.
489 */
490 {
491 markupString.assign( "" );
492 while( markupString.empty() )
493 {
495 &&
497 &&
499 if( !streamIsGood )
500 {
501 return false;
502 }
503 }
504 return true;
505 }
506
507 inline bool
509 std::string const& comparisonString ) const
510 // this returns true if the start of "<" + markupString matches all of
511 // comparisonString.
512 {
513 return ( 0 == comparisonString.compare( 1,
514 ( comparisonString.size() - 1 ),
515 markupString,
516 0,
517 ( comparisonString.size() - 1 ) ) );
518 }
519
520 inline bool
521 AsciiXmlParser::compareMarkupEnd( std::string const& comparisonString ) const
522 // this returns true if the end of markupString + ">" matches all of
523 // comparisonString.
524 {
525 return ( 0 == comparisonString.compare( 0,
526 ( comparisonString.size() - 1 ),
527 markupString,
528 ( markupString.size() - comparisonString.size() + 1 ),
529 ( comparisonString.size() - 1 ) ) );
530 }
531
532 inline void
533 AsciiXmlParser::recordTagTo( std::string& recordingString )
534 // this appends markupOpener + markupString + markupCloser to
535 // recordingString.
536 {
537 recordingString.append( 1,
538 markupOpener );
539 recordingString.append( markupString );
540 recordingString.append( 1,
541 markupCloser );
542 }
543
544 inline bool
545 AsciiXmlParser::parseTagName( std::string& nameDestination )
546 // this parses markupString as the tag of an XML element, leaving parseEnd
547 // as the end of the name string.
548 {
550 if( std::string::npos == parseStart )
551 {
552 // empty markup is a sign of malformed XML:
553 if( isVerbose )
554 {
555 std::cout
556 << std::endl
557 << "BOL::error! AsciiXmlParser::parseOpeningTag(...) found empty"
558 << " markup!";
559 std::cout << std::endl;
560 }
561 return false;
562 }
564 parseStart );
565 nameDestination.assign( markupString.substr( parseStart,
566 ( parseEnd - parseStart ) ) );
567 return true;
568 }
569
570}
571
572#endif /* ASCIIXMLPARSER_HPP_ */
bool parseTagName(std::string &nameDestination)
std::map< std::string, std::string > elementAttributeMap
std::string currentTagName
static std::pair< std::string, std::string > const piDelimiter
static std::pair< std::string, std::string > const doctypeDelimiter
bool recordDelimited(std::pair< std::string, std::string > const &delimitingStrings)
bool closeMarkup(size_t const startPosition=0)
bool ignoreDelimited(std::pair< std::string, std::string > const &delimitingStrings)
std::string fullElementContentAsFound
std::pair< int, int > const & getLineRangeOfCurrentElement() const
bool compareMarkupEnd(std::string const &comparisonString) const
bool recordTo(std::string &recordingString, char const endChar)
std::ifstream fileParsingStream
static std::pair< std::string, std::string > const cdataDelimiter
static char const markupOpener
std::map< std::string, std::string > const & getCurrentElementAttributes()
std::string const & getCurrentOpeningTagAsFound() const
std::pair< int, int > rootLineRange
bool eraseQuotedStringsInMarkup(size_t const startPosition)
std::string fullOpeningTagAsFound
bool compareMarkupStart(std::string const &comparisonString) const
static std::string const allowedXmlWhitespaceChars
bool loadString(std::string const stringToParse)
static char const markupCloser
std::pair< std::string, std::string > currentAttribute
std::string const & getCurrentElementName() const
void recordTagTo(std::string &recordingString)
std::string const & getCurrentElementContent() const
bool currentElementNameMatches(std::string const &comparisonString) const
std::string getTrimmedCurrentElementContent() const
bool openRootElementOfFile(std::string const &fileName)
bool readAllOfRootElementOfFile(std::string const &fileName)
AsciiXmlParser(bool const isVerbose=false)
static std::string const allowedXmlQuoteChars
static char const tagCloser
std::istream * textStream
std::istringstream stringParsingStream
std::map< std::string, std::string > rootAttributeMap
std::pair< int, int > elementLineRange
static std::pair< std::string, std::string > const commentDelimiter
static std::string trimFromFrontAndBack(std::string const &stringToTrim, std::string const &charsToTrim=whitespaceAndNewlineChars)