a Code for the Combination of Indirect and Direct Constraints on High Energy Physics Models Logo
StringParser.cpp
Go to the documentation of this file.
1/*
2 * StringParser.cpp
3 *
4 * Created on: Jan 6, 2012
5 * Author: Ben O'Leary (benjamin.oleary@gmail.com)
6 *
7 * This file is part of BOLlib, released under the
8 * GNU General Public License. Please see the accompanying
9 * README.BOLlib.txt file for a full list of files, brief documentation
10 * on how to use these classes, and further details on the license.
11 */
12
13#include "StringParser.hpp"
14
15namespace BOL
16{
17 std::string const StringParser::whitespaceChars( " \t" );
18 std::string const StringParser::newlineChars( "\n\r" );
19 std::string const StringParser::whitespaceAndNewlineChars( " \t\n\r" );
20 std::string const
21 StringParser::lowercaseAlphabetChars( "abcdefghijklmnopqrstuvwxyz" );
22 std::string const
23 StringParser::uppercaseAlphabetChars( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
24 std::string const StringParser::digitChars( "0123456789" );
25
26 char const StringParser::lowercaseMinusUppercase( 'a' - 'A' );
27
28
29 std::string
31 int const minimumNumberOfDigits,
32 std::string const prefixForPositiveNumbers,
33 std::string const prefixForNegativeNumbers,
34 char const paddingChar )
35 /* this returns a std::string that is the ASCII version of an int in base
36 * 10, prefixed with prefixForPositiveNumbers or prefixForNegativeNumbers
37 * depending on whether it is positive or negative. it makes returnString
38 * have at least minimumNumberOfDigits digit characters, filling it out
39 * with paddingChars after
40 * prefixForPositiveNumbers/prefixForNegativeNumbers
41 * (e.g. intToString( 23, 4, "+", "-" ) returns "+0023").
42 */
43 {
44 if( 0 >= minimumNumberOfDigits )
45 {
46 std::cout
47 << std::endl
48 << "BOL::warning! StringParser::intToString( "
49 << inputInt << ", " << minimumNumberOfDigits << ", "
51 << " ) could not fit the integer into the given size!";
52 // report a warning message.
53 return std::string( "please_give_a_positive_number_of_digits" );
54 }
55 std::string returnString( prefixForPositiveNumbers );
56 if( 0 > inputInt )
57 {
58 returnString.assign( prefixForNegativeNumbers );
59 inputInt = -inputInt;
60 }
61 // now the '+' or '-' or whatever is substituting has been inserted &
62 // inputInt is positive semi-definite.
63 std::string unpaddedIntAsString( positiveIntToString( inputInt ) );
64 int numberOfZeroesToInsert( minimumNumberOfDigits
65 - (int)(unpaddedIntAsString.size()) );
66 // if numberOfZeroesToInsert is negative, then the number was longer than
67 // the minimum output string length specified.
68 if( 0 < numberOfZeroesToInsert )
69 {
70 returnString.append( (size_t)numberOfZeroesToInsert,
71 paddingChar );
72 }
73 returnString.append( unpaddedIntAsString );
74 return returnString;
75 }
76
77 std::string
78 StringParser::doubleToString( double inputDouble,
79 int const numberOfMantissaDigits,
80 int const numberOfExponentDigits,
81 std::string const prefixForPositiveNumbers,
82 std::string const prefixForNegativeNumbers,
83 std::string const positiveExponentPrefix,
84 std::string const negativeExponentPrefix,
85 std::string const exponentCharacter )
86 /* this returns a std::string that is the ASCII version of a double in base
87 * 10, in the form specified thusly:
88 * 1st character: either "-" for negative numbers, or a "+" for
89 * positive numbers (or a string to replace this character),
90 * 2nd character: the 1st digit,
91 * 3rd character: the decimal point,
92 * then ( numberOfMantissaDigits - 1 ) digits following the
93 * decimal point (so that the mantissa is numberOfMantissaDigits
94 * digits plus a decimal point)
95 * then "E" (or a string to replace this character)
96 * then "+" or "-", depending on the sign of the exponent (or a string to
97 * replace this character),
98 * then the absolute value of the exponent, with preceding 0s to fill to
99 * numberOfExponentDigits digit characters.
100 * NaNs are returned as "NaN".
101 */
102 {
103 if( ( 0 >= numberOfMantissaDigits )
104 ||
105 ( 0 >= numberOfExponentDigits ) )
106 {
107 std::cout
108 << std::endl
109 << "BOL::warning! StringParser::doubleToString( "
110 << inputDouble << ", " << numberOfMantissaDigits << ", "
114 << " ) could not fit the double into the given size!";
115 // report a warning message.
116 return std::string( "please_give_a_positive_number_of_digits" );
117 }
118 std::string returnString( prefixForPositiveNumbers );
119 double formattedMantissa( inputDouble );
120 if( 0.0 > inputDouble )
121 {
122 returnString.assign( prefixForNegativeNumbers );
123 formattedMantissa = -inputDouble;
124 }
125 if( 0.0 == formattedMantissa )
126 {
127 returnString.append( "0." );
128 returnString.append( ( numberOfMantissaDigits - 1 ),
129 '0' );
130 returnString.append( exponentCharacter );
131 returnString.append( positiveExponentPrefix );
132 returnString.append( numberOfExponentDigits,
133 '0' );
134 }
135 else if( 0.0 < formattedMantissa )
136 // at this point any negative numbers will now be positive, so any
137 // that fail this comparison should be NaN.
138 {
139 int tenToDigitsMinusOneAsInt( 1 );
140 for( int mantissaDigitCount( 1 );
141 numberOfMantissaDigits > mantissaDigitCount;
142 ++mantissaDigitCount )
143 {
144 tenToDigitsMinusOneAsInt *= 10;
145 }
146 double tenToDigitsMinusOneAsDouble( (double)tenToDigitsMinusOneAsInt );
147 double tenToDigits( 10.0 * tenToDigitsMinusOneAsDouble );
148 int formattedExponent( 0 );
149 while( tenToDigits <= formattedMantissa )
150 {
151 formattedMantissa *= 0.1;
152 ++formattedExponent;
153 }
154 while( tenToDigitsMinusOneAsDouble > formattedMantissa )
155 {
156 formattedMantissa *= 10.0;
157 --formattedExponent;
158 }
159 /* now formattedMantissa is between tenToDigitsMinusOneAsDouble &
160 * tenToDigits, & hence has numberOfMantissaCharacters digits before the
161 * decimal point. however, now we have to round correctly:
162 */
163 int mantissaTimesTenToSomePowerAsInt( (int)formattedMantissa );
164 if( 0.5 <= ( formattedMantissa
165 - (double)mantissaTimesTenToSomePowerAsInt ) )
166 {
167 ++mantissaTimesTenToSomePowerAsInt;
168 }
169 if( mantissaTimesTenToSomePowerAsInt
170 >= ( 10 * tenToDigitsMinusOneAsInt) )
171 // if rounding pushed mantissaTimesTenToSomePowerAsInt into having too
172 // many digits...
173 {
174 mantissaTimesTenToSomePowerAsInt
175 = ( mantissaTimesTenToSomePowerAsInt / 10 );
176 ++formattedExponent;
177 }
178 std::string mantissaTimesTenToSomePowerAsString( positiveIntToString(
179 mantissaTimesTenToSomePowerAsInt ) );
180 returnString.append( 1,
181 mantissaTimesTenToSomePowerAsString[ 0 ] );
182 returnString.append( 1,
183 '.' );
184 returnString.append( mantissaTimesTenToSomePowerAsString,
185 1,
186 ( mantissaTimesTenToSomePowerAsString.size() - 1 ) );
187 formattedExponent += ( numberOfMantissaDigits - 1 );
188 // this accounts for all the multiplication to get the mantissa as an
189 // int of the appropriate length.
190 returnString.append( exponentCharacter );
191 if( 0 > formattedExponent )
192 {
193 returnString.append( negativeExponentPrefix );
194 formattedExponent = -formattedExponent;
195 }
196 else
197 {
198 returnString.append( positiveExponentPrefix );
199 }
200 std::string
201 exponentIntAsString( positiveIntToString( formattedExponent ) );
202 int exponentZeroesToPrepend( numberOfExponentDigits
203 - (int)(exponentIntAsString.size()) );
204 if( 0 < exponentZeroesToPrepend )
205 {
206 returnString.append( exponentZeroesToPrepend,
207 '0' );
208 }
209 returnString.append( exponentIntAsString );
210 }
211 else
212 // if it failed the comparison, it should be a NaN.
213 {
214 returnString.assign( UsefulStuff::nanString );
215 }
216 return returnString;
217 }
218
219 bool
220 StringParser::stringsMatchIgnoringCase( std::string const& firstString,
221 std::string const& secondString )
222 // this returns true if both strings would be identical if all their
223 // uppercase chars were converted to lowercase.
224 {
225 if( firstString.size() != secondString.size() )
226 // if the strings don't match in size, they obviously do not match.
227 {
228 return false;
229 }
230 for( int charCounter( firstString.size() - 1 );
231 0 <= charCounter ;
232 --charCounter )
233 // go through each character in the string:
234 {
235 // if the strings do not match at this char, check to see if they are
236 // letters that just differ in case:
237 if( ( secondString[ charCounter ] != firstString[ charCounter ] )
238 &&
239 !( ( firstString[ charCounter ] >= 'A' )
240 &&
241 ( firstString[ charCounter ] <= 'Z' )
242 &&
243 ( secondString[ charCounter ]
244 == ( firstString[ charCounter ]
246 &&
247 !( ( firstString[ charCounter ] >= 'a' )
248 &&
249 ( firstString[ charCounter ] <= 'z' )
250 &&
251 ( secondString[ charCounter ]
252 == ( firstString[ charCounter ]
254 {
255 return false;
256 }
257 }
258 // if this point is reached, all the characters matched:
259 return true;
260 }
261
262 bool
263 StringParser::stringIsDouble( std::string const& stringToInterpret,
264 double& doubleToSet )
265 /* this returns true if stringToInterpret is a floating-point number in
266 * scientific E notation (allowing 'E' or 'e'), and sets doubleToSet
267 * accordingly if so.
268 */
269 {
270 size_t charPosition( stringToInterpret.find_first_not_of(
272 if( charPosition == std::string::npos )
273 {
274 return false;
275 }
276 if( ( stringToInterpret[ charPosition ] == '+' )
277 ||
278 ( stringToInterpret[ charPosition ] == '-' ) )
279 {
280 if( charPosition == ( stringToInterpret.size() - 1 ) )
281 {
282 return false;
283 }
284 ++charPosition;
285 }
286 charPosition = stringToInterpret.find_first_not_of( digitChars,
287 charPosition );
288 if( ( charPosition != std::string::npos )
289 &&
290 ( stringToInterpret[ charPosition ] == '.' ) )
291 {
292 charPosition = stringToInterpret.find_first_not_of( digitChars,
293 ( charPosition + 1 ) );
294 }
295 if( ( charPosition < ( stringToInterpret.size() - 2 ) )
296 &&
297 ( ( stringToInterpret[ charPosition ] == 'e' )
298 ||
299 ( stringToInterpret[ charPosition ] == 'E' ) ) )
300 {
301 ++charPosition;
302 if( ( stringToInterpret[ charPosition ] == '+' )
303 ||
304 ( stringToInterpret[ charPosition ] == '-' ) )
305 {
306 ++charPosition;
307 }
308 charPosition = stringToInterpret.find_first_not_of( digitChars,
309 charPosition );
310 }
311 charPosition
312 = stringToInterpret.find_first_not_of( whitespaceAndNewlineChars,
313 charPosition );
314 if( charPosition == std::string::npos )
315 {
316 doubleToSet = stringToDouble( stringToInterpret );
317 return true;
318 }
319 return false;
320 }
321
322 std::vector< int >
323 StringParser::stringToIntVector( std::string stringToInterpret )
324 {
325 substituteCharacterWith( stringToInterpret,
326 ',',
327 ' ' );
328 substituteCharacterWith( stringToInterpret,
329 ';',
330 ' ' );
331 std::vector< int > returnVector;
332 std::string indicesString( trimFromFrontAndBack( stringToInterpret,
334 if( !(indicesString.empty()) )
335 {
336 std::stringstream streamToParse( indicesString );
337 double parsedIntAsDouble;
338 while( streamToParse.good() )
339 {
340 streamToParse >> parsedIntAsDouble;
341 returnVector.push_back( (int)parsedIntAsDouble );
342 }
343 }
344 return returnVector;
345 }
346
347 std::string
348 StringParser::substringToFirst( std::string const& stringToParse,
349 VectorlikeArray< std::string > const& delimitersOfSubstring,
350 std::string* const remainderString )
351 /* this returns the substring of stringToParse from its beginning up to the
352 * first instance of any of the strings in delimitersOfSubstring within
353 * stringToParse. if stringToParse does not contain any of those strings as
354 * a substring, the whole of stringToParse is returned, otherwise the
355 * substring up to but not including the first of any found strings from
356 * delimitersOfSubstring is returned. if remainderString is not NULL, the
357 * remainder of stringToParse that is not returned is put into
358 * remainderString.
359 */
360 {
361 size_t
362 delimiterPosition( stringToParse.find( delimitersOfSubstring[ 0 ] ) );
363 size_t comparisonPosition;
364 for( int stringIndex( delimitersOfSubstring.getLastIndex() );
365 0 < stringIndex;
366 --stringIndex )
367 {
368 comparisonPosition
369 = stringToParse.find( delimitersOfSubstring[ stringIndex ] );
370 if( comparisonPosition < delimiterPosition )
371 {
372 delimiterPosition = comparisonPosition;
373 }
374 }
375
376 // now delimiterPosition marks the position of the first of any of the
377 // strings in delimitersOfSubstring which were found in stringToParse.
378 if( std::string::npos == delimiterPosition )
379 {
380 if( NULL != remainderString )
381 {
382 remainderString->assign( "" );
383 }
384 return stringToParse;
385 }
386 else
387 {
388 std::string returnString( stringToParse.begin(),
389 ( stringToParse.begin() + delimiterPosition ) );
390 if( NULL != remainderString )
391 {
392 remainderString->assign( ( stringToParse.begin() + delimiterPosition ),
393 stringToParse.end() );
394 }
395 return returnString;
396 }
397 }
398
399 std::string
400 StringParser::firstWordOf( std::string const& stringToParse,
401 std::string* const remainderString,
402 std::string const& separatorChars )
403 /* this parses the first substring without any of the characters in
404 * separatorChars & returns it, filling remainderString with the rest if
405 * it is not NULL.
406 */
407 {
408 size_t wordStart( stringToParse.find_first_not_of( separatorChars ) );
409 if( std::string::npos == wordStart )
410 {
411 if( NULL != remainderString )
412 {
413 remainderString->assign( "" );
414 }
415 return std::string( "" );
416 }
417 else
418 {
419 size_t wordEnd( stringToParse.find_first_of( separatorChars,
420 wordStart ) );
421 std::string returnString( stringToParse.substr( wordStart,
422 ( wordEnd - wordStart ) ) );
423 if( NULL != remainderString )
424 {
425 wordStart = stringToParse.find_first_not_of( separatorChars,
426 wordEnd );
427 if( std::string::npos != wordStart )
428 {
429 remainderString->assign( stringToParse.substr( wordStart ) );
430 }
431 else
432 {
433 remainderString->assign( "" );
434 }
435 }
436 return returnString;
437 }
438 }
439
440
441 StringParser::StringParser( int const minimumNumberOfDigitsForInts,
442 char const paddingCharForInts,
443 int const numberOfMantissaDigits,
444 int const numberOfExponentDigits,
445 std::string const prefixForPositiveNumbers,
446 std::string const prefixForNegativeNumbers,
447 std::string const positiveExponentPrefix,
448 std::string const negativeExponentPrefix,
449 std::string const exponentCharacter ) :
450 minimumNumberOfDigitsForInts( minimumNumberOfDigitsForInts ),
451 paddingCharForInts( paddingCharForInts ),
452 numberOfMantissaDigits( numberOfMantissaDigits ),
453 numberOfExponentDigits( numberOfExponentDigits ),
454 prefixForPositiveNumbers( prefixForPositiveNumbers ),
455 prefixForNegativeNumbers( prefixForNegativeNumbers ),
456 positiveExponentPrefix( positiveExponentPrefix ),
457 negativeExponentPrefix( negativeExponentPrefix ),
458 exponentCharacter( exponentCharacter )
459 {
460 // just an initialization list.
461 }
462
464 {
465 // does nothing.
466 }
467
468
469 char
470 StringParser::charForSingleDigit( int const singleDigitAsInt )
471 {
472 switch( singleDigitAsInt )
473 {
474 case 0:
475 return '0';
476 case 1:
477 return '1';
478 case 2:
479 return '2';
480 case 3:
481 return '3';
482 case 4:
483 return '4';
484 case 5:
485 return '5';
486 case 6:
487 return '6';
488 case 7:
489 return '7';
490 case 8:
491 return '8';
492 case 9:
493 return '9';
494 default:
495 return '?';
496 }
497 }
498
499 int
500 StringParser::intForSingleDigit( char const singleDigitAsChar )
501 {
502 switch( singleDigitAsChar )
503 {
504 case '0':
505 return 0;
506 case '1':
507 return 1;
508 case '2':
509 return 2;
510 case '3':
511 return 3;
512 case '4':
513 return 4;
514 case '5':
515 return 5;
516 case '6':
517 return 6;
518 case '7':
519 return 7;
520 case '8':
521 return 8;
522 case '9':
523 return 9;
524 case 'A':
525 return 10;
526 case 'B':
527 return 11;
528 case 'C':
529 return 12;
530 case 'D':
531 return 13;
532 case 'E':
533 return 14;
534 case 'F':
535 return 15;
536 default:
537 return (int)(UsefulStuff::notANumber);
538 }
539 }
540
541 std::string
543 // this puts the digits of positiveInt into digitBuffer in the order of
544 // digit for highest power of 10 1st.
545 {
546 int numberOfDigits( 1 );
547 int tenToNumberOfDigits( 10 );
548 while( positiveInt >= tenToNumberOfDigits )
549 {
550 tenToNumberOfDigits *= 10;
551 ++numberOfDigits;
552 }
553 std::string digitBuffer( "" );
554 int digitInt;
555 while( 0 < positiveInt )
556 {
557 tenToNumberOfDigits = ( tenToNumberOfDigits / 10 );
558 digitInt = 0;
559 while( tenToNumberOfDigits <= positiveInt )
560 {
561 positiveInt -= tenToNumberOfDigits;
562 ++digitInt;
563 }
564 digitBuffer.push_back( charForSingleDigit( digitInt ) );
565 }
566 for( int zeroesToPushBack( numberOfDigits - digitBuffer.size() );
567 0 < zeroesToPushBack;
568 --zeroesToPushBack )
569 {
570 digitBuffer.push_back( '0' );
571 }
572 return digitBuffer;
573 }
574
575}
std::string prefixForPositiveNumbers
StringParser(int const minimumNumberOfDigitsForInts=6, char const paddingCharForInts='0', int const numberOfMantissaDigits=6, int const numberOfExponentDigits=2, std::string const prefixForPositiveNumbers="+", std::string const prefixForNegativeNumbers="-", std::string const positiveExponentPrefix="+", std::string const negativeExponentPrefix="-", std::string const exponentCharacter="E")
static std::string firstWordOf(std::string const &stringToParse, std::string *const remainderString=NULL, std::string const &separatorChars=whitespaceChars)
static std::string const uppercaseAlphabetChars
static std::string const lowercaseAlphabetChars
std::string positiveExponentPrefix
static std::string const whitespaceAndNewlineChars
static bool stringsMatchIgnoringCase(std::string const &firstString, std::string const &secondString)
static std::vector< int > stringToIntVector(std::string stringToInterpret)
static std::string const digitChars
static int intForSingleDigit(char const singleDigitAsChar)
std::string negativeExponentPrefix
static std::string const whitespaceChars
static bool stringIsDouble(std::string const &stringToInterpret, double &doubleToSet)
static std::string const newlineChars
static std::string trimFromFrontAndBack(std::string const &stringToTrim, std::string const &charsToTrim=whitespaceAndNewlineChars)
static double stringToDouble(std::string const &stringToInterpret)
static std::string intToString(int inputInt, int const minimumNumberOfDigits, std::string const prefixForPositiveNumbers="+", std::string const prefixForNegativeNumbers="-", char const paddingChar='0')
static char const lowercaseMinusUppercase
static char charForSingleDigit(int const singleDigitAsInt)
std::string prefixForNegativeNumbers
static std::string positiveIntToString(int positiveInt)
std::string exponentCharacter
static std::string doubleToString(double inputDouble, int const numberOfMantissaDigits, int const numberOfExponentDigits, std::string const prefixForPositiveNumbers="+", std::string const prefixForNegativeNumbers="-", std::string const positiveExponentPrefix="+", std::string const negativeExponentPrefix="-", std::string const exponentCharacter="E")
static void substituteCharacterWith(std::string &stringToTransform, char const charToBeReplaced, char const charToBePutIn)
static std::string substringToFirst(std::string const &stringToParse, VectorlikeArray< std::string > const &delimitersOfSubstring, std::string *const remainderString=NULL)
static double const notANumber
Definition: UsefulStuff.hpp:28
static std::string const nanString
Definition: UsefulStuff.hpp:29