1
/* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3
* JSON Library, originally from http://jsoncpp.sourceforge.net/
5
* Copyright (C) 2011 Stewart Smith
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions are
12
* * Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
15
* * Redistributions in binary form must reproduce the above
16
* copyright notice, this list of conditions and the following disclaimer
17
* in the documentation and/or other materials provided with the
20
* * The names of its contributors may not be used to endorse or
21
* promote products derived from this software without specific prior
24
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
#include <plugin/json_server/json/reader.h>
41
#include <plugin/json_server/json/value.h>
52
// Implementation of class Features
53
// ////////////////////////////////
56
: allowComments_( true )
57
, strictRoot_( false )
70
Features::strictMode()
73
features.allowComments_ = false;
74
features.strictRoot_ = true;
78
// Implementation of class Reader
79
// ////////////////////////////////
83
in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
85
return c == c1 || c == c2 || c == c3 || c == c4;
89
in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
91
return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
96
containsNewLine( Reader::Location begin,
97
Reader::Location end )
99
for ( ;begin < end; ++begin )
100
if ( *begin == '\n' || *begin == '\r' )
105
static std::string codePointToUTF8(unsigned int cp)
109
// based on description from http://en.wikipedia.org/wiki/UTF-8
114
result[0] = static_cast<char>(cp);
116
else if (cp <= 0x7FF)
119
result[1] = static_cast<char>(0x80 | (0x3f & cp));
120
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
122
else if (cp <= 0xFFFF)
125
result[2] = static_cast<char>(0x80 | (0x3f & cp));
126
result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
127
result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
129
else if (cp <= 0x10FFFF)
132
result[3] = static_cast<char>(0x80 | (0x3f & cp));
133
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
134
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
135
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
143
// //////////////////////////////////////////////////////////////////
146
: features_( Features::all() )
151
Reader::Reader( const Features &features )
152
: features_( features )
158
Reader::parse( const std::string &document,
160
bool collectComments )
162
document_ = document;
163
const char *begin = document_.c_str();
164
const char *end = begin + document_.length();
165
return parse( begin, end, root, collectComments );
170
Reader::parse( std::istream& sin,
172
bool collectComments )
174
//std::istream_iterator<char> begin(sin);
175
//std::istream_iterator<char> end;
176
// Those would allow streamed input from a file, if parse() were a
177
// template function.
179
// Since std::string is reference-counted, this at least does not
180
// create an extra copy.
182
std::getline(sin, doc, (char)EOF);
183
return parse( doc, root, collectComments );
187
Reader::parse( const char *beginDoc, const char *endDoc,
189
bool collectComments )
191
if ( !features_.allowComments_ )
193
collectComments = false;
198
collectComments_ = collectComments;
202
commentsBefore_ = "";
204
while ( !nodes_.empty() )
206
nodes_.push( &root );
208
bool successful = readValue();
210
skipCommentTokens( token );
211
if ( collectComments_ && !commentsBefore_.empty() )
212
root.setComment( commentsBefore_, commentAfter );
213
if ( features_.strictRoot_ )
215
if ( !root.isArray() && !root.isObject() )
217
// Set error location to start of doc, ideally should be first token found in doc
218
token.type_ = tokenError;
219
token.start_ = beginDoc;
221
addError( "A valid JSON document must be either an array or an object value.",
234
skipCommentTokens( token );
235
bool successful = true;
237
if ( collectComments_ && !commentsBefore_.empty() )
239
currentValue().setComment( commentsBefore_, commentBefore );
240
commentsBefore_ = "";
244
switch ( token.type_ )
246
case tokenObjectBegin:
247
successful = readObject( token );
249
case tokenArrayBegin:
250
successful = readArray( token );
253
successful = decodeNumber( token );
256
successful = decodeString( token );
259
currentValue() = true;
262
currentValue() = false;
265
currentValue() = Value();
268
return addError( "Syntax error: value, object or array expected.", token );
271
if ( collectComments_ )
273
lastValueEnd_ = current_;
274
lastValue_ = ¤tValue();
282
Reader::skipCommentTokens( Token &token )
284
if ( features_.allowComments_ )
290
while ( token.type_ == tokenComment );
300
Reader::expectToken( TokenType type, Token &token, const char *message )
303
if ( token.type_ != type )
304
return addError( message, token );
310
Reader::readToken( Token &token )
313
token.start_ = current_;
314
Char c = getNextChar();
319
token.type_ = tokenObjectBegin;
322
token.type_ = tokenObjectEnd;
325
token.type_ = tokenArrayBegin;
328
token.type_ = tokenArrayEnd;
331
token.type_ = tokenString;
335
token.type_ = tokenComment;
349
token.type_ = tokenNumber;
353
token.type_ = tokenTrue;
354
ok = match( "rue", 3 );
357
token.type_ = tokenFalse;
358
ok = match( "alse", 4 );
361
token.type_ = tokenNull;
362
ok = match( "ull", 3 );
365
token.type_ = tokenArraySeparator;
368
token.type_ = tokenMemberSeparator;
371
token.type_ = tokenEndOfStream;
378
token.type_ = tokenError;
379
token.end_ = current_;
387
while ( current_ != end_ )
390
if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
399
Reader::match( Location pattern,
402
if ( end_ - current_ < patternLength )
404
int index = patternLength;
406
if ( current_[index] != pattern[index] )
408
current_ += patternLength;
414
Reader::readComment()
416
Location commentBegin = current_ - 1;
417
Char c = getNextChar();
418
bool successful = false;
420
successful = readCStyleComment();
422
successful = readCppStyleComment();
426
if ( collectComments_ )
428
CommentPlacement placement = commentBefore;
429
if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
431
if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
432
placement = commentAfterOnSameLine;
435
addComment( commentBegin, current_, placement );
442
Reader::addComment( Location begin,
444
CommentPlacement placement )
446
assert( collectComments_ );
447
if ( placement == commentAfterOnSameLine )
449
assert( lastValue_ != 0 );
450
lastValue_->setComment( std::string( begin, end ), placement );
454
if ( !commentsBefore_.empty() )
455
commentsBefore_ += "\n";
456
commentsBefore_ += std::string( begin, end );
462
Reader::readCStyleComment()
464
while ( current_ != end_ )
466
Char c = getNextChar();
467
if ( c == '*' && *current_ == '/' )
470
return getNextChar() == '/';
475
Reader::readCppStyleComment()
477
while ( current_ != end_ )
479
Char c = getNextChar();
480
if ( c == '\r' || c == '\n' )
490
while ( current_ != end_ )
492
if ( !(*current_ >= '0' && *current_ <= '9') &&
493
!in( *current_, '.', 'e', 'E', '+', '-' ) )
503
while ( current_ != end_ )
516
Reader::readObject( Token & )
520
currentValue() = Value( objectValue );
521
while ( readToken( tokenName ) )
523
bool initialTokenOk = true;
524
while ( tokenName.type_ == tokenComment && initialTokenOk )
525
initialTokenOk = readToken( tokenName );
526
if ( !initialTokenOk )
528
if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
530
if ( tokenName.type_ != tokenString )
534
if ( !decodeString( tokenName, name ) )
535
return recoverFromError( tokenObjectEnd );
538
if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
540
return addErrorAndRecover( "Missing ':' after object member name",
544
Value &value = currentValue()[ name ];
545
nodes_.push( &value );
546
bool ok = readValue();
548
if ( !ok ) // error already set
549
return recoverFromError( tokenObjectEnd );
552
if ( !readToken( comma )
553
|| ( comma.type_ != tokenObjectEnd &&
554
comma.type_ != tokenArraySeparator &&
555
comma.type_ != tokenComment ) )
557
return addErrorAndRecover( "Missing ',' or '}' in object declaration",
561
bool finalizeTokenOk = true;
562
while ( comma.type_ == tokenComment &&
564
finalizeTokenOk = readToken( comma );
565
if ( comma.type_ == tokenObjectEnd )
568
return addErrorAndRecover( "Missing '}' or object member name",
575
Reader::readArray( Token & )
577
currentValue() = Value( arrayValue );
579
if ( *current_ == ']' ) // empty array
582
readToken( endArray );
588
Value &value = currentValue()[ index++ ];
589
nodes_.push( &value );
590
bool ok = readValue();
592
if ( !ok ) // error already set
593
return recoverFromError( tokenArrayEnd );
596
// Accept Comment after last item in the array.
597
ok = readToken( token );
598
while ( token.type_ == tokenComment && ok )
600
ok = readToken( token );
602
bool badTokenType = ( token.type_ == tokenArraySeparator &&
603
token.type_ == tokenArrayEnd );
604
if ( !ok || badTokenType )
606
return addErrorAndRecover( "Missing ',' or ']' in array declaration",
610
if ( token.type_ == tokenArrayEnd )
618
Reader::decodeNumber( Token &token )
620
bool isDouble = false;
621
for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
624
|| in( *inspect, '.', 'e', 'E', '+' )
625
|| ( *inspect == '-' && inspect != token.start_ );
628
return decodeDouble( token );
629
Location current = token.start_;
630
bool isNegative = *current == '-';
633
Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt)
634
: Value::maxUInt) / 10;
635
Value::UInt value = 0;
636
while ( current < token.end_ )
639
if ( c < '0' || c > '9' )
640
return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
641
if ( value >= threshold )
642
return decodeDouble( token );
643
value = value * 10 + Value::UInt(c - '0');
646
currentValue() = -Value::Int( value );
647
else if ( value <= Value::UInt(Value::maxInt) )
648
currentValue() = Value::Int( value );
650
currentValue() = value;
656
Reader::decodeDouble( Token &token )
659
const int bufferSize = 32;
661
int length = int(token.end_ - token.start_);
662
if ( length <= bufferSize )
664
Char buffer[bufferSize];
665
memcpy( buffer, token.start_, length );
667
count = sscanf( buffer, "%lf", &value );
671
std::string buffer( token.start_, token.end_ );
672
count = sscanf( buffer.c_str(), "%lf", &value );
676
return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
677
currentValue() = value;
683
Reader::decodeString( Token &token )
686
if ( !decodeString( token, decoded ) )
688
currentValue() = decoded;
694
Reader::decodeString( Token &token, std::string &decoded )
696
decoded.reserve( token.end_ - token.start_ - 2 );
697
Location current = token.start_ + 1; // skip '"'
698
Location end = token.end_ - 1; // do not include '"'
699
while ( current != end )
704
else if ( c == '\\' )
706
if ( current == end )
707
return addError( "Empty escape sequence in string", token, current );
708
Char escape = *current++;
711
case '"': decoded += '"'; break;
712
case '/': decoded += '/'; break;
713
case '\\': decoded += '\\'; break;
714
case 'b': decoded += '\b'; break;
715
case 'f': decoded += '\f'; break;
716
case 'n': decoded += '\n'; break;
717
case 'r': decoded += '\r'; break;
718
case 't': decoded += '\t'; break;
721
unsigned int unicode;
722
if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
724
decoded += codePointToUTF8(unicode);
728
return addError( "Bad escape sequence in string", token, current );
740
Reader::decodeUnicodeCodePoint( Token &token,
743
unsigned int &unicode )
746
if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
748
if (unicode >= 0xD800 && unicode <= 0xDBFF)
751
if (end - current < 6)
752
return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
753
unsigned int surrogatePair;
754
if (*(current++) == '\\' && *(current++)== 'u')
756
if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
758
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
764
return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
770
Reader::decodeUnicodeEscapeSequence( Token &token,
773
unsigned int &unicode )
775
if ( end - current < 4 )
776
return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
778
for ( int index =0; index < 4; ++index )
782
if ( c >= '0' && c <= '9' )
784
else if ( c >= 'a' && c <= 'f' )
785
unicode += c - 'a' + 10;
786
else if ( c >= 'A' && c <= 'F' )
787
unicode += c - 'A' + 10;
789
return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
796
Reader::addError( const std::string &message,
802
info.message_ = message;
804
errors_.push_back( info );
810
Reader::recoverFromError( TokenType skipUntilToken )
812
int errorCount = int(errors_.size());
816
if ( !readToken(skip) )
817
errors_.resize( errorCount ); // discard errors caused by recovery
818
if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
821
errors_.resize( errorCount );
827
Reader::addErrorAndRecover( const std::string &message,
829
TokenType skipUntilToken )
831
addError( message, token );
832
return recoverFromError( skipUntilToken );
837
Reader::currentValue()
839
return *(nodes_.top());
844
Reader::getNextChar()
846
if ( current_ == end_ )
853
Reader::getLocationLineAndColumn( Location location,
857
Location current = begin_;
858
Location lastLineStart = current;
860
while ( current < location && current != end_ )
865
if ( *current == '\n' )
867
lastLineStart = current;
870
else if ( c == '\n' )
872
lastLineStart = current;
876
// column & line start at 1
877
column = int(location - lastLineStart) + 1;
883
Reader::getLocationLineAndColumn( Location location ) const
886
getLocationLineAndColumn( location, line, column );
887
char buffer[18+16+16+1];
888
sprintf( buffer, "Line %d, Column %d", line, column );
894
Reader::getFormatedErrorMessages() const
896
std::string formattedMessage;
897
for ( Errors::const_iterator itError = errors_.begin();
898
itError != errors_.end();
901
const ErrorInfo &error = *itError;
902
formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
903
formattedMessage += " " + error.message_ + "\n";
905
formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
907
return formattedMessage;
911
std::istream& operator>>( std::istream &sin, Value &root )
914
bool ok = reader.parse(sin, root, true);
916
if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages());