File indexing completed on 2025-01-05 04:01:13
0001 /* 0002 * SPDX-FileCopyrightText: 2019-2023 Mattia Basaglia <dev@dragon.best> 0003 * 0004 * SPDX-License-Identifier: GPL-3.0-or-later 0005 */ 0006 #pragma once 0007 0008 #include <unordered_map> 0009 #include <vector> 0010 #include <variant> 0011 #include <memory> 0012 #include <stdexcept> 0013 0014 #include <QByteArray> 0015 #include <QString> 0016 0017 #include "string_decoder.hpp" 0018 0019 namespace glaxnimate::io::aep { 0020 0021 enum class CosTokenType 0022 { 0023 // /foo 0024 Identifier, 0025 // 123 0026 Number, 0027 // (foo) 0028 String, 0029 // <f000> 0030 HexString, 0031 // true 0032 Boolean, 0033 // << 0034 ObjectStart, 0035 // >> 0036 ObjectEnd, 0037 // [ 0038 ArrayStart, 0039 // ] 0040 ArrayEnd, 0041 // null 0042 Null, 0043 // end of file 0044 Eof, 0045 }; 0046 0047 class CosError : public std::runtime_error 0048 { 0049 public: 0050 CosError(QString message) : runtime_error(message.toStdString()), message(std::move(message)) {} 0051 0052 QString message; 0053 }; 0054 0055 0056 struct CosValue 0057 { 0058 enum class Index 0059 { 0060 Null, 0061 Number, 0062 String, 0063 Boolean, 0064 Bytes, 0065 Object, 0066 Array 0067 }; 0068 0069 using Object = std::unique_ptr<std::unordered_map<QString, CosValue>>; 0070 using Array = std::unique_ptr<std::vector<CosValue>>; 0071 0072 template<class T> 0073 CosValue(T&& v) : value(std::forward<T>(v)) {} 0074 CosValue() = default; 0075 CosValue(CosValue& v) = delete; 0076 CosValue(const CosValue& v) = delete; 0077 CosValue(CosValue&& v) = default; 0078 CosValue& operator=(CosValue&& v) = default; 0079 0080 0081 template<Index Ind> 0082 const auto& get() const 0083 { 0084 if ( Ind != type() ) 0085 throw CosError("Invalid COS value type"); 0086 return std::get<int(Ind)>(value); 0087 } 0088 0089 Index type() const { return Index(value.index()); } 0090 0091 std::variant< 0092 std::nullptr_t, double, QString, bool, QByteArray, Object, Array 0093 > value = nullptr; 0094 }; 0095 0096 using CosObject = CosValue::Object; 0097 using CosArray = CosValue::Array; 0098 0099 struct CosToken 0100 { 0101 CosTokenType type = CosTokenType::Eof; 0102 CosValue value = {}; 0103 0104 CosToken() = default; 0105 CosToken(CosToken&&) = default; 0106 CosToken& operator=(CosToken&&) = default; 0107 }; 0108 0109 class CosLexer 0110 { 0111 public: 0112 CosLexer(QByteArray data) : data(std::move(data)) {} 0113 0114 CosToken next_token() 0115 { 0116 int ch; 0117 0118 while ( true ) 0119 { 0120 ch = get_char(); 0121 if ( ch == -1 ) 0122 return CosToken(); 0123 else if ( ch == '%' ) 0124 lex_comment(); 0125 else if ( !std::isspace(ch) ) 0126 break; 0127 } 0128 0129 // << 0130 if ( ch == '<' ) 0131 { 0132 ch = get_char(); 0133 if ( ch == '<' ) 0134 return {CosTokenType::ObjectStart}; 0135 else if ( ch == -1 ) 0136 throw_lex("<"); 0137 else if ( std::isxdigit(ch) ) 0138 return lex_hex_string(ch); 0139 else 0140 throw_lex(QString("<") + QChar(ch)); 0141 } 0142 0143 // >> 0144 if ( ch == '>' ) 0145 { 0146 auto d = get_char(); 0147 if ( d != '>' ) 0148 { 0149 QString tok{QChar(ch)}; 0150 if ( d != -1 ) 0151 tok += QChar(d); 0152 throw_lex(tok, ">>"); 0153 } 0154 return {CosTokenType::ObjectEnd}; 0155 } 0156 0157 // [ 0158 if ( ch == '[' ) 0159 return {CosTokenType::ArrayStart}; 0160 0161 // ] 0162 if ( ch == ']' ) 0163 return {CosTokenType::ArrayEnd}; 0164 0165 // /foo 0166 if ( ch == '/' ) { 0167 return lex_identifier(); 0168 } 0169 0170 // (foo) 0171 if ( ch == '(' ) { 0172 return lex_string(); 0173 } 0174 0175 // Keyword 0176 if ( std::isalpha(ch) ) 0177 return lex_keyword(ch); 0178 0179 // Number 0180 if ( std::isdigit(ch) || ch == '-' || ch == '+' || ch == '.' ) 0181 return lex_number(ch); 0182 0183 throw_lex(QString() + QChar(ch)); 0184 } 0185 0186 [[noreturn]] void throw_lex(const QString& token, const QString& exp = {}) 0187 { 0188 QString msg = "Unknown COS token %1"; 0189 msg = msg.arg(token); 0190 if ( !exp.isEmpty() ) 0191 { 0192 msg += ", expected "; 0193 msg += exp; 0194 } 0195 0196 throw CosError(msg); 0197 } 0198 0199 int get_char() 0200 { 0201 if ( offset >= data.size() ) 0202 return -1; 0203 0204 int ch = std::uint8_t(data[offset]); 0205 offset += 1; 0206 return ch; 0207 } 0208 0209 void unget() 0210 { 0211 offset -= 1; 0212 if ( offset < 0 ) 0213 throw CosError("Buffer underflow"); 0214 } 0215 0216 void lex_comment() 0217 { 0218 while ( true ) 0219 { 0220 auto ch = get_char(); 0221 if ( ch == -1 || ch == '\n' ) 0222 break; 0223 } 0224 } 0225 0226 CosToken lex_number(int ch) 0227 { 0228 if ( ch == '.' ) 0229 return lex_number_fract(QString(QChar(ch))); 0230 else if ( ch == '+' || ch == '-' ) 0231 return lex_number_int(get_char(), QChar(ch)); 0232 else 0233 return lex_number_int(ch, '+'); 0234 } 0235 0236 CosToken lex_number_int(int ch, QChar sign) 0237 { 0238 QString head; 0239 head += sign; 0240 0241 while ( true ) 0242 { 0243 if ( ch == '.' ) 0244 { 0245 return lex_number_fract(head + QChar(ch)); 0246 } 0247 else if ( ch == -1 ) 0248 { 0249 break; 0250 } 0251 else if ( std::isdigit(ch) ) 0252 { 0253 head += QChar(ch); 0254 ch = get_char(); 0255 } 0256 else 0257 { 0258 unget(); 0259 break; 0260 } 0261 } 0262 0263 return {CosTokenType::Number, head.toDouble()}; 0264 } 0265 0266 CosToken lex_number_fract(QString num) 0267 { 0268 while ( true ) 0269 { 0270 int ch = get_char(); 0271 0272 if ( ch == -1 ) 0273 { 0274 break; 0275 } 0276 else if ( std::isdigit(ch) ) 0277 { 0278 num += QChar(ch); 0279 } 0280 else 0281 { 0282 unget(); 0283 break; 0284 } 0285 } 0286 return {CosTokenType::Number, num.toDouble()}; 0287 } 0288 0289 CosToken lex_keyword(char start) 0290 { 0291 QString kw(start); 0292 0293 while ( true ) 0294 { 0295 auto ch = get_char(); 0296 if ( ch == -1 ) 0297 { 0298 break; 0299 } 0300 else if ( std::isalpha(ch) ) 0301 { 0302 kw += QChar(ch); 0303 } 0304 else 0305 { 0306 unget(); 0307 break; 0308 } 0309 } 0310 0311 if ( kw == "true" ) 0312 return {CosTokenType::Boolean, true}; 0313 if ( kw == "false" ) 0314 return {CosTokenType::Boolean, false}; 0315 if ( kw == "null") 0316 return {CosTokenType::Null}; 0317 0318 throw CosError("Unknown keyword " + kw); 0319 } 0320 0321 CosToken lex_string() 0322 { 0323 QByteArray string; 0324 0325 while ( true ) 0326 { 0327 auto ch = lex_string_char(); 0328 if ( ch == -1 ) 0329 break; 0330 0331 string.push_back(ch); 0332 } 0333 0334 return {CosTokenType::String, decode_string(string)}; 0335 } 0336 0337 int lex_string_char() 0338 { 0339 auto ch = get_char(); 0340 if ( ch == -1 ) 0341 throw CosError("Unterminated String"); 0342 0343 if ( ch == ')' ) 0344 return -1; 0345 0346 if ( ch == '\\' ) 0347 return lex_string_escape(); 0348 0349 if ( ch == '\r' ) 0350 { 0351 if ( get_char() != '\n' ) 0352 unget(); 0353 return '\n'; 0354 } 0355 else if ( ch == '\n' ) 0356 { 0357 if ( get_char() != '\r' ) 0358 unget(); 0359 return '\n'; 0360 } 0361 0362 return ch; 0363 } 0364 0365 bool is_octal(char ch) 0366 { 0367 return '0' <= ch && ch <= '7'; 0368 } 0369 0370 char lex_string_escape() 0371 { 0372 auto ch = get_char(); 0373 if ( ch == -1 ) 0374 throw CosError("Unterminated string"); 0375 0376 switch ( ch ) 0377 { 0378 case 'b': 0379 return '\b'; 0380 case 'n': 0381 return '\n'; 0382 case 'f': 0383 return '\f'; 0384 case 'r': 0385 return '\r'; 0386 case '(': 0387 case ')': 0388 case '\\': 0389 return ch; 0390 } 0391 0392 if ( is_octal(ch) ) 0393 { 0394 QString octal{QChar(ch)}; 0395 for ( auto i = 0; i < 2; i++ ) 0396 { 0397 ch = get_char(); 0398 if ( ch == -1 ) 0399 break; 0400 0401 if ( !is_octal(ch) ) 0402 { 0403 unget(); 0404 break; 0405 } 0406 0407 octal += QChar(ch); 0408 } 0409 0410 return octal.toInt(nullptr, 8); 0411 } 0412 0413 throw CosError("Invalid escape sequence"); 0414 } 0415 0416 CosToken lex_hex_string(char head) 0417 { 0418 QByteArray data; 0419 data.push_back(head); 0420 while ( true ) 0421 { 0422 auto ch = get_char(); 0423 if ( ch == -1 ) 0424 { 0425 throw CosError("Unterminated hex string"); 0426 } 0427 else if ( std::isxdigit(ch) ) 0428 { 0429 data.push_back(ch); 0430 } 0431 else if ( ch == '>' ) 0432 { 0433 if ( data.size() % 2 != 0 ) 0434 data.push_back('0'); 0435 break; 0436 } 0437 else if ( !std::isspace(ch) ) 0438 { 0439 throw CosError(QString("Invalid character in hex string: ") + QChar(ch)); 0440 } 0441 } 0442 0443 return {CosTokenType::HexString, QByteArray::fromHex(data)}; 0444 } 0445 0446 CosToken lex_identifier() 0447 { 0448 QString ident = ""; 0449 const QString special = "()[]<>/%"; 0450 while ( true ) 0451 { 0452 auto ch = get_char(); 0453 if ( ch == -1 ) 0454 break; 0455 if ( ch < 0x21 || ch > 0x7e ) 0456 { 0457 unget(); 0458 break; 0459 } 0460 0461 if ( ch == '#' ) 0462 { 0463 QByteArray hexstr; 0464 for ( auto i = 0; i < 2; i++ ) 0465 { 0466 ch = get_char(); 0467 if ( ch == -1 || !std::isxdigit(ch) ) 0468 throw CosError("Invalid Identifier"); 0469 hexstr += std::uint8_t(ch); 0470 } 0471 ident += QChar(hexstr.toInt(nullptr, 16)); 0472 } 0473 else if ( special.indexOf(QChar(ch)) != -1 ) 0474 { 0475 unget(); 0476 break; 0477 } 0478 else 0479 { 0480 ident += QChar(ch); 0481 } 0482 } 0483 0484 return {CosTokenType::Identifier, ident}; 0485 } 0486 0487 private: 0488 QByteArray data; 0489 int offset = 0; 0490 }; 0491 0492 class CosParser 0493 { 0494 public: 0495 CosParser(QByteArray data) : lexer(std::move(data)) {} 0496 0497 CosValue parse() 0498 { 0499 lex(); 0500 if ( lookahead.type == CosTokenType::Identifier ) 0501 return parse_object_content(); 0502 0503 auto val = parse_value(); 0504 if ( lookahead.type == CosTokenType::Eof ) 0505 return val; 0506 0507 CosArray arr = parse_array_content(); 0508 arr->emplace(arr->begin(), std::move(val)); 0509 return arr; 0510 } 0511 0512 0513 private: 0514 CosToken lookahead; 0515 CosLexer lexer; 0516 0517 void lex() 0518 { 0519 lookahead = lexer.next_token(); 0520 } 0521 0522 CosObject parse_object_content() 0523 { 0524 CosObject value = std::make_unique<CosObject::element_type>(); 0525 0526 while ( true ) 0527 { 0528 if ( lookahead.type == CosTokenType::Eof || lookahead.type == CosTokenType::ObjectEnd ) 0529 break; 0530 0531 expect(CosTokenType::Identifier); 0532 auto key = lookahead.value.get<CosValue::Index::String>(); 0533 lex(); 0534 auto val = parse_value(); 0535 value->emplace(key, std::move(val)); 0536 } 0537 0538 return value; 0539 } 0540 0541 void expect(CosTokenType token_type) 0542 { 0543 if ( lookahead.type != token_type ) 0544 throw CosError(QString("Expected token %1, got %2").arg(int(token_type)).arg(int(lookahead.type))); 0545 } 0546 0547 CosArray parse_array_content() 0548 { 0549 CosArray value = std::make_unique<CosArray::element_type>(); 0550 0551 while ( true ) 0552 { 0553 if ( lookahead.type == CosTokenType::Eof || lookahead.type == CosTokenType::ArrayEnd ) 0554 break; 0555 0556 value->push_back(parse_value()); 0557 } 0558 0559 return value; 0560 } 0561 0562 CosValue parse_value() 0563 { 0564 CosValue val; 0565 switch ( lookahead.type ) 0566 { 0567 case CosTokenType::String: 0568 case CosTokenType::HexString: 0569 case CosTokenType::Null: 0570 case CosTokenType::Boolean: 0571 case CosTokenType::Identifier: 0572 case CosTokenType::Number: 0573 val = std::move(lookahead.value); 0574 lex(); 0575 return val; 0576 case CosTokenType::ObjectStart: 0577 lex(); 0578 val = parse_object_content(); 0579 expect(CosTokenType::ObjectEnd); 0580 lex(); 0581 return val; 0582 case CosTokenType::ArrayStart: 0583 lex(); 0584 val = parse_array_content(); 0585 expect(CosTokenType::ArrayEnd); 0586 lex(); 0587 return val; 0588 default: 0589 throw CosError(QString("Expected token COS value, got %1").arg(int(lookahead.type))); 0590 } 0591 } 0592 }; 0593 0594 } // namespace glaxnimate::io::aep