File indexing completed on 2025-01-05 04:01:13

0001 /*
0002  * SPDX-FileCopyrightText: 2019-2023 Mattia Basaglia <dev@dragon.best>
0003  *
0004  * SPDX-License-Identifier: GPL-3.0-or-later
0005  */
0006 #pragma once
0007 
0008 #include <unordered_map>
0009 #include <vector>
0010 #include <variant>
0011 #include <memory>
0012 #include <stdexcept>
0013 
0014 #include <QByteArray>
0015 #include <QString>
0016 
0017 #include "string_decoder.hpp"
0018 
0019 namespace glaxnimate::io::aep {
0020 
0021 enum class CosTokenType
0022 {
0023     // /foo
0024     Identifier,
0025     // 123
0026     Number,
0027     // (foo)
0028     String,
0029     // <f000>
0030     HexString,
0031     // true
0032     Boolean,
0033     // <<
0034     ObjectStart,
0035     // >>
0036     ObjectEnd,
0037     // [
0038     ArrayStart,
0039     // ]
0040     ArrayEnd,
0041     // null
0042     Null,
0043     // end of file
0044     Eof,
0045 };
0046 
0047 class CosError : public std::runtime_error
0048 {
0049 public:
0050     CosError(QString message) : runtime_error(message.toStdString()), message(std::move(message)) {}
0051 
0052     QString message;
0053 };
0054 
0055 
0056 struct CosValue
0057 {
0058     enum class Index
0059     {
0060         Null,
0061         Number,
0062         String,
0063         Boolean,
0064         Bytes,
0065         Object,
0066         Array
0067     };
0068 
0069     using Object = std::unique_ptr<std::unordered_map<QString, CosValue>>;
0070     using Array = std::unique_ptr<std::vector<CosValue>>;
0071 
0072     template<class T>
0073     CosValue(T&& v) : value(std::forward<T>(v)) {}
0074     CosValue() = default;
0075     CosValue(CosValue& v) = delete;
0076     CosValue(const CosValue& v) = delete;
0077     CosValue(CosValue&& v) = default;
0078     CosValue& operator=(CosValue&& v) = default;
0079 
0080 
0081     template<Index Ind>
0082     const auto& get() const
0083     {
0084         if ( Ind != type() )
0085             throw CosError("Invalid COS value type");
0086         return std::get<int(Ind)>(value);
0087     }
0088 
0089     Index type() const { return Index(value.index()); }
0090 
0091     std::variant<
0092         std::nullptr_t, double, QString, bool, QByteArray, Object, Array
0093     > value = nullptr;
0094 };
0095 
0096 using CosObject = CosValue::Object;
0097 using CosArray = CosValue::Array;
0098 
0099 struct CosToken
0100 {
0101     CosTokenType type = CosTokenType::Eof;
0102     CosValue value = {};
0103 
0104     CosToken() = default;
0105     CosToken(CosToken&&) = default;
0106     CosToken& operator=(CosToken&&) = default;
0107 };
0108 
0109 class CosLexer
0110 {
0111 public:
0112     CosLexer(QByteArray data) : data(std::move(data)) {}
0113 
0114     CosToken next_token()
0115     {
0116         int ch;
0117 
0118         while ( true )
0119         {
0120             ch = get_char();
0121             if ( ch == -1 )
0122                 return CosToken();
0123             else if ( ch == '%' )
0124                 lex_comment();
0125             else if ( !std::isspace(ch) )
0126                 break;
0127         }
0128 
0129         // <<
0130         if ( ch == '<' )
0131         {
0132             ch = get_char();
0133             if ( ch == '<' )
0134                 return {CosTokenType::ObjectStart};
0135             else if ( ch == -1 )
0136                 throw_lex("<");
0137             else if ( std::isxdigit(ch) )
0138                 return lex_hex_string(ch);
0139             else
0140                 throw_lex(QString("<") + QChar(ch));
0141         }
0142 
0143         // >>
0144         if ( ch == '>' )
0145         {
0146             auto d = get_char();
0147             if ( d != '>' )
0148             {
0149                 QString tok{QChar(ch)};
0150                 if ( d != -1 )
0151                     tok += QChar(d);
0152                 throw_lex(tok, ">>");
0153             }
0154             return {CosTokenType::ObjectEnd};
0155         }
0156 
0157         // [
0158         if ( ch == '[' )
0159             return {CosTokenType::ArrayStart};
0160 
0161         // ]
0162         if ( ch == ']' )
0163             return {CosTokenType::ArrayEnd};
0164 
0165         // /foo
0166         if ( ch == '/' ) {
0167             return lex_identifier();
0168         }
0169 
0170         // (foo)
0171         if ( ch == '(' ) {
0172             return lex_string();
0173         }
0174 
0175         // Keyword
0176         if ( std::isalpha(ch) )
0177             return lex_keyword(ch);
0178 
0179         // Number
0180         if ( std::isdigit(ch) || ch == '-' || ch == '+' || ch == '.' )
0181             return lex_number(ch);
0182 
0183         throw_lex(QString() + QChar(ch));
0184     }
0185 
0186     [[noreturn]] void throw_lex(const QString& token, const QString& exp = {})
0187     {
0188         QString msg = "Unknown COS token %1";
0189         msg = msg.arg(token);
0190         if ( !exp.isEmpty() )
0191         {
0192             msg += ", expected ";
0193             msg += exp;
0194         }
0195 
0196         throw CosError(msg);
0197     }
0198 
0199     int get_char()
0200     {
0201         if ( offset >= data.size() )
0202             return -1;
0203 
0204         int ch = std::uint8_t(data[offset]);
0205         offset += 1;
0206         return ch;
0207     }
0208 
0209     void unget()
0210     {
0211         offset -= 1;
0212         if ( offset < 0 )
0213             throw CosError("Buffer underflow");
0214     }
0215 
0216     void lex_comment()
0217     {
0218         while ( true )
0219         {
0220             auto ch = get_char();
0221             if ( ch == -1 || ch == '\n' )
0222                 break;
0223         }
0224     }
0225 
0226     CosToken lex_number(int ch)
0227     {
0228         if ( ch == '.' )
0229             return lex_number_fract(QString(QChar(ch)));
0230         else if ( ch == '+' || ch == '-' )
0231             return lex_number_int(get_char(), QChar(ch));
0232         else
0233             return lex_number_int(ch, '+');
0234     }
0235 
0236     CosToken lex_number_int(int ch, QChar sign)
0237     {
0238         QString head;
0239         head += sign;
0240 
0241         while ( true )
0242         {
0243             if ( ch == '.' )
0244             {
0245                 return lex_number_fract(head + QChar(ch));
0246             }
0247             else if ( ch == -1 )
0248             {
0249                 break;
0250             }
0251             else if ( std::isdigit(ch) )
0252             {
0253                 head += QChar(ch);
0254                 ch = get_char();
0255             }
0256             else
0257             {
0258                 unget();
0259                 break;
0260             }
0261         }
0262 
0263         return {CosTokenType::Number, head.toDouble()};
0264     }
0265 
0266     CosToken lex_number_fract(QString num)
0267     {
0268         while ( true )
0269         {
0270             int ch = get_char();
0271 
0272             if ( ch == -1 )
0273             {
0274                 break;
0275             }
0276             else if ( std::isdigit(ch) )
0277             {
0278                 num += QChar(ch);
0279             }
0280             else
0281             {
0282                 unget();
0283                 break;
0284             }
0285         }
0286         return {CosTokenType::Number, num.toDouble()};
0287     }
0288 
0289     CosToken lex_keyword(char start)
0290     {
0291         QString kw(start);
0292 
0293         while ( true )
0294         {
0295             auto ch = get_char();
0296             if ( ch == -1 )
0297             {
0298                 break;
0299             }
0300             else if ( std::isalpha(ch) )
0301             {
0302                 kw += QChar(ch);
0303             }
0304             else
0305             {
0306                 unget();
0307                 break;
0308             }
0309         }
0310 
0311         if ( kw == "true" )
0312                 return {CosTokenType::Boolean, true};
0313         if ( kw == "false" )
0314                 return {CosTokenType::Boolean, false};
0315         if ( kw == "null")
0316                 return {CosTokenType::Null};
0317 
0318         throw CosError("Unknown keyword " + kw);
0319     }
0320 
0321     CosToken lex_string()
0322     {
0323         QByteArray string;
0324 
0325         while ( true )
0326         {
0327             auto ch = lex_string_char();
0328             if ( ch == -1 )
0329                 break;
0330 
0331             string.push_back(ch);
0332         }
0333 
0334         return {CosTokenType::String, decode_string(string)};
0335     }
0336 
0337     int lex_string_char()
0338     {
0339         auto ch = get_char();
0340         if ( ch == -1 )
0341             throw CosError("Unterminated String");
0342 
0343         if ( ch == ')' )
0344             return -1;
0345 
0346         if ( ch == '\\' )
0347             return lex_string_escape();
0348 
0349         if ( ch == '\r' )
0350         {
0351             if ( get_char() != '\n' )
0352                 unget();
0353             return '\n';
0354         }
0355         else if ( ch == '\n' )
0356         {
0357             if ( get_char() != '\r' )
0358                 unget();
0359             return '\n';
0360         }
0361 
0362         return ch;
0363     }
0364 
0365     bool is_octal(char ch)
0366     {
0367         return '0' <= ch && ch <= '7';
0368     }
0369 
0370     char lex_string_escape()
0371     {
0372         auto ch = get_char();
0373         if ( ch == -1 )
0374             throw CosError("Unterminated string");
0375 
0376         switch ( ch )
0377         {
0378             case 'b':
0379                 return '\b';
0380             case 'n':
0381                 return '\n';
0382             case 'f':
0383                 return '\f';
0384             case 'r':
0385                 return '\r';
0386             case '(':
0387             case ')':
0388             case '\\':
0389                 return ch;
0390         }
0391 
0392         if ( is_octal(ch) )
0393         {
0394             QString octal{QChar(ch)};
0395             for ( auto i = 0; i < 2; i++ )
0396             {
0397                 ch = get_char();
0398                 if ( ch == -1 )
0399                     break;
0400 
0401                 if ( !is_octal(ch) )
0402                 {
0403                     unget();
0404                     break;
0405                 }
0406 
0407                 octal += QChar(ch);
0408             }
0409 
0410             return octal.toInt(nullptr, 8);
0411         }
0412 
0413         throw CosError("Invalid escape sequence");
0414     }
0415 
0416     CosToken lex_hex_string(char head)
0417     {
0418         QByteArray data;
0419         data.push_back(head);
0420         while ( true )
0421         {
0422             auto ch = get_char();
0423             if ( ch == -1 )
0424             {
0425                 throw CosError("Unterminated hex string");
0426             }
0427             else if ( std::isxdigit(ch) )
0428             {
0429                 data.push_back(ch);
0430             }
0431             else if ( ch == '>' )
0432             {
0433                 if ( data.size() % 2 != 0 )
0434                     data.push_back('0');
0435                 break;
0436             }
0437             else if ( !std::isspace(ch) )
0438             {
0439                 throw CosError(QString("Invalid character in hex string: ") + QChar(ch));
0440             }
0441         }
0442 
0443         return {CosTokenType::HexString, QByteArray::fromHex(data)};
0444     }
0445 
0446     CosToken lex_identifier()
0447     {
0448         QString ident = "";
0449         const QString special = "()[]<>/%";
0450         while ( true )
0451         {
0452             auto ch = get_char();
0453             if ( ch == -1 )
0454                 break;
0455             if ( ch < 0x21 || ch > 0x7e )
0456             {
0457                 unget();
0458                 break;
0459             }
0460 
0461             if ( ch == '#' )
0462             {
0463                 QByteArray hexstr;
0464                 for ( auto i = 0; i < 2; i++ )
0465                 {
0466                     ch = get_char();
0467                     if ( ch == -1 || !std::isxdigit(ch) )
0468                         throw CosError("Invalid Identifier");
0469                     hexstr += std::uint8_t(ch);
0470                 }
0471                 ident += QChar(hexstr.toInt(nullptr, 16));
0472             }
0473             else if ( special.indexOf(QChar(ch)) != -1 )
0474             {
0475                 unget();
0476                 break;
0477             }
0478             else
0479             {
0480                 ident += QChar(ch);
0481             }
0482         }
0483 
0484         return {CosTokenType::Identifier, ident};
0485     }
0486 
0487 private:
0488     QByteArray data;
0489     int offset = 0;
0490 };
0491 
0492 class CosParser
0493 {
0494 public:
0495     CosParser(QByteArray data) : lexer(std::move(data)) {}
0496 
0497     CosValue parse()
0498     {
0499         lex();
0500         if ( lookahead.type == CosTokenType::Identifier )
0501             return parse_object_content();
0502 
0503         auto val = parse_value();
0504         if ( lookahead.type == CosTokenType::Eof )
0505             return val;
0506 
0507         CosArray arr = parse_array_content();
0508         arr->emplace(arr->begin(), std::move(val));
0509         return arr;
0510     }
0511 
0512 
0513 private:
0514     CosToken lookahead;
0515     CosLexer lexer;
0516 
0517     void lex()
0518     {
0519         lookahead = lexer.next_token();
0520     }
0521 
0522     CosObject parse_object_content()
0523     {
0524         CosObject value = std::make_unique<CosObject::element_type>();
0525 
0526         while ( true )
0527         {
0528             if ( lookahead.type == CosTokenType::Eof || lookahead.type == CosTokenType::ObjectEnd )
0529                 break;
0530 
0531             expect(CosTokenType::Identifier);
0532             auto key = lookahead.value.get<CosValue::Index::String>();
0533             lex();
0534             auto val = parse_value();
0535             value->emplace(key, std::move(val));
0536         }
0537 
0538         return value;
0539     }
0540 
0541     void expect(CosTokenType token_type)
0542     {
0543         if ( lookahead.type != token_type )
0544             throw CosError(QString("Expected token %1, got %2").arg(int(token_type)).arg(int(lookahead.type)));
0545     }
0546 
0547     CosArray parse_array_content()
0548     {
0549         CosArray value = std::make_unique<CosArray::element_type>();
0550 
0551         while ( true )
0552         {
0553             if ( lookahead.type == CosTokenType::Eof || lookahead.type == CosTokenType::ArrayEnd )
0554                 break;
0555 
0556             value->push_back(parse_value());
0557         }
0558 
0559         return value;
0560     }
0561 
0562     CosValue parse_value()
0563     {
0564         CosValue val;
0565         switch ( lookahead.type )
0566         {
0567             case CosTokenType::String:
0568             case CosTokenType::HexString:
0569             case CosTokenType::Null:
0570             case CosTokenType::Boolean:
0571             case CosTokenType::Identifier:
0572             case CosTokenType::Number:
0573                 val = std::move(lookahead.value);
0574                 lex();
0575                 return val;
0576             case CosTokenType::ObjectStart:
0577                 lex();
0578                 val = parse_object_content();
0579                 expect(CosTokenType::ObjectEnd);
0580                 lex();
0581                 return val;
0582             case CosTokenType::ArrayStart:
0583                 lex();
0584                 val = parse_array_content();
0585                 expect(CosTokenType::ArrayEnd);
0586                 lex();
0587                 return val;
0588             default:
0589                 throw CosError(QString("Expected token COS value, got %1").arg(int(lookahead.type)));
0590         }
0591     }
0592 };
0593 
0594 } // namespace glaxnimate::io::aep