libmxp/src/cmxpparser.cpp

0001 /***************************************************************************
0002  *   Copyright (C) 2004 by Tomas Mecir                                     *
0003  *   kmuddy@kmuddy.org                                                     *
0004  *                                                                         *
0005  *   This program is free software; you can redistribute it and/or modify  *
0006  *   it under the terms of the GNU Library General Public License as       *
0007  *   published by the Free Software Foundation; either version 2 of the    *
0008  *   License, or (at your option) any later version.                       *
0009  *                                                                         *
0010  *   This program is distributed in the hope that it will be useful,       *
0011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0013  *   GNU Library General Public License for more details.                  *
0014  ***************************************************************************/
0015
0016 #include "cmxpparser.h"
0017
0018 #include "celementmanager.h"
0019 #include "cmxpstate.h"
0020 #include "cresulthandler.h"
0021
0022 cMXPParser::cMXPParser (cMXPState *st, cElementManager *elm, cResultHandler *res)
0023 {
0024   state = st;
0025   elements = elm;
0026   results = res;
0027
0028   pstate = pText;
0029   wasBackslashR = false;
0030 }
0031
0032 cMXPParser::~cMXPParser ()
0033 {
0034 }
0035
0036 void cMXPParser::reset ()
0037 {
0038   str = "";
0039   pstate = pText;
0040   wasBackslashR = false;
0041   chunks.clear();
0042 }
0043
0044 void cMXPParser::parse (const string &text)
0045 {
0046   //WARNING: examine this function only at your own risk!
0047   //it is advised to have a look at the simpleParse() function first - it's similar
0048   //to this one, but much simpler...
0049   if (text.empty())
0050     return;
0051   string::const_iterator it;
0052   for (it = text.begin(); it != text.end(); ++it)
0053   {
0054     char c = *it;
0055
0056 // Looks like number of brain-dead servers that send out \n\r is bigger than the
0057 // number of servers that send out \r alone - the latter maybe don't exist at
0058 // all. Hence, with this commented out, we can't handle the \r-only ones,
0059 // but \n\r works.
0060 /*
0061     //handle \r not followed by \n - treated as a newline
0062     if (wasBackslashR && (c != '\n'))
0063     {
0064       //"str" now certainly is empty, so we needn't care about that
0065       //report new-line
0066       elements->gotNewLine();
0067       state->gotNewLine();
0068     }
0069 */
0070     wasBackslashR = false;
0071
0072     //we need current mode - parsing in LOCKED mode is limited
0073     //mode is retrieved in every iteration to ensure that it's always up-to-date
0074     mxpMode mode = state->getMXPMode();
0075     switch (pstate) {
0076       case pText: {
0077         //tags not recognized in LOCKED mode...
0078         if ((c == '\e') || ((mode != lockedMode) && (c == '<')) || (c == '\n') || (c == '\r'))
0079         {
0080           //end of text - got newline / ANSI seq / start of tag
0081           if (!str.empty())
0082           {
0083             state->gotText (str);
0084             str = "";
0085           }
0086           if (c == '\e')
0087             pstate = pAnsiSeq;
0088           if ((c == '<') && (mode != lockedMode))
0089             pstate = pTag;
0090           if (c == '\n')
0091           {
0092             //report new-line
0093             elements->gotNewLine();
0094             state->gotNewLine();
0095           }
0096           if (c == '\r')
0097             wasBackslashR = true;
0098         }
0099         else
0100           str += c;  //add new character to the text...
0101         break;
0102       };
0103       case pAnsiSeq: {
0104         if ((c == '\e') || (c == '\n') || (c == '\r'))
0105         {
0106           //the same as in pTag section...
0107           results->addToList (results->createError ("Received unfinished ANSI sequence!"));
0108           str = "";
0109           if (c == '\e')
0110             pstate = pAnsiSeq;
0111           if (c == '\n')
0112           {
0113             //report new-line
0114             elements->gotNewLine();
0115             state->gotNewLine();
0116             pstate = pText;
0117           }
0118           if (c == '\r')
0119           {
0120             pstate = pText;
0121             wasBackslashR = true;
0122           }
0123         }
0124         else
0125         if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))
0126         {
0127           //ANSI sequence ends...
0128           if (c == 'z')  //line tag
0129           {
0130             if (str.empty())
0131             {
0132               //invalid sequence
0133               str = "\ez";
0134             }
0135             else
0136             {
0137               //process this sequence
0138               int len = str.length();
0139               int num = 0;
0140               for (int i = 1; i < len; i++)  //str[0] is '[', which is SKIPPED
0141               {
0142                 char cc = str[i];
0143                 if (cc == ';')  //this shouldn't happen, but some MUD might want to use it...
0144                 {
0145                   if ((num >= 0) && (num <= 99))  //ensure that number lies in correct range
0146                   {
0147                     state->gotLineTag (num);
0148                     elements->gotLineTag (num);
0149                   }
0150                   else
0151                     results->addToList (results->createError ("Received invalid line tag!"));
0152                   num = 0;
0153                 }
0154                 else
0155                   num = num * 10 + (cc - 48);  //48 is the code of '0'
0156               }
0157               //report last line tag (and usually the only one)
0158               if ((num >= 0) && (num <= 99))  //ensure that number lies in correct range
0159               {
0160                 state->gotLineTag (num);
0161                 elements->gotLineTag (num);
0162               }
0163               else
0164                 results->addToList (results->createError ("Received invalid line tag!"));
0165               str = "";
0166             }
0167           }
0168           else  //something else
0169           {
0170             //'\e' and c are not in the string - add them there
0171             str = '\e' + str + c;
0172           }
0173           pstate = pText;
0174         }
0175         else
0176         if (c == '[')  //this one is valid, but only at the beginning
0177         {
0178           if (str.empty())
0179             str += c;
0180           else
0181           {
0182             //'[' in the middle of ANSI seq => not an ANSI seq...
0183             pstate = pText;
0184             str = '\e' + str + c;
0185           }
0186         }
0187         else
0188         if ((c == ';') || ((c >= '0') && (c <= '9')))  //correct char, unless str is empty
0189           if (!str.empty())
0190             str += c;  //here we go...
0191           else
0192           {
0193             //ANSI seq must start with [ - therefore this is not an ANSI sequence after all
0194             pstate = pText;
0195             str += '\e';
0196             str += c;
0197           }
0198         else
0199         //incorrect character...
0200         {
0201           str = '\e' + str + c;
0202           pstate = pText;
0203         }
0204         break;
0205       };
0206       case pTag: {
0207         if (c == '>')
0208         {
0209           elements->gotTag (str);
0210           str = "";
0211           pstate = pText;
0212         }
0213         else
0214         if ((c == '"') || (c == '\''))
0215         {
0216           pstate = pQuotedParam;
0217           quoteChar = c;
0218           str += c;
0219         }
0220         else if ((c == '\e') || (c == '\n') || (c == '\r'))
0221         {
0222           //handle incorrectly terminated tag and continue parsing...
0223           results->addToList (results->createError ("Received unfinished tag <" + str));
0224           str = "";
0225           if (c == '\e')
0226             pstate = pAnsiSeq;
0227           if (c == '\n')
0228           {
0229             //report new-line
0230             elements->gotNewLine();
0231             state->gotNewLine();
0232             pstate = pText;
0233           }
0234           if (c == '\r')
0235           {
0236             pstate = pText;
0237             wasBackslashR = true;
0238           }
0239         }
0240         else if (str == "!--")  //comment
0241         {
0242           str += c;
0243           pstate = pComment;
0244         }
0245         else
0246           str += c;
0247         break;
0248       };
0249       case pComment: {
0250         if (c == '>')
0251         {
0252           int l = str.length();
0253           if ((str[l-2] == '-') && (str[l-1] == '-')) //okay, comment ends
0254           {
0255             str = "";
0256             pstate = pText;
0257           }
0258           else
0259             str += c;
0260         }
0261         else if ((c == '\e') || (c == '\n') || (c == '\r'))
0262         {
0263           //handle incorrectly terminated comment and continue parsing...
0264           results->addToList (results->createError ("Received an unfinished comment!"));
0265           str = "";
0266           if (c == '\e')
0267             pstate = pAnsiSeq;
0268           if (c == '\n')
0269           {
0270             //report new-line
0271             elements->gotNewLine();
0272             state->gotNewLine();
0273             pstate = pText;
0274           }
0275           if (c == '\r')
0276           {
0277             pstate = pText;
0278             wasBackslashR = true;
0279           }
0280         }
0281         else
0282           str += c;
0283         break;
0284       };
0285       case pQuotedParam: {
0286         if (c == quoteChar)
0287         {
0288           //quoted parameter ends... this simple approach will work correctly for correct
0289           //tags, it may treat incorrect quotes as correct, but element manager will take care
0290           //of that
0291           pstate = pTag;
0292           str += c;
0293         }
0294         else
0295         if ((c == '\e') || (c == '\n') || (c == '\r'))
0296         {
0297           //the same as in pTag section...
0298           results->addToList (results->createError ("Received unfinished tag <" + str));
0299           str = "";
0300           if (c == '\e')
0301             pstate = pAnsiSeq;
0302           if (c == '\n')
0303           {
0304             //report new-line
0305             elements->gotNewLine();
0306             state->gotNewLine();
0307             pstate = pText;
0308           }
0309           if (c == '\r')
0310           {
0311             pstate = pText;
0312             wasBackslashR = true;
0313           }
0314         }
0315         else
0316           str += c;
0317         break;
0318       };
0319     };
0320   }
0321   //report remaining text, if any (needed to improve speed of text displaying and to handle
0322   //prompts correctly)
0323   if ((pstate == pText) && (!str.empty()))
0324   {
0325     state->gotText (str);
0326     str = "";
0327   }
0328 }
0329
0330 void cMXPParser::simpleParse (const string &text)
0331 //simple parsing - only text and tags - no newlines, no ANSI sequences, no line tags
0332 //simpler version of parse() function above
0333 {
0334   if (text.empty())
0335     return;
0336   chunk ch;
0337   string::const_iterator it;
0338   pstate = pText;
0339   str = "";
0340   for (it = text.begin(); it != text.end(); ++it)
0341   {
0342     char c = *it;
0343     switch (pstate) {
0344       case pText: {
0345         if (c == '<')
0346         {
0347           //end of text - got start of tag
0348           if (!str.empty())
0349           {
0350             ch.chk = chunkText;
0351             ch.text = str;
0352             chunks.push_back (ch);
0353             str = "";
0354           }
0355           pstate = pTag;
0356         }
0357         else
0358           str += c;  //add new character to the text...
0359         break;
0360       };
0361       case pTag: {
0362         if (c == '>')
0363         {
0364           ch.chk = chunkTag;
0365           ch.text = str;
0366           chunks.push_back (ch);
0367           str = "";
0368           pstate = pText;
0369         }
0370         else
0371         if ((c == '"') || (c == '\''))
0372         {
0373           pstate = pQuotedParam;
0374           quoteChar = c;
0375           str += c;
0376         }
0377         else
0378           str += c;
0379         break;
0380       };
0381       case pQuotedParam: {
0382         if (c == quoteChar)
0383         {
0384           //quoted parameter ends... this simple approach will work correctly for correct
0385           //tags, it may treat incorrect quotes as correct, but element manager will take care
0386           //of that
0387           pstate = pTag;
0388           str += c;
0389         }
0390         else
0391           str += c;
0392         break;
0393       };
0394     };
0395   }
0396   //unfinished things...
0397   if (pstate == pText)
0398   {
0399     ch.chk = chunkText;
0400     ch.text = str;
0401     chunks.push_back (ch);
0402   }
0403   if ((pstate == pTag) || (pstate == pQuotedParam))
0404   {
0405     ch.chk = chunkError;
0406     ch.text = "Tag definition contains unfinished tag <" + str;
0407     chunks.push_back (ch);
0408   }
0409   str = "";
0410 }
0411
0412 bool cMXPParser::hasNext()
0413 {
0414   return chunks.empty() ? false : true;
0415 }
0416
0417 chunk cMXPParser::getNext()
0418 {
0419   if (!hasNext())
0420   {
0421     chunk nochunk;
0422     nochunk.chk = chunkNone;
0423     return nochunk;
0424   }
0425   chunk ch = chunks.front();
0426   chunks.pop_front();
0427   return ch;
0428 }