File indexing completed on 2024-04-28 15:35:14
0001 /*************************************************************************** 0002 * Copyright (C) 2004 by Tomas Mecir * 0003 * kmuddy@kmuddy.org * 0004 * * 0005 * This program is free software; you can redistribute it and/or modify * 0006 * it under the terms of the GNU Library General Public License as * 0007 * published by the Free Software Foundation; either version 2 of the * 0008 * License, or (at your option) any later version. * 0009 * * 0010 * This program is distributed in the hope that it will be useful, * 0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0013 * GNU Library General Public License for more details. * 0014 ***************************************************************************/ 0015 0016 #include "cmxpparser.h" 0017 0018 #include "celementmanager.h" 0019 #include "cmxpstate.h" 0020 #include "cresulthandler.h" 0021 0022 cMXPParser::cMXPParser (cMXPState *st, cElementManager *elm, cResultHandler *res) 0023 { 0024 state = st; 0025 elements = elm; 0026 results = res; 0027 0028 pstate = pText; 0029 wasBackslashR = false; 0030 } 0031 0032 cMXPParser::~cMXPParser () 0033 { 0034 } 0035 0036 void cMXPParser::reset () 0037 { 0038 str = ""; 0039 pstate = pText; 0040 wasBackslashR = false; 0041 chunks.clear(); 0042 } 0043 0044 void cMXPParser::parse (const string &text) 0045 { 0046 //WARNING: examine this function only at your own risk! 0047 //it is advised to have a look at the simpleParse() function first - it's similar 0048 //to this one, but much simpler... 0049 if (text.empty()) 0050 return; 0051 string::const_iterator it; 0052 for (it = text.begin(); it != text.end(); ++it) 0053 { 0054 char c = *it; 0055 0056 // Looks like number of brain-dead servers that send out \n\r is bigger than the 0057 // number of servers that send out \r alone - the latter maybe don't exist at 0058 // all. Hence, with this commented out, we can't handle the \r-only ones, 0059 // but \n\r works. 0060 /* 0061 //handle \r not followed by \n - treated as a newline 0062 if (wasBackslashR && (c != '\n')) 0063 { 0064 //"str" now certainly is empty, so we needn't care about that 0065 //report new-line 0066 elements->gotNewLine(); 0067 state->gotNewLine(); 0068 } 0069 */ 0070 wasBackslashR = false; 0071 0072 //we need current mode - parsing in LOCKED mode is limited 0073 //mode is retrieved in every iteration to ensure that it's always up-to-date 0074 mxpMode mode = state->getMXPMode(); 0075 switch (pstate) { 0076 case pText: { 0077 //tags not recognized in LOCKED mode... 0078 if ((c == '\e') || ((mode != lockedMode) && (c == '<')) || (c == '\n') || (c == '\r')) 0079 { 0080 //end of text - got newline / ANSI seq / start of tag 0081 if (!str.empty()) 0082 { 0083 state->gotText (str); 0084 str = ""; 0085 } 0086 if (c == '\e') 0087 pstate = pAnsiSeq; 0088 if ((c == '<') && (mode != lockedMode)) 0089 pstate = pTag; 0090 if (c == '\n') 0091 { 0092 //report new-line 0093 elements->gotNewLine(); 0094 state->gotNewLine(); 0095 } 0096 if (c == '\r') 0097 wasBackslashR = true; 0098 } 0099 else 0100 str += c; //add new character to the text... 0101 break; 0102 }; 0103 case pAnsiSeq: { 0104 if ((c == '\e') || (c == '\n') || (c == '\r')) 0105 { 0106 //the same as in pTag section... 0107 results->addToList (results->createError ("Received unfinished ANSI sequence!")); 0108 str = ""; 0109 if (c == '\e') 0110 pstate = pAnsiSeq; 0111 if (c == '\n') 0112 { 0113 //report new-line 0114 elements->gotNewLine(); 0115 state->gotNewLine(); 0116 pstate = pText; 0117 } 0118 if (c == '\r') 0119 { 0120 pstate = pText; 0121 wasBackslashR = true; 0122 } 0123 } 0124 else 0125 if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))) 0126 { 0127 //ANSI sequence ends... 0128 if (c == 'z') //line tag 0129 { 0130 if (str.empty()) 0131 { 0132 //invalid sequence 0133 str = "\ez"; 0134 } 0135 else 0136 { 0137 //process this sequence 0138 int len = str.length(); 0139 int num = 0; 0140 for (int i = 1; i < len; i++) //str[0] is '[', which is SKIPPED 0141 { 0142 char cc = str[i]; 0143 if (cc == ';') //this shouldn't happen, but some MUD might want to use it... 0144 { 0145 if ((num >= 0) && (num <= 99)) //ensure that number lies in correct range 0146 { 0147 state->gotLineTag (num); 0148 elements->gotLineTag (num); 0149 } 0150 else 0151 results->addToList (results->createError ("Received invalid line tag!")); 0152 num = 0; 0153 } 0154 else 0155 num = num * 10 + (cc - 48); //48 is the code of '0' 0156 } 0157 //report last line tag (and usually the only one) 0158 if ((num >= 0) && (num <= 99)) //ensure that number lies in correct range 0159 { 0160 state->gotLineTag (num); 0161 elements->gotLineTag (num); 0162 } 0163 else 0164 results->addToList (results->createError ("Received invalid line tag!")); 0165 str = ""; 0166 } 0167 } 0168 else //something else 0169 { 0170 //'\e' and c are not in the string - add them there 0171 str = '\e' + str + c; 0172 } 0173 pstate = pText; 0174 } 0175 else 0176 if (c == '[') //this one is valid, but only at the beginning 0177 { 0178 if (str.empty()) 0179 str += c; 0180 else 0181 { 0182 //'[' in the middle of ANSI seq => not an ANSI seq... 0183 pstate = pText; 0184 str = '\e' + str + c; 0185 } 0186 } 0187 else 0188 if ((c == ';') || ((c >= '0') && (c <= '9'))) //correct char, unless str is empty 0189 if (!str.empty()) 0190 str += c; //here we go... 0191 else 0192 { 0193 //ANSI seq must start with [ - therefore this is not an ANSI sequence after all 0194 pstate = pText; 0195 str += '\e'; 0196 str += c; 0197 } 0198 else 0199 //incorrect character... 0200 { 0201 str = '\e' + str + c; 0202 pstate = pText; 0203 } 0204 break; 0205 }; 0206 case pTag: { 0207 if (c == '>') 0208 { 0209 elements->gotTag (str); 0210 str = ""; 0211 pstate = pText; 0212 } 0213 else 0214 if ((c == '"') || (c == '\'')) 0215 { 0216 pstate = pQuotedParam; 0217 quoteChar = c; 0218 str += c; 0219 } 0220 else if ((c == '\e') || (c == '\n') || (c == '\r')) 0221 { 0222 //handle incorrectly terminated tag and continue parsing... 0223 results->addToList (results->createError ("Received unfinished tag <" + str)); 0224 str = ""; 0225 if (c == '\e') 0226 pstate = pAnsiSeq; 0227 if (c == '\n') 0228 { 0229 //report new-line 0230 elements->gotNewLine(); 0231 state->gotNewLine(); 0232 pstate = pText; 0233 } 0234 if (c == '\r') 0235 { 0236 pstate = pText; 0237 wasBackslashR = true; 0238 } 0239 } 0240 else if (str == "!--") //comment 0241 { 0242 str += c; 0243 pstate = pComment; 0244 } 0245 else 0246 str += c; 0247 break; 0248 }; 0249 case pComment: { 0250 if (c == '>') 0251 { 0252 int l = str.length(); 0253 if ((str[l-2] == '-') && (str[l-1] == '-')) //okay, comment ends 0254 { 0255 str = ""; 0256 pstate = pText; 0257 } 0258 else 0259 str += c; 0260 } 0261 else if ((c == '\e') || (c == '\n') || (c == '\r')) 0262 { 0263 //handle incorrectly terminated comment and continue parsing... 0264 results->addToList (results->createError ("Received an unfinished comment!")); 0265 str = ""; 0266 if (c == '\e') 0267 pstate = pAnsiSeq; 0268 if (c == '\n') 0269 { 0270 //report new-line 0271 elements->gotNewLine(); 0272 state->gotNewLine(); 0273 pstate = pText; 0274 } 0275 if (c == '\r') 0276 { 0277 pstate = pText; 0278 wasBackslashR = true; 0279 } 0280 } 0281 else 0282 str += c; 0283 break; 0284 }; 0285 case pQuotedParam: { 0286 if (c == quoteChar) 0287 { 0288 //quoted parameter ends... this simple approach will work correctly for correct 0289 //tags, it may treat incorrect quotes as correct, but element manager will take care 0290 //of that 0291 pstate = pTag; 0292 str += c; 0293 } 0294 else 0295 if ((c == '\e') || (c == '\n') || (c == '\r')) 0296 { 0297 //the same as in pTag section... 0298 results->addToList (results->createError ("Received unfinished tag <" + str)); 0299 str = ""; 0300 if (c == '\e') 0301 pstate = pAnsiSeq; 0302 if (c == '\n') 0303 { 0304 //report new-line 0305 elements->gotNewLine(); 0306 state->gotNewLine(); 0307 pstate = pText; 0308 } 0309 if (c == '\r') 0310 { 0311 pstate = pText; 0312 wasBackslashR = true; 0313 } 0314 } 0315 else 0316 str += c; 0317 break; 0318 }; 0319 }; 0320 } 0321 //report remaining text, if any (needed to improve speed of text displaying and to handle 0322 //prompts correctly) 0323 if ((pstate == pText) && (!str.empty())) 0324 { 0325 state->gotText (str); 0326 str = ""; 0327 } 0328 } 0329 0330 void cMXPParser::simpleParse (const string &text) 0331 //simple parsing - only text and tags - no newlines, no ANSI sequences, no line tags 0332 //simpler version of parse() function above 0333 { 0334 if (text.empty()) 0335 return; 0336 chunk ch; 0337 string::const_iterator it; 0338 pstate = pText; 0339 str = ""; 0340 for (it = text.begin(); it != text.end(); ++it) 0341 { 0342 char c = *it; 0343 switch (pstate) { 0344 case pText: { 0345 if (c == '<') 0346 { 0347 //end of text - got start of tag 0348 if (!str.empty()) 0349 { 0350 ch.chk = chunkText; 0351 ch.text = str; 0352 chunks.push_back (ch); 0353 str = ""; 0354 } 0355 pstate = pTag; 0356 } 0357 else 0358 str += c; //add new character to the text... 0359 break; 0360 }; 0361 case pTag: { 0362 if (c == '>') 0363 { 0364 ch.chk = chunkTag; 0365 ch.text = str; 0366 chunks.push_back (ch); 0367 str = ""; 0368 pstate = pText; 0369 } 0370 else 0371 if ((c == '"') || (c == '\'')) 0372 { 0373 pstate = pQuotedParam; 0374 quoteChar = c; 0375 str += c; 0376 } 0377 else 0378 str += c; 0379 break; 0380 }; 0381 case pQuotedParam: { 0382 if (c == quoteChar) 0383 { 0384 //quoted parameter ends... this simple approach will work correctly for correct 0385 //tags, it may treat incorrect quotes as correct, but element manager will take care 0386 //of that 0387 pstate = pTag; 0388 str += c; 0389 } 0390 else 0391 str += c; 0392 break; 0393 }; 0394 }; 0395 } 0396 //unfinished things... 0397 if (pstate == pText) 0398 { 0399 ch.chk = chunkText; 0400 ch.text = str; 0401 chunks.push_back (ch); 0402 } 0403 if ((pstate == pTag) || (pstate == pQuotedParam)) 0404 { 0405 ch.chk = chunkError; 0406 ch.text = "Tag definition contains unfinished tag <" + str; 0407 chunks.push_back (ch); 0408 } 0409 str = ""; 0410 } 0411 0412 bool cMXPParser::hasNext() 0413 { 0414 return chunks.empty() ? false : true; 0415 } 0416 0417 chunk cMXPParser::getNext() 0418 { 0419 if (!hasNext()) 0420 { 0421 chunk nochunk; 0422 nochunk.chk = chunkNone; 0423 return nochunk; 0424 } 0425 chunk ch = chunks.front(); 0426 chunks.pop_front(); 0427 return ch; 0428 }