File indexing completed on 2026-06-07 12:42:39

0001 /* This file is part of the KDE project
0002    SPDX-FileCopyrightText: 2002 Dirk Schönberger <dirk.schoenberger@sz-online.de>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "PsCommentLexer.h"
0008 
0009 #include <stdlib.h>
0010 #include <ctype.h>
0011 #include <QStringList>
0012 
0013 #define CATEGORY_WHITESPACE -1
0014 #define CATEGORY_ALPHA -2
0015 #define CATEGORY_DIGIT -3
0016 #define CATEGORY_SPECIAL -4
0017 #define CATEGORY_LETTERHEX -5
0018 #define CATEGORY_INTTOOLONG -6
0019 
0020 #define CATEGORY_ANY -127
0021 
0022 #define MAX_INTLEN 9
0023 #define MIN_HEXCHARS 6
0024 
0025 #define STOP 0
0026 
0027 int iswhitespace(char c)
0028 {
0029     return (c == ' ') || (c == '\n') || (c == '\t') || (c == '\r');
0030 }
0031 
0032 int isSpecial(char c)
0033 {
0034     return (c == '*') || (c == '_') || (c == '?') || (c == '~') || (c == '-') || (c == '^') || (c == '`') || (c == '!') || (c == '.') || (c == '@') || (c == '&') || (c == '$') || (c == '=');
0035 }
0036 
0037 int isletterhex(char c)
0038 {
0039     return (c == 'A') || (c == 'B') || (c == 'C') || (c == 'D') || (c == 'E') || (c == 'F');
0040 }
0041 
0042 const char*statetoa(State state)
0043 {
0044     switch (state) {
0045     case State_Comment : return "comment";
0046     case State_CommentEncodedChar : return "encoded char (comment)";
0047     default : return "unknown";
0048     }
0049 }
0050 
0051 typedef struct {
0052     State oldState;
0053     signed char c;
0054     State newState;
0055     Action action;
0056 } Transition;
0057 
0058 static const Transition transitions[] = {
0059     { State_Comment, '\n', State_Start, Action_Output},
0060     { State_Comment, '\r', State_Start, Action_Output},
0061     { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp},
0062     { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy},
0063     { State_CommentEncodedChar, '\\', State_Comment, Action_Copy},
0064     { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp},
0065     { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget},
0066     { State_Start, '%', State_Comment, Action_Ignore},
0067     { State_Start, CATEGORY_ANY, State_Start, Action_Ignore},
0068     { State_Start, STOP, State_Start, Action_Abort}
0069 };
0070 
0071 PSCommentLexer::PSCommentLexer()
0072 {
0073 }
0074 PSCommentLexer::~PSCommentLexer()
0075 {
0076 }
0077 
0078 bool PSCommentLexer::parse(QIODevice& fin)
0079 {
0080     char c;
0081 
0082     m_buffer.clear();
0083     m_curState = State_Start;
0084 
0085     parsingStarted();
0086 
0087     while (!fin.atEnd()) {
0088         fin.getChar(&c);
0089 
0090 //    qDebug ("got %c", c);
0091 
0092         State newState;
0093         Action action;
0094 
0095         nextStep(c, &newState, &action);
0096 
0097         switch (action) {
0098         case Action_Copy :
0099             m_buffer.append(c);
0100             break;
0101         case Action_CopyOutput :
0102             m_buffer.append(c);
0103             doOutput();
0104             break;
0105         case Action_Output :
0106             doOutput();
0107             break;
0108         case Action_OutputUnget :
0109             doOutput();
0110             fin.ungetChar(c);
0111             break;
0112         case Action_Ignore :
0113             /* ignore */
0114             break;
0115         case Action_Abort :
0116             qWarning("state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c);
0117             parsingAborted();
0118             return false;
0119             break;
0120         case Action_InitTemp :
0121             m_temp.clear();
0122             break;
0123         case Action_CopyTemp :
0124             m_temp.append(c);
0125             break;
0126         case Action_DecodeUnget :
0127             m_buffer.append(decode());
0128             fin.ungetChar(c);
0129             break;
0130         default :
0131             qWarning("unknown action: %d ", action);
0132         }
0133 
0134         m_curState = newState;
0135     }
0136 
0137     parsingFinished();
0138     return true;
0139 }
0140 
0141 void PSCommentLexer::doOutput()
0142 {
0143     if (m_buffer.length() == 0) return;
0144     switch (m_curState) {
0145     case State_Comment :
0146         gotComment(m_buffer.toLatin1());
0147         break;
0148     default:
0149         qWarning("unknown state: %d", m_curState);
0150     }
0151 
0152     m_buffer.clear();
0153 }
0154 
0155 void PSCommentLexer::gotComment(const char *value)
0156 {
0157     qDebug("gotComment: %s ", value);
0158 }
0159 
0160 void PSCommentLexer::parsingStarted()
0161 {
0162     qDebug("parsing started");
0163 }
0164 
0165 void PSCommentLexer::parsingFinished()
0166 {
0167     qDebug("parsing finished");
0168 }
0169 
0170 void PSCommentLexer::parsingAborted()
0171 {
0172     qDebug("parsing aborted");
0173 }
0174 
0175 void PSCommentLexer::nextStep(char c, State *newState, Action *newAction)
0176 {
0177     int i = 0;
0178 
0179     while (true) {
0180         Transition trans = transitions[i];
0181 
0182         if (trans.c == STOP) {
0183             *newState = trans.newState;
0184             *newAction = trans.action;
0185             return;
0186         }
0187 
0188         bool found = false;
0189 
0190         if (trans.oldState == m_curState) {
0191             switch (trans.c) {
0192             case CATEGORY_WHITESPACE : found = isspace(c); break;
0193             case CATEGORY_ALPHA : found = isalpha(c); break;
0194             case CATEGORY_DIGIT : found = isdigit(c); break;
0195             case CATEGORY_SPECIAL : found = isSpecial(c); break;
0196             case CATEGORY_LETTERHEX : found = isletterhex(c); break;
0197             case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break;
0198             case CATEGORY_ANY : found = true; break;
0199             default : found = (trans.c == c);
0200             }
0201 
0202             if (found) {
0203                 *newState = trans.newState;
0204                 *newAction = trans.action;
0205 
0206                 return;
0207             }
0208         }
0209 
0210 
0211         i++;
0212     }
0213 }
0214 
0215 uchar PSCommentLexer::decode()
0216 {
0217     uchar value = m_temp.toString().toShort(nullptr, 8);
0218 //  qDebug ("got encoded char %c",value);
0219     return value;
0220 }
0221 
0222 /* StringBuffer implementation */
0223 
0224 const int initialSize = 20;
0225 const int addSize = 10;
0226 
0227 StringBuffer::StringBuffer()
0228 {
0229     m_buffer = (char*)calloc(initialSize, sizeof(char));
0230     m_length = 0;
0231     m_capacity = initialSize;
0232 }
0233 
0234 StringBuffer::~StringBuffer()
0235 {
0236     free(m_buffer);
0237 }
0238 
0239 void StringBuffer::append(char c)
0240 {
0241     ensureCapacity(m_length + 1);
0242     m_buffer[m_length] = c;
0243     m_length++;
0244 }
0245 
0246 void StringBuffer::clear()
0247 {
0248     for (uint i = 0; i < m_length; i++) m_buffer[i] = '\0';
0249     m_length = 0;
0250 }
0251 
0252 QString StringBuffer::toString() const
0253 {
0254     QString ret(m_buffer);
0255     return ret;
0256 }
0257 
0258 void StringBuffer::ensureCapacity(int p_capacity)
0259 {
0260     if (m_capacity >= p_capacity) return;
0261 
0262     int newSize = m_capacity + addSize;
0263     if (p_capacity > newSize) newSize = p_capacity;
0264 
0265     char* oldBuffer = m_buffer;
0266     char *newBuffer = (char*)calloc(newSize, sizeof(char));
0267     strcpy(newBuffer, m_buffer);
0268     free(oldBuffer);
0269     m_buffer = newBuffer;
0270     m_capacity = newSize;
0271 }
0272 
0273 uint StringBuffer::length() const
0274 {
0275     return m_length;
0276 }
0277 
0278 double StringBuffer::toFloat()
0279 {
0280     QString data = toString();
0281     return data.toFloat();
0282 }
0283 
0284 int StringBuffer::toInt()
0285 {
0286     QString data = toString();
0287     return data.toInt();
0288 }
0289 
0290 const char *StringBuffer::toLatin1() const
0291 {
0292     return m_buffer;
0293 }
0294 
0295 QString StringBuffer::mid(uint index, uint len) const
0296 {
0297     QString data = toString();
0298     return data.mid(index, len);
0299 }
0300 
0301 /* BoundingBoxExtractor */
0302 BoundingBoxExtractor:: BoundingBoxExtractor() : m_llx(0), m_lly(0), m_urx(0), m_ury(0) {}
0303 BoundingBoxExtractor::~BoundingBoxExtractor() {}
0304 
0305 void BoundingBoxExtractor::gotComment(const char *value)
0306 {
0307     QString data(value);
0308     if (data.indexOf("%BoundingBox:") == -1) return;
0309 
0310     getRectangle(value, m_llx, m_lly, m_urx, m_ury);
0311 }
0312 
0313 bool BoundingBoxExtractor::getRectangle(const char* input, int &llx, int &lly, int &urx, int &ury)
0314 {
0315     if (input == nullptr) return false;
0316 
0317     QString s(input);
0318     if (s.contains("(atend)")) return false;
0319 
0320     s.remove("%BoundingBox:");
0321     QStringList values = s.split(' ');
0322     qDebug("size is %d", values.size());
0323 //  if (values.size() < 5) return false;
0324     llx = values[0].toInt();
0325     lly = values[1].toInt();
0326     urx = values[2].toInt();
0327     ury = values[3].toInt();
0328 
0329     return true;
0330 }
0331