File indexing completed on 2026-06-07 12:42:39
0001 /* This file is part of the KDE project 0002 SPDX-FileCopyrightText: 2002 Dirk Schönberger <dirk.schoenberger@sz-online.de> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "PsCommentLexer.h" 0008 0009 #include <stdlib.h> 0010 #include <ctype.h> 0011 #include <QStringList> 0012 0013 #define CATEGORY_WHITESPACE -1 0014 #define CATEGORY_ALPHA -2 0015 #define CATEGORY_DIGIT -3 0016 #define CATEGORY_SPECIAL -4 0017 #define CATEGORY_LETTERHEX -5 0018 #define CATEGORY_INTTOOLONG -6 0019 0020 #define CATEGORY_ANY -127 0021 0022 #define MAX_INTLEN 9 0023 #define MIN_HEXCHARS 6 0024 0025 #define STOP 0 0026 0027 int iswhitespace(char c) 0028 { 0029 return (c == ' ') || (c == '\n') || (c == '\t') || (c == '\r'); 0030 } 0031 0032 int isSpecial(char c) 0033 { 0034 return (c == '*') || (c == '_') || (c == '?') || (c == '~') || (c == '-') || (c == '^') || (c == '`') || (c == '!') || (c == '.') || (c == '@') || (c == '&') || (c == '$') || (c == '='); 0035 } 0036 0037 int isletterhex(char c) 0038 { 0039 return (c == 'A') || (c == 'B') || (c == 'C') || (c == 'D') || (c == 'E') || (c == 'F'); 0040 } 0041 0042 const char*statetoa(State state) 0043 { 0044 switch (state) { 0045 case State_Comment : return "comment"; 0046 case State_CommentEncodedChar : return "encoded char (comment)"; 0047 default : return "unknown"; 0048 } 0049 } 0050 0051 typedef struct { 0052 State oldState; 0053 signed char c; 0054 State newState; 0055 Action action; 0056 } Transition; 0057 0058 static const Transition transitions[] = { 0059 { State_Comment, '\n', State_Start, Action_Output}, 0060 { State_Comment, '\r', State_Start, Action_Output}, 0061 { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp}, 0062 { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy}, 0063 { State_CommentEncodedChar, '\\', State_Comment, Action_Copy}, 0064 { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp}, 0065 { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget}, 0066 { State_Start, '%', State_Comment, Action_Ignore}, 0067 { State_Start, CATEGORY_ANY, State_Start, Action_Ignore}, 0068 { State_Start, STOP, State_Start, Action_Abort} 0069 }; 0070 0071 PSCommentLexer::PSCommentLexer() 0072 { 0073 } 0074 PSCommentLexer::~PSCommentLexer() 0075 { 0076 } 0077 0078 bool PSCommentLexer::parse(QIODevice& fin) 0079 { 0080 char c; 0081 0082 m_buffer.clear(); 0083 m_curState = State_Start; 0084 0085 parsingStarted(); 0086 0087 while (!fin.atEnd()) { 0088 fin.getChar(&c); 0089 0090 // qDebug ("got %c", c); 0091 0092 State newState; 0093 Action action; 0094 0095 nextStep(c, &newState, &action); 0096 0097 switch (action) { 0098 case Action_Copy : 0099 m_buffer.append(c); 0100 break; 0101 case Action_CopyOutput : 0102 m_buffer.append(c); 0103 doOutput(); 0104 break; 0105 case Action_Output : 0106 doOutput(); 0107 break; 0108 case Action_OutputUnget : 0109 doOutput(); 0110 fin.ungetChar(c); 0111 break; 0112 case Action_Ignore : 0113 /* ignore */ 0114 break; 0115 case Action_Abort : 0116 qWarning("state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c); 0117 parsingAborted(); 0118 return false; 0119 break; 0120 case Action_InitTemp : 0121 m_temp.clear(); 0122 break; 0123 case Action_CopyTemp : 0124 m_temp.append(c); 0125 break; 0126 case Action_DecodeUnget : 0127 m_buffer.append(decode()); 0128 fin.ungetChar(c); 0129 break; 0130 default : 0131 qWarning("unknown action: %d ", action); 0132 } 0133 0134 m_curState = newState; 0135 } 0136 0137 parsingFinished(); 0138 return true; 0139 } 0140 0141 void PSCommentLexer::doOutput() 0142 { 0143 if (m_buffer.length() == 0) return; 0144 switch (m_curState) { 0145 case State_Comment : 0146 gotComment(m_buffer.toLatin1()); 0147 break; 0148 default: 0149 qWarning("unknown state: %d", m_curState); 0150 } 0151 0152 m_buffer.clear(); 0153 } 0154 0155 void PSCommentLexer::gotComment(const char *value) 0156 { 0157 qDebug("gotComment: %s ", value); 0158 } 0159 0160 void PSCommentLexer::parsingStarted() 0161 { 0162 qDebug("parsing started"); 0163 } 0164 0165 void PSCommentLexer::parsingFinished() 0166 { 0167 qDebug("parsing finished"); 0168 } 0169 0170 void PSCommentLexer::parsingAborted() 0171 { 0172 qDebug("parsing aborted"); 0173 } 0174 0175 void PSCommentLexer::nextStep(char c, State *newState, Action *newAction) 0176 { 0177 int i = 0; 0178 0179 while (true) { 0180 Transition trans = transitions[i]; 0181 0182 if (trans.c == STOP) { 0183 *newState = trans.newState; 0184 *newAction = trans.action; 0185 return; 0186 } 0187 0188 bool found = false; 0189 0190 if (trans.oldState == m_curState) { 0191 switch (trans.c) { 0192 case CATEGORY_WHITESPACE : found = isspace(c); break; 0193 case CATEGORY_ALPHA : found = isalpha(c); break; 0194 case CATEGORY_DIGIT : found = isdigit(c); break; 0195 case CATEGORY_SPECIAL : found = isSpecial(c); break; 0196 case CATEGORY_LETTERHEX : found = isletterhex(c); break; 0197 case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break; 0198 case CATEGORY_ANY : found = true; break; 0199 default : found = (trans.c == c); 0200 } 0201 0202 if (found) { 0203 *newState = trans.newState; 0204 *newAction = trans.action; 0205 0206 return; 0207 } 0208 } 0209 0210 0211 i++; 0212 } 0213 } 0214 0215 uchar PSCommentLexer::decode() 0216 { 0217 uchar value = m_temp.toString().toShort(nullptr, 8); 0218 // qDebug ("got encoded char %c",value); 0219 return value; 0220 } 0221 0222 /* StringBuffer implementation */ 0223 0224 const int initialSize = 20; 0225 const int addSize = 10; 0226 0227 StringBuffer::StringBuffer() 0228 { 0229 m_buffer = (char*)calloc(initialSize, sizeof(char)); 0230 m_length = 0; 0231 m_capacity = initialSize; 0232 } 0233 0234 StringBuffer::~StringBuffer() 0235 { 0236 free(m_buffer); 0237 } 0238 0239 void StringBuffer::append(char c) 0240 { 0241 ensureCapacity(m_length + 1); 0242 m_buffer[m_length] = c; 0243 m_length++; 0244 } 0245 0246 void StringBuffer::clear() 0247 { 0248 for (uint i = 0; i < m_length; i++) m_buffer[i] = '\0'; 0249 m_length = 0; 0250 } 0251 0252 QString StringBuffer::toString() const 0253 { 0254 QString ret(m_buffer); 0255 return ret; 0256 } 0257 0258 void StringBuffer::ensureCapacity(int p_capacity) 0259 { 0260 if (m_capacity >= p_capacity) return; 0261 0262 int newSize = m_capacity + addSize; 0263 if (p_capacity > newSize) newSize = p_capacity; 0264 0265 char* oldBuffer = m_buffer; 0266 char *newBuffer = (char*)calloc(newSize, sizeof(char)); 0267 strcpy(newBuffer, m_buffer); 0268 free(oldBuffer); 0269 m_buffer = newBuffer; 0270 m_capacity = newSize; 0271 } 0272 0273 uint StringBuffer::length() const 0274 { 0275 return m_length; 0276 } 0277 0278 double StringBuffer::toFloat() 0279 { 0280 QString data = toString(); 0281 return data.toFloat(); 0282 } 0283 0284 int StringBuffer::toInt() 0285 { 0286 QString data = toString(); 0287 return data.toInt(); 0288 } 0289 0290 const char *StringBuffer::toLatin1() const 0291 { 0292 return m_buffer; 0293 } 0294 0295 QString StringBuffer::mid(uint index, uint len) const 0296 { 0297 QString data = toString(); 0298 return data.mid(index, len); 0299 } 0300 0301 /* BoundingBoxExtractor */ 0302 BoundingBoxExtractor:: BoundingBoxExtractor() : m_llx(0), m_lly(0), m_urx(0), m_ury(0) {} 0303 BoundingBoxExtractor::~BoundingBoxExtractor() {} 0304 0305 void BoundingBoxExtractor::gotComment(const char *value) 0306 { 0307 QString data(value); 0308 if (data.indexOf("%BoundingBox:") == -1) return; 0309 0310 getRectangle(value, m_llx, m_lly, m_urx, m_ury); 0311 } 0312 0313 bool BoundingBoxExtractor::getRectangle(const char* input, int &llx, int &lly, int &urx, int &ury) 0314 { 0315 if (input == nullptr) return false; 0316 0317 QString s(input); 0318 if (s.contains("(atend)")) return false; 0319 0320 s.remove("%BoundingBox:"); 0321 QStringList values = s.split(' '); 0322 qDebug("size is %d", values.size()); 0323 // if (values.size() < 5) return false; 0324 llx = values[0].toInt(); 0325 lly = values[1].toInt(); 0326 urx = values[2].toInt(); 0327 ury = values[3].toInt(); 0328 0329 return true; 0330 } 0331