File indexing completed on 2025-01-19 04:46:30
0001 /* 0002 SPDX-FileCopyrightText: 2009 Tobias Koenig <tokoe@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "qcsvreader.h" 0008 0009 #include <KLocalizedString> 0010 #include <QIODevice> 0011 #include <QList> 0012 #include <QStringList> 0013 #include <QTextCodec> 0014 #include <QTextStream> 0015 0016 QCsvBuilderInterface::~QCsvBuilderInterface() = default; 0017 0018 class QCsvReaderPrivate 0019 { 0020 public: 0021 explicit QCsvReaderPrivate(QCsvBuilderInterface *builder) 0022 : mBuilder(builder) 0023 , mCodec(QTextCodec::codecForLocale()) 0024 { 0025 } 0026 0027 void emitBeginLine(uint row); 0028 void emitEndLine(uint row); 0029 void emitField(const QString &data, int row, int column); 0030 0031 QCsvBuilderInterface *const mBuilder; 0032 QTextCodec *mCodec = nullptr; 0033 QChar mTextQuote = QLatin1Char('"'); 0034 QChar mDelimiter = QLatin1Char(' '); 0035 0036 uint mStartRow = 0; 0037 bool mNotTerminated = true; 0038 }; 0039 0040 void QCsvReaderPrivate::emitBeginLine(uint row) 0041 { 0042 if ((row - mStartRow) > 0) { 0043 mBuilder->beginLine(); 0044 } 0045 } 0046 0047 void QCsvReaderPrivate::emitEndLine(uint row) 0048 { 0049 if ((row - mStartRow) > 0) { 0050 mBuilder->endLine(); 0051 } 0052 } 0053 0054 void QCsvReaderPrivate::emitField(const QString &data, int row, int column) 0055 { 0056 if ((row - mStartRow) > 0) { 0057 mBuilder->field(data, row - mStartRow - 1, column - 1); 0058 } 0059 } 0060 0061 QCsvReader::QCsvReader(QCsvBuilderInterface *builder) 0062 : d(new QCsvReaderPrivate(builder)) 0063 { 0064 Q_ASSERT(builder); 0065 } 0066 0067 QCsvReader::~QCsvReader() = default; 0068 0069 bool QCsvReader::read(QIODevice *device) 0070 { 0071 enum State { 0072 StartLine, 0073 QuotedField, 0074 QuotedFieldEnd, 0075 NormalField, 0076 EmptyField, 0077 }; 0078 0079 int row; 0080 int column; 0081 0082 QString field; 0083 QChar input; 0084 State currentState = StartLine; 0085 0086 row = column = 1; 0087 0088 d->mBuilder->begin(); 0089 0090 if (!device->isOpen()) { 0091 d->emitBeginLine(row); 0092 d->mBuilder->error(i18n("Device is not open")); 0093 d->emitEndLine(row); 0094 d->mBuilder->end(); 0095 return false; 0096 } 0097 0098 QTextStream inputStream(device); 0099 inputStream.setCodec(d->mCodec); 0100 0101 /** 0102 * We use the following state machine to parse CSV: 0103 * 0104 * digraph { 0105 * StartLine -> StartLine [label="\\r\\n"] 0106 * StartLine -> QuotedField [label="Quote"] 0107 * StartLine -> EmptyField [label="Delimiter"] 0108 * StartLine -> NormalField [label="Other Char"] 0109 * 0110 * QuotedField -> QuotedField [label="\\r\\n"] 0111 * QuotedField -> QuotedFieldEnd [label="Quote"] 0112 * QuotedField -> QuotedField [label="Delimiter"] 0113 * QuotedField -> QuotedField [label="Other Char"] 0114 * 0115 * QuotedFieldEnd -> StartLine [label="\\r\\n"] 0116 * QuotedFieldEnd -> QuotedField [label="Quote"] 0117 * QuotedFieldEnd -> EmptyField [label="Delimiter"] 0118 * QuotedFieldEnd -> EmptyField [label="Other Char"] 0119 * 0120 * EmptyField -> StartLine [label="\\r\\n"] 0121 * EmptyField -> QuotedField [label="Quote"] 0122 * EmptyField -> EmptyField [label="Delimiter"] 0123 * EmptyField -> NormalField [label="Other Char"] 0124 * 0125 * NormalField -> StartLine [label="\\r\\n"] 0126 * NormalField -> NormalField [label="Quote"] 0127 * NormalField -> EmptyField [label="Delimiter"] 0128 * NormalField -> NormalField [label="Other Char"] 0129 * } 0130 */ 0131 0132 while (!inputStream.atEnd() && d->mNotTerminated) { 0133 inputStream >> input; 0134 0135 switch (currentState) { 0136 case StartLine: 0137 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) { 0138 currentState = StartLine; 0139 } else if (input == d->mTextQuote) { 0140 d->emitBeginLine(row); 0141 currentState = QuotedField; 0142 } else if (input == d->mDelimiter) { 0143 d->emitBeginLine(row); 0144 d->emitField(field, row, column); 0145 column++; 0146 currentState = EmptyField; 0147 } else { 0148 d->emitBeginLine(row); 0149 field.append(input); 0150 currentState = NormalField; 0151 } 0152 break; 0153 case QuotedField: 0154 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) { 0155 field.append(input); 0156 currentState = QuotedField; 0157 } else if (input == d->mTextQuote) { 0158 currentState = QuotedFieldEnd; 0159 } else if (input == d->mDelimiter) { 0160 field.append(input); 0161 currentState = QuotedField; 0162 } else { 0163 field.append(input); 0164 currentState = QuotedField; 0165 } 0166 break; 0167 case QuotedFieldEnd: 0168 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) { 0169 d->emitField(field, row, column); 0170 field.clear(); 0171 d->emitEndLine(row); 0172 column = 1; 0173 row++; 0174 currentState = StartLine; 0175 } else if (input == d->mTextQuote) { 0176 field.append(input); 0177 currentState = QuotedField; 0178 } else if (input == d->mDelimiter) { 0179 d->emitField(field, row, column); 0180 field.clear(); 0181 column++; 0182 currentState = EmptyField; 0183 } else { 0184 d->emitField(field, row, column); 0185 field.clear(); 0186 column++; 0187 field.append(input); 0188 currentState = EmptyField; 0189 } 0190 break; 0191 case NormalField: 0192 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) { 0193 d->emitField(field, row, column); 0194 field.clear(); 0195 d->emitEndLine(row); 0196 row++; 0197 column = 1; 0198 currentState = StartLine; 0199 } else if (input == d->mTextQuote) { 0200 field.append(input); 0201 currentState = NormalField; 0202 } else if (input == d->mDelimiter) { 0203 d->emitField(field, row, column); 0204 field.clear(); 0205 column++; 0206 currentState = EmptyField; 0207 } else { 0208 field.append(input); 0209 currentState = NormalField; 0210 } 0211 break; 0212 case EmptyField: 0213 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) { 0214 d->emitField(QString(), row, column); 0215 field.clear(); 0216 d->emitEndLine(row); 0217 column = 1; 0218 row++; 0219 currentState = StartLine; 0220 } else if (input == d->mTextQuote) { 0221 currentState = QuotedField; 0222 } else if (input == d->mDelimiter) { 0223 d->emitField(QString(), row, column); 0224 column++; 0225 currentState = EmptyField; 0226 } else { 0227 field.append(input); 0228 currentState = NormalField; 0229 } 0230 break; 0231 } 0232 } 0233 0234 if (currentState != StartLine) { 0235 if (field.length() > 0) { 0236 d->emitField(field, row, column); 0237 ++row; 0238 field.clear(); 0239 } 0240 d->emitEndLine(row); 0241 } 0242 0243 d->mBuilder->end(); 0244 0245 return true; 0246 } 0247 0248 void QCsvReader::setTextQuote(QChar textQuote) 0249 { 0250 d->mTextQuote = textQuote; 0251 } 0252 0253 QChar QCsvReader::textQuote() const 0254 { 0255 return d->mTextQuote; 0256 } 0257 0258 void QCsvReader::setDelimiter(QChar delimiter) 0259 { 0260 d->mDelimiter = delimiter; 0261 } 0262 0263 QChar QCsvReader::delimiter() const 0264 { 0265 return d->mDelimiter; 0266 } 0267 0268 void QCsvReader::setStartRow(uint startRow) 0269 { 0270 d->mStartRow = startRow; 0271 } 0272 0273 uint QCsvReader::startRow() const 0274 { 0275 return d->mStartRow; 0276 } 0277 0278 void QCsvReader::setTextCodec(QTextCodec *textCodec) 0279 { 0280 d->mCodec = textCodec; 0281 } 0282 0283 QTextCodec *QCsvReader::textCodec() const 0284 { 0285 return d->mCodec; 0286 } 0287 0288 void QCsvReader::terminate() 0289 { 0290 d->mNotTerminated = false; 0291 } 0292 0293 class QCsvStandardBuilderPrivate 0294 { 0295 public: 0296 QCsvStandardBuilderPrivate() 0297 { 0298 init(); 0299 } 0300 0301 void init(); 0302 0303 QString mLastErrorString; 0304 uint mRowCount; 0305 uint mColumnCount; 0306 QList<QStringList> mRows; 0307 }; 0308 0309 void QCsvStandardBuilderPrivate::init() 0310 { 0311 mRows.clear(); 0312 mRowCount = 0; 0313 mColumnCount = 0; 0314 mLastErrorString.clear(); 0315 } 0316 0317 QCsvStandardBuilder::QCsvStandardBuilder() 0318 : d(new QCsvStandardBuilderPrivate) 0319 { 0320 } 0321 0322 QCsvStandardBuilder::~QCsvStandardBuilder() = default; 0323 0324 QString QCsvStandardBuilder::lastErrorString() const 0325 { 0326 return d->mLastErrorString; 0327 } 0328 0329 uint QCsvStandardBuilder::rowCount() const 0330 { 0331 return d->mRowCount; 0332 } 0333 0334 uint QCsvStandardBuilder::columnCount() const 0335 { 0336 return d->mColumnCount; 0337 } 0338 0339 QString QCsvStandardBuilder::data(uint row, uint column) const 0340 { 0341 if (row > d->mRowCount || column > d->mColumnCount || column >= (uint)d->mRows[row].count()) { 0342 return {}; 0343 } 0344 0345 return d->mRows[row][column]; 0346 } 0347 0348 void QCsvStandardBuilder::begin() 0349 { 0350 d->init(); 0351 } 0352 0353 void QCsvStandardBuilder::beginLine() 0354 { 0355 d->mRows.append(QStringList()); 0356 d->mRowCount++; 0357 } 0358 0359 void QCsvStandardBuilder::field(const QString &data, uint row, uint column) 0360 { 0361 const uint size = d->mRows[row].size(); 0362 if (column >= size) { 0363 for (uint i = column; i < size + 1; ++i) { 0364 d->mRows[row].append(QString()); 0365 } 0366 } 0367 0368 d->mRows[row][column] = data; 0369 0370 d->mColumnCount = qMax(d->mColumnCount, column + 1); 0371 } 0372 0373 void QCsvStandardBuilder::endLine() 0374 { 0375 } 0376 0377 void QCsvStandardBuilder::end() 0378 { 0379 } 0380 0381 void QCsvStandardBuilder::error(const QString &errorMsg) 0382 { 0383 d->mLastErrorString = errorMsg; 0384 } 0385 0386 #include "moc_qcsvreader.cpp"