File indexing completed on 2025-01-19 04:46:30

0001 /*
0002   SPDX-FileCopyrightText: 2009 Tobias Koenig <tokoe@kde.org>
0003 
0004   SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "qcsvreader.h"
0008 
0009 #include <KLocalizedString>
0010 #include <QIODevice>
0011 #include <QList>
0012 #include <QStringList>
0013 #include <QTextCodec>
0014 #include <QTextStream>
0015 
0016 QCsvBuilderInterface::~QCsvBuilderInterface() = default;
0017 
0018 class QCsvReaderPrivate
0019 {
0020 public:
0021     explicit QCsvReaderPrivate(QCsvBuilderInterface *builder)
0022         : mBuilder(builder)
0023         , mCodec(QTextCodec::codecForLocale())
0024     {
0025     }
0026 
0027     void emitBeginLine(uint row);
0028     void emitEndLine(uint row);
0029     void emitField(const QString &data, int row, int column);
0030 
0031     QCsvBuilderInterface *const mBuilder;
0032     QTextCodec *mCodec = nullptr;
0033     QChar mTextQuote = QLatin1Char('"');
0034     QChar mDelimiter = QLatin1Char(' ');
0035 
0036     uint mStartRow = 0;
0037     bool mNotTerminated = true;
0038 };
0039 
0040 void QCsvReaderPrivate::emitBeginLine(uint row)
0041 {
0042     if ((row - mStartRow) > 0) {
0043         mBuilder->beginLine();
0044     }
0045 }
0046 
0047 void QCsvReaderPrivate::emitEndLine(uint row)
0048 {
0049     if ((row - mStartRow) > 0) {
0050         mBuilder->endLine();
0051     }
0052 }
0053 
0054 void QCsvReaderPrivate::emitField(const QString &data, int row, int column)
0055 {
0056     if ((row - mStartRow) > 0) {
0057         mBuilder->field(data, row - mStartRow - 1, column - 1);
0058     }
0059 }
0060 
0061 QCsvReader::QCsvReader(QCsvBuilderInterface *builder)
0062     : d(new QCsvReaderPrivate(builder))
0063 {
0064     Q_ASSERT(builder);
0065 }
0066 
0067 QCsvReader::~QCsvReader() = default;
0068 
0069 bool QCsvReader::read(QIODevice *device)
0070 {
0071     enum State {
0072         StartLine,
0073         QuotedField,
0074         QuotedFieldEnd,
0075         NormalField,
0076         EmptyField,
0077     };
0078 
0079     int row;
0080     int column;
0081 
0082     QString field;
0083     QChar input;
0084     State currentState = StartLine;
0085 
0086     row = column = 1;
0087 
0088     d->mBuilder->begin();
0089 
0090     if (!device->isOpen()) {
0091         d->emitBeginLine(row);
0092         d->mBuilder->error(i18n("Device is not open"));
0093         d->emitEndLine(row);
0094         d->mBuilder->end();
0095         return false;
0096     }
0097 
0098     QTextStream inputStream(device);
0099     inputStream.setCodec(d->mCodec);
0100 
0101     /**
0102      * We use the following state machine to parse CSV:
0103      *
0104      * digraph {
0105      *   StartLine -> StartLine [label="\\r\\n"]
0106      *   StartLine -> QuotedField [label="Quote"]
0107      *   StartLine -> EmptyField [label="Delimiter"]
0108      *   StartLine -> NormalField [label="Other Char"]
0109      *
0110      *   QuotedField -> QuotedField [label="\\r\\n"]
0111      *   QuotedField -> QuotedFieldEnd [label="Quote"]
0112      *   QuotedField -> QuotedField [label="Delimiter"]
0113      *   QuotedField -> QuotedField [label="Other Char"]
0114      *
0115      *   QuotedFieldEnd -> StartLine [label="\\r\\n"]
0116      *   QuotedFieldEnd -> QuotedField [label="Quote"]
0117      *   QuotedFieldEnd -> EmptyField [label="Delimiter"]
0118      *   QuotedFieldEnd -> EmptyField [label="Other Char"]
0119      *
0120      *   EmptyField -> StartLine [label="\\r\\n"]
0121      *   EmptyField -> QuotedField [label="Quote"]
0122      *   EmptyField -> EmptyField [label="Delimiter"]
0123      *   EmptyField -> NormalField [label="Other Char"]
0124      *
0125      *   NormalField -> StartLine [label="\\r\\n"]
0126      *   NormalField -> NormalField [label="Quote"]
0127      *   NormalField -> EmptyField [label="Delimiter"]
0128      *   NormalField -> NormalField [label="Other Char"]
0129      * }
0130      */
0131 
0132     while (!inputStream.atEnd() && d->mNotTerminated) {
0133         inputStream >> input;
0134 
0135         switch (currentState) {
0136         case StartLine:
0137             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
0138                 currentState = StartLine;
0139             } else if (input == d->mTextQuote) {
0140                 d->emitBeginLine(row);
0141                 currentState = QuotedField;
0142             } else if (input == d->mDelimiter) {
0143                 d->emitBeginLine(row);
0144                 d->emitField(field, row, column);
0145                 column++;
0146                 currentState = EmptyField;
0147             } else {
0148                 d->emitBeginLine(row);
0149                 field.append(input);
0150                 currentState = NormalField;
0151             }
0152             break;
0153         case QuotedField:
0154             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
0155                 field.append(input);
0156                 currentState = QuotedField;
0157             } else if (input == d->mTextQuote) {
0158                 currentState = QuotedFieldEnd;
0159             } else if (input == d->mDelimiter) {
0160                 field.append(input);
0161                 currentState = QuotedField;
0162             } else {
0163                 field.append(input);
0164                 currentState = QuotedField;
0165             }
0166             break;
0167         case QuotedFieldEnd:
0168             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
0169                 d->emitField(field, row, column);
0170                 field.clear();
0171                 d->emitEndLine(row);
0172                 column = 1;
0173                 row++;
0174                 currentState = StartLine;
0175             } else if (input == d->mTextQuote) {
0176                 field.append(input);
0177                 currentState = QuotedField;
0178             } else if (input == d->mDelimiter) {
0179                 d->emitField(field, row, column);
0180                 field.clear();
0181                 column++;
0182                 currentState = EmptyField;
0183             } else {
0184                 d->emitField(field, row, column);
0185                 field.clear();
0186                 column++;
0187                 field.append(input);
0188                 currentState = EmptyField;
0189             }
0190             break;
0191         case NormalField:
0192             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
0193                 d->emitField(field, row, column);
0194                 field.clear();
0195                 d->emitEndLine(row);
0196                 row++;
0197                 column = 1;
0198                 currentState = StartLine;
0199             } else if (input == d->mTextQuote) {
0200                 field.append(input);
0201                 currentState = NormalField;
0202             } else if (input == d->mDelimiter) {
0203                 d->emitField(field, row, column);
0204                 field.clear();
0205                 column++;
0206                 currentState = EmptyField;
0207             } else {
0208                 field.append(input);
0209                 currentState = NormalField;
0210             }
0211             break;
0212         case EmptyField:
0213             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
0214                 d->emitField(QString(), row, column);
0215                 field.clear();
0216                 d->emitEndLine(row);
0217                 column = 1;
0218                 row++;
0219                 currentState = StartLine;
0220             } else if (input == d->mTextQuote) {
0221                 currentState = QuotedField;
0222             } else if (input == d->mDelimiter) {
0223                 d->emitField(QString(), row, column);
0224                 column++;
0225                 currentState = EmptyField;
0226             } else {
0227                 field.append(input);
0228                 currentState = NormalField;
0229             }
0230             break;
0231         }
0232     }
0233 
0234     if (currentState != StartLine) {
0235         if (field.length() > 0) {
0236             d->emitField(field, row, column);
0237             ++row;
0238             field.clear();
0239         }
0240         d->emitEndLine(row);
0241     }
0242 
0243     d->mBuilder->end();
0244 
0245     return true;
0246 }
0247 
0248 void QCsvReader::setTextQuote(QChar textQuote)
0249 {
0250     d->mTextQuote = textQuote;
0251 }
0252 
0253 QChar QCsvReader::textQuote() const
0254 {
0255     return d->mTextQuote;
0256 }
0257 
0258 void QCsvReader::setDelimiter(QChar delimiter)
0259 {
0260     d->mDelimiter = delimiter;
0261 }
0262 
0263 QChar QCsvReader::delimiter() const
0264 {
0265     return d->mDelimiter;
0266 }
0267 
0268 void QCsvReader::setStartRow(uint startRow)
0269 {
0270     d->mStartRow = startRow;
0271 }
0272 
0273 uint QCsvReader::startRow() const
0274 {
0275     return d->mStartRow;
0276 }
0277 
0278 void QCsvReader::setTextCodec(QTextCodec *textCodec)
0279 {
0280     d->mCodec = textCodec;
0281 }
0282 
0283 QTextCodec *QCsvReader::textCodec() const
0284 {
0285     return d->mCodec;
0286 }
0287 
0288 void QCsvReader::terminate()
0289 {
0290     d->mNotTerminated = false;
0291 }
0292 
0293 class QCsvStandardBuilderPrivate
0294 {
0295 public:
0296     QCsvStandardBuilderPrivate()
0297     {
0298         init();
0299     }
0300 
0301     void init();
0302 
0303     QString mLastErrorString;
0304     uint mRowCount;
0305     uint mColumnCount;
0306     QList<QStringList> mRows;
0307 };
0308 
0309 void QCsvStandardBuilderPrivate::init()
0310 {
0311     mRows.clear();
0312     mRowCount = 0;
0313     mColumnCount = 0;
0314     mLastErrorString.clear();
0315 }
0316 
0317 QCsvStandardBuilder::QCsvStandardBuilder()
0318     : d(new QCsvStandardBuilderPrivate)
0319 {
0320 }
0321 
0322 QCsvStandardBuilder::~QCsvStandardBuilder() = default;
0323 
0324 QString QCsvStandardBuilder::lastErrorString() const
0325 {
0326     return d->mLastErrorString;
0327 }
0328 
0329 uint QCsvStandardBuilder::rowCount() const
0330 {
0331     return d->mRowCount;
0332 }
0333 
0334 uint QCsvStandardBuilder::columnCount() const
0335 {
0336     return d->mColumnCount;
0337 }
0338 
0339 QString QCsvStandardBuilder::data(uint row, uint column) const
0340 {
0341     if (row > d->mRowCount || column > d->mColumnCount || column >= (uint)d->mRows[row].count()) {
0342         return {};
0343     }
0344 
0345     return d->mRows[row][column];
0346 }
0347 
0348 void QCsvStandardBuilder::begin()
0349 {
0350     d->init();
0351 }
0352 
0353 void QCsvStandardBuilder::beginLine()
0354 {
0355     d->mRows.append(QStringList());
0356     d->mRowCount++;
0357 }
0358 
0359 void QCsvStandardBuilder::field(const QString &data, uint row, uint column)
0360 {
0361     const uint size = d->mRows[row].size();
0362     if (column >= size) {
0363         for (uint i = column; i < size + 1; ++i) {
0364             d->mRows[row].append(QString());
0365         }
0366     }
0367 
0368     d->mRows[row][column] = data;
0369 
0370     d->mColumnCount = qMax(d->mColumnCount, column + 1);
0371 }
0372 
0373 void QCsvStandardBuilder::endLine()
0374 {
0375 }
0376 
0377 void QCsvStandardBuilder::end()
0378 {
0379 }
0380 
0381 void QCsvStandardBuilder::error(const QString &errorMsg)
0382 {
0383     d->mLastErrorString = errorMsg;
0384 }
0385 
0386 #include "moc_qcsvreader.cpp"