File indexing completed on 2024-03-24 15:14:49

0001 /*
0002     SPDX-FileCopyrightText: 2012 Rishab Arora <ra.rishab@gmail.com>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #include "ksparser.h"
0008 
0009 #include <QDebug>
0010 
0011 const int KSParser::EBROKEN_INT         = 0;
0012 const double KSParser::EBROKEN_DOUBLE   = 0.0;
0013 const float KSParser::EBROKEN_FLOAT     = 0.0;
0014 const QString KSParser::EBROKEN_QSTRING = "Null";
0015 const bool KSParser::parser_debug_mode_ = false;
0016 
0017 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence,
0018                    const char delimiter)
0019     : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), delimiter_(delimiter)
0020 {
0021     if (!file_reader_.openFullPath(filename_))
0022     {
0023         qWarning() << "Unable to open file: " << filename;
0024         readFunctionPtr = &KSParser::DummyRow;
0025     }
0026     else
0027     {
0028         readFunctionPtr = &KSParser::ReadCSVRow;
0029         qDebug() << Q_FUNC_INFO << "File opened: " << filename;
0030     }
0031 }
0032 
0033 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence,
0034                    const QList<int> &widths)
0035     : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), width_sequence_(widths)
0036 {
0037     if (!file_reader_.openFullPath(filename_))
0038     {
0039         qWarning() << "Unable to open file: " << filename;
0040         readFunctionPtr = &KSParser::DummyRow;
0041     }
0042     else
0043     {
0044         readFunctionPtr = &KSParser::ReadFixedWidthRow;
0045         qDebug() << Q_FUNC_INFO << "File opened: " << filename;
0046     }
0047 }
0048 
0049 QHash<QString, QVariant> KSParser::ReadNextRow()
0050 {
0051     return (this->*readFunctionPtr)();
0052 }
0053 
0054 QHash<QString, QVariant> KSParser::ReadCSVRow()
0055 {
0056     /**
0057      * @brief read_success(bool) signifies if a row has been successfully read.
0058      * If any problem (eg incomplete row) is encountered. The row is discarded
0059      * and the while loop continues till it finds a good row or the file ends.
0060      **/
0061     bool read_success = false;
0062     QString next_line;
0063     QStringList separated;
0064     QHash<QString, QVariant> newRow;
0065 
0066     while (file_reader_.hasMoreLines() && read_success == false)
0067     {
0068         next_line = file_reader_.readLine();
0069         if (next_line.mid(0, 1)[0] == comment_char_)
0070             continue;
0071         separated = next_line.split(delimiter_);
0072         /*
0073             * 1) split along delimiter eg. comma (,)
0074             * 2) check first and last characters.
0075             *    if the first letter is  '"',
0076             *    then combine the nexto ones in it till
0077             *    till you come across the next word which
0078             *    has the last character as '"'
0079             *    (CombineQuoteParts
0080             *
0081         */
0082         if (separated.length() == 1)
0083             continue; // Length will be 1 if there
0084         // is no delimiter
0085 
0086         separated = CombineQuoteParts(separated); // At this point, the
0087         // string has been split
0088         // taking the quote marks into account
0089 
0090         // Check if the generated list has correct size
0091         // If not, continue to next row. (i.e SKIP INCOMPLETE ROW)
0092         if (separated.length() != name_type_sequence_.length())
0093             continue;
0094 
0095         for (int i = 0; i < name_type_sequence_.length(); i++)
0096         {
0097             bool ok;
0098             newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok);
0099             if (!ok && parser_debug_mode_)
0100             {
0101                 qDebug() << Q_FUNC_INFO << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first
0102                          << " & next_line : " << next_line;
0103             }
0104         }
0105         read_success = true;
0106     }
0107     /*
0108      * This signifies that someone tried to read a row
0109      * without checking if HasNextRow is true.
0110      * OR
0111      * The file was truncated OR the file ends with one or more '\n'
0112      */
0113     if (file_reader_.hasMoreLines() == false && newRow.size() <= 1)
0114         newRow = DummyRow();
0115     return newRow;
0116 }
0117 
0118 QHash<QString, QVariant> KSParser::ReadFixedWidthRow()
0119 {
0120     if (name_type_sequence_.length() != (width_sequence_.length() + 1))
0121     {
0122         // line length is appendeded to width_sequence_ by default.
0123         // Hence, the length of width_sequence_ is one less than
0124         // name_type_sequence_
0125         qWarning() << "Unequal fields and widths! Returning dummy row!";
0126         Q_ASSERT(false); // Make sure that in Debug mode, this condition generates an abort.
0127         return DummyRow();
0128     }
0129 
0130     /**
0131     * @brief read_success (bool) signifies if a row has been successfully read.
0132     * If any problem (eg incomplete row) is encountered. The row is discarded
0133     * and the while loop continues till it finds a good row or the file ends.
0134     **/
0135     bool read_success = false;
0136     QString next_line;
0137     QStringList separated;
0138     QHash<QString, QVariant> newRow;
0139     int total_min_length = 0;
0140 
0141     foreach (const int width_value, width_sequence_)
0142     {
0143         total_min_length += width_value;
0144     }
0145     while (file_reader_.hasMoreLines() && read_success == false)
0146     {
0147         /*
0148          * Steps:
0149          * 1) Read Line
0150          * 2) If it is a comment, loop again
0151          * 3) If it is too small, loop again
0152          * 4) Else, a) Break it down according to widths
0153          *          b) Convert each broken down unit to appropriate value
0154          *          c) set read_success to True denoting we have a valid
0155          *             conversion
0156         */
0157         next_line = file_reader_.readLine();
0158         if (next_line.mid(0, 1)[0] == comment_char_)
0159             continue;
0160         if (next_line.length() < total_min_length)
0161             continue;
0162 
0163         int curr_width = 0;
0164         for (int split : width_sequence_)
0165         {
0166             // Build separated stringlist. Then assign it afterwards.
0167             QString temp_split;
0168 
0169             temp_split = next_line.mid(curr_width, split);
0170             // Don't use at(), because it crashes on invalid index
0171             curr_width += split;
0172             separated.append(temp_split.trimmed());
0173         }
0174         separated.append(next_line.mid(curr_width).trimmed()); // Append last segment
0175 
0176         // Conversions
0177         for (int i = 0; i < name_type_sequence_.length(); ++i)
0178         {
0179             bool ok;
0180             newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok);
0181             if (!ok && parser_debug_mode_)
0182             {
0183                 qDebug() << Q_FUNC_INFO << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first
0184                          << " & next_line : " << next_line;
0185             }
0186         }
0187         read_success = true;
0188     }
0189     /*
0190      * This signifies that someone tried to read a row
0191      * without checking if HasNextRow is true.
0192      * OR
0193      * The file was truncated OR the file ends with one or more '\n'
0194      */
0195     if (file_reader_.hasMoreLines() == false && newRow.size() <= 1)
0196         newRow = DummyRow();
0197     return newRow;
0198 }
0199 
0200 QHash<QString, QVariant> KSParser::DummyRow()
0201 {
0202     // qWarning() << "File named " << filename_ << " encountered an error while reading";
0203     QHash<QString, QVariant> newRow;
0204 
0205     for (auto &item : name_type_sequence_)
0206     {
0207         switch (item.second)
0208         {
0209             case D_QSTRING:
0210                 newRow[item.first] = EBROKEN_QSTRING;
0211                 break;
0212             case D_DOUBLE:
0213                 newRow[item.first] = EBROKEN_DOUBLE;
0214                 break;
0215             case D_INT:
0216                 newRow[item.first] = EBROKEN_INT;
0217                 break;
0218             case D_FLOAT:
0219                 newRow[item.first] = EBROKEN_FLOAT;
0220                 break;
0221             case D_SKIP:
0222             default:
0223                 break;
0224         }
0225     }
0226     return newRow;
0227 }
0228 
0229 bool KSParser::HasNextRow()
0230 {
0231     return file_reader_.hasMoreLines();
0232 }
0233 
0234 void KSParser::SetProgress(QString msg, int total_lines, int step_size)
0235 {
0236     file_reader_.setProgress(msg, total_lines, step_size);
0237 }
0238 
0239 void KSParser::ShowProgress()
0240 {
0241     file_reader_.showProgress();
0242 }
0243 
0244 QList<QString> KSParser::CombineQuoteParts(QList<QString> &separated)
0245 {
0246     QString iter_string;
0247     QList<QString> quoteCombined;
0248     QStringList::const_iterator iter;
0249 
0250     if (separated.length() == 0)
0251     {
0252         qDebug() << Q_FUNC_INFO << "Cannot Combine empty list";
0253     }
0254     else
0255     {
0256         /* Algorithm:
0257          * In the following steps, 'word' implies a unit from 'separated'.
0258          * i.e. separated[0], separated[1] etc are 'words'
0259          *
0260          * 1) Read a word
0261          * 2) If word does not start with \" add to final expression. Goto 1)
0262          * 3) If word starts with \", push to queue
0263          * 4) If word ends with \", empty queue and join each with delimiter.
0264          *    Add this to final expression. Go to 6)
0265          * 5) Read next word. Goto 3) until end of list of words is reached
0266          * 6) Goto 1) until end of list of words is reached
0267         */
0268         iter = separated.constBegin();
0269 
0270         while (iter != separated.constEnd())
0271         {
0272             QList<QString> queue;
0273             iter_string = *iter;
0274 
0275             if (iter_string.indexOf("\"") == 0) // if (quote mark is the first character)
0276             {
0277                 iter_string = (iter_string).remove(0, 1); // remove the quote at the start
0278                 while (iter_string.lastIndexOf('\"') != (iter_string.length() - 1) &&
0279                        iter != separated.constEnd()) // handle stuff between parent quotes
0280                 {
0281                     queue.append((iter_string));
0282                     ++iter;
0283                     iter_string = *iter;
0284                 }
0285                 iter_string.chop(1); // remove the quote at the end
0286                 queue.append(iter_string);
0287             }
0288             else
0289             {
0290                 queue.append(iter_string);
0291             }
0292 
0293             QString col_result;
0294             foreach (const QString &join, queue)
0295                 col_result += (join + delimiter_);
0296             col_result.chop(1); // remove extra delimiter
0297             quoteCombined.append(col_result);
0298             ++iter;
0299         }
0300     }
0301     return quoteCombined;
0302 }
0303 
0304 QVariant KSParser::ConvertToQVariant(const QString &input_string, const KSParser::DataTypes &data_type, bool &ok)
0305 {
0306     ok = true;
0307     QVariant converted_object;
0308     switch (data_type)
0309     {
0310         case D_QSTRING:
0311         case D_SKIP:
0312             converted_object = input_string;
0313             break;
0314         case D_DOUBLE:
0315             converted_object = input_string.trimmed().toDouble(&ok);
0316             if (!ok)
0317                 converted_object = EBROKEN_DOUBLE;
0318             break;
0319         case D_INT:
0320             converted_object = input_string.trimmed().toInt(&ok);
0321             if (!ok)
0322                 converted_object = EBROKEN_INT;
0323             break;
0324         case D_FLOAT:
0325             converted_object = input_string.trimmed().toFloat(&ok);
0326             if (!ok)
0327                 converted_object = EBROKEN_FLOAT;
0328             break;
0329     }
0330     return converted_object;
0331 }