File indexing completed on 2024-03-24 15:14:49
0001 /* 0002 SPDX-FileCopyrightText: 2012 Rishab Arora <ra.rishab@gmail.com> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #include "ksparser.h" 0008 0009 #include <QDebug> 0010 0011 const int KSParser::EBROKEN_INT = 0; 0012 const double KSParser::EBROKEN_DOUBLE = 0.0; 0013 const float KSParser::EBROKEN_FLOAT = 0.0; 0014 const QString KSParser::EBROKEN_QSTRING = "Null"; 0015 const bool KSParser::parser_debug_mode_ = false; 0016 0017 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence, 0018 const char delimiter) 0019 : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), delimiter_(delimiter) 0020 { 0021 if (!file_reader_.openFullPath(filename_)) 0022 { 0023 qWarning() << "Unable to open file: " << filename; 0024 readFunctionPtr = &KSParser::DummyRow; 0025 } 0026 else 0027 { 0028 readFunctionPtr = &KSParser::ReadCSVRow; 0029 qDebug() << Q_FUNC_INFO << "File opened: " << filename; 0030 } 0031 } 0032 0033 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence, 0034 const QList<int> &widths) 0035 : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), width_sequence_(widths) 0036 { 0037 if (!file_reader_.openFullPath(filename_)) 0038 { 0039 qWarning() << "Unable to open file: " << filename; 0040 readFunctionPtr = &KSParser::DummyRow; 0041 } 0042 else 0043 { 0044 readFunctionPtr = &KSParser::ReadFixedWidthRow; 0045 qDebug() << Q_FUNC_INFO << "File opened: " << filename; 0046 } 0047 } 0048 0049 QHash<QString, QVariant> KSParser::ReadNextRow() 0050 { 0051 return (this->*readFunctionPtr)(); 0052 } 0053 0054 QHash<QString, QVariant> KSParser::ReadCSVRow() 0055 { 0056 /** 0057 * @brief read_success(bool) signifies if a row has been successfully read. 0058 * If any problem (eg incomplete row) is encountered. The row is discarded 0059 * and the while loop continues till it finds a good row or the file ends. 0060 **/ 0061 bool read_success = false; 0062 QString next_line; 0063 QStringList separated; 0064 QHash<QString, QVariant> newRow; 0065 0066 while (file_reader_.hasMoreLines() && read_success == false) 0067 { 0068 next_line = file_reader_.readLine(); 0069 if (next_line.mid(0, 1)[0] == comment_char_) 0070 continue; 0071 separated = next_line.split(delimiter_); 0072 /* 0073 * 1) split along delimiter eg. comma (,) 0074 * 2) check first and last characters. 0075 * if the first letter is '"', 0076 * then combine the nexto ones in it till 0077 * till you come across the next word which 0078 * has the last character as '"' 0079 * (CombineQuoteParts 0080 * 0081 */ 0082 if (separated.length() == 1) 0083 continue; // Length will be 1 if there 0084 // is no delimiter 0085 0086 separated = CombineQuoteParts(separated); // At this point, the 0087 // string has been split 0088 // taking the quote marks into account 0089 0090 // Check if the generated list has correct size 0091 // If not, continue to next row. (i.e SKIP INCOMPLETE ROW) 0092 if (separated.length() != name_type_sequence_.length()) 0093 continue; 0094 0095 for (int i = 0; i < name_type_sequence_.length(); i++) 0096 { 0097 bool ok; 0098 newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok); 0099 if (!ok && parser_debug_mode_) 0100 { 0101 qDebug() << Q_FUNC_INFO << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first 0102 << " & next_line : " << next_line; 0103 } 0104 } 0105 read_success = true; 0106 } 0107 /* 0108 * This signifies that someone tried to read a row 0109 * without checking if HasNextRow is true. 0110 * OR 0111 * The file was truncated OR the file ends with one or more '\n' 0112 */ 0113 if (file_reader_.hasMoreLines() == false && newRow.size() <= 1) 0114 newRow = DummyRow(); 0115 return newRow; 0116 } 0117 0118 QHash<QString, QVariant> KSParser::ReadFixedWidthRow() 0119 { 0120 if (name_type_sequence_.length() != (width_sequence_.length() + 1)) 0121 { 0122 // line length is appendeded to width_sequence_ by default. 0123 // Hence, the length of width_sequence_ is one less than 0124 // name_type_sequence_ 0125 qWarning() << "Unequal fields and widths! Returning dummy row!"; 0126 Q_ASSERT(false); // Make sure that in Debug mode, this condition generates an abort. 0127 return DummyRow(); 0128 } 0129 0130 /** 0131 * @brief read_success (bool) signifies if a row has been successfully read. 0132 * If any problem (eg incomplete row) is encountered. The row is discarded 0133 * and the while loop continues till it finds a good row or the file ends. 0134 **/ 0135 bool read_success = false; 0136 QString next_line; 0137 QStringList separated; 0138 QHash<QString, QVariant> newRow; 0139 int total_min_length = 0; 0140 0141 foreach (const int width_value, width_sequence_) 0142 { 0143 total_min_length += width_value; 0144 } 0145 while (file_reader_.hasMoreLines() && read_success == false) 0146 { 0147 /* 0148 * Steps: 0149 * 1) Read Line 0150 * 2) If it is a comment, loop again 0151 * 3) If it is too small, loop again 0152 * 4) Else, a) Break it down according to widths 0153 * b) Convert each broken down unit to appropriate value 0154 * c) set read_success to True denoting we have a valid 0155 * conversion 0156 */ 0157 next_line = file_reader_.readLine(); 0158 if (next_line.mid(0, 1)[0] == comment_char_) 0159 continue; 0160 if (next_line.length() < total_min_length) 0161 continue; 0162 0163 int curr_width = 0; 0164 for (int split : width_sequence_) 0165 { 0166 // Build separated stringlist. Then assign it afterwards. 0167 QString temp_split; 0168 0169 temp_split = next_line.mid(curr_width, split); 0170 // Don't use at(), because it crashes on invalid index 0171 curr_width += split; 0172 separated.append(temp_split.trimmed()); 0173 } 0174 separated.append(next_line.mid(curr_width).trimmed()); // Append last segment 0175 0176 // Conversions 0177 for (int i = 0; i < name_type_sequence_.length(); ++i) 0178 { 0179 bool ok; 0180 newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok); 0181 if (!ok && parser_debug_mode_) 0182 { 0183 qDebug() << Q_FUNC_INFO << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first 0184 << " & next_line : " << next_line; 0185 } 0186 } 0187 read_success = true; 0188 } 0189 /* 0190 * This signifies that someone tried to read a row 0191 * without checking if HasNextRow is true. 0192 * OR 0193 * The file was truncated OR the file ends with one or more '\n' 0194 */ 0195 if (file_reader_.hasMoreLines() == false && newRow.size() <= 1) 0196 newRow = DummyRow(); 0197 return newRow; 0198 } 0199 0200 QHash<QString, QVariant> KSParser::DummyRow() 0201 { 0202 // qWarning() << "File named " << filename_ << " encountered an error while reading"; 0203 QHash<QString, QVariant> newRow; 0204 0205 for (auto &item : name_type_sequence_) 0206 { 0207 switch (item.second) 0208 { 0209 case D_QSTRING: 0210 newRow[item.first] = EBROKEN_QSTRING; 0211 break; 0212 case D_DOUBLE: 0213 newRow[item.first] = EBROKEN_DOUBLE; 0214 break; 0215 case D_INT: 0216 newRow[item.first] = EBROKEN_INT; 0217 break; 0218 case D_FLOAT: 0219 newRow[item.first] = EBROKEN_FLOAT; 0220 break; 0221 case D_SKIP: 0222 default: 0223 break; 0224 } 0225 } 0226 return newRow; 0227 } 0228 0229 bool KSParser::HasNextRow() 0230 { 0231 return file_reader_.hasMoreLines(); 0232 } 0233 0234 void KSParser::SetProgress(QString msg, int total_lines, int step_size) 0235 { 0236 file_reader_.setProgress(msg, total_lines, step_size); 0237 } 0238 0239 void KSParser::ShowProgress() 0240 { 0241 file_reader_.showProgress(); 0242 } 0243 0244 QList<QString> KSParser::CombineQuoteParts(QList<QString> &separated) 0245 { 0246 QString iter_string; 0247 QList<QString> quoteCombined; 0248 QStringList::const_iterator iter; 0249 0250 if (separated.length() == 0) 0251 { 0252 qDebug() << Q_FUNC_INFO << "Cannot Combine empty list"; 0253 } 0254 else 0255 { 0256 /* Algorithm: 0257 * In the following steps, 'word' implies a unit from 'separated'. 0258 * i.e. separated[0], separated[1] etc are 'words' 0259 * 0260 * 1) Read a word 0261 * 2) If word does not start with \" add to final expression. Goto 1) 0262 * 3) If word starts with \", push to queue 0263 * 4) If word ends with \", empty queue and join each with delimiter. 0264 * Add this to final expression. Go to 6) 0265 * 5) Read next word. Goto 3) until end of list of words is reached 0266 * 6) Goto 1) until end of list of words is reached 0267 */ 0268 iter = separated.constBegin(); 0269 0270 while (iter != separated.constEnd()) 0271 { 0272 QList<QString> queue; 0273 iter_string = *iter; 0274 0275 if (iter_string.indexOf("\"") == 0) // if (quote mark is the first character) 0276 { 0277 iter_string = (iter_string).remove(0, 1); // remove the quote at the start 0278 while (iter_string.lastIndexOf('\"') != (iter_string.length() - 1) && 0279 iter != separated.constEnd()) // handle stuff between parent quotes 0280 { 0281 queue.append((iter_string)); 0282 ++iter; 0283 iter_string = *iter; 0284 } 0285 iter_string.chop(1); // remove the quote at the end 0286 queue.append(iter_string); 0287 } 0288 else 0289 { 0290 queue.append(iter_string); 0291 } 0292 0293 QString col_result; 0294 foreach (const QString &join, queue) 0295 col_result += (join + delimiter_); 0296 col_result.chop(1); // remove extra delimiter 0297 quoteCombined.append(col_result); 0298 ++iter; 0299 } 0300 } 0301 return quoteCombined; 0302 } 0303 0304 QVariant KSParser::ConvertToQVariant(const QString &input_string, const KSParser::DataTypes &data_type, bool &ok) 0305 { 0306 ok = true; 0307 QVariant converted_object; 0308 switch (data_type) 0309 { 0310 case D_QSTRING: 0311 case D_SKIP: 0312 converted_object = input_string; 0313 break; 0314 case D_DOUBLE: 0315 converted_object = input_string.trimmed().toDouble(&ok); 0316 if (!ok) 0317 converted_object = EBROKEN_DOUBLE; 0318 break; 0319 case D_INT: 0320 converted_object = input_string.trimmed().toInt(&ok); 0321 if (!ok) 0322 converted_object = EBROKEN_INT; 0323 break; 0324 case D_FLOAT: 0325 converted_object = input_string.trimmed().toFloat(&ok); 0326 if (!ok) 0327 converted_object = EBROKEN_FLOAT; 0328 break; 0329 } 0330 return converted_object; 0331 }