File indexing completed on 2024-05-12 03:47:20
0001 /* 0002 File : VariableParser.h 0003 Project : LabPlot 0004 Description : Variable parser for different CAS backends 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2015 Garvit Khatri <garvitdelhi@gmail.com> 0007 SPDX-FileCopyrightText: 2016 Alexander Semke <alexander.semke@web.de> 0008 SPDX-License-Identifier: GPL-2.0-or-later 0009 */ 0010 0011 #include "VariableParser.h" 0012 #include "backend/lib/trace.h" 0013 #include <QRegularExpressionMatch> 0014 #include <QStringList> 0015 #include <cmath> // NAN 0016 0017 VariableParser::VariableParser(QString name, QString value) 0018 : m_backendName(std::move(name)) 0019 , m_string(std::move(value)) { 0020 PERFTRACE(QLatin1String("parsing variable")); 0021 if (m_backendName.compare(QStringLiteral("Maxima"), Qt::CaseInsensitive) == 0) 0022 parseMaximaValues(); 0023 else if ((m_backendName.compare(QStringLiteral("Python"), Qt::CaseInsensitive) == 0) 0024 || (m_backendName.compare(QStringLiteral("Python 3"), Qt::CaseInsensitive) == 0) 0025 || (m_backendName.compare(QStringLiteral("Python 2"), Qt::CaseInsensitive) == 0)) 0026 parsePythonValues(); 0027 else if (m_backendName.compare(QStringLiteral("Sage"), Qt::CaseInsensitive) == 0) 0028 parsePythonValues(); 0029 else if (m_backendName.compare(QStringLiteral("R"), Qt::CaseInsensitive) == 0) 0030 parseRValues(); 0031 else if (m_backendName.compare(QStringLiteral("Julia"), Qt::CaseInsensitive) == 0) 0032 parsePythonValues(); 0033 else if (m_backendName.compare(QStringLiteral("Octave"), Qt::CaseInsensitive) == 0) 0034 parseOctaveValues(); 0035 } 0036 0037 QVector<int>& VariableParser::integers() { 0038 return *static_cast<QVector<int>*>(m_values); 0039 } 0040 0041 QVector<qint64>& VariableParser::bigInt() { 0042 return *static_cast<QVector<qint64>*>(m_values); 0043 } 0044 0045 QVector<double>& VariableParser::doublePrecision() { 0046 return *static_cast<QVector<double>*>(m_values); 0047 } 0048 0049 QVector<QDateTime>& VariableParser::dateTime() { 0050 return *static_cast<QVector<QDateTime>*>(m_values); 0051 } 0052 0053 QVector<QString>& VariableParser::text() { 0054 return *static_cast<QVector<QString>*>(m_values); 0055 } 0056 0057 void VariableParser::parseMaximaValues() { 0058 if (m_string.count(QStringLiteral("[")) > 2) 0059 return; 0060 0061 Datatype dataType = Datatype::float64; 0062 if (m_string.startsWith(QLatin1String("[\""))) // Maxime uses " to quote string values in the output 0063 dataType = Datatype::text; 0064 0065 m_string = m_string.replace(QStringLiteral("["), QString()); 0066 m_string = m_string.replace(QStringLiteral("]"), QString()); 0067 m_string = m_string.trimmed(); 0068 0069 const QStringList valueStringList = m_string.split(QStringLiteral(",")); 0070 parseValues(valueStringList, dataType); 0071 } 0072 0073 /*! 0074 * Python containers that can be parsed: 0075 * * List (a collection which is ordered and changeable, allows duplicate members) 0076 * * Tuple (collection which is ordered and unchangeable, allows duplicate members) 0077 * * Set (collection which is unordered, unchangeable and unindexed, no duplicate members) 0078 * * Numpy's array (with and without the explicit specification of the data type) 0079 * */ 0080 void VariableParser::parsePythonValues() { 0081 QStringList valueStringList; 0082 QString dataType = QStringLiteral("float64"); 0083 m_string = m_string.trimmed(); 0084 if (m_string.startsWith(QLatin1String("array"))) { 0085 // parse numpy arrays, string representation like array([1,2,3,4,5]) or 0086 // array([1, 2,3], dtype=uint32) 0087 0088 // we don't handle array of arrays, e.g. the output of 'np.ones((2,2), dtype=np.int16)' 0089 // which is 'array([[1, 1], [1, 1]], dtype=int16)' 0090 if (m_string.count(QStringLiteral("[")) > 2) 0091 return; 0092 0093 QRegularExpressionMatch match; 0094 auto numpyDatatypeRegex = QStringLiteral("\\s*,\\s*dtype='{0,1}[a-zA-Z0-9\\[\\]]*'{0,1}"); 0095 m_string.indexOf(QRegularExpression(numpyDatatypeRegex), 0, &match); 0096 if (match.isValid() && match.captured() != QString()) 0097 dataType = match.captured().replace(QStringLiteral("'"), QString()).replace(QStringLiteral(", dtype="), QString()); 0098 m_string = m_string.replace(QStringLiteral("array(["), QString()); 0099 m_string = m_string.replace(QRegExp(numpyDatatypeRegex), QString()); 0100 m_string = m_string.replace(QStringLiteral("])"), QString()); 0101 } else if (m_string.startsWith(QStringLiteral("["))) { 0102 // parse python's lists 0103 if (m_string.startsWith(QLatin1String("['"))) // python uses ' to quote string values in the output 0104 dataType = QStringLiteral("text"); 0105 m_string = m_string.replace(QStringLiteral("["), QString()); 0106 m_string = m_string.replace(QStringLiteral("]"), QString()); 0107 } else if (m_string.startsWith(QStringLiteral("("))) { 0108 // parse python's tuples 0109 if (m_string.startsWith(QLatin1String("('"))) 0110 dataType = QStringLiteral("text"); 0111 m_string = m_string.replace(QStringLiteral("("), QString()); 0112 m_string = m_string.replace(QStringLiteral(")"), QString()); 0113 } else if (m_string.startsWith(QStringLiteral("{"))) { 0114 // parse python's sets 0115 if (m_string.startsWith(QLatin1String("{'"))) 0116 dataType = QStringLiteral("text"); 0117 m_string = m_string.replace(QStringLiteral("{"), QString()); 0118 m_string = m_string.replace(QStringLiteral("}"), QString()); 0119 } else { 0120 return; 0121 } 0122 0123 // Fast method to determine the separator. It is assumed if at least one 0124 // commas exist, the comma is the separator 0125 if (m_string.indexOf(QLatin1Char(',')) != -1) 0126 valueStringList = m_string.split(QStringLiteral(",")); 0127 else 0128 valueStringList = m_string.split(QStringLiteral(" ")); 0129 0130 parseValues(valueStringList, convertNumpyDatatype(dataType)); 0131 } 0132 0133 void VariableParser::parseRValues() { 0134 m_string = m_string.trimmed(); 0135 const QStringList valueStringList = m_string.split(QStringLiteral(", ")); 0136 parseValues(valueStringList); 0137 } 0138 0139 void VariableParser::parseOctaveValues() { 0140 m_string = m_string.trimmed(); 0141 0142 QStringList valueStringList; 0143 const QStringList tempStringList = m_string.split(QLatin1Char('\n')); 0144 if (m_string.indexOf(QStringLiteral("; ")) != -1) { // parse column vectors 0145 for (const QString& values : tempStringList) 0146 valueStringList << values.split(QStringLiteral("; ")); 0147 } else { 0148 for (const QString& values : tempStringList) { // parse row vectors 0149 // TODO: in newer version of Cantor the rows with "Columns..." were removed already. 0150 // we can stop looking for this substring in some point in time later. 0151 if (!values.isEmpty() && !values.trimmed().startsWith(QStringLiteral("Columns"))) 0152 valueStringList << values.split(QLatin1Char(' ')); 0153 } 0154 } 0155 0156 valueStringList.removeAll(QString()); 0157 parseValues(valueStringList); 0158 } 0159 0160 bool VariableParser::isParsed() { 0161 return m_parsed; 0162 } 0163 0164 void VariableParser::clearValues() { 0165 switch (m_dataType) { 0166 case AbstractColumn::ColumnMode::Integer: 0167 delete static_cast<QVector<int>*>(m_values); 0168 break; 0169 case AbstractColumn::ColumnMode::BigInt: 0170 delete static_cast<QVector<qlonglong>*>(m_values); 0171 break; 0172 case AbstractColumn::ColumnMode::Double: 0173 delete static_cast<QVector<qreal>*>(m_values); 0174 break; 0175 case AbstractColumn::ColumnMode::Day: 0176 case AbstractColumn::ColumnMode::Month: 0177 case AbstractColumn::ColumnMode::DateTime: 0178 delete static_cast<QVector<QDateTime>*>(m_values); 0179 break; 0180 case AbstractColumn::ColumnMode::Text: 0181 delete static_cast<QVector<QString>*>(m_values); 0182 break; 0183 } 0184 } 0185 0186 VariableParser::Datatype VariableParser::convertNumpyDatatype(const QString& d) { 0187 if (d == QStringLiteral("uint8")) 0188 return Datatype::uint8; 0189 else if (d == QStringLiteral("int8")) 0190 return Datatype::int8; 0191 else if (d == QStringLiteral("uint16")) 0192 return Datatype::uint16; 0193 else if (d == QStringLiteral("int16")) 0194 return Datatype::int16; 0195 else if (d == QStringLiteral("uint32")) 0196 return Datatype::uint32; 0197 else if (d == QStringLiteral("int32")) 0198 return Datatype::int32; 0199 else if (d == QStringLiteral("uint64")) 0200 return Datatype::uint64; 0201 else if (d == QStringLiteral("int64")) 0202 return Datatype::int64; 0203 else if (d == QStringLiteral("float32")) 0204 return Datatype::float32; 0205 else if (d == QStringLiteral("float64")) 0206 return Datatype::float64; 0207 else if (d == QStringLiteral("datetime64[ms]")) 0208 return Datatype::datetime64_ms; 0209 else if (d == QStringLiteral("datetime64[s]")) 0210 return Datatype::datetime64_s; 0211 else if (d == QStringLiteral("datetime64[m]")) 0212 return Datatype::datetime64_m; 0213 else if (d == QStringLiteral("datetime64[h]")) 0214 return Datatype::datetime64_h; 0215 else if (d == QStringLiteral("datetime64[D]") || d == QStringLiteral("datetime64")) 0216 return Datatype::datetime64_D; 0217 0218 // as default text is used 0219 return Datatype::text; 0220 } 0221 0222 void VariableParser::parseValues(const QStringList& values, VariableParser::Datatype dataType) { 0223 PERFTRACE(QStringLiteral("parsing variable values string list")); 0224 switch (dataType) { 0225 case Datatype::uint8: 0226 case Datatype::int8: 0227 case Datatype::uint16: 0228 case Datatype::int16: 0229 case Datatype::int32: 0230 m_values = new QVector<int>(values.size()); 0231 m_dataType = AbstractColumn::ColumnMode::Integer; 0232 break; 0233 case Datatype::uint32: 0234 case Datatype::int64: 0235 m_values = new QVector<qint64>(values.size()); 0236 m_dataType = AbstractColumn::ColumnMode::BigInt; 0237 break; 0238 case Datatype::uint64: // larger than qint64! 0239 case Datatype::float32: 0240 case Datatype::float64: 0241 m_values = new QVector<double>(values.size()); 0242 m_dataType = AbstractColumn::ColumnMode::Double; 0243 break; 0244 case Datatype::datetime64_D: 0245 case Datatype::datetime64_h: 0246 case Datatype::datetime64_m: 0247 case Datatype::datetime64_s: 0248 case Datatype::datetime64_ms: 0249 m_values = new QVector<QDateTime>(values.size()); 0250 m_dataType = AbstractColumn::ColumnMode::DateTime; 0251 break; 0252 case Datatype::text: 0253 m_values = new QVector<QString>(values.size()); 0254 m_dataType = AbstractColumn::ColumnMode::Text; 0255 } 0256 0257 int i = 0; 0258 bool isNumber = false; 0259 switch (dataType) { 0260 case Datatype::uint8: 0261 case Datatype::int8: 0262 case Datatype::uint16: 0263 case Datatype::int16: 0264 case Datatype::int32: { 0265 for (const auto& v : values) { 0266 int value = v.trimmed().toUInt(&isNumber); 0267 0268 // accept the variable only if there is at least one numerical value in the array. 0269 if (isNumber) { 0270 if (!m_parsed) 0271 m_parsed = true; 0272 } else 0273 value = 0; 0274 0275 integers()[i] = value; 0276 i++; 0277 } 0278 break; 0279 } 0280 case Datatype::uint32: 0281 case Datatype::int64: { 0282 for (const auto& v : values) { 0283 qint64 value = v.trimmed().toLongLong(&isNumber); 0284 if (isNumber) { 0285 if (!m_parsed) 0286 m_parsed = true; 0287 } else 0288 value = 0; 0289 0290 bigInt()[i] = value; 0291 i++; 0292 } 0293 break; 0294 } 0295 case Datatype::uint64: 0296 case Datatype::float32: 0297 case Datatype::float64: { 0298 // use the first value in the vector to check whether we need to consider 0299 // the locale specific representation of floats (for example, R's output is locale specific) 0300 bool useLocale = false; 0301 if (!values.isEmpty()) { 0302 values.constFirst().trimmed().toDouble(&isNumber); 0303 if (!isNumber) 0304 useLocale = true; // direct conversion has failed, use QLocale to parse the strings further below 0305 } 0306 0307 if (!useLocale) { 0308 for (const auto& v : values) { 0309 double value = v.trimmed().toDouble(&isNumber); 0310 if (isNumber) { 0311 if (!m_parsed) 0312 m_parsed = true; 0313 } else 0314 value = NAN; 0315 0316 doublePrecision()[i] = value; 0317 i++; 0318 } 0319 } else { 0320 QLocale locale; 0321 for (const auto& v : values) { 0322 double value = locale.toDouble(v.trimmed(), &isNumber); 0323 if (isNumber) { 0324 if (!m_parsed) 0325 m_parsed = true; 0326 } else 0327 value = NAN; 0328 0329 doublePrecision()[i] = value; 0330 i++; 0331 } 0332 } 0333 0334 break; 0335 } 0336 // Adding timezone indicator "Z" is necessary, because specific dates like 0337 // 2017-03-26T02:14:34.000 are not available in different timezones. 0338 // https://forum.qt.io/topic/133181/qdatetime-fromstring-returns-invalid-datetime 0339 case Datatype::datetime64_D: 0340 for (const auto& v : values) { 0341 #if (QT_VERSION >= QT_VERSION_CHECK(5, 14, 0)) 0342 dateTime()[i] = QDate::fromString(v.trimmed().replace(QStringLiteral("'"), QString()) + QStringLiteral("Z"), Qt::ISODate).startOfDay(Qt::UTC); 0343 #else 0344 dateTime()[i] = QDateTime(QDate::fromString(v.trimmed().replace(QStringLiteral("'"), QString()) + QStringLiteral("Z"), Qt::ISODate)); 0345 #endif 0346 m_parsed = true; 0347 i++; 0348 } 0349 break; 0350 case Datatype::datetime64_h: 0351 for (const auto& v : values) { 0352 dateTime()[i] = QDateTime::fromString(v.trimmed().replace(QStringLiteral("'"), QString()) + QStringLiteral("Z"), 0353 QStringLiteral("yyyy-MM-ddThht")); // last t is important. It is the timezone 0354 m_parsed = true; 0355 i++; 0356 } 0357 break; 0358 case Datatype::datetime64_m: 0359 for (const auto& v : values) { 0360 dateTime()[i] = QDateTime::fromString(v.trimmed().replace(QStringLiteral("'"), QString()) + QStringLiteral("Z"), 0361 QStringLiteral("yyyy-MM-ddThh:mmt")); // last t is important. It is the timezone 0362 m_parsed = true; 0363 i++; 0364 } 0365 break; 0366 case Datatype::datetime64_s: 0367 for (const auto& v : values) { 0368 dateTime()[i] = QDateTime::fromString(v.trimmed().replace(QStringLiteral("'"), QString()) + QStringLiteral("Z"), Qt::ISODate); 0369 m_parsed = true; 0370 i++; 0371 } 0372 break; 0373 case Datatype::datetime64_ms: 0374 for (const auto& v : values) { 0375 dateTime()[i] = QDateTime::fromString(v.trimmed().replace(QStringLiteral("'"), QString()) + QStringLiteral("Z"), Qt::ISODateWithMs); 0376 m_parsed = true; 0377 i++; 0378 } 0379 break; 0380 case Datatype::text: 0381 for (const auto& v : values) { 0382 text()[i] = v; 0383 m_parsed = true; 0384 i++; 0385 } 0386 break; 0387 } 0388 }