File indexing completed on 2024-12-22 03:35:41

0001 /*
0002     File                 : AbstractFileFilter.h
0003     Project              : LabPlot
0004     Description          : file I/O-filter related interface
0005     --------------------------------------------------------------------
0006     SPDX-FileCopyrightText: 2009-2017 Alexander Semke <alexander.semke@web.de>
0007     SPDX-FileCopyrightText: 2017 Stefan Gerlach <stefan.gerlach@uni.kn>
0008 
0009     SPDX-License-Identifier: GPL-2.0-or-later
0010 */
0011 
0012 #include "backend/datasources/filters/AbstractFileFilter.h"
0013 #include "backend/datasources/filters/SpiceFilter.h"
0014 #include "backend/datasources/filters/VectorBLFFilter.h"
0015 #include "backend/lib/macros.h"
0016 
0017 #include <KLocalizedString>
0018 #include <QDateTime>
0019 #include <QImageReader>
0020 #include <QLocale>
0021 #include <QProcess>
0022 #include <QStandardPaths>
0023 
0024 bool AbstractFileFilter::isNan(const QString& s) {
0025     const static QStringList nanStrings{QStringLiteral("NA"),
0026                                         QStringLiteral("NAN"),
0027                                         QStringLiteral("N/A"),
0028                                         QStringLiteral("-NA"),
0029                                         QStringLiteral("-NAN"),
0030                                         QStringLiteral("NULL")};
0031     if (nanStrings.contains(s, Qt::CaseInsensitive))
0032         return true;
0033 
0034     return false;
0035 }
0036 
0037 AbstractColumn::ColumnMode AbstractFileFilter::columnMode(const QString& valueString, QString& dateTimeFormat, QLocale::Language lang) {
0038     return columnMode(valueString, dateTimeFormat, QLocale(lang));
0039 }
0040 
0041 /*!
0042  * return the column mode for the given value string and settings \c dateTimeFormat and \c locale.
0043  * in case \c dateTimeFormat is empty, all possible datetime formats are tried out to determine the valid datetime object.
0044  */
0045 AbstractColumn::ColumnMode AbstractFileFilter::columnMode(const QString& valueString, QString& dateTimeFormat, const QLocale& locale) {
0046     // TODO: use BigInt as default integer?
0047     auto mode = AbstractColumn::ColumnMode::Integer;
0048     if (valueString.size() == 0) // empty string treated as integer (meaning the non-empty strings will determine the data type)
0049         return mode;
0050 
0051     if (isNan(valueString))
0052         return AbstractColumn::ColumnMode::Double;
0053 
0054     // check if integer first
0055     bool ok;
0056     int intValue = locale.toInt(valueString, &ok);
0057     DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : toInt " << intValue << " ?: " << ok);
0058     Q_UNUSED(intValue)
0059     if (!ok) {
0060         // if not a int, check datetime. if that fails: check double and big int, else it's a string
0061         QDateTime valueDateTime;
0062         if (dateTimeFormat.isEmpty()) {
0063             for (const auto& format : AbstractColumn::dateTimeFormats()) {
0064                 valueDateTime = QDateTime::fromString(valueString, format);
0065                 if (valueDateTime.isValid()) {
0066                     DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : valid DateTime format - " << STDSTRING(format));
0067                     dateTimeFormat = format;
0068                     break;
0069                 }
0070             }
0071         } else
0072             valueDateTime = QDateTime::fromString(valueString, dateTimeFormat);
0073 
0074         if (valueDateTime.isValid()) {
0075             mode = AbstractColumn::ColumnMode::DateTime;
0076         } else {
0077             DEBUG(Q_FUNC_INFO << ", DATETIME invalid! String: " << STDSTRING(valueString) << " DateTime format: " << STDSTRING(dateTimeFormat))
0078 
0079             // check if big integer
0080             qint64 bigIntValue = locale.toLongLong(valueString, &ok);
0081             DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : toBigInt " << bigIntValue << " ?: " << ok);
0082             Q_UNUSED(bigIntValue)
0083             if (ok)
0084                 return AbstractColumn::ColumnMode::BigInt;
0085 
0086             // check if double
0087             double value = locale.toDouble(valueString, &ok);
0088             DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : toDouble " << value << " ?: " << ok);
0089             Q_UNUSED(value)
0090 
0091             mode = ok ? AbstractColumn::ColumnMode::Double : AbstractColumn::ColumnMode::Text;
0092         }
0093     }
0094 
0095     return mode;
0096 }
0097 
0098 QString AbstractFileFilter::dateTimeFormat(const QString& valueString) {
0099     QDateTime valueDateTime;
0100     for (const auto& format : AbstractColumn::dateTimeFormats()) {
0101         valueDateTime = QDateTime::fromString(valueString, format);
0102         if (valueDateTime.isValid())
0103             return format;
0104     }
0105     return QLatin1String("yyyy-MM-dd hh:mm:ss.zzz");
0106 }
0107 
0108 /*
0109 returns the list of all supported locales for numeric data
0110 */
0111 QStringList AbstractFileFilter::numberFormats() {
0112     QStringList formats;
0113     for (int l = 0; l < ENUM_COUNT(QLocale, Language); ++l)
0114         formats << QLocale::languageToString((QLocale::Language)l);
0115 
0116     return formats;
0117 }
0118 
0119 /*!
0120  * \brief AbstractFileFilter::lastErrors
0121  * Errors occured during last parse
0122  * \return
0123  */
0124 QStringList AbstractFileFilter::lastErrors() {
0125     return QStringList();
0126 }
0127 
0128 AbstractFileFilter::FileType AbstractFileFilter::fileType(const QString& fileName) {
0129     DEBUG(Q_FUNC_INFO)
0130     QString fileInfo;
0131 #ifndef HAVE_WINDOWS
0132     // check, if we can guess the file type by content
0133     const QString fileFullPath = QStandardPaths::findExecutable(QLatin1String("file"));
0134     if (!fileFullPath.isEmpty()) {
0135         QProcess proc;
0136         proc.start(fileFullPath, QStringList() << QStringLiteral("-b") << QStringLiteral("-z") << fileName);
0137         if (!proc.waitForFinished(1000)) {
0138             proc.kill();
0139             DEBUG("ERROR: reading file type of file" << STDSTRING(fileName));
0140             return FileType::Binary;
0141         }
0142         fileInfo = QLatin1String(proc.readLine());
0143     }
0144 #endif
0145 
0146     FileType fileType;
0147     QByteArray imageFormat = QImageReader::imageFormat(fileName);
0148     if (fileInfo.contains(QLatin1String("JSON"))
0149         || fileName.endsWith(QLatin1String("json"), Qt::CaseInsensitive)
0150         // json file can be compressed. add all formats supported by KFilterDev, \sa KCompressionDevice::CompressionType
0151         || fileName.endsWith(QLatin1String("json.gz"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("json.bz2"), Qt::CaseInsensitive)
0152         || fileName.endsWith(QLatin1String("json.lzma"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("json.xz"), Qt::CaseInsensitive)
0153         || fileName.endsWith(QLatin1String("har"), Qt::CaseInsensitive)) {
0154         //*.json files can be recognized as ASCII. so, do the check for the json-extension as first.
0155         fileType = FileType::JSON;
0156     } else if (SpiceFilter::isSpiceFile(fileName))
0157         fileType = FileType::Spice;
0158 #ifdef HAVE_QXLSX // before ASCII, because XLSX is XML and XML is ASCII
0159     else if (fileInfo.contains(QLatin1String("Microsoft Excel")) || fileName.endsWith(QLatin1String("xlsx"), Qt::CaseInsensitive))
0160         fileType = FileType::XLSX;
0161 #endif
0162 #ifdef HAVE_ORCUS // before ASCII, because ODS is XML and XML is ASCII
0163     else if (fileInfo.contains(QLatin1String("OpenDocument Spreadsheet")) || fileName.endsWith(QLatin1String("ods"), Qt::CaseInsensitive))
0164         fileType = FileType::Ods;
0165 #endif
0166     else if (fileInfo.contains(QLatin1String("ASCII")) || fileName.endsWith(QLatin1String("txt"), Qt::CaseInsensitive)
0167              || fileName.endsWith(QLatin1String("csv"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("dat"), Qt::CaseInsensitive)
0168              || fileInfo.contains(QLatin1String("compressed data")) /* for gzipped ascii data */) {
0169         if (fileName.endsWith(QLatin1String(".sas7bdat"), Qt::CaseInsensitive))
0170             fileType = FileType::READSTAT;
0171         else // probably ascii data
0172             fileType = FileType::Ascii;
0173     }
0174 #ifdef HAVE_MATIO // before HDF5 to prefer this filter for MAT 7.4 files
0175     else if (fileInfo.contains(QLatin1String("Matlab")) || fileName.endsWith(QLatin1String("mat"), Qt::CaseInsensitive))
0176         fileType = FileType::MATIO;
0177 #endif
0178 #ifdef HAVE_HDF5 // before NETCDF to treat NetCDF 4 files with .nc ending as HDF5 when fileInfo detects it (HDF4 not supported)
0179     else if (fileInfo.contains(QLatin1String("Hierarchical Data Format (version 5)")) || fileName.endsWith(QLatin1String("h5"), Qt::CaseInsensitive)
0180              || (fileName.endsWith(QLatin1String("hdf"), Qt::CaseInsensitive) && !fileInfo.contains(QLatin1String("(version 4)")))
0181              || fileName.endsWith(QLatin1String("hdf5"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("nc4"), Qt::CaseInsensitive))
0182         fileType = FileType::HDF5;
0183 #endif
0184 #ifdef HAVE_NETCDF
0185     else if (fileInfo.contains(QLatin1String("NetCDF Data Format")) || fileName.endsWith(QLatin1String("nc"), Qt::CaseInsensitive)
0186              || fileName.endsWith(QLatin1String("netcdf"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("cdf"), Qt::CaseInsensitive))
0187         fileType = FileType::NETCDF;
0188 #endif
0189 #ifdef HAVE_VECTOR_BLF
0190     else if (fileName.endsWith(QLatin1String("blf")) && VectorBLFFilter::isValid(fileName))
0191         fileType = FileType::VECTOR_BLF;
0192 #endif
0193 #ifdef HAVE_FITS
0194     else if (fileInfo.contains(QLatin1String("FITS image data")) || fileName.endsWith(QLatin1String("fits"), Qt::CaseInsensitive)
0195              || fileName.endsWith(QLatin1String("fit"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("fts"), Qt::CaseInsensitive))
0196         fileType = FileType::FITS;
0197 #endif
0198 #ifdef HAVE_ZIP
0199     else if (fileInfo.contains(QLatin1String("ROOT")) // can be "ROOT Data Format" or "ROOT file Version ??? (Compression: 1)"
0200              || fileName.endsWith(QLatin1String("root"), Qt::CaseInsensitive)) // TODO find out file description
0201         fileType = FileType::ROOT;
0202 #endif
0203 #ifdef HAVE_READSTAT // sas7bdat -> ASCII
0204     else if (fileInfo.startsWith(QLatin1String("SAS")) || fileInfo.startsWith(QLatin1String("SPSS"))
0205              || fileName.endsWith(QLatin1String(".dta"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".sav"), Qt::CaseInsensitive)
0206              || fileName.endsWith(QLatin1String(".zsav"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".por"), Qt::CaseInsensitive)
0207              || fileName.endsWith(QLatin1String(".sas7bcat"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".xpt"), Qt::CaseInsensitive)
0208              || fileName.endsWith(QLatin1String(".xpt5"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".xpt8"), Qt::CaseInsensitive))
0209         fileType = FileType::READSTAT;
0210 #endif
0211     else if (fileInfo.contains(QLatin1String("image")) || fileInfo.contains(QLatin1String("bitmap")) || !imageFormat.isEmpty())
0212         fileType = FileType::Image;
0213     else
0214         fileType = FileType::Binary;
0215 
0216     return fileType;
0217 }
0218 
0219 /*!
0220   returns the list of all supported data file formats
0221 */
0222 QStringList AbstractFileFilter::fileTypes() {
0223     // TODO: Used by what? #ifdef HAVE_QXLSX?
0224     return (QStringList() << i18n("ASCII Data") << i18n("Binary Data") << i18n("Image") << i18n("Excel") << i18n("Hierarchical Data Format 5 (HDF5)")
0225                           << i18n("Network Common Data Format (NetCDF)") << i18n("Flexible Image Transport System Data Format (FITS)") << i18n("JSON Data")
0226                           << i18n("ROOT (CERN) Histograms") << i18n("Spice") << i18n("SAS, Stata or SPSS"));
0227 }
0228 
0229 QString AbstractFileFilter::convertFromNumberToColumn(int n) {
0230     // main code from https://www.geeksforgeeks.org/find-excel-column-name-given-number/
0231     // Function to print column name for a given column number
0232 
0233     char str[1000]; // To store result (column name)
0234     int i = 0; // To store current index in str which is result
0235 
0236     while (n > 0) {
0237         // Find remainder
0238         int rem = n % 26;
0239 
0240         // If remainder is 0, then a 'Z' must be there in output
0241         if (rem == 0) {
0242             str[i++] = 'Z';
0243             n = (n / 26) - 1;
0244         } else // If remainder is non-zero
0245         {
0246             str[i++] = (rem - 1) + 'A';
0247             n = n / 26;
0248         }
0249     }
0250     str[i] = '\0';
0251 
0252     // Reverse the string and print result
0253     std::reverse(str, str + strlen(str));
0254 
0255     return QLatin1String(str);
0256 }