File indexing completed on 2024-12-22 03:35:41
0001 /* 0002 File : AbstractFileFilter.h 0003 Project : LabPlot 0004 Description : file I/O-filter related interface 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2009-2017 Alexander Semke <alexander.semke@web.de> 0007 SPDX-FileCopyrightText: 2017 Stefan Gerlach <stefan.gerlach@uni.kn> 0008 0009 SPDX-License-Identifier: GPL-2.0-or-later 0010 */ 0011 0012 #include "backend/datasources/filters/AbstractFileFilter.h" 0013 #include "backend/datasources/filters/SpiceFilter.h" 0014 #include "backend/datasources/filters/VectorBLFFilter.h" 0015 #include "backend/lib/macros.h" 0016 0017 #include <KLocalizedString> 0018 #include <QDateTime> 0019 #include <QImageReader> 0020 #include <QLocale> 0021 #include <QProcess> 0022 #include <QStandardPaths> 0023 0024 bool AbstractFileFilter::isNan(const QString& s) { 0025 const static QStringList nanStrings{QStringLiteral("NA"), 0026 QStringLiteral("NAN"), 0027 QStringLiteral("N/A"), 0028 QStringLiteral("-NA"), 0029 QStringLiteral("-NAN"), 0030 QStringLiteral("NULL")}; 0031 if (nanStrings.contains(s, Qt::CaseInsensitive)) 0032 return true; 0033 0034 return false; 0035 } 0036 0037 AbstractColumn::ColumnMode AbstractFileFilter::columnMode(const QString& valueString, QString& dateTimeFormat, QLocale::Language lang) { 0038 return columnMode(valueString, dateTimeFormat, QLocale(lang)); 0039 } 0040 0041 /*! 0042 * return the column mode for the given value string and settings \c dateTimeFormat and \c locale. 0043 * in case \c dateTimeFormat is empty, all possible datetime formats are tried out to determine the valid datetime object. 0044 */ 0045 AbstractColumn::ColumnMode AbstractFileFilter::columnMode(const QString& valueString, QString& dateTimeFormat, const QLocale& locale) { 0046 // TODO: use BigInt as default integer? 0047 auto mode = AbstractColumn::ColumnMode::Integer; 0048 if (valueString.size() == 0) // empty string treated as integer (meaning the non-empty strings will determine the data type) 0049 return mode; 0050 0051 if (isNan(valueString)) 0052 return AbstractColumn::ColumnMode::Double; 0053 0054 // check if integer first 0055 bool ok; 0056 int intValue = locale.toInt(valueString, &ok); 0057 DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : toInt " << intValue << " ?: " << ok); 0058 Q_UNUSED(intValue) 0059 if (!ok) { 0060 // if not a int, check datetime. if that fails: check double and big int, else it's a string 0061 QDateTime valueDateTime; 0062 if (dateTimeFormat.isEmpty()) { 0063 for (const auto& format : AbstractColumn::dateTimeFormats()) { 0064 valueDateTime = QDateTime::fromString(valueString, format); 0065 if (valueDateTime.isValid()) { 0066 DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : valid DateTime format - " << STDSTRING(format)); 0067 dateTimeFormat = format; 0068 break; 0069 } 0070 } 0071 } else 0072 valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); 0073 0074 if (valueDateTime.isValid()) { 0075 mode = AbstractColumn::ColumnMode::DateTime; 0076 } else { 0077 DEBUG(Q_FUNC_INFO << ", DATETIME invalid! String: " << STDSTRING(valueString) << " DateTime format: " << STDSTRING(dateTimeFormat)) 0078 0079 // check if big integer 0080 qint64 bigIntValue = locale.toLongLong(valueString, &ok); 0081 DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : toBigInt " << bigIntValue << " ?: " << ok); 0082 Q_UNUSED(bigIntValue) 0083 if (ok) 0084 return AbstractColumn::ColumnMode::BigInt; 0085 0086 // check if double 0087 double value = locale.toDouble(valueString, &ok); 0088 DEBUG(Q_FUNC_INFO << ", " << STDSTRING(valueString) << " : toDouble " << value << " ?: " << ok); 0089 Q_UNUSED(value) 0090 0091 mode = ok ? AbstractColumn::ColumnMode::Double : AbstractColumn::ColumnMode::Text; 0092 } 0093 } 0094 0095 return mode; 0096 } 0097 0098 QString AbstractFileFilter::dateTimeFormat(const QString& valueString) { 0099 QDateTime valueDateTime; 0100 for (const auto& format : AbstractColumn::dateTimeFormats()) { 0101 valueDateTime = QDateTime::fromString(valueString, format); 0102 if (valueDateTime.isValid()) 0103 return format; 0104 } 0105 return QLatin1String("yyyy-MM-dd hh:mm:ss.zzz"); 0106 } 0107 0108 /* 0109 returns the list of all supported locales for numeric data 0110 */ 0111 QStringList AbstractFileFilter::numberFormats() { 0112 QStringList formats; 0113 for (int l = 0; l < ENUM_COUNT(QLocale, Language); ++l) 0114 formats << QLocale::languageToString((QLocale::Language)l); 0115 0116 return formats; 0117 } 0118 0119 /*! 0120 * \brief AbstractFileFilter::lastErrors 0121 * Errors occured during last parse 0122 * \return 0123 */ 0124 QStringList AbstractFileFilter::lastErrors() { 0125 return QStringList(); 0126 } 0127 0128 AbstractFileFilter::FileType AbstractFileFilter::fileType(const QString& fileName) { 0129 DEBUG(Q_FUNC_INFO) 0130 QString fileInfo; 0131 #ifndef HAVE_WINDOWS 0132 // check, if we can guess the file type by content 0133 const QString fileFullPath = QStandardPaths::findExecutable(QLatin1String("file")); 0134 if (!fileFullPath.isEmpty()) { 0135 QProcess proc; 0136 proc.start(fileFullPath, QStringList() << QStringLiteral("-b") << QStringLiteral("-z") << fileName); 0137 if (!proc.waitForFinished(1000)) { 0138 proc.kill(); 0139 DEBUG("ERROR: reading file type of file" << STDSTRING(fileName)); 0140 return FileType::Binary; 0141 } 0142 fileInfo = QLatin1String(proc.readLine()); 0143 } 0144 #endif 0145 0146 FileType fileType; 0147 QByteArray imageFormat = QImageReader::imageFormat(fileName); 0148 if (fileInfo.contains(QLatin1String("JSON")) 0149 || fileName.endsWith(QLatin1String("json"), Qt::CaseInsensitive) 0150 // json file can be compressed. add all formats supported by KFilterDev, \sa KCompressionDevice::CompressionType 0151 || fileName.endsWith(QLatin1String("json.gz"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("json.bz2"), Qt::CaseInsensitive) 0152 || fileName.endsWith(QLatin1String("json.lzma"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("json.xz"), Qt::CaseInsensitive) 0153 || fileName.endsWith(QLatin1String("har"), Qt::CaseInsensitive)) { 0154 //*.json files can be recognized as ASCII. so, do the check for the json-extension as first. 0155 fileType = FileType::JSON; 0156 } else if (SpiceFilter::isSpiceFile(fileName)) 0157 fileType = FileType::Spice; 0158 #ifdef HAVE_QXLSX // before ASCII, because XLSX is XML and XML is ASCII 0159 else if (fileInfo.contains(QLatin1String("Microsoft Excel")) || fileName.endsWith(QLatin1String("xlsx"), Qt::CaseInsensitive)) 0160 fileType = FileType::XLSX; 0161 #endif 0162 #ifdef HAVE_ORCUS // before ASCII, because ODS is XML and XML is ASCII 0163 else if (fileInfo.contains(QLatin1String("OpenDocument Spreadsheet")) || fileName.endsWith(QLatin1String("ods"), Qt::CaseInsensitive)) 0164 fileType = FileType::Ods; 0165 #endif 0166 else if (fileInfo.contains(QLatin1String("ASCII")) || fileName.endsWith(QLatin1String("txt"), Qt::CaseInsensitive) 0167 || fileName.endsWith(QLatin1String("csv"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("dat"), Qt::CaseInsensitive) 0168 || fileInfo.contains(QLatin1String("compressed data")) /* for gzipped ascii data */) { 0169 if (fileName.endsWith(QLatin1String(".sas7bdat"), Qt::CaseInsensitive)) 0170 fileType = FileType::READSTAT; 0171 else // probably ascii data 0172 fileType = FileType::Ascii; 0173 } 0174 #ifdef HAVE_MATIO // before HDF5 to prefer this filter for MAT 7.4 files 0175 else if (fileInfo.contains(QLatin1String("Matlab")) || fileName.endsWith(QLatin1String("mat"), Qt::CaseInsensitive)) 0176 fileType = FileType::MATIO; 0177 #endif 0178 #ifdef HAVE_HDF5 // before NETCDF to treat NetCDF 4 files with .nc ending as HDF5 when fileInfo detects it (HDF4 not supported) 0179 else if (fileInfo.contains(QLatin1String("Hierarchical Data Format (version 5)")) || fileName.endsWith(QLatin1String("h5"), Qt::CaseInsensitive) 0180 || (fileName.endsWith(QLatin1String("hdf"), Qt::CaseInsensitive) && !fileInfo.contains(QLatin1String("(version 4)"))) 0181 || fileName.endsWith(QLatin1String("hdf5"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("nc4"), Qt::CaseInsensitive)) 0182 fileType = FileType::HDF5; 0183 #endif 0184 #ifdef HAVE_NETCDF 0185 else if (fileInfo.contains(QLatin1String("NetCDF Data Format")) || fileName.endsWith(QLatin1String("nc"), Qt::CaseInsensitive) 0186 || fileName.endsWith(QLatin1String("netcdf"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("cdf"), Qt::CaseInsensitive)) 0187 fileType = FileType::NETCDF; 0188 #endif 0189 #ifdef HAVE_VECTOR_BLF 0190 else if (fileName.endsWith(QLatin1String("blf")) && VectorBLFFilter::isValid(fileName)) 0191 fileType = FileType::VECTOR_BLF; 0192 #endif 0193 #ifdef HAVE_FITS 0194 else if (fileInfo.contains(QLatin1String("FITS image data")) || fileName.endsWith(QLatin1String("fits"), Qt::CaseInsensitive) 0195 || fileName.endsWith(QLatin1String("fit"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String("fts"), Qt::CaseInsensitive)) 0196 fileType = FileType::FITS; 0197 #endif 0198 #ifdef HAVE_ZIP 0199 else if (fileInfo.contains(QLatin1String("ROOT")) // can be "ROOT Data Format" or "ROOT file Version ??? (Compression: 1)" 0200 || fileName.endsWith(QLatin1String("root"), Qt::CaseInsensitive)) // TODO find out file description 0201 fileType = FileType::ROOT; 0202 #endif 0203 #ifdef HAVE_READSTAT // sas7bdat -> ASCII 0204 else if (fileInfo.startsWith(QLatin1String("SAS")) || fileInfo.startsWith(QLatin1String("SPSS")) 0205 || fileName.endsWith(QLatin1String(".dta"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".sav"), Qt::CaseInsensitive) 0206 || fileName.endsWith(QLatin1String(".zsav"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".por"), Qt::CaseInsensitive) 0207 || fileName.endsWith(QLatin1String(".sas7bcat"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".xpt"), Qt::CaseInsensitive) 0208 || fileName.endsWith(QLatin1String(".xpt5"), Qt::CaseInsensitive) || fileName.endsWith(QLatin1String(".xpt8"), Qt::CaseInsensitive)) 0209 fileType = FileType::READSTAT; 0210 #endif 0211 else if (fileInfo.contains(QLatin1String("image")) || fileInfo.contains(QLatin1String("bitmap")) || !imageFormat.isEmpty()) 0212 fileType = FileType::Image; 0213 else 0214 fileType = FileType::Binary; 0215 0216 return fileType; 0217 } 0218 0219 /*! 0220 returns the list of all supported data file formats 0221 */ 0222 QStringList AbstractFileFilter::fileTypes() { 0223 // TODO: Used by what? #ifdef HAVE_QXLSX? 0224 return (QStringList() << i18n("ASCII Data") << i18n("Binary Data") << i18n("Image") << i18n("Excel") << i18n("Hierarchical Data Format 5 (HDF5)") 0225 << i18n("Network Common Data Format (NetCDF)") << i18n("Flexible Image Transport System Data Format (FITS)") << i18n("JSON Data") 0226 << i18n("ROOT (CERN) Histograms") << i18n("Spice") << i18n("SAS, Stata or SPSS")); 0227 } 0228 0229 QString AbstractFileFilter::convertFromNumberToColumn(int n) { 0230 // main code from https://www.geeksforgeeks.org/find-excel-column-name-given-number/ 0231 // Function to print column name for a given column number 0232 0233 char str[1000]; // To store result (column name) 0234 int i = 0; // To store current index in str which is result 0235 0236 while (n > 0) { 0237 // Find remainder 0238 int rem = n % 26; 0239 0240 // If remainder is 0, then a 'Z' must be there in output 0241 if (rem == 0) { 0242 str[i++] = 'Z'; 0243 n = (n / 26) - 1; 0244 } else // If remainder is non-zero 0245 { 0246 str[i++] = (rem - 1) + 'A'; 0247 n = n / 26; 0248 } 0249 } 0250 str[i] = '\0'; 0251 0252 // Reverse the string and print result 0253 std::reverse(str, str + strlen(str)); 0254 0255 return QLatin1String(str); 0256 }