File indexing completed on 2024-05-12 03:47:42
0001 /* 0002 File : DatasetHandler.cpp 0003 Project : LabPlot 0004 Description : Processes a dataset's metadata file 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2019 Kovacs Ferencz <kferike98@gmail.com> 0007 SPDX-FileCopyrightText: 2019-2023 Alexander Semke <alexander.semke@web.de> 0008 0009 SPDX-License-Identifier: GPL-2.0-or-later 0010 */ 0011 0012 #include "backend/datasources/DatasetHandler.h" 0013 #include "backend/datasources/filters/AsciiFilter.h" 0014 #include "backend/lib/macros.h" 0015 0016 #include <QDir> 0017 #include <QFile> 0018 #include <QJsonArray> 0019 #include <QJsonObject> 0020 #include <QMessageBox> 0021 #include <QStandardPaths> 0022 #include <QTextEdit> 0023 #include <QtNetwork/QNetworkAccessManager> 0024 #include <QtNetwork/QNetworkReply> 0025 0026 #include <KLocalizedString> 0027 0028 /*! 0029 \class DatasetHandler 0030 \brief Provides functionality to process a metadata file of a dataset, configure a spreadsheet and filter based on it, download the dataset 0031 and load it into the spreadsheet. 0032 0033 \ingroup datasources 0034 */ 0035 DatasetHandler::DatasetHandler(Spreadsheet* spreadsheet) 0036 : m_spreadsheet(spreadsheet) 0037 , m_filter(new AsciiFilter) 0038 , m_downloadManager(new QNetworkAccessManager) { 0039 connect(m_downloadManager, &QNetworkAccessManager::finished, this, &DatasetHandler::downloadFinished); 0040 connect(this, &DatasetHandler::downloadCompleted, this, &DatasetHandler::processDataset); 0041 } 0042 0043 DatasetHandler::~DatasetHandler() { 0044 delete m_downloadManager; 0045 delete m_filter; 0046 } 0047 0048 /** 0049 * @brief Initiates processing the metadata file,, located at the given path, belonging to a dataset. 0050 * @param path the path to the metadata file 0051 */ 0052 void DatasetHandler::processMetadata(const QJsonObject& object, const QString& description) { 0053 m_object = new QJsonObject(object); 0054 DEBUG("Start processing dataset..."); 0055 0056 if (!m_object->isEmpty()) { 0057 configureFilter(); 0058 configureSpreadsheet(description); 0059 prepareForDataset(); 0060 } 0061 } 0062 0063 /** 0064 * @brief Marks the metadata file being invalid by setting the value of a flag, also pops up a messagebox. 0065 */ 0066 void DatasetHandler::markMetadataAsInvalid() { 0067 m_invalidMetadataFile = true; 0068 QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("The metadata file for the selected dataset is invalid.")); 0069 } 0070 0071 /** 0072 * @brief Configures the filter, that will be used later, based on the metadata file. 0073 */ 0074 void DatasetHandler::configureFilter() { 0075 // set some default values common to many datasets 0076 m_filter->setNumberFormat(QLocale::C); 0077 m_filter->setSkipEmptyParts(false); 0078 m_filter->setHeaderEnabled(false); 0079 m_filter->setRemoveQuotesEnabled(true); 0080 0081 // read properties specified in the dataset description 0082 if (!m_object->isEmpty()) { 0083 if (m_object->contains(QLatin1String("separator"))) 0084 m_filter->setSeparatingCharacter(m_object->value(QStringLiteral("separator")).toString()); 0085 0086 if (m_object->contains(QLatin1String("comment_character"))) 0087 m_filter->setCommentCharacter(m_object->value(QStringLiteral("comment_character")).toString()); 0088 0089 if (m_object->contains(QLatin1String("create_index_column"))) 0090 m_filter->setCreateIndexEnabled(m_object->value(QStringLiteral("create_index_column")).toBool()); 0091 0092 if (m_object->contains(QLatin1String("skip_empty_parts"))) 0093 m_filter->setSkipEmptyParts(m_object->value(QStringLiteral("skip_empty_parts")).toBool()); 0094 0095 if (m_object->contains(QLatin1String("simplify_whitespaces"))) 0096 m_filter->setSimplifyWhitespacesEnabled(m_object->value(QStringLiteral("simplify_whitespaces")).toBool()); 0097 0098 if (m_object->contains(QLatin1String("remove_quotes"))) 0099 m_filter->setRemoveQuotesEnabled(m_object->value(QStringLiteral("remove_quotes")).toBool()); 0100 0101 if (m_object->contains(QLatin1String("use_first_row_for_vectorname"))) { 0102 m_filter->setHeaderEnabled(m_object->value(QStringLiteral("use_first_row_for_vectorname")).toBool()); 0103 m_filter->setHeaderLine(1); 0104 } 0105 0106 if (m_object->contains(QLatin1String("number_format"))) 0107 m_filter->setNumberFormat(QLocale::Language(m_object->value(QStringLiteral("number_format")).toInt())); 0108 0109 if (m_object->contains(QLatin1String("DateTime_format"))) 0110 m_filter->setDateTimeFormat(m_object->value(QStringLiteral("DateTime_format")).toString()); 0111 0112 if (m_object->contains(QLatin1String("columns"))) { 0113 const QJsonArray& columnsArray = m_object->value(QStringLiteral("columns")).toArray(); 0114 QStringList columnNames; 0115 for (const auto& col : columnsArray) 0116 columnNames << col.toString(); 0117 0118 m_filter->setVectorNames(columnNames); 0119 } 0120 } else { 0121 DEBUG("Empty object"); 0122 markMetadataAsInvalid(); 0123 } 0124 } 0125 0126 /** 0127 * @brief Configures the spreadsheet based on the metadata file. 0128 */ 0129 void DatasetHandler::configureSpreadsheet(const QString& description) { 0130 DEBUG("Start preparing spreadsheet"); 0131 if (!m_object->isEmpty()) { 0132 if (m_object->contains(QLatin1String("name"))) 0133 m_spreadsheet->setName(m_object->value(QStringLiteral("name")).toString()); 0134 else 0135 markMetadataAsInvalid(); 0136 0137 if (description.startsWith(QLatin1String("<!DOCTYPE html"))) { 0138 // remove html-formatting 0139 QTextEdit te; 0140 te.setHtml(description); 0141 m_spreadsheet->setComment(te.toPlainText()); 0142 } else 0143 m_spreadsheet->setComment(description); 0144 } else 0145 markMetadataAsInvalid(); 0146 } 0147 0148 /** 0149 * @brief Extracts the download URL of the dataset and initiates the process of download. 0150 */ 0151 void DatasetHandler::prepareForDataset() { 0152 DEBUG("Start downloading dataset"); 0153 if (!m_object->isEmpty()) { 0154 if (m_object->contains(QLatin1String("url"))) { 0155 const QString& url = m_object->value(QStringLiteral("url")).toString(); 0156 doDownload(QUrl(url)); 0157 } else { 0158 QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("There is no download URL present in the metadata file!")); 0159 } 0160 } else 0161 markMetadataAsInvalid(); 0162 } 0163 0164 /** 0165 * @brief Starts the download of the dataset. 0166 * @param url the download URL of the dataset 0167 */ 0168 void DatasetHandler::doDownload(const QUrl& url) { 0169 QDEBUG("Download request " << url); 0170 QNetworkRequest request(url); 0171 request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, true); 0172 m_currentDownload = m_downloadManager->get(request); 0173 connect(m_currentDownload, &QNetworkReply::downloadProgress, [this](qint64 bytesReceived, qint64 bytesTotal) { 0174 double progress; 0175 if (bytesTotal <= 0) 0176 progress = 0; 0177 else 0178 progress = 100 * (static_cast<double>(bytesReceived) / static_cast<double>(bytesTotal)); 0179 0180 Q_EMIT downloadProgress(progress); 0181 }); 0182 } 0183 0184 /** 0185 * @brief Called when the download of the dataset is finished. 0186 */ 0187 void DatasetHandler::downloadFinished(QNetworkReply* reply) { 0188 DEBUG("Download finished"); 0189 const QUrl& url = reply->url(); 0190 if (reply->error()) { 0191 QMessageBox::critical(nullptr, 0192 i18n("Failed to download the dataset"), 0193 i18n("Failed to download the dataset from %1.\n%2.", url.toDisplayString(), reply->errorString())); 0194 } else { 0195 QString filename = saveFileName(url); 0196 if (saveToDisk(filename, reply)) { 0197 qDebug("Download of %s succeeded (saved to %s)\n", url.toEncoded().constData(), qPrintable(filename)); 0198 m_fileName = filename; 0199 Q_EMIT downloadCompleted(); 0200 } 0201 } 0202 0203 m_currentDownload = nullptr; 0204 reply->deleteLater(); 0205 } 0206 0207 /** 0208 * @brief Returns the name and path of the file that will contain the content of the reply (based on the URL). 0209 * @param url 0210 */ 0211 QString DatasetHandler::saveFileName(const QUrl& url) { 0212 const QString path = url.path(); 0213 0214 // get the extension of the downloaded file 0215 const QString downloadFileName = QFileInfo(path).fileName(); 0216 int lastIndex = downloadFileName.lastIndexOf(QLatin1Char('.')); 0217 const QString fileExtension = lastIndex >= 0 ? downloadFileName.right(downloadFileName.length() - lastIndex) : QString(); 0218 0219 QString basename = m_object->value(QStringLiteral("filename")).toString() + fileExtension; 0220 0221 if (basename.isEmpty()) 0222 basename = QStringLiteral("url"); 0223 0224 QDir downloadDir(QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + QStringLiteral("/datasets_local/")); 0225 if (!downloadDir.exists()) { 0226 if (!downloadDir.mkpath(downloadDir.path())) { 0227 QMessageBox::critical(nullptr, 0228 i18n("Failed to save the dataset"), 0229 i18n("Failed to create the directory %1 to save the dataset.", downloadDir.path())); 0230 return {}; 0231 } 0232 } 0233 0234 QString fileName = downloadDir.path() + QLatin1Char('/') + basename; 0235 QFileInfo fileInfo(fileName); 0236 if (QFile::exists(fileName)) { 0237 if (fileInfo.lastModified().addDays(1) < QDateTime::currentDateTime()) { 0238 QFile removeFile(fileName); 0239 removeFile.remove(); 0240 } else 0241 DEBUG("Dataset file already exists, no need to download it again."); 0242 } 0243 return fileName; 0244 } 0245 0246 /** 0247 * @brief Saves the content of the network reply to the given path under the given name. 0248 */ 0249 bool DatasetHandler::saveToDisk(const QString& filename, QIODevice* data) { 0250 QFile file(filename); 0251 if (!file.open(QIODevice::WriteOnly)) { 0252 QMessageBox::critical(nullptr, i18n("Failed to save the dataset"), i18n("Couldn't open the file %1 for writing.\n%2", filename, file.errorString())); 0253 return false; 0254 } 0255 0256 file.write(data->readAll()); 0257 file.close(); 0258 0259 return true; 0260 } 0261 0262 /** 0263 * @brief Processes the downloaded dataset with the help of the already configured filter. 0264 */ 0265 void DatasetHandler::processDataset() { 0266 m_filter->readDataFromFile(m_fileName, m_spreadsheet); 0267 0268 // set column comments/descriptions, if available 0269 // TODO: 0270 // if (!m_object->isEmpty()) { 0271 // int index = 0; 0272 // const int columnsCount = m_spreadsheet->columnCount(); 0273 // while(m_object->contains(i18n("column_description_%1", index)) && (index < columnsCount)) { 0274 // m_spreadsheet->column(index)->setComment(m_object->value(i18n("column_description_%1", index)).toString()); 0275 // ++index; 0276 // } 0277 // } 0278 }