File indexing completed on 2024-05-12 03:47:42

0001 /*
0002     File                 : DatasetHandler.cpp
0003     Project              : LabPlot
0004     Description          : Processes a dataset's metadata file
0005     --------------------------------------------------------------------
0006     SPDX-FileCopyrightText: 2019 Kovacs Ferencz <kferike98@gmail.com>
0007     SPDX-FileCopyrightText: 2019-2023 Alexander Semke <alexander.semke@web.de>
0008 
0009     SPDX-License-Identifier: GPL-2.0-or-later
0010 */
0011 
0012 #include "backend/datasources/DatasetHandler.h"
0013 #include "backend/datasources/filters/AsciiFilter.h"
0014 #include "backend/lib/macros.h"
0015 
0016 #include <QDir>
0017 #include <QFile>
0018 #include <QJsonArray>
0019 #include <QJsonObject>
0020 #include <QMessageBox>
0021 #include <QStandardPaths>
0022 #include <QTextEdit>
0023 #include <QtNetwork/QNetworkAccessManager>
0024 #include <QtNetwork/QNetworkReply>
0025 
0026 #include <KLocalizedString>
0027 
0028 /*!
0029   \class DatasetHandler
0030   \brief Provides  functionality to process a metadata file of a dataset, configure a spreadsheet and filter based on it, download the dataset
0031   and load it into the spreadsheet.
0032 
0033   \ingroup datasources
0034 */
0035 DatasetHandler::DatasetHandler(Spreadsheet* spreadsheet)
0036     : m_spreadsheet(spreadsheet)
0037     , m_filter(new AsciiFilter)
0038     , m_downloadManager(new QNetworkAccessManager) {
0039     connect(m_downloadManager, &QNetworkAccessManager::finished, this, &DatasetHandler::downloadFinished);
0040     connect(this, &DatasetHandler::downloadCompleted, this, &DatasetHandler::processDataset);
0041 }
0042 
0043 DatasetHandler::~DatasetHandler() {
0044     delete m_downloadManager;
0045     delete m_filter;
0046 }
0047 
0048 /**
0049  * @brief Initiates processing the metadata file,, located at the given path, belonging to a dataset.
0050  * @param path the path to the metadata file
0051  */
0052 void DatasetHandler::processMetadata(const QJsonObject& object, const QString& description) {
0053     m_object = new QJsonObject(object);
0054     DEBUG("Start processing dataset...");
0055 
0056     if (!m_object->isEmpty()) {
0057         configureFilter();
0058         configureSpreadsheet(description);
0059         prepareForDataset();
0060     }
0061 }
0062 
0063 /**
0064  * @brief Marks the metadata file being invalid by setting the value of a flag, also pops up a messagebox.
0065  */
0066 void DatasetHandler::markMetadataAsInvalid() {
0067     m_invalidMetadataFile = true;
0068     QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("The metadata file for the selected dataset is invalid."));
0069 }
0070 
0071 /**
0072  * @brief Configures the filter, that will be used later, based on the metadata file.
0073  */
0074 void DatasetHandler::configureFilter() {
0075     // set some default values common to many datasets
0076     m_filter->setNumberFormat(QLocale::C);
0077     m_filter->setSkipEmptyParts(false);
0078     m_filter->setHeaderEnabled(false);
0079     m_filter->setRemoveQuotesEnabled(true);
0080 
0081     // read properties specified in the dataset description
0082     if (!m_object->isEmpty()) {
0083         if (m_object->contains(QLatin1String("separator")))
0084             m_filter->setSeparatingCharacter(m_object->value(QStringLiteral("separator")).toString());
0085 
0086         if (m_object->contains(QLatin1String("comment_character")))
0087             m_filter->setCommentCharacter(m_object->value(QStringLiteral("comment_character")).toString());
0088 
0089         if (m_object->contains(QLatin1String("create_index_column")))
0090             m_filter->setCreateIndexEnabled(m_object->value(QStringLiteral("create_index_column")).toBool());
0091 
0092         if (m_object->contains(QLatin1String("skip_empty_parts")))
0093             m_filter->setSkipEmptyParts(m_object->value(QStringLiteral("skip_empty_parts")).toBool());
0094 
0095         if (m_object->contains(QLatin1String("simplify_whitespaces")))
0096             m_filter->setSimplifyWhitespacesEnabled(m_object->value(QStringLiteral("simplify_whitespaces")).toBool());
0097 
0098         if (m_object->contains(QLatin1String("remove_quotes")))
0099             m_filter->setRemoveQuotesEnabled(m_object->value(QStringLiteral("remove_quotes")).toBool());
0100 
0101         if (m_object->contains(QLatin1String("use_first_row_for_vectorname"))) {
0102             m_filter->setHeaderEnabled(m_object->value(QStringLiteral("use_first_row_for_vectorname")).toBool());
0103             m_filter->setHeaderLine(1);
0104         }
0105 
0106         if (m_object->contains(QLatin1String("number_format")))
0107             m_filter->setNumberFormat(QLocale::Language(m_object->value(QStringLiteral("number_format")).toInt()));
0108 
0109         if (m_object->contains(QLatin1String("DateTime_format")))
0110             m_filter->setDateTimeFormat(m_object->value(QStringLiteral("DateTime_format")).toString());
0111 
0112         if (m_object->contains(QLatin1String("columns"))) {
0113             const QJsonArray& columnsArray = m_object->value(QStringLiteral("columns")).toArray();
0114             QStringList columnNames;
0115             for (const auto& col : columnsArray)
0116                 columnNames << col.toString();
0117 
0118             m_filter->setVectorNames(columnNames);
0119         }
0120     } else {
0121         DEBUG("Empty object");
0122         markMetadataAsInvalid();
0123     }
0124 }
0125 
0126 /**
0127  * @brief Configures the spreadsheet based on the metadata file.
0128  */
0129 void DatasetHandler::configureSpreadsheet(const QString& description) {
0130     DEBUG("Start preparing spreadsheet");
0131     if (!m_object->isEmpty()) {
0132         if (m_object->contains(QLatin1String("name")))
0133             m_spreadsheet->setName(m_object->value(QStringLiteral("name")).toString());
0134         else
0135             markMetadataAsInvalid();
0136 
0137         if (description.startsWith(QLatin1String("<!DOCTYPE html"))) {
0138             // remove html-formatting
0139             QTextEdit te;
0140             te.setHtml(description);
0141             m_spreadsheet->setComment(te.toPlainText());
0142         } else
0143             m_spreadsheet->setComment(description);
0144     } else
0145         markMetadataAsInvalid();
0146 }
0147 
0148 /**
0149  * @brief Extracts the download URL of the dataset and initiates the process of download.
0150  */
0151 void DatasetHandler::prepareForDataset() {
0152     DEBUG("Start downloading dataset");
0153     if (!m_object->isEmpty()) {
0154         if (m_object->contains(QLatin1String("url"))) {
0155             const QString& url = m_object->value(QStringLiteral("url")).toString();
0156             doDownload(QUrl(url));
0157         } else {
0158             QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("There is no download URL present in the metadata file!"));
0159         }
0160     } else
0161         markMetadataAsInvalid();
0162 }
0163 
0164 /**
0165  * @brief Starts the download of the dataset.
0166  * @param url the download URL of the dataset
0167  */
0168 void DatasetHandler::doDownload(const QUrl& url) {
0169     QDEBUG("Download request " << url);
0170     QNetworkRequest request(url);
0171     request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, true);
0172     m_currentDownload = m_downloadManager->get(request);
0173     connect(m_currentDownload, &QNetworkReply::downloadProgress, [this](qint64 bytesReceived, qint64 bytesTotal) {
0174         double progress;
0175         if (bytesTotal <= 0)
0176             progress = 0;
0177         else
0178             progress = 100 * (static_cast<double>(bytesReceived) / static_cast<double>(bytesTotal));
0179 
0180         Q_EMIT downloadProgress(progress);
0181     });
0182 }
0183 
0184 /**
0185  * @brief Called when the download of the dataset is finished.
0186  */
0187 void DatasetHandler::downloadFinished(QNetworkReply* reply) {
0188     DEBUG("Download finished");
0189     const QUrl& url = reply->url();
0190     if (reply->error()) {
0191         QMessageBox::critical(nullptr,
0192                               i18n("Failed to download the dataset"),
0193                               i18n("Failed to download the dataset from %1.\n%2.", url.toDisplayString(), reply->errorString()));
0194     } else {
0195         QString filename = saveFileName(url);
0196         if (saveToDisk(filename, reply)) {
0197             qDebug("Download of %s succeeded (saved to %s)\n", url.toEncoded().constData(), qPrintable(filename));
0198             m_fileName = filename;
0199             Q_EMIT downloadCompleted();
0200         }
0201     }
0202 
0203     m_currentDownload = nullptr;
0204     reply->deleteLater();
0205 }
0206 
0207 /**
0208  * @brief Returns the name and path of the file that will contain the content of the reply (based on the URL).
0209  * @param url
0210  */
0211 QString DatasetHandler::saveFileName(const QUrl& url) {
0212     const QString path = url.path();
0213 
0214     // get the extension of the downloaded file
0215     const QString downloadFileName = QFileInfo(path).fileName();
0216     int lastIndex = downloadFileName.lastIndexOf(QLatin1Char('.'));
0217     const QString fileExtension = lastIndex >= 0 ? downloadFileName.right(downloadFileName.length() - lastIndex) : QString();
0218 
0219     QString basename = m_object->value(QStringLiteral("filename")).toString() + fileExtension;
0220 
0221     if (basename.isEmpty())
0222         basename = QStringLiteral("url");
0223 
0224     QDir downloadDir(QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + QStringLiteral("/datasets_local/"));
0225     if (!downloadDir.exists()) {
0226         if (!downloadDir.mkpath(downloadDir.path())) {
0227             QMessageBox::critical(nullptr,
0228                                   i18n("Failed to save the dataset"),
0229                                   i18n("Failed to create the directory %1 to save the dataset.", downloadDir.path()));
0230             return {};
0231         }
0232     }
0233 
0234     QString fileName = downloadDir.path() + QLatin1Char('/') + basename;
0235     QFileInfo fileInfo(fileName);
0236     if (QFile::exists(fileName)) {
0237         if (fileInfo.lastModified().addDays(1) < QDateTime::currentDateTime()) {
0238             QFile removeFile(fileName);
0239             removeFile.remove();
0240         } else
0241             DEBUG("Dataset file already exists, no need to download it again.");
0242     }
0243     return fileName;
0244 }
0245 
0246 /**
0247  * @brief Saves the content of the network reply to the given path under the given name.
0248  */
0249 bool DatasetHandler::saveToDisk(const QString& filename, QIODevice* data) {
0250     QFile file(filename);
0251     if (!file.open(QIODevice::WriteOnly)) {
0252         QMessageBox::critical(nullptr, i18n("Failed to save the dataset"), i18n("Couldn't open the file %1 for writing.\n%2", filename, file.errorString()));
0253         return false;
0254     }
0255 
0256     file.write(data->readAll());
0257     file.close();
0258 
0259     return true;
0260 }
0261 
0262 /**
0263  * @brief Processes the downloaded dataset with the help of the already configured filter.
0264  */
0265 void DatasetHandler::processDataset() {
0266     m_filter->readDataFromFile(m_fileName, m_spreadsheet);
0267 
0268     // set column comments/descriptions, if available
0269     // TODO:
0270     //  if (!m_object->isEmpty()) {
0271     //      int index = 0;
0272     //      const int columnsCount = m_spreadsheet->columnCount();
0273     //      while(m_object->contains(i18n("column_description_%1", index)) && (index < columnsCount)) {
0274     //          m_spreadsheet->column(index)->setComment(m_object->value(i18n("column_description_%1", index)).toString());
0275     //          ++index;
0276     //      }
0277     //  }
0278 }