File indexing completed on 2024-05-12 15:26:55

0001 /***************************************************************************
0002     File                 : DatasetHandler.cpp
0003     Project              : LabPlot
0004     Description          : Processes a dataset's metadata file
0005     --------------------------------------------------------------------
0006     Copyright            : (C) 2019 Kovacs Ferencz (kferike98@gmail.com)
0007     Copyright            : (C) 2019 by Alexander Semke (alexander.semke@web.de)
0008 
0009 ***************************************************************************/
0010 
0011 /***************************************************************************
0012 *                                                                         *
0013 *  This program is free software; you can redistribute it and/or modify   *
0014 *  it under the terms of the GNU General Public License as published by   *
0015 *  the Free Software Foundation; either version 2 of the License, or      *
0016 *  (at your option) any later version.                                    *
0017 *                                                                         *
0018 *  This program is distributed in the hope that it will be useful,        *
0019 *  but WITHOUT ANY WARRANTY; without even the implied warranty of         *
0020 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
0021 *  GNU General Public License for more details.                           *
0022 *                                                                         *
0023 *   You should have received a copy of the GNU General Public License     *
0024 *   along with this program; if not, write to the Free Software           *
0025 *   Foundation, Inc., 51 Franklin Street, Fifth Floor,                    *
0026 *   Boston, MA  02110-1301  USA                                           *
0027 *                                                                         *
0028 ***************************************************************************/
0029 
0030 #include "backend/datasources/filters/AsciiFilter.h"
0031 #include "backend/datasources/DatasetHandler.h"
0032 
0033 #include <QDir>
0034 #include <QFile>
0035 #include <QJsonArray>
0036 #include <QJsonObject>
0037 #include <QMessageBox>
0038 #include <QStandardPaths>
0039 #include <QtNetwork/QNetworkAccessManager>
0040 #include <QtNetwork/QNetworkReply>
0041 
0042 #include <KLocalizedString>
0043 
0044 /*!
0045   \class DatasetHandler
0046   \brief Provides  functionality to process a metadata file of a dataset, configure a spreadsheet and filter based on it, download the dataset
0047   and load it into the spreadsheet.
0048 
0049   \ingroup datasources
0050 */
0051 DatasetHandler::DatasetHandler(Spreadsheet* spreadsheet) : m_spreadsheet(spreadsheet),
0052     m_filter(new AsciiFilter),
0053     m_downloadManager(new QNetworkAccessManager) {
0054     connect(m_downloadManager, &QNetworkAccessManager::finished, this, &DatasetHandler::downloadFinished);
0055     connect(this, &DatasetHandler::downloadCompleted, this, &DatasetHandler::processDataset);
0056 }
0057 
0058 DatasetHandler::~DatasetHandler() {
0059     delete m_downloadManager;
0060     delete m_filter;
0061 }
0062 
0063 /**
0064  * @brief Initiates processing the metadata file,, located at the given path, belonging to a dataset.
0065  * @param path the path to the metadata file
0066  */
0067 void DatasetHandler::processMetadata(const QJsonObject& object) {
0068     m_object = new QJsonObject(object);
0069     DEBUG("Start processing dataset...");
0070 
0071     if (!m_object->isEmpty()) {
0072         configureFilter();
0073         configureSpreadsheet();
0074         prepareForDataset();
0075     }
0076 }
0077 
0078 /**
0079  * @brief Marks the metadata file being invalid by setting the value of a flag, also pops up a messagebox.
0080  */
0081 void DatasetHandler::markMetadataAsInvalid() {
0082     m_invalidMetadataFile = true;
0083     QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("The metadata file for the selected dataset is invalid."));
0084 }
0085 
0086 /**
0087  * @brief Configures the filter, that will be used later, based on the metadata file.
0088  */
0089 void DatasetHandler::configureFilter() {
0090     //set some default values common to many datasets
0091     m_filter->setNumberFormat(QLocale::C);
0092     m_filter->setSkipEmptyParts(true);
0093     m_filter->setHeaderEnabled(false);
0094 
0095     //read properties specified in the dataset description
0096     if (!m_object->isEmpty()) {
0097         if (m_object->contains("separator"))
0098             m_filter->setSeparatingCharacter(m_object->value("separator").toString());
0099 
0100         if (m_object->contains("comment_character"))
0101             m_filter->setCommentCharacter(m_object->value("comment_character").toString());
0102 
0103         if (m_object->contains("create_index_column"))
0104             m_filter->setCreateIndexEnabled(m_object->value("create_index_column").toBool());
0105 
0106         if (m_object->contains("skip_empty_parts"))
0107             m_filter->setSkipEmptyParts(m_object->value("skip_empty_parts").toBool());
0108 
0109         if (m_object->contains("simplify_whitespaces"))
0110             m_filter->setSimplifyWhitespacesEnabled(m_object->value("simplify_whitespaces").toBool());
0111 
0112         if (m_object->contains("remove_quotes"))
0113             m_filter->setRemoveQuotesEnabled(m_object->value("remove_quotes").toBool());
0114 
0115         if (m_object->contains("use_first_row_for_vectorname"))
0116             m_filter->setHeaderEnabled(m_object->value("use_first_row_for_vectorname").toBool());
0117 
0118         if (m_object->contains("number_format"))
0119             m_filter->setNumberFormat(QLocale::Language(m_object->value("number_format").toInt()));
0120 
0121         if (m_object->contains("DateTime_format"))
0122             m_filter->setDateTimeFormat(m_object->value("DateTime_format").toString());
0123 
0124         if (m_object->contains("columns")) {
0125             const QJsonArray& columnsArray = m_object->value("columns").toArray();
0126             QStringList columnNames;
0127             for (const auto& col : columnsArray)
0128                 columnNames << col.toString();
0129 
0130             m_filter->setVectorNames(columnNames);
0131         }
0132     } else {
0133         DEBUG("Empty object");
0134         markMetadataAsInvalid();
0135     }
0136 }
0137 
0138 /**
0139  * @brief Configures the spreadsheet based on the metadata file.
0140  */
0141 void DatasetHandler::configureSpreadsheet() {
0142     DEBUG("Start preparing spreadsheet");
0143     if (!m_object->isEmpty()) {
0144         if (m_object->contains("name"))
0145             m_spreadsheet->setName( m_object->value("name").toString());
0146         else
0147             markMetadataAsInvalid();
0148 
0149         if (m_object->contains("description_url")) {
0150             auto* manager = new QNetworkAccessManager(this);
0151             connect(manager, &QNetworkAccessManager::finished, [this] (QNetworkReply* reply) {
0152                 if (reply->error() == QNetworkReply::NoError) {
0153                     QByteArray ba = reply->readAll();
0154                     QString info(ba);
0155                     m_spreadsheet->setComment(info);
0156                 } else {
0157                     DEBUG("Failed to fetch the description.");
0158                     if (m_object->contains("description"))
0159                         m_spreadsheet->setComment(m_object->value("description").toString());
0160                 }
0161                 reply->deleteLater();
0162             }
0163             );
0164             manager->get(QNetworkRequest(QUrl(m_object->value("description_url").toString())));
0165         } else if (m_object->contains("description"))
0166             m_spreadsheet->setComment(m_object->value("description").toString());
0167     } else {
0168         markMetadataAsInvalid();
0169     }
0170 }
0171 
0172 /**
0173  * @brief Extracts the download URL of the dataset and initiates the process of download.
0174  */
0175 void DatasetHandler::prepareForDataset() {
0176     DEBUG("Start downloading dataset");
0177     if (!m_object->isEmpty()) {
0178         if (m_object->contains("url")) {
0179             const QString& url =  m_object->value("url").toString();
0180             doDownload(QUrl(url));
0181         }
0182         else {
0183             QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("There is no download URL present in the metadata file!"));
0184         }
0185 
0186     } else {
0187         markMetadataAsInvalid();
0188     }
0189 }
0190 
0191 /**
0192  * @brief Starts the download of the dataset.
0193  * @param url the download URL of the dataset
0194  */
0195 void DatasetHandler::doDownload(const QUrl& url) {
0196     DEBUG("Download request");
0197     QNetworkRequest request(url);
0198     m_currentDownload = m_downloadManager->get(request);
0199     connect(m_currentDownload, &QNetworkReply::downloadProgress, [this] (qint64 bytesReceived, qint64 bytesTotal) {
0200         double progress;
0201         if (bytesTotal == -1)
0202             progress = 0;
0203         else
0204             progress = 100 * (static_cast<double>(bytesReceived) / static_cast<double>(bytesTotal));
0205         qDebug() << "Progress: " << progress;
0206         emit downloadProgress(progress);
0207     });
0208 }
0209 
0210 /**
0211  * @brief Called when the download of the dataset is finished.
0212  */
0213 void DatasetHandler::downloadFinished(QNetworkReply* reply) {
0214     DEBUG("Download finished");
0215     const QUrl& url = reply->url();
0216     if (reply->error()) {
0217         qDebug("Download of %s failed: %s\n",
0218                url.toEncoded().constData(),
0219                qPrintable(reply->errorString()));
0220     } else {
0221         if (isHttpRedirect(reply)) {
0222             qDebug("Request was redirected.\n");
0223         } else {
0224             QString filename = saveFileName(url);
0225             if (saveToDisk(filename, reply)) {
0226                 qDebug("Download of %s succeeded (saved to %s)\n",
0227                        url.toEncoded().constData(), qPrintable(filename));
0228                 m_fileName = filename;
0229                 emit downloadCompleted();
0230             }
0231         }
0232     }
0233 
0234     m_currentDownload = nullptr;
0235     reply->deleteLater();
0236 }
0237 
0238 /**
0239  * @brief Checks whether the GET request was redirected or not.
0240  */
0241 bool DatasetHandler::isHttpRedirect(QNetworkReply* reply) {
0242     const int statusCode = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
0243     // TODO enum/defines for status codes ?
0244     return statusCode == 301 || statusCode == 302 || statusCode == 303
0245             || statusCode == 305 || statusCode == 307 || statusCode == 308;
0246 }
0247 
0248 /**
0249  * @brief Returns the name and path of the file that will contain the content of the reply (based on the URL).
0250  * @param url
0251  */
0252 QString DatasetHandler::saveFileName(const QUrl& url) {
0253     const QString path = url.path();
0254 
0255     //get the extension of the downloaded file
0256     const QString downloadFileName = QFileInfo(path).fileName();
0257     int lastIndex = downloadFileName.lastIndexOf(".");
0258     const QString fileExtension = lastIndex >= 0 ?  downloadFileName.right(downloadFileName.length() - lastIndex) : "";
0259 
0260     QString basename = m_object->value("filename").toString() + fileExtension;
0261 
0262     if (basename.isEmpty())
0263         basename = "url";
0264 
0265     QDir downloadDir(QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + QLatin1String("/datasets_local/"));
0266     if (!downloadDir.exists()) {
0267         if (!downloadDir.mkpath(downloadDir.path())) {
0268             qDebug()<<"Failed to create the directory " << downloadDir.path();
0269             return QString();
0270         }
0271     }
0272 
0273     QString fileName = downloadDir.path() + QLatin1Char('/') + basename;
0274     QFileInfo fileInfo (fileName);
0275     if (QFile::exists(fileName)) {
0276         if (fileInfo.lastModified().addDays(1) < QDateTime::currentDateTime()){
0277             QFile removeFile (fileName);
0278             removeFile.remove();
0279         } else {
0280             qDebug() << "Dataset file already exists, no need to download it again";
0281         }
0282     }
0283     return fileName;
0284 }
0285 
0286 /**
0287  * @brief Saves the content of the network reply to the given path under the given name.
0288  */
0289 bool DatasetHandler::saveToDisk(const QString& filename, QIODevice* data) {
0290     QFile file(filename);
0291     if (!file.open(QIODevice::WriteOnly)) {
0292         qDebug("Could not open %s for writing: %s\n",
0293                qPrintable(filename),
0294                qPrintable(file.errorString()));
0295         return false;
0296     }
0297 
0298     file.write(data->readAll());
0299     file.close();
0300 
0301     return true;
0302 }
0303 
0304 /**
0305  * @brief Processes the downloaded dataset with the help of the already configured filter.
0306  */
0307 void DatasetHandler::processDataset() {
0308     m_filter->readDataFromFile(m_fileName, m_spreadsheet);
0309 
0310     //set column comments/descriptions, if available
0311     //TODO:
0312 //  if (!m_object->isEmpty()) {
0313 //      int index = 0;
0314 //      const int columnsCount = m_spreadsheet->columnCount();
0315 //      while(m_object->contains(i18n("column_description_%1", index)) && (index < columnsCount)) {
0316 //          m_spreadsheet->column(index)->setComment(m_object->value(i18n("column_description_%1", index)).toString());
0317 //          ++index;
0318 //      }
0319 //  }
0320 }