File indexing completed on 2024-05-12 15:26:55
0001 /*************************************************************************** 0002 File : DatasetHandler.cpp 0003 Project : LabPlot 0004 Description : Processes a dataset's metadata file 0005 -------------------------------------------------------------------- 0006 Copyright : (C) 2019 Kovacs Ferencz (kferike98@gmail.com) 0007 Copyright : (C) 2019 by Alexander Semke (alexander.semke@web.de) 0008 0009 ***************************************************************************/ 0010 0011 /*************************************************************************** 0012 * * 0013 * This program is free software; you can redistribute it and/or modify * 0014 * it under the terms of the GNU General Public License as published by * 0015 * the Free Software Foundation; either version 2 of the License, or * 0016 * (at your option) any later version. * 0017 * * 0018 * This program is distributed in the hope that it will be useful, * 0019 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0021 * GNU General Public License for more details. * 0022 * * 0023 * You should have received a copy of the GNU General Public License * 0024 * along with this program; if not, write to the Free Software * 0025 * Foundation, Inc., 51 Franklin Street, Fifth Floor, * 0026 * Boston, MA 02110-1301 USA * 0027 * * 0028 ***************************************************************************/ 0029 0030 #include "backend/datasources/filters/AsciiFilter.h" 0031 #include "backend/datasources/DatasetHandler.h" 0032 0033 #include <QDir> 0034 #include <QFile> 0035 #include <QJsonArray> 0036 #include <QJsonObject> 0037 #include <QMessageBox> 0038 #include <QStandardPaths> 0039 #include <QtNetwork/QNetworkAccessManager> 0040 #include <QtNetwork/QNetworkReply> 0041 0042 #include <KLocalizedString> 0043 0044 /*! 0045 \class DatasetHandler 0046 \brief Provides functionality to process a metadata file of a dataset, configure a spreadsheet and filter based on it, download the dataset 0047 and load it into the spreadsheet. 0048 0049 \ingroup datasources 0050 */ 0051 DatasetHandler::DatasetHandler(Spreadsheet* spreadsheet) : m_spreadsheet(spreadsheet), 0052 m_filter(new AsciiFilter), 0053 m_downloadManager(new QNetworkAccessManager) { 0054 connect(m_downloadManager, &QNetworkAccessManager::finished, this, &DatasetHandler::downloadFinished); 0055 connect(this, &DatasetHandler::downloadCompleted, this, &DatasetHandler::processDataset); 0056 } 0057 0058 DatasetHandler::~DatasetHandler() { 0059 delete m_downloadManager; 0060 delete m_filter; 0061 } 0062 0063 /** 0064 * @brief Initiates processing the metadata file,, located at the given path, belonging to a dataset. 0065 * @param path the path to the metadata file 0066 */ 0067 void DatasetHandler::processMetadata(const QJsonObject& object) { 0068 m_object = new QJsonObject(object); 0069 DEBUG("Start processing dataset..."); 0070 0071 if (!m_object->isEmpty()) { 0072 configureFilter(); 0073 configureSpreadsheet(); 0074 prepareForDataset(); 0075 } 0076 } 0077 0078 /** 0079 * @brief Marks the metadata file being invalid by setting the value of a flag, also pops up a messagebox. 0080 */ 0081 void DatasetHandler::markMetadataAsInvalid() { 0082 m_invalidMetadataFile = true; 0083 QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("The metadata file for the selected dataset is invalid.")); 0084 } 0085 0086 /** 0087 * @brief Configures the filter, that will be used later, based on the metadata file. 0088 */ 0089 void DatasetHandler::configureFilter() { 0090 //set some default values common to many datasets 0091 m_filter->setNumberFormat(QLocale::C); 0092 m_filter->setSkipEmptyParts(true); 0093 m_filter->setHeaderEnabled(false); 0094 0095 //read properties specified in the dataset description 0096 if (!m_object->isEmpty()) { 0097 if (m_object->contains("separator")) 0098 m_filter->setSeparatingCharacter(m_object->value("separator").toString()); 0099 0100 if (m_object->contains("comment_character")) 0101 m_filter->setCommentCharacter(m_object->value("comment_character").toString()); 0102 0103 if (m_object->contains("create_index_column")) 0104 m_filter->setCreateIndexEnabled(m_object->value("create_index_column").toBool()); 0105 0106 if (m_object->contains("skip_empty_parts")) 0107 m_filter->setSkipEmptyParts(m_object->value("skip_empty_parts").toBool()); 0108 0109 if (m_object->contains("simplify_whitespaces")) 0110 m_filter->setSimplifyWhitespacesEnabled(m_object->value("simplify_whitespaces").toBool()); 0111 0112 if (m_object->contains("remove_quotes")) 0113 m_filter->setRemoveQuotesEnabled(m_object->value("remove_quotes").toBool()); 0114 0115 if (m_object->contains("use_first_row_for_vectorname")) 0116 m_filter->setHeaderEnabled(m_object->value("use_first_row_for_vectorname").toBool()); 0117 0118 if (m_object->contains("number_format")) 0119 m_filter->setNumberFormat(QLocale::Language(m_object->value("number_format").toInt())); 0120 0121 if (m_object->contains("DateTime_format")) 0122 m_filter->setDateTimeFormat(m_object->value("DateTime_format").toString()); 0123 0124 if (m_object->contains("columns")) { 0125 const QJsonArray& columnsArray = m_object->value("columns").toArray(); 0126 QStringList columnNames; 0127 for (const auto& col : columnsArray) 0128 columnNames << col.toString(); 0129 0130 m_filter->setVectorNames(columnNames); 0131 } 0132 } else { 0133 DEBUG("Empty object"); 0134 markMetadataAsInvalid(); 0135 } 0136 } 0137 0138 /** 0139 * @brief Configures the spreadsheet based on the metadata file. 0140 */ 0141 void DatasetHandler::configureSpreadsheet() { 0142 DEBUG("Start preparing spreadsheet"); 0143 if (!m_object->isEmpty()) { 0144 if (m_object->contains("name")) 0145 m_spreadsheet->setName( m_object->value("name").toString()); 0146 else 0147 markMetadataAsInvalid(); 0148 0149 if (m_object->contains("description_url")) { 0150 auto* manager = new QNetworkAccessManager(this); 0151 connect(manager, &QNetworkAccessManager::finished, [this] (QNetworkReply* reply) { 0152 if (reply->error() == QNetworkReply::NoError) { 0153 QByteArray ba = reply->readAll(); 0154 QString info(ba); 0155 m_spreadsheet->setComment(info); 0156 } else { 0157 DEBUG("Failed to fetch the description."); 0158 if (m_object->contains("description")) 0159 m_spreadsheet->setComment(m_object->value("description").toString()); 0160 } 0161 reply->deleteLater(); 0162 } 0163 ); 0164 manager->get(QNetworkRequest(QUrl(m_object->value("description_url").toString()))); 0165 } else if (m_object->contains("description")) 0166 m_spreadsheet->setComment(m_object->value("description").toString()); 0167 } else { 0168 markMetadataAsInvalid(); 0169 } 0170 } 0171 0172 /** 0173 * @brief Extracts the download URL of the dataset and initiates the process of download. 0174 */ 0175 void DatasetHandler::prepareForDataset() { 0176 DEBUG("Start downloading dataset"); 0177 if (!m_object->isEmpty()) { 0178 if (m_object->contains("url")) { 0179 const QString& url = m_object->value("url").toString(); 0180 doDownload(QUrl(url)); 0181 } 0182 else { 0183 QMessageBox::critical(nullptr, i18n("Invalid metadata file"), i18n("There is no download URL present in the metadata file!")); 0184 } 0185 0186 } else { 0187 markMetadataAsInvalid(); 0188 } 0189 } 0190 0191 /** 0192 * @brief Starts the download of the dataset. 0193 * @param url the download URL of the dataset 0194 */ 0195 void DatasetHandler::doDownload(const QUrl& url) { 0196 DEBUG("Download request"); 0197 QNetworkRequest request(url); 0198 m_currentDownload = m_downloadManager->get(request); 0199 connect(m_currentDownload, &QNetworkReply::downloadProgress, [this] (qint64 bytesReceived, qint64 bytesTotal) { 0200 double progress; 0201 if (bytesTotal == -1) 0202 progress = 0; 0203 else 0204 progress = 100 * (static_cast<double>(bytesReceived) / static_cast<double>(bytesTotal)); 0205 qDebug() << "Progress: " << progress; 0206 emit downloadProgress(progress); 0207 }); 0208 } 0209 0210 /** 0211 * @brief Called when the download of the dataset is finished. 0212 */ 0213 void DatasetHandler::downloadFinished(QNetworkReply* reply) { 0214 DEBUG("Download finished"); 0215 const QUrl& url = reply->url(); 0216 if (reply->error()) { 0217 qDebug("Download of %s failed: %s\n", 0218 url.toEncoded().constData(), 0219 qPrintable(reply->errorString())); 0220 } else { 0221 if (isHttpRedirect(reply)) { 0222 qDebug("Request was redirected.\n"); 0223 } else { 0224 QString filename = saveFileName(url); 0225 if (saveToDisk(filename, reply)) { 0226 qDebug("Download of %s succeeded (saved to %s)\n", 0227 url.toEncoded().constData(), qPrintable(filename)); 0228 m_fileName = filename; 0229 emit downloadCompleted(); 0230 } 0231 } 0232 } 0233 0234 m_currentDownload = nullptr; 0235 reply->deleteLater(); 0236 } 0237 0238 /** 0239 * @brief Checks whether the GET request was redirected or not. 0240 */ 0241 bool DatasetHandler::isHttpRedirect(QNetworkReply* reply) { 0242 const int statusCode = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt(); 0243 // TODO enum/defines for status codes ? 0244 return statusCode == 301 || statusCode == 302 || statusCode == 303 0245 || statusCode == 305 || statusCode == 307 || statusCode == 308; 0246 } 0247 0248 /** 0249 * @brief Returns the name and path of the file that will contain the content of the reply (based on the URL). 0250 * @param url 0251 */ 0252 QString DatasetHandler::saveFileName(const QUrl& url) { 0253 const QString path = url.path(); 0254 0255 //get the extension of the downloaded file 0256 const QString downloadFileName = QFileInfo(path).fileName(); 0257 int lastIndex = downloadFileName.lastIndexOf("."); 0258 const QString fileExtension = lastIndex >= 0 ? downloadFileName.right(downloadFileName.length() - lastIndex) : ""; 0259 0260 QString basename = m_object->value("filename").toString() + fileExtension; 0261 0262 if (basename.isEmpty()) 0263 basename = "url"; 0264 0265 QDir downloadDir(QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + QLatin1String("/datasets_local/")); 0266 if (!downloadDir.exists()) { 0267 if (!downloadDir.mkpath(downloadDir.path())) { 0268 qDebug()<<"Failed to create the directory " << downloadDir.path(); 0269 return QString(); 0270 } 0271 } 0272 0273 QString fileName = downloadDir.path() + QLatin1Char('/') + basename; 0274 QFileInfo fileInfo (fileName); 0275 if (QFile::exists(fileName)) { 0276 if (fileInfo.lastModified().addDays(1) < QDateTime::currentDateTime()){ 0277 QFile removeFile (fileName); 0278 removeFile.remove(); 0279 } else { 0280 qDebug() << "Dataset file already exists, no need to download it again"; 0281 } 0282 } 0283 return fileName; 0284 } 0285 0286 /** 0287 * @brief Saves the content of the network reply to the given path under the given name. 0288 */ 0289 bool DatasetHandler::saveToDisk(const QString& filename, QIODevice* data) { 0290 QFile file(filename); 0291 if (!file.open(QIODevice::WriteOnly)) { 0292 qDebug("Could not open %s for writing: %s\n", 0293 qPrintable(filename), 0294 qPrintable(file.errorString())); 0295 return false; 0296 } 0297 0298 file.write(data->readAll()); 0299 file.close(); 0300 0301 return true; 0302 } 0303 0304 /** 0305 * @brief Processes the downloaded dataset with the help of the already configured filter. 0306 */ 0307 void DatasetHandler::processDataset() { 0308 m_filter->readDataFromFile(m_fileName, m_spreadsheet); 0309 0310 //set column comments/descriptions, if available 0311 //TODO: 0312 // if (!m_object->isEmpty()) { 0313 // int index = 0; 0314 // const int columnsCount = m_spreadsheet->columnCount(); 0315 // while(m_object->contains(i18n("column_description_%1", index)) && (index < columnsCount)) { 0316 // m_spreadsheet->column(index)->setComment(m_object->value(i18n("column_description_%1", index)).toString()); 0317 // ++index; 0318 // } 0319 // } 0320 }