File indexing completed on 2024-05-12 05:10:13

0001 /***************************************************************************
0002     Copyright (C) 2008-2020 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "tellicoimporter.h"
0026 #include "tellicoxmlreader.h"
0027 #include "tellico_xml.h"
0028 #include "../images/imagefactory.h"
0029 #include "../core/tellico_strings.h"
0030 #include "../utils/guiproxy.h"
0031 #include "../tellico_debug.h"
0032 
0033 #include <KLocalizedString>
0034 #include <KZip>
0035 
0036 #include <QBuffer>
0037 #include <QFile>
0038 #include <QTimer>
0039 #include <QApplication>
0040 #include <QPointer>
0041 
0042 namespace {
0043   static const int MIN_BLOCK_SIZE = 100*1024; // minimum read size of 100 kB
0044 }
0045 
0046 using Tellico::Import::TellicoImporter;
0047 
0048 TellicoImporter::TellicoImporter(const QUrl& url_, bool loadAllImages_) : DataImporter(url_),
0049     m_loadAllImages(loadAllImages_), m_format(Unknown), m_modified(false),
0050     m_cancelled(false), m_hasImages(false), m_buffer(nullptr), m_zip(nullptr), m_imgDir(nullptr) {
0051 }
0052 
0053 TellicoImporter::TellicoImporter(const QString& text_) : DataImporter(text_),
0054     m_loadAllImages(true), m_format(Unknown), m_modified(false),
0055     m_cancelled(false), m_hasImages(false), m_buffer(nullptr), m_zip(nullptr), m_imgDir(nullptr) {
0056 }
0057 
0058 TellicoImporter::~TellicoImporter() {
0059 }
0060 
0061 Tellico::Data::CollPtr TellicoImporter::collection() {
0062   if(m_coll) {
0063     return m_coll;
0064   }
0065 
0066   QByteArray s; // read first 5 characters
0067   if(source() == URL) {
0068     if(!fileRef().open()) {
0069       return Data::CollPtr();
0070     }
0071     QIODevice* f = fileRef().file();
0072     s = f->peek(5);
0073   } else {
0074     if(data().size() < 5) {
0075       m_format = Error;
0076       return Data::CollPtr();
0077     }
0078     s = QByteArray(data().constData(), 6);
0079   }
0080 
0081   // hack for processEvents and deletion
0082   QPointer<TellicoImporter> thisPtr(this);
0083 
0084   // need to decide if the data is xml text, or a zip file
0085   // if the first 5 characters are <?xml then treat it like text
0086   if(s[0] == '<' && s[1] == '?' && s[2] == 'x' && s[3] == 'm' && s[4] == 'l') {
0087     m_format = XML;
0088     loadXMLData(source() == URL ? fileRef().file()->readAll() : data(), true);
0089   } else {
0090     m_format = Zip;
0091     loadZipData();
0092   }
0093   return thisPtr ? m_coll : Data::CollPtr();
0094 }
0095 
0096 void TellicoImporter::loadXMLData(const QByteArray& data_, bool loadImages_) {
0097   const bool showProgress = options() & ImportProgress;
0098 
0099   TellicoXmlReader reader(m_baseUrl);
0100   reader.setLoadImages(loadImages_);
0101   reader.setShowImageLoadErrors(options() & ImportShowImageErrors);
0102   bool success = true;
0103 
0104   const int blockSize = qMax(data_.size()/100 + 1, MIN_BLOCK_SIZE);
0105   int pos = 0;
0106   emit signalTotalSteps(this, data_.size());
0107 
0108   // hack to allow processEvents
0109   QPointer<TellicoImporter> thisPtr(this);
0110   while(thisPtr && success && !m_cancelled && pos < data_.size()) {
0111     const uint size = qMin(blockSize, data_.size() - pos);
0112     const QByteArray block = QByteArray::fromRawData(data_.data() + pos, size);
0113     success = reader.readNext(block);
0114     if(!success && reader.isNotWellFormed()) {
0115       // could be bug 418067 where version of Tellico < 3.3 could use invalid XML names
0116       // try to recover. If it's not a bad field name, this should be a pretty quick check
0117       myDebug() << "XML parsing failed. Attempting to recover.";
0118       QByteArray newData = XML::recoverFromBadXMLName(data_);
0119       if(newData.length() == data_.length()) {
0120         // might be bug 443845 with invalid XML control characters
0121         newData = XML::removeInvalidXml(data_);
0122       }
0123       if(newData.length() < data_.length()) {
0124         myDebug() << "Reloading the XML data.";
0125         loadXMLData(newData, loadImages_);
0126         return;
0127       }
0128     }
0129     pos += blockSize;
0130     if(thisPtr && showProgress) {
0131       emit signalProgress(this, pos);
0132       qApp->processEvents();
0133     }
0134   }
0135   if(!thisPtr) {
0136     return;
0137   }
0138 
0139   if(!success) {
0140     m_format = Error;
0141     QString error;
0142     if(!url().isEmpty()) {
0143       error = i18n(errorLoad, url().fileName());
0144     }
0145     const QString errorString = reader.errorString();
0146     if(!errorString.isEmpty()) {
0147       error += QStringLiteral("\n") + errorString;
0148     }
0149     myDebug() << error;
0150     setStatusMessage(error);
0151     return;
0152   }
0153 
0154   if(!m_cancelled) {
0155     m_hasImages = reader.hasImages();
0156     m_coll = reader.collection();
0157   }
0158 }
0159 
0160 void TellicoImporter::loadZipData() {
0161   std::unique_ptr<KZip> zip;
0162   std::unique_ptr<QBuffer> buffer;
0163   if(source() == URL) {
0164     buffer.reset();
0165     zip.reset(new KZip(fileRef().fileName()));
0166   } else {
0167     myDebug() << "Attempting to read text as a zip file";
0168     return;
0169   }
0170   if(!zip->open(QIODevice::ReadOnly)) {
0171     setStatusMessage(i18n(errorLoad, url().fileName()));
0172     m_format = Error;
0173     return;
0174   }
0175 
0176   const KArchiveDirectory* dir = zip->directory();
0177   if(!dir) {
0178     QString str = i18n(errorLoad, url().fileName()) + QLatin1Char('\n');
0179     str += i18n("The file is empty.");
0180     setStatusMessage(str);
0181     m_format = Error;
0182     return;
0183   }
0184 
0185   // main file was changed from bookcase.xml to tellico.xml as of version 0.13
0186   const KArchiveEntry* entry = dir->entry(QStringLiteral("tellico.xml"));
0187   if(!entry) {
0188     entry = dir->entry(QStringLiteral("bookcase.xml"));
0189   }
0190   if(!entry || !entry->isFile()) {
0191     QString str = i18n(errorLoad, url().fileName()) + QLatin1Char('\n');
0192     str += i18n("The file contains no collection data.");
0193     setStatusMessage(str);
0194     m_format = Error;
0195     return;
0196   }
0197 
0198   const QByteArray xmlData = static_cast<const KArchiveFile*>(entry)->data();
0199   // hack to account for processEvents and deletion
0200   QPointer<TellicoImporter> thisPtr(this);
0201   loadXMLData(xmlData, false);
0202   if(!thisPtr) {
0203     return;
0204   }
0205   if(!m_coll) {
0206     m_format = Error;
0207     return;
0208   }
0209 
0210   if(m_cancelled) {
0211     return;
0212   }
0213 
0214   const KArchiveEntry* imgDirEntry = dir->entry(QStringLiteral("images"));
0215   if(!imgDirEntry || !imgDirEntry->isDirectory()) {
0216     return;
0217   }
0218 
0219   // past the point of dropping errors, so retain ownership of the objects
0220   m_zip = std::move(zip);
0221   m_buffer = std::move(buffer);
0222 
0223   m_imgDir = static_cast<const KArchiveDirectory*>(imgDirEntry);
0224   m_images.clear();
0225   m_images.add(m_imgDir->entries());
0226   m_hasImages = !m_images.isEmpty();
0227 
0228   // if all the images are not to be loaded, then we're done
0229   if(!m_loadAllImages) {
0230 //    myLog() << "delayed loading for " << m_images.count() << " images";
0231     return;
0232   }
0233 
0234   const QStringList images = m_imgDir->entries();
0235   const uint stepSize = qMax(s_stepSize, static_cast<uint>(images.count())/100);
0236 
0237   uint j = 0;
0238   for(QStringList::ConstIterator it = images.begin(); !m_cancelled && it != images.end(); ++it, ++j) {
0239     const KArchiveEntry* file = m_imgDir->entry(*it);
0240     if(file && file->isFile()) {
0241       ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(),
0242                              (*it).section(QLatin1Char('.'), -1).toUpper(), (*it));
0243       m_images.remove(*it);
0244     }
0245     if(j%stepSize == 0) {
0246       qApp->processEvents();
0247     }
0248   }
0249 
0250   if(thisPtr && m_images.isEmpty()) {
0251     // give it some time
0252     QTimer::singleShot(3000, this, &QObject::deleteLater);
0253   }
0254 }
0255 
0256 bool TellicoImporter::hasImages() const {
0257   return m_hasImages;
0258 }
0259 
0260 bool TellicoImporter::loadImage(const QString& id_) {
0261 //  myLog() << "id =  " << id_;
0262   if(m_format != Zip || !m_imgDir) {
0263     return false;
0264   }
0265   const KArchiveEntry* file = m_imgDir->entry(id_);
0266   if(!file || !file->isFile()) {
0267     return false;
0268   }
0269   QString newID = ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(),
0270                                          id_.section(QLatin1Char('.'), -1).toUpper(), id_);
0271   m_images.remove(id_);
0272   if(m_images.isEmpty()) {
0273     // give it some time
0274     QTimer::singleShot(3000, this, &QObject::deleteLater);
0275   }
0276   return !newID.isEmpty();
0277 }
0278 
0279 std::unique_ptr<KZip> TellicoImporter::takeImages() {
0280   return std::move(m_zip);
0281 }
0282 
0283 void TellicoImporter::slotCancel() {
0284   m_cancelled = true;
0285   m_format = Cancel;
0286 }
0287 
0288 // static
0289 bool TellicoImporter::loadAllImages(const QUrl& url_) {
0290   // only local files are allowed
0291   if(url_.isEmpty() || !url_.isValid() || !url_.isLocalFile()) {
0292 //    myDebug() << "returning";
0293     return false;
0294   }
0295 
0296   // keep track of url for error reporting
0297   static QUrl u;
0298 
0299   KZip zip(url_.path());
0300   if(!zip.open(QIODevice::ReadOnly)) {
0301     if(u != url_) {
0302       GUI::Proxy::sorry(i18n(errorImageLoad, url_.fileName()));
0303     }
0304     u = url_;
0305     return false;
0306   }
0307 
0308   const KArchiveDirectory* dir = zip.directory();
0309   if(!dir) {
0310     if(u != url_) {
0311       GUI::Proxy::sorry(i18n(errorImageLoad, url_.fileName()));
0312     }
0313     u = url_;
0314     return false;
0315   }
0316 
0317   const KArchiveEntry* imgDirEntry = dir->entry(QStringLiteral("images"));
0318   if(!imgDirEntry || !imgDirEntry->isDirectory()) {
0319     return false;
0320   }
0321   const QStringList images = static_cast<const KArchiveDirectory*>(imgDirEntry)->entries();
0322   for(QStringList::ConstIterator it = images.begin(); it != images.end(); ++it) {
0323     const KArchiveEntry* file = static_cast<const KArchiveDirectory*>(imgDirEntry)->entry(*it);
0324     if(file && file->isFile()) {
0325       ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(),
0326                              (*it).section(QLatin1Char('.'), -1).toUpper(), (*it));
0327     }
0328   }
0329   return true;
0330 }