File indexing completed on 2024-05-12 16:46:33

0001 /***************************************************************************
0002     Copyright (C) 2008-2020 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "tellicoimporter.h"
0026 #include "tellicoxmlreader.h"
0027 #include "tellico_xml.h"
0028 #include "../collectionfactory.h"
0029 #include "../entry.h"
0030 #include "../field.h"
0031 #include "../images/imagefactory.h"
0032 #include "../images/image.h"
0033 #include "../utils/isbnvalidator.h"
0034 #include "../core/tellico_strings.h"
0035 #include "../utils/guiproxy.h"
0036 #include "../utils/tellico_utils.h"
0037 #include "../tellico_debug.h"
0038 
0039 #include <KLocalizedString>
0040 #include <KZip>
0041 
0042 #include <QBuffer>
0043 #include <QFile>
0044 #include <QTimer>
0045 #include <QApplication>
0046 #include <QPointer>
0047 
0048 namespace {
0049   static const int MIN_BLOCK_SIZE = 100*1024; // minimum read size of 100 kB
0050 }
0051 
0052 using Tellico::Import::TellicoImporter;
0053 
0054 TellicoImporter::TellicoImporter(const QUrl& url_, bool loadAllImages_) : DataImporter(url_),
0055     m_loadAllImages(loadAllImages_), m_format(Unknown), m_modified(false),
0056     m_cancelled(false), m_hasImages(false), m_buffer(nullptr), m_zip(nullptr), m_imgDir(nullptr) {
0057 }
0058 
0059 TellicoImporter::TellicoImporter(const QString& text_) : DataImporter(text_),
0060     m_loadAllImages(true), m_format(Unknown), m_modified(false),
0061     m_cancelled(false), m_hasImages(false), m_buffer(nullptr), m_zip(nullptr), m_imgDir(nullptr) {
0062 }
0063 
0064 TellicoImporter::~TellicoImporter() {
0065 }
0066 
0067 Tellico::Data::CollPtr TellicoImporter::collection() {
0068   if(m_coll) {
0069     return m_coll;
0070   }
0071 
0072   QByteArray s; // read first 5 characters
0073   if(source() == URL) {
0074     if(!fileRef().open()) {
0075       return Data::CollPtr();
0076     }
0077     QIODevice* f = fileRef().file();
0078     char c;
0079     for(int i = 0; i < 5; ++i) {
0080       if(f->getChar(&c)) {
0081         s += c;
0082       }
0083     }
0084     f->reset();
0085   } else {
0086     if(data().size() < 5) {
0087       m_format = Error;
0088       return Data::CollPtr();
0089     }
0090     s = QByteArray(data().constData(), 6);
0091   }
0092 
0093   // hack for processEvents and deletion
0094   QPointer<TellicoImporter> thisPtr(this);
0095 
0096   // need to decide if the data is xml text, or a zip file
0097   // if the first 5 characters are <?xml then treat it like text
0098   if(s[0] == '<' && s[1] == '?' && s[2] == 'x' && s[3] == 'm' && s[4] == 'l') {
0099     m_format = XML;
0100     loadXMLData(source() == URL ? fileRef().file()->readAll() : data(), true);
0101   } else {
0102     m_format = Zip;
0103     loadZipData();
0104   }
0105   return thisPtr ? m_coll : Data::CollPtr();
0106 }
0107 
0108 void TellicoImporter::loadXMLData(const QByteArray& data_, bool loadImages_) {
0109   const bool showProgress = options() & ImportProgress;
0110 
0111   TellicoXmlReader reader;
0112   reader.setLoadImages(loadImages_);
0113   reader.setShowImageLoadErrors(options() & ImportShowImageErrors);
0114   bool success = true;
0115 
0116   const int blockSize = qMax(data_.size()/100 + 1, MIN_BLOCK_SIZE);
0117   int pos = 0;
0118   emit signalTotalSteps(this, data_.size());
0119 
0120   // hack to allow processEvents
0121   QPointer<TellicoImporter> thisPtr(this);
0122   while(thisPtr && success && !m_cancelled && pos < data_.size()) {
0123     const uint size = qMin(blockSize, data_.size() - pos);
0124     const QByteArray block = QByteArray::fromRawData(data_.data() + pos, size);
0125     success = reader.readNext(block);
0126     if(!success && reader.isNotWellFormed()) {
0127       // could be bug 418067 where version of Tellico < 3.3 could use invalid XML names
0128       // try to recover. If it's not a bad field name, this should be a pretty quick check
0129       myDebug() << "XML parsing failed. Attempting to recover.";
0130       QByteArray newData = XML::recoverFromBadXMLName(data_);
0131       if(newData.length() == data_.length()) {
0132         // might be bug 443845 with invalid XML control characters
0133         newData = XML::removeInvalidXml(data_);
0134       }
0135       if(newData.length() < data_.length()) {
0136         myDebug() << "Reloading the XML data.";
0137         loadXMLData(newData, loadImages_);
0138         return;
0139       }
0140     }
0141     pos += blockSize;
0142     if(thisPtr && showProgress) {
0143       emit signalProgress(this, pos);
0144       qApp->processEvents();
0145     }
0146   }
0147   if(!thisPtr) {
0148     return;
0149   }
0150 
0151   if(!success) {
0152     m_format = Error;
0153     QString error;
0154     if(!url().isEmpty()) {
0155       error = i18n(errorLoad, url().fileName());
0156     }
0157     const QString errorString = reader.errorString();
0158     if(!errorString.isEmpty()) {
0159       error += QStringLiteral("\n") + errorString;
0160     }
0161     myDebug() << error;
0162     setStatusMessage(error);
0163     return;
0164   }
0165 
0166   if(!m_cancelled) {
0167     m_hasImages = reader.hasImages();
0168     m_coll = reader.collection();
0169   }
0170 }
0171 
0172 void TellicoImporter::loadZipData() {
0173   std::unique_ptr<KZip> zip;
0174   std::unique_ptr<QBuffer> buffer;
0175   if(source() == URL) {
0176     buffer.reset();
0177     zip.reset(new KZip(fileRef().fileName()));
0178   } else {
0179     QByteArray allData = data();
0180     buffer.reset(new QBuffer(&allData));
0181     zip.reset(new KZip(buffer.get()));
0182   }
0183   if(!zip->open(QIODevice::ReadOnly)) {
0184     setStatusMessage(i18n(errorLoad, url().fileName()));
0185     m_format = Error;
0186     return;
0187   }
0188 
0189   const KArchiveDirectory* dir = zip->directory();
0190   if(!dir) {
0191     QString str = i18n(errorLoad, url().fileName()) + QLatin1Char('\n');
0192     str += i18n("The file is empty.");
0193     setStatusMessage(str);
0194     m_format = Error;
0195     return;
0196   }
0197 
0198   // main file was changed from bookcase.xml to tellico.xml as of version 0.13
0199   const KArchiveEntry* entry = dir->entry(QStringLiteral("tellico.xml"));
0200   if(!entry) {
0201     entry = dir->entry(QStringLiteral("bookcase.xml"));
0202   }
0203   if(!entry || !entry->isFile()) {
0204     QString str = i18n(errorLoad, url().fileName()) + QLatin1Char('\n');
0205     str += i18n("The file contains no collection data.");
0206     setStatusMessage(str);
0207     m_format = Error;
0208     return;
0209   }
0210 
0211   const QByteArray xmlData = static_cast<const KArchiveFile*>(entry)->data();
0212   // hack to account for processEvents and deletion
0213   QPointer<TellicoImporter> thisPtr(this);
0214   loadXMLData(xmlData, false);
0215   if(!thisPtr) {
0216     return;
0217   }
0218   if(!m_coll) {
0219     m_format = Error;
0220     return;
0221   }
0222 
0223   if(m_cancelled) {
0224     return;
0225   }
0226 
0227   const KArchiveEntry* imgDirEntry = dir->entry(QStringLiteral("images"));
0228   if(!imgDirEntry || !imgDirEntry->isDirectory()) {
0229     return;
0230   }
0231 
0232   // past the point of dropping errors, so retain ownership of the objects
0233   m_zip = std::move(zip);
0234   m_buffer = std::move(buffer);
0235 
0236   m_imgDir = static_cast<const KArchiveDirectory*>(imgDirEntry);
0237   m_images.clear();
0238   m_images.add(m_imgDir->entries());
0239   m_hasImages = !m_images.isEmpty();
0240 
0241   // if all the images are not to be loaded, then we're done
0242   if(!m_loadAllImages) {
0243 //    myLog() << "delayed loading for " << m_images.count() << " images";
0244     return;
0245   }
0246 
0247   const QStringList images = m_imgDir->entries();
0248   const uint stepSize = qMax(s_stepSize, static_cast<uint>(images.count())/100);
0249 
0250   uint j = 0;
0251   for(QStringList::ConstIterator it = images.begin(); !m_cancelled && it != images.end(); ++it, ++j) {
0252     const KArchiveEntry* file = m_imgDir->entry(*it);
0253     if(file && file->isFile()) {
0254       ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(),
0255                              (*it).section(QLatin1Char('.'), -1).toUpper(), (*it));
0256       m_images.remove(*it);
0257     }
0258     if(j%stepSize == 0) {
0259       qApp->processEvents();
0260     }
0261   }
0262 
0263   if(thisPtr && m_images.isEmpty()) {
0264     // give it some time
0265     QTimer::singleShot(3000, this, &QObject::deleteLater);
0266   }
0267 }
0268 
0269 bool TellicoImporter::hasImages() const {
0270   return m_hasImages;
0271 }
0272 
0273 bool TellicoImporter::loadImage(const QString& id_) {
0274 //  myLog() << "id =  " << id_;
0275   if(m_format != Zip || !m_imgDir) {
0276     return false;
0277   }
0278   const KArchiveEntry* file = m_imgDir->entry(id_);
0279   if(!file || !file->isFile()) {
0280     return false;
0281   }
0282   QString newID = ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(),
0283                                          id_.section(QLatin1Char('.'), -1).toUpper(), id_);
0284   m_images.remove(id_);
0285   if(m_images.isEmpty()) {
0286     // give it some time
0287     QTimer::singleShot(3000, this, &QObject::deleteLater);
0288   }
0289   return !newID.isEmpty();
0290 }
0291 
0292 std::unique_ptr<KZip> TellicoImporter::takeImages() {
0293   return std::move(m_zip);
0294 }
0295 
0296 void TellicoImporter::slotCancel() {
0297   m_cancelled = true;
0298   m_format = Cancel;
0299 }
0300 
0301 // static
0302 bool TellicoImporter::loadAllImages(const QUrl& url_) {
0303   // only local files are allowed
0304   if(url_.isEmpty() || !url_.isValid() || !url_.isLocalFile()) {
0305 //    myDebug() << "returning";
0306     return false;
0307   }
0308 
0309   // keep track of url for error reporting
0310   static QUrl u;
0311 
0312   KZip zip(url_.path());
0313   if(!zip.open(QIODevice::ReadOnly)) {
0314     if(u != url_) {
0315       GUI::Proxy::sorry(i18n(errorImageLoad, url_.fileName()));
0316     }
0317     u = url_;
0318     return false;
0319   }
0320 
0321   const KArchiveDirectory* dir = zip.directory();
0322   if(!dir) {
0323     if(u != url_) {
0324       GUI::Proxy::sorry(i18n(errorImageLoad, url_.fileName()));
0325     }
0326     u = url_;
0327     return false;
0328   }
0329 
0330   const KArchiveEntry* imgDirEntry = dir->entry(QStringLiteral("images"));
0331   if(!imgDirEntry || !imgDirEntry->isDirectory()) {
0332     return false;
0333   }
0334   const QStringList images = static_cast<const KArchiveDirectory*>(imgDirEntry)->entries();
0335   for(QStringList::ConstIterator it = images.begin(); it != images.end(); ++it) {
0336     const KArchiveEntry* file = static_cast<const KArchiveDirectory*>(imgDirEntry)->entry(*it);
0337     if(file && file->isFile()) {
0338       ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(),
0339                              (*it).section(QLatin1Char('.'), -1).toUpper(), (*it));
0340     }
0341   }
0342   return true;
0343 }