File indexing completed on 2024-05-12 16:46:33
0001 /*************************************************************************** 0002 Copyright (C) 2008-2020 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "tellicoimporter.h" 0026 #include "tellicoxmlreader.h" 0027 #include "tellico_xml.h" 0028 #include "../collectionfactory.h" 0029 #include "../entry.h" 0030 #include "../field.h" 0031 #include "../images/imagefactory.h" 0032 #include "../images/image.h" 0033 #include "../utils/isbnvalidator.h" 0034 #include "../core/tellico_strings.h" 0035 #include "../utils/guiproxy.h" 0036 #include "../utils/tellico_utils.h" 0037 #include "../tellico_debug.h" 0038 0039 #include <KLocalizedString> 0040 #include <KZip> 0041 0042 #include <QBuffer> 0043 #include <QFile> 0044 #include <QTimer> 0045 #include <QApplication> 0046 #include <QPointer> 0047 0048 namespace { 0049 static const int MIN_BLOCK_SIZE = 100*1024; // minimum read size of 100 kB 0050 } 0051 0052 using Tellico::Import::TellicoImporter; 0053 0054 TellicoImporter::TellicoImporter(const QUrl& url_, bool loadAllImages_) : DataImporter(url_), 0055 m_loadAllImages(loadAllImages_), m_format(Unknown), m_modified(false), 0056 m_cancelled(false), m_hasImages(false), m_buffer(nullptr), m_zip(nullptr), m_imgDir(nullptr) { 0057 } 0058 0059 TellicoImporter::TellicoImporter(const QString& text_) : DataImporter(text_), 0060 m_loadAllImages(true), m_format(Unknown), m_modified(false), 0061 m_cancelled(false), m_hasImages(false), m_buffer(nullptr), m_zip(nullptr), m_imgDir(nullptr) { 0062 } 0063 0064 TellicoImporter::~TellicoImporter() { 0065 } 0066 0067 Tellico::Data::CollPtr TellicoImporter::collection() { 0068 if(m_coll) { 0069 return m_coll; 0070 } 0071 0072 QByteArray s; // read first 5 characters 0073 if(source() == URL) { 0074 if(!fileRef().open()) { 0075 return Data::CollPtr(); 0076 } 0077 QIODevice* f = fileRef().file(); 0078 char c; 0079 for(int i = 0; i < 5; ++i) { 0080 if(f->getChar(&c)) { 0081 s += c; 0082 } 0083 } 0084 f->reset(); 0085 } else { 0086 if(data().size() < 5) { 0087 m_format = Error; 0088 return Data::CollPtr(); 0089 } 0090 s = QByteArray(data().constData(), 6); 0091 } 0092 0093 // hack for processEvents and deletion 0094 QPointer<TellicoImporter> thisPtr(this); 0095 0096 // need to decide if the data is xml text, or a zip file 0097 // if the first 5 characters are <?xml then treat it like text 0098 if(s[0] == '<' && s[1] == '?' && s[2] == 'x' && s[3] == 'm' && s[4] == 'l') { 0099 m_format = XML; 0100 loadXMLData(source() == URL ? fileRef().file()->readAll() : data(), true); 0101 } else { 0102 m_format = Zip; 0103 loadZipData(); 0104 } 0105 return thisPtr ? m_coll : Data::CollPtr(); 0106 } 0107 0108 void TellicoImporter::loadXMLData(const QByteArray& data_, bool loadImages_) { 0109 const bool showProgress = options() & ImportProgress; 0110 0111 TellicoXmlReader reader; 0112 reader.setLoadImages(loadImages_); 0113 reader.setShowImageLoadErrors(options() & ImportShowImageErrors); 0114 bool success = true; 0115 0116 const int blockSize = qMax(data_.size()/100 + 1, MIN_BLOCK_SIZE); 0117 int pos = 0; 0118 emit signalTotalSteps(this, data_.size()); 0119 0120 // hack to allow processEvents 0121 QPointer<TellicoImporter> thisPtr(this); 0122 while(thisPtr && success && !m_cancelled && pos < data_.size()) { 0123 const uint size = qMin(blockSize, data_.size() - pos); 0124 const QByteArray block = QByteArray::fromRawData(data_.data() + pos, size); 0125 success = reader.readNext(block); 0126 if(!success && reader.isNotWellFormed()) { 0127 // could be bug 418067 where version of Tellico < 3.3 could use invalid XML names 0128 // try to recover. If it's not a bad field name, this should be a pretty quick check 0129 myDebug() << "XML parsing failed. Attempting to recover."; 0130 QByteArray newData = XML::recoverFromBadXMLName(data_); 0131 if(newData.length() == data_.length()) { 0132 // might be bug 443845 with invalid XML control characters 0133 newData = XML::removeInvalidXml(data_); 0134 } 0135 if(newData.length() < data_.length()) { 0136 myDebug() << "Reloading the XML data."; 0137 loadXMLData(newData, loadImages_); 0138 return; 0139 } 0140 } 0141 pos += blockSize; 0142 if(thisPtr && showProgress) { 0143 emit signalProgress(this, pos); 0144 qApp->processEvents(); 0145 } 0146 } 0147 if(!thisPtr) { 0148 return; 0149 } 0150 0151 if(!success) { 0152 m_format = Error; 0153 QString error; 0154 if(!url().isEmpty()) { 0155 error = i18n(errorLoad, url().fileName()); 0156 } 0157 const QString errorString = reader.errorString(); 0158 if(!errorString.isEmpty()) { 0159 error += QStringLiteral("\n") + errorString; 0160 } 0161 myDebug() << error; 0162 setStatusMessage(error); 0163 return; 0164 } 0165 0166 if(!m_cancelled) { 0167 m_hasImages = reader.hasImages(); 0168 m_coll = reader.collection(); 0169 } 0170 } 0171 0172 void TellicoImporter::loadZipData() { 0173 std::unique_ptr<KZip> zip; 0174 std::unique_ptr<QBuffer> buffer; 0175 if(source() == URL) { 0176 buffer.reset(); 0177 zip.reset(new KZip(fileRef().fileName())); 0178 } else { 0179 QByteArray allData = data(); 0180 buffer.reset(new QBuffer(&allData)); 0181 zip.reset(new KZip(buffer.get())); 0182 } 0183 if(!zip->open(QIODevice::ReadOnly)) { 0184 setStatusMessage(i18n(errorLoad, url().fileName())); 0185 m_format = Error; 0186 return; 0187 } 0188 0189 const KArchiveDirectory* dir = zip->directory(); 0190 if(!dir) { 0191 QString str = i18n(errorLoad, url().fileName()) + QLatin1Char('\n'); 0192 str += i18n("The file is empty."); 0193 setStatusMessage(str); 0194 m_format = Error; 0195 return; 0196 } 0197 0198 // main file was changed from bookcase.xml to tellico.xml as of version 0.13 0199 const KArchiveEntry* entry = dir->entry(QStringLiteral("tellico.xml")); 0200 if(!entry) { 0201 entry = dir->entry(QStringLiteral("bookcase.xml")); 0202 } 0203 if(!entry || !entry->isFile()) { 0204 QString str = i18n(errorLoad, url().fileName()) + QLatin1Char('\n'); 0205 str += i18n("The file contains no collection data."); 0206 setStatusMessage(str); 0207 m_format = Error; 0208 return; 0209 } 0210 0211 const QByteArray xmlData = static_cast<const KArchiveFile*>(entry)->data(); 0212 // hack to account for processEvents and deletion 0213 QPointer<TellicoImporter> thisPtr(this); 0214 loadXMLData(xmlData, false); 0215 if(!thisPtr) { 0216 return; 0217 } 0218 if(!m_coll) { 0219 m_format = Error; 0220 return; 0221 } 0222 0223 if(m_cancelled) { 0224 return; 0225 } 0226 0227 const KArchiveEntry* imgDirEntry = dir->entry(QStringLiteral("images")); 0228 if(!imgDirEntry || !imgDirEntry->isDirectory()) { 0229 return; 0230 } 0231 0232 // past the point of dropping errors, so retain ownership of the objects 0233 m_zip = std::move(zip); 0234 m_buffer = std::move(buffer); 0235 0236 m_imgDir = static_cast<const KArchiveDirectory*>(imgDirEntry); 0237 m_images.clear(); 0238 m_images.add(m_imgDir->entries()); 0239 m_hasImages = !m_images.isEmpty(); 0240 0241 // if all the images are not to be loaded, then we're done 0242 if(!m_loadAllImages) { 0243 // myLog() << "delayed loading for " << m_images.count() << " images"; 0244 return; 0245 } 0246 0247 const QStringList images = m_imgDir->entries(); 0248 const uint stepSize = qMax(s_stepSize, static_cast<uint>(images.count())/100); 0249 0250 uint j = 0; 0251 for(QStringList::ConstIterator it = images.begin(); !m_cancelled && it != images.end(); ++it, ++j) { 0252 const KArchiveEntry* file = m_imgDir->entry(*it); 0253 if(file && file->isFile()) { 0254 ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(), 0255 (*it).section(QLatin1Char('.'), -1).toUpper(), (*it)); 0256 m_images.remove(*it); 0257 } 0258 if(j%stepSize == 0) { 0259 qApp->processEvents(); 0260 } 0261 } 0262 0263 if(thisPtr && m_images.isEmpty()) { 0264 // give it some time 0265 QTimer::singleShot(3000, this, &QObject::deleteLater); 0266 } 0267 } 0268 0269 bool TellicoImporter::hasImages() const { 0270 return m_hasImages; 0271 } 0272 0273 bool TellicoImporter::loadImage(const QString& id_) { 0274 // myLog() << "id = " << id_; 0275 if(m_format != Zip || !m_imgDir) { 0276 return false; 0277 } 0278 const KArchiveEntry* file = m_imgDir->entry(id_); 0279 if(!file || !file->isFile()) { 0280 return false; 0281 } 0282 QString newID = ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(), 0283 id_.section(QLatin1Char('.'), -1).toUpper(), id_); 0284 m_images.remove(id_); 0285 if(m_images.isEmpty()) { 0286 // give it some time 0287 QTimer::singleShot(3000, this, &QObject::deleteLater); 0288 } 0289 return !newID.isEmpty(); 0290 } 0291 0292 std::unique_ptr<KZip> TellicoImporter::takeImages() { 0293 return std::move(m_zip); 0294 } 0295 0296 void TellicoImporter::slotCancel() { 0297 m_cancelled = true; 0298 m_format = Cancel; 0299 } 0300 0301 // static 0302 bool TellicoImporter::loadAllImages(const QUrl& url_) { 0303 // only local files are allowed 0304 if(url_.isEmpty() || !url_.isValid() || !url_.isLocalFile()) { 0305 // myDebug() << "returning"; 0306 return false; 0307 } 0308 0309 // keep track of url for error reporting 0310 static QUrl u; 0311 0312 KZip zip(url_.path()); 0313 if(!zip.open(QIODevice::ReadOnly)) { 0314 if(u != url_) { 0315 GUI::Proxy::sorry(i18n(errorImageLoad, url_.fileName())); 0316 } 0317 u = url_; 0318 return false; 0319 } 0320 0321 const KArchiveDirectory* dir = zip.directory(); 0322 if(!dir) { 0323 if(u != url_) { 0324 GUI::Proxy::sorry(i18n(errorImageLoad, url_.fileName())); 0325 } 0326 u = url_; 0327 return false; 0328 } 0329 0330 const KArchiveEntry* imgDirEntry = dir->entry(QStringLiteral("images")); 0331 if(!imgDirEntry || !imgDirEntry->isDirectory()) { 0332 return false; 0333 } 0334 const QStringList images = static_cast<const KArchiveDirectory*>(imgDirEntry)->entries(); 0335 for(QStringList::ConstIterator it = images.begin(); it != images.end(); ++it) { 0336 const KArchiveEntry* file = static_cast<const KArchiveDirectory*>(imgDirEntry)->entry(*it); 0337 if(file && file->isFile()) { 0338 ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(), 0339 (*it).section(QLatin1Char('.'), -1).toUpper(), (*it)); 0340 } 0341 } 0342 return true; 0343 }