File indexing completed on 2024-05-12 05:10:07

0001 /***************************************************************************
0002     Copyright (C) 2003-2020 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "alexandriaimporter.h"
0026 #include "../collections/bookcollection.h"
0027 #include "../entry.h"
0028 #include "../field.h"
0029 #include "../fieldformat.h"
0030 #include "../images/imagefactory.h"
0031 #include "../utils/isbnvalidator.h"
0032 #include "../tellico_debug.h"
0033 
0034 #include <KComboBox>
0035 #include <KStringHandler>
0036 #include <KLocalizedString>
0037 
0038 #include <QLabel>
0039 #include <QGroupBox>
0040 #include <QTextStream>
0041 #include <QByteArray>
0042 #include <QHBoxLayout>
0043 #include <QApplication>
0044 
0045 using Tellico::Import::AlexandriaImporter;
0046 
0047 bool AlexandriaImporter::canImport(int type) const {
0048   return type == Data::Collection::Book;
0049 }
0050 
0051 Tellico::Data::CollPtr AlexandriaImporter::collection() {
0052   QDir dataDir;
0053   if(m_libraryDir.exists() && m_library && m_library->count() > 0) {
0054     dataDir = m_libraryDir;
0055     dataDir.cd(m_library->currentText());
0056   } else if(!m_libraryPath.isEmpty()) {
0057     dataDir.setPath(m_libraryPath);
0058   } else {
0059     // no widget and no explicit set of the library path means we fail
0060     myWarning() << "no widget and no library path";
0061     return Data::CollPtr();
0062   }
0063   // just a sanity check
0064   if(!dataDir.exists()) {
0065     myDebug() << dataDir.path() << "doesn't exist";
0066     return Data::CollPtr();
0067   }
0068 
0069   dataDir.setFilter(QDir::Files | QDir::Readable | QDir::NoSymLinks);
0070 
0071   m_coll = new Data::BookCollection(true);
0072 
0073   const QString author = QStringLiteral("author");
0074   const QString year = QStringLiteral("pub_year");
0075   const QString binding = QStringLiteral("binding");
0076   const QString isbn = QStringLiteral("isbn");
0077   const QString cover = QStringLiteral("cover");
0078   const QString comments = QStringLiteral("comments");
0079 
0080   // start with yaml files
0081   dataDir.setNameFilters(QStringList() << QStringLiteral("*.yaml"));
0082   const QStringList files = dataDir.entryList();
0083   const uint numFiles = files.count();
0084   const uint stepSize = qMax(s_stepSize, numFiles/100);
0085   const bool showProgress = options() & ImportProgress;
0086 
0087   emit signalTotalSteps(this, numFiles);
0088 
0089   QStringList covers;
0090   covers << QStringLiteral(".cover")
0091          << QStringLiteral("_medium.jpg")
0092          << QStringLiteral("_small.jpg");
0093 
0094   static const QRegularExpression begin(QLatin1String("^\\s*-\\s+"));
0095   static const QRegularExpression spaces(QLatin1String("^ +"));
0096 
0097   QTextStream ts;
0098   ts.setCodec("UTF-8"); // YAML is always utf8?
0099   uint j = 0;
0100   for(QStringList::ConstIterator it = files.begin(); !m_cancelled && it != files.end(); ++it, ++j) {
0101     QFile file(dataDir.absoluteFilePath(*it));
0102     if(!file.open(QIODevice::ReadOnly)) {
0103       myLog() << "can't open" << file.fileName();
0104       continue;
0105     }
0106     Data::EntryPtr entry(new Data::Entry(m_coll));
0107 
0108     bool readNextLine = true;
0109     ts.setDevice(&file);
0110     ts.setCodec("UTF-8"); // YAML is always utf8?
0111     QString line;
0112     while(!ts.atEnd()) {
0113       if(readNextLine) {
0114         line = ts.readLine();
0115       } else {
0116         readNextLine = true;
0117       }
0118       // skip the line that starts with ---
0119       if(line.isEmpty() || line.startsWith(QLatin1String("---"))) {
0120         continue;
0121       }
0122       if(line.endsWith(QLatin1Char('\\'))) {
0123         line.truncate(line.length()-1); // remove last character
0124         line += ts.readLine();
0125       }
0126 
0127       cleanLine(line);
0128       QString alexField = line.section(QLatin1Char(':'), 0, 0);
0129       QString alexValue = line.section(QLatin1Char(':'), 1).trimmed();
0130       clean(alexValue);
0131 
0132       // Alexandria uses "n/a" for empty values, and it is translated
0133       // only thing we can do is check for english value and continue
0134       if(alexValue == QLatin1String("n/a") || alexValue == QLatin1String("false")) {
0135         continue;
0136       }
0137 
0138       if(alexField == QLatin1String("redd"))  {
0139         alexField = QStringLiteral("read");
0140       }
0141 
0142       if(alexField == QLatin1String("authors")) {
0143         QStringList authors;
0144         line = ts.readLine();
0145         while(!line.isNull() && line.indexOf(begin) > -1) {
0146           line.remove(begin);
0147           authors += clean(line);
0148           line = ts.readLine();
0149         }
0150         entry->setField(author, authors.join(FieldFormat::delimiterString()));
0151         // the next line has already been read
0152         readNextLine = false;
0153 
0154         // Alexandria calls the edition the binding
0155       } else if(alexField == QLatin1String("edition")) {
0156         // special case if it's "Hardcover"
0157         if(alexValue.toLower() == QLatin1String("hardcover")) {
0158           alexValue = i18n("Hardback");
0159         }
0160         entry->setField(binding, alexValue);
0161 
0162       } else if(alexField == QLatin1String("publishing_year")) {
0163         entry->setField(year, alexValue);
0164 
0165       } else if(alexField == QLatin1String("isbn")) {
0166         const ISBNValidator val;
0167         val.fixup(alexValue);
0168         entry->setField(isbn, alexValue);
0169 
0170         // now find cover image
0171         alexValue.remove(QLatin1Char('-'));
0172         for(QStringList::Iterator ext = covers.begin(); ext != covers.end(); ++ext) {
0173           QUrl u = QUrl::fromLocalFile(dataDir.absoluteFilePath(alexValue + *ext));
0174           if(!QFile::exists(u.path())) {
0175             continue;
0176           }
0177           QString id = ImageFactory::addImage(u, true);
0178           if(!id.isEmpty()) {
0179             entry->setField(cover, id);
0180             break;
0181           }
0182         }
0183       } else if(alexField == QLatin1String("notes")) {
0184         if(alexValue.startsWith(QLatin1Char('|'))) {
0185           line = ts.readLine();
0186           QRegularExpressionMatch m = spaces.match(line);
0187           if(m.hasMatch()) {
0188             alexValue.clear();
0189             const int spaceCount = m.capturedLength();
0190             QRegularExpression begin(QStringLiteral("^ {%1,%2}").arg(spaceCount).arg(spaceCount));
0191             while(!line.isNull() && line.indexOf(begin) > -1) {
0192               line.remove(begin);
0193               alexValue += clean(line) + QLatin1Char('\n');
0194               line = ts.readLine();
0195             }
0196             alexValue.chop(1); // remove last newline char
0197             alexValue.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
0198           }
0199           readNextLine = false;
0200         }
0201 
0202         entry->setField(comments, alexValue);
0203 
0204       // now try by name then title
0205       } else if(m_coll->fieldByName(alexField)) {
0206         entry->setField(alexField, alexValue);
0207 
0208       } else if(m_coll->fieldByTitle(alexField)) {
0209         entry->setField(m_coll->fieldByTitle(alexField), alexValue);
0210       }
0211     }
0212     m_coll->addEntries(entry);
0213 
0214     if(showProgress && j%stepSize == 0) {
0215       emit signalProgress(this, j);
0216       qApp->processEvents();
0217     }
0218   }
0219 
0220   return m_coll;
0221 }
0222 
0223 QWidget* AlexandriaImporter::widget(QWidget* parent_) {
0224   if(m_widget) {
0225     return m_widget;
0226   }
0227 
0228   m_libraryDir = QDir::home();
0229   m_libraryDir.setFilter(QDir::Dirs | QDir::Readable | QDir::NoSymLinks | QDir::NoDotAndDotDot);
0230 
0231   m_widget = new QWidget(parent_);
0232   QVBoxLayout* l = new QVBoxLayout(m_widget);
0233 
0234   QGroupBox* gbox = new QGroupBox(i18n("Alexandria Options"), m_widget);
0235   QHBoxLayout* hlay = new QHBoxLayout(gbox);
0236 
0237   QLabel* label = new QLabel(i18n("&Library:"), gbox);
0238   m_library = new KComboBox(gbox);
0239   label->setBuddy(m_library);
0240 
0241   hlay->addWidget(label);
0242   hlay->addWidget(m_library);
0243 
0244   // .alexandria might not exist
0245   if(m_libraryDir.cd(QStringLiteral(".alexandria"))) {
0246     m_library->addItems(m_libraryDir.entryList());
0247   }
0248 
0249   l->addWidget(gbox);
0250   l->addStretch(1);
0251 
0252   // now that we set a widget, it should override library path
0253   m_libraryPath.clear();
0254 
0255   return m_widget;
0256 }
0257 
0258 QString& AlexandriaImporter::cleanLine(QString& str_) {
0259   static const QRegularExpression escRx(QLatin1String("\\\\x(\\w\\w)"), QRegularExpression::CaseInsensitiveOption);
0260   str_.remove(QStringLiteral("\\r"));
0261   str_.replace(QLatin1String("\\n"), QLatin1String("\n"));
0262   str_.replace(QLatin1String("\\t"), QLatin1String("\t"));
0263 
0264   // YAML uses escape sequences like \xC3
0265   QRegularExpressionMatch m = escRx.match(str_);
0266   int pos = m.capturedStart();
0267   int origPos = pos;
0268   QByteArray bytes;
0269   while(pos > -1) {
0270     bool ok;
0271     char c = static_cast<char>(m.captured(1).toInt(&ok, 16));
0272     if(ok) {
0273       bytes += c;
0274     } else {
0275       bytes.clear();
0276       break;
0277     }
0278     m = escRx.match(str_, pos+1);
0279     pos = m.capturedStart();
0280   }
0281   if(!bytes.isEmpty()) {
0282     str_.replace(origPos, bytes.length()*4, QString::fromUtf8(bytes.data()));
0283   }
0284   return str_;
0285 }
0286 
0287 QString& AlexandriaImporter::clean(QString& str_) {
0288   static const QRegularExpression quote(QLatin1String("\\\\\"")); // equals \"
0289   static const QRegularExpression yamlTags(QLatin1String("^![^\\s]*\\s+"));
0290   if(str_.startsWith(QLatin1Char('\'')) || str_.startsWith(QLatin1Char('"'))) {
0291     str_.remove(0, 1);
0292   }
0293   if(str_.endsWith(QLatin1Char('\'')) || str_.endsWith(QLatin1Char('"'))) {
0294     str_.truncate(str_.length()-1);
0295   }
0296   // we ignore YAML tags, this is not actually a good parser, but will do for now
0297   str_.remove(yamlTags);
0298   return str_.replace(quote, QStringLiteral("\""));
0299 }
0300 
0301 void AlexandriaImporter::slotCancel() {
0302   m_cancelled = true;
0303 }