File indexing completed on 2024-05-12 05:10:07
0001 /*************************************************************************** 0002 Copyright (C) 2003-2020 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "alexandriaimporter.h" 0026 #include "../collections/bookcollection.h" 0027 #include "../entry.h" 0028 #include "../field.h" 0029 #include "../fieldformat.h" 0030 #include "../images/imagefactory.h" 0031 #include "../utils/isbnvalidator.h" 0032 #include "../tellico_debug.h" 0033 0034 #include <KComboBox> 0035 #include <KStringHandler> 0036 #include <KLocalizedString> 0037 0038 #include <QLabel> 0039 #include <QGroupBox> 0040 #include <QTextStream> 0041 #include <QByteArray> 0042 #include <QHBoxLayout> 0043 #include <QApplication> 0044 0045 using Tellico::Import::AlexandriaImporter; 0046 0047 bool AlexandriaImporter::canImport(int type) const { 0048 return type == Data::Collection::Book; 0049 } 0050 0051 Tellico::Data::CollPtr AlexandriaImporter::collection() { 0052 QDir dataDir; 0053 if(m_libraryDir.exists() && m_library && m_library->count() > 0) { 0054 dataDir = m_libraryDir; 0055 dataDir.cd(m_library->currentText()); 0056 } else if(!m_libraryPath.isEmpty()) { 0057 dataDir.setPath(m_libraryPath); 0058 } else { 0059 // no widget and no explicit set of the library path means we fail 0060 myWarning() << "no widget and no library path"; 0061 return Data::CollPtr(); 0062 } 0063 // just a sanity check 0064 if(!dataDir.exists()) { 0065 myDebug() << dataDir.path() << "doesn't exist"; 0066 return Data::CollPtr(); 0067 } 0068 0069 dataDir.setFilter(QDir::Files | QDir::Readable | QDir::NoSymLinks); 0070 0071 m_coll = new Data::BookCollection(true); 0072 0073 const QString author = QStringLiteral("author"); 0074 const QString year = QStringLiteral("pub_year"); 0075 const QString binding = QStringLiteral("binding"); 0076 const QString isbn = QStringLiteral("isbn"); 0077 const QString cover = QStringLiteral("cover"); 0078 const QString comments = QStringLiteral("comments"); 0079 0080 // start with yaml files 0081 dataDir.setNameFilters(QStringList() << QStringLiteral("*.yaml")); 0082 const QStringList files = dataDir.entryList(); 0083 const uint numFiles = files.count(); 0084 const uint stepSize = qMax(s_stepSize, numFiles/100); 0085 const bool showProgress = options() & ImportProgress; 0086 0087 emit signalTotalSteps(this, numFiles); 0088 0089 QStringList covers; 0090 covers << QStringLiteral(".cover") 0091 << QStringLiteral("_medium.jpg") 0092 << QStringLiteral("_small.jpg"); 0093 0094 static const QRegularExpression begin(QLatin1String("^\\s*-\\s+")); 0095 static const QRegularExpression spaces(QLatin1String("^ +")); 0096 0097 QTextStream ts; 0098 ts.setCodec("UTF-8"); // YAML is always utf8? 0099 uint j = 0; 0100 for(QStringList::ConstIterator it = files.begin(); !m_cancelled && it != files.end(); ++it, ++j) { 0101 QFile file(dataDir.absoluteFilePath(*it)); 0102 if(!file.open(QIODevice::ReadOnly)) { 0103 myLog() << "can't open" << file.fileName(); 0104 continue; 0105 } 0106 Data::EntryPtr entry(new Data::Entry(m_coll)); 0107 0108 bool readNextLine = true; 0109 ts.setDevice(&file); 0110 ts.setCodec("UTF-8"); // YAML is always utf8? 0111 QString line; 0112 while(!ts.atEnd()) { 0113 if(readNextLine) { 0114 line = ts.readLine(); 0115 } else { 0116 readNextLine = true; 0117 } 0118 // skip the line that starts with --- 0119 if(line.isEmpty() || line.startsWith(QLatin1String("---"))) { 0120 continue; 0121 } 0122 if(line.endsWith(QLatin1Char('\\'))) { 0123 line.truncate(line.length()-1); // remove last character 0124 line += ts.readLine(); 0125 } 0126 0127 cleanLine(line); 0128 QString alexField = line.section(QLatin1Char(':'), 0, 0); 0129 QString alexValue = line.section(QLatin1Char(':'), 1).trimmed(); 0130 clean(alexValue); 0131 0132 // Alexandria uses "n/a" for empty values, and it is translated 0133 // only thing we can do is check for english value and continue 0134 if(alexValue == QLatin1String("n/a") || alexValue == QLatin1String("false")) { 0135 continue; 0136 } 0137 0138 if(alexField == QLatin1String("redd")) { 0139 alexField = QStringLiteral("read"); 0140 } 0141 0142 if(alexField == QLatin1String("authors")) { 0143 QStringList authors; 0144 line = ts.readLine(); 0145 while(!line.isNull() && line.indexOf(begin) > -1) { 0146 line.remove(begin); 0147 authors += clean(line); 0148 line = ts.readLine(); 0149 } 0150 entry->setField(author, authors.join(FieldFormat::delimiterString())); 0151 // the next line has already been read 0152 readNextLine = false; 0153 0154 // Alexandria calls the edition the binding 0155 } else if(alexField == QLatin1String("edition")) { 0156 // special case if it's "Hardcover" 0157 if(alexValue.toLower() == QLatin1String("hardcover")) { 0158 alexValue = i18n("Hardback"); 0159 } 0160 entry->setField(binding, alexValue); 0161 0162 } else if(alexField == QLatin1String("publishing_year")) { 0163 entry->setField(year, alexValue); 0164 0165 } else if(alexField == QLatin1String("isbn")) { 0166 const ISBNValidator val; 0167 val.fixup(alexValue); 0168 entry->setField(isbn, alexValue); 0169 0170 // now find cover image 0171 alexValue.remove(QLatin1Char('-')); 0172 for(QStringList::Iterator ext = covers.begin(); ext != covers.end(); ++ext) { 0173 QUrl u = QUrl::fromLocalFile(dataDir.absoluteFilePath(alexValue + *ext)); 0174 if(!QFile::exists(u.path())) { 0175 continue; 0176 } 0177 QString id = ImageFactory::addImage(u, true); 0178 if(!id.isEmpty()) { 0179 entry->setField(cover, id); 0180 break; 0181 } 0182 } 0183 } else if(alexField == QLatin1String("notes")) { 0184 if(alexValue.startsWith(QLatin1Char('|'))) { 0185 line = ts.readLine(); 0186 QRegularExpressionMatch m = spaces.match(line); 0187 if(m.hasMatch()) { 0188 alexValue.clear(); 0189 const int spaceCount = m.capturedLength(); 0190 QRegularExpression begin(QStringLiteral("^ {%1,%2}").arg(spaceCount).arg(spaceCount)); 0191 while(!line.isNull() && line.indexOf(begin) > -1) { 0192 line.remove(begin); 0193 alexValue += clean(line) + QLatin1Char('\n'); 0194 line = ts.readLine(); 0195 } 0196 alexValue.chop(1); // remove last newline char 0197 alexValue.replace(QLatin1Char('\n'), QLatin1String("<br/>")); 0198 } 0199 readNextLine = false; 0200 } 0201 0202 entry->setField(comments, alexValue); 0203 0204 // now try by name then title 0205 } else if(m_coll->fieldByName(alexField)) { 0206 entry->setField(alexField, alexValue); 0207 0208 } else if(m_coll->fieldByTitle(alexField)) { 0209 entry->setField(m_coll->fieldByTitle(alexField), alexValue); 0210 } 0211 } 0212 m_coll->addEntries(entry); 0213 0214 if(showProgress && j%stepSize == 0) { 0215 emit signalProgress(this, j); 0216 qApp->processEvents(); 0217 } 0218 } 0219 0220 return m_coll; 0221 } 0222 0223 QWidget* AlexandriaImporter::widget(QWidget* parent_) { 0224 if(m_widget) { 0225 return m_widget; 0226 } 0227 0228 m_libraryDir = QDir::home(); 0229 m_libraryDir.setFilter(QDir::Dirs | QDir::Readable | QDir::NoSymLinks | QDir::NoDotAndDotDot); 0230 0231 m_widget = new QWidget(parent_); 0232 QVBoxLayout* l = new QVBoxLayout(m_widget); 0233 0234 QGroupBox* gbox = new QGroupBox(i18n("Alexandria Options"), m_widget); 0235 QHBoxLayout* hlay = new QHBoxLayout(gbox); 0236 0237 QLabel* label = new QLabel(i18n("&Library:"), gbox); 0238 m_library = new KComboBox(gbox); 0239 label->setBuddy(m_library); 0240 0241 hlay->addWidget(label); 0242 hlay->addWidget(m_library); 0243 0244 // .alexandria might not exist 0245 if(m_libraryDir.cd(QStringLiteral(".alexandria"))) { 0246 m_library->addItems(m_libraryDir.entryList()); 0247 } 0248 0249 l->addWidget(gbox); 0250 l->addStretch(1); 0251 0252 // now that we set a widget, it should override library path 0253 m_libraryPath.clear(); 0254 0255 return m_widget; 0256 } 0257 0258 QString& AlexandriaImporter::cleanLine(QString& str_) { 0259 static const QRegularExpression escRx(QLatin1String("\\\\x(\\w\\w)"), QRegularExpression::CaseInsensitiveOption); 0260 str_.remove(QStringLiteral("\\r")); 0261 str_.replace(QLatin1String("\\n"), QLatin1String("\n")); 0262 str_.replace(QLatin1String("\\t"), QLatin1String("\t")); 0263 0264 // YAML uses escape sequences like \xC3 0265 QRegularExpressionMatch m = escRx.match(str_); 0266 int pos = m.capturedStart(); 0267 int origPos = pos; 0268 QByteArray bytes; 0269 while(pos > -1) { 0270 bool ok; 0271 char c = static_cast<char>(m.captured(1).toInt(&ok, 16)); 0272 if(ok) { 0273 bytes += c; 0274 } else { 0275 bytes.clear(); 0276 break; 0277 } 0278 m = escRx.match(str_, pos+1); 0279 pos = m.capturedStart(); 0280 } 0281 if(!bytes.isEmpty()) { 0282 str_.replace(origPos, bytes.length()*4, QString::fromUtf8(bytes.data())); 0283 } 0284 return str_; 0285 } 0286 0287 QString& AlexandriaImporter::clean(QString& str_) { 0288 static const QRegularExpression quote(QLatin1String("\\\\\"")); // equals \" 0289 static const QRegularExpression yamlTags(QLatin1String("^![^\\s]*\\s+")); 0290 if(str_.startsWith(QLatin1Char('\'')) || str_.startsWith(QLatin1Char('"'))) { 0291 str_.remove(0, 1); 0292 } 0293 if(str_.endsWith(QLatin1Char('\'')) || str_.endsWith(QLatin1Char('"'))) { 0294 str_.truncate(str_.length()-1); 0295 } 0296 // we ignore YAML tags, this is not actually a good parser, but will do for now 0297 str_.remove(yamlTags); 0298 return str_.replace(quote, QStringLiteral("\"")); 0299 } 0300 0301 void AlexandriaImporter::slotCancel() { 0302 m_cancelled = true; 0303 }