File indexing completed on 2024-05-12 05:10:08
0001 /*************************************************************************** 0002 Copyright (C) 2003-2009 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include <config.h> 0026 #include "bibteximporter.h" 0027 #include "../utils/bibtexhandler.h" 0028 #include "../collections/bibtexcollection.h" 0029 #include "../entry.h" 0030 #include "../fieldformat.h" 0031 #include "../core/filehandler.h" 0032 #include "../tellico_debug.h" 0033 0034 #include <KLocalizedString> 0035 #include <KSharedConfig> 0036 #include <KConfigGroup> 0037 0038 #include <QRegularExpression> 0039 #include <QGroupBox> 0040 #include <QRadioButton> 0041 #include <QTextCodec> 0042 #include <QVBoxLayout> 0043 #include <QButtonGroup> 0044 #include <QFile> 0045 #include <QApplication> 0046 0047 using namespace Tellico; 0048 using Tellico::Import::BibtexImporter; 0049 0050 #ifndef ENABLE_BTPARSE 0051 void bt_cleanup() {} 0052 void bt_initialize() {} 0053 #endif 0054 0055 int BibtexImporter::s_initCount = 0; 0056 0057 BibtexImporter::BibtexImporter(const QList<QUrl>& urls_) : Importer(urls_) 0058 , m_widget(nullptr), m_readUTF8(nullptr), m_readLocale(nullptr), m_cancelled(false) { 0059 init(); 0060 } 0061 0062 BibtexImporter::BibtexImporter(const QString& text_) : Importer(text_) 0063 , m_widget(nullptr), m_readUTF8(nullptr), m_readLocale(nullptr), m_cancelled(false) { 0064 init(); 0065 } 0066 0067 BibtexImporter::~BibtexImporter() { 0068 --s_initCount; 0069 if(s_initCount == 0) { 0070 bt_cleanup(); 0071 } 0072 if(m_readUTF8) { 0073 KConfigGroup config(KSharedConfig::openConfig(), "Import Options"); 0074 config.writeEntry("Bibtex UTF8", m_readUTF8->isChecked()); 0075 } 0076 } 0077 0078 void BibtexImporter::init() { 0079 if(s_initCount == 0) { 0080 bt_initialize(); 0081 } 0082 ++s_initCount; 0083 } 0084 0085 bool BibtexImporter::canImport(int type) const { 0086 return type == Data::Collection::Bibtex; 0087 } 0088 0089 Tellico::Data::CollPtr BibtexImporter::collection() { 0090 if(m_coll) { 0091 return m_coll; 0092 } 0093 0094 emit signalTotalSteps(this, urls().count() * 100); 0095 0096 bool useUTF8 = m_widget && m_readUTF8->isChecked(); 0097 0098 m_coll = new Data::BibtexCollection(true); 0099 0100 int count = 0; 0101 // might be importing text only 0102 if(!text().isEmpty()) { 0103 QString text = this->text(); 0104 Data::CollPtr coll = readCollection(text, count); 0105 if(!coll || coll->entryCount() == 0) { 0106 setStatusMessage(i18n("No valid bibtex entries were found")); 0107 } else { 0108 appendCollection(coll); 0109 } 0110 } 0111 0112 foreach(const QUrl& url, urls()) { 0113 if(m_cancelled) { 0114 return Data::CollPtr(); 0115 } 0116 if(!url.isValid()) { 0117 continue; 0118 } 0119 QString text = FileHandler::readTextFile(url, false, useUTF8); 0120 if(text.isEmpty()) { 0121 continue; 0122 } 0123 Data::CollPtr coll = readCollection(text, count); 0124 if(!coll || coll->entryCount() == 0) { 0125 setStatusMessage(i18n("No valid bibtex entries were found in file - %1", this->url().fileName())); 0126 continue; 0127 } 0128 appendCollection(coll); 0129 } 0130 0131 if(m_cancelled) { 0132 return Data::CollPtr(); 0133 } 0134 0135 return m_coll; 0136 } 0137 0138 Tellico::Data::CollPtr BibtexImporter::readCollection(const QString& text, int urlCount) { 0139 #ifdef ENABLE_BTPARSE 0140 if(text.isEmpty()) { 0141 myDebug() << "no text"; 0142 return Data::CollPtr(); 0143 } 0144 Data::CollPtr ptr(new Data::BibtexCollection(true)); 0145 Data::BibtexCollection* c = static_cast<Data::BibtexCollection*>(ptr.data()); 0146 0147 parseText(text); // populates m_nodes 0148 if(m_cancelled) { 0149 return Data::CollPtr(); 0150 } 0151 0152 if(m_nodes.isEmpty()) { 0153 return Data::CollPtr(); 0154 } 0155 0156 QString str; 0157 const uint count = m_nodes.count(); 0158 const uint stepSize = qMax(s_stepSize, count/100); 0159 const bool showProgress = options() & ImportProgress; 0160 0161 Data::CollPtr currentColl = currentCollection(); 0162 if(!currentColl || currentColl->type() != Data::Collection::Bibtex) { 0163 currentColl = ptr; 0164 } 0165 0166 uint j = 0; 0167 for(int i = 0; !m_cancelled && i < m_nodes.count(); ++i, ++j) { 0168 AST* node = m_nodes[i]; 0169 // if we're parsing a macro string, comment or preamble, skip it for now 0170 if(bt_entry_metatype(node) == BTE_PREAMBLE) { 0171 char* preamble = bt_get_text(node); 0172 if(preamble) { 0173 c->setPreamble(QString::fromUtf8(preamble)); 0174 } 0175 continue; 0176 } 0177 0178 if(bt_entry_metatype(node) == BTE_MACRODEF) { 0179 char* macro; 0180 (void) bt_next_field(node, nullptr, ¯o); 0181 // FIXME: replace macros within macro definitions! 0182 // lookup lowercase macro in map 0183 c->addMacro(m_macros[QString::fromUtf8(macro)], QString::fromUtf8(bt_macro_text(macro, nullptr, 0))); 0184 continue; 0185 } 0186 0187 if(bt_entry_metatype(node) == BTE_COMMENT) { 0188 continue; 0189 } 0190 0191 // now we're parsing a regular entry 0192 Data::EntryPtr entry(new Data::Entry(ptr)); 0193 0194 str = QString::fromUtf8(bt_entry_type(node)); 0195 // myDebug() << "entry type: " << str; 0196 // text is automatically put into lower-case by btparse 0197 Data::BibtexCollection::setFieldValue(entry, QStringLiteral("entry-type"), str, currentColl); 0198 0199 str = QString::fromUtf8(bt_entry_key(node)); 0200 // myDebug() << "entry key: " << str; 0201 Data::BibtexCollection::setFieldValue(entry, QStringLiteral("key"), str, currentColl); 0202 0203 static const QRegularExpression andRx(QLatin1String("\\sand\\s")); 0204 char* name; 0205 AST* field = nullptr; 0206 while((field = bt_next_field(node, field, &name))) { 0207 // myDebug() << "\tfound: " << name; 0208 // str = QLatin1String(bt_get_text(field)); 0209 str.clear(); 0210 AST* value = nullptr; 0211 bt_nodetype type; 0212 char* svalue; 0213 bool end_macro = false; 0214 while((value = bt_next_value(field, value, &type, &svalue))) { 0215 switch(type) { 0216 case BTAST_STRING: 0217 case BTAST_NUMBER: 0218 str += BibtexHandler::importText(svalue).simplified(); 0219 end_macro = false; 0220 break; 0221 case BTAST_MACRO: 0222 str += QString::fromUtf8(svalue) + QLatin1Char('#'); 0223 end_macro = true; 0224 break; 0225 default: 0226 break; 0227 } 0228 } 0229 if(end_macro) { 0230 // remove last character '#' 0231 str.truncate(str.length() - 1); 0232 } 0233 QString fieldName = QString::fromUtf8(name); 0234 if(fieldName == QLatin1String("author") || fieldName == QLatin1String("editor")) { 0235 str.replace(andRx,FieldFormat::delimiterString()); 0236 } 0237 // there's a 'key' field different from the citation key 0238 // https://nwalsh.com/tex/texhelp/bibtx-37.html 0239 // TODO account for this later 0240 if(fieldName == QLatin1String("key")) { 0241 myLog() << "skipping bibtex 'key' field for" << str; 0242 } else { 0243 Data::BibtexCollection::setFieldValue(entry, fieldName, str, currentColl); 0244 } 0245 } 0246 0247 ptr->addEntries(entry); 0248 0249 if(showProgress && j%stepSize == 0) { 0250 emit signalProgress(this, urlCount*100 + 100*j/count); 0251 qApp->processEvents(); 0252 } 0253 } 0254 0255 if(m_cancelled) { 0256 ptr = nullptr; 0257 } 0258 0259 // clean-up 0260 foreach(AST* node, m_nodes) { 0261 bt_free_ast(node); 0262 } 0263 0264 return ptr; 0265 #else 0266 return Data::CollPtr(); 0267 #endif // ENABLE_BTPARSE 0268 } 0269 0270 void BibtexImporter::parseText(const QString& text) { 0271 #ifdef ENABLE_BTPARSE 0272 m_nodes.clear(); 0273 m_macros.clear(); 0274 0275 ushort bt_options = 0; // ushort is defined in btparse.h 0276 boolean ok; // boolean is defined in btparse.h as an int 0277 0278 // for regular nodes (entries), do NOT convert numbers to strings, do NOT expand macros 0279 bt_set_stringopts(BTE_REGULAR, 0); 0280 bt_set_stringopts(BTE_MACRODEF, 0); 0281 // bt_set_stringopts(BTE_PREAMBLE, BTO_CONVERT | BTO_EXPAND); 0282 0283 QString entry; 0284 static const QRegularExpression rx(QLatin1String("[{}]")); 0285 static const QRegularExpression macroName(QLatin1String("@string\\s*\\{\\s*(.*?)="), QRegularExpression::CaseInsensitiveOption); 0286 0287 int line = 1; 0288 bool needsCleanup = false; 0289 int brace = 0; 0290 int startpos = 0; 0291 QRegularExpressionMatch m = rx.match(text); 0292 int pos = m.capturedStart(); 0293 while(pos > 0 && !m_cancelled) { 0294 if(text[pos] == QLatin1Char('{')) { 0295 ++brace; 0296 } else if(text[pos] == QLatin1Char('}') && brace > 0) { 0297 --brace; 0298 } 0299 if(brace == 0) { 0300 entry = text.mid(startpos, pos-startpos+1); 0301 // All the downstream text processing on the AST node will assume utf-8 0302 QByteArray entryText = entry.toUtf8(); 0303 QByteArray filename = QFile::encodeName(url().fileName()); 0304 AST* node = bt_parse_entry_s(entryText.data(), 0305 filename.data(), 0306 line, bt_options, &ok); 0307 if(ok && node) { 0308 QRegularExpressionMatch macroMatch = macroName.match(entry); 0309 if(bt_entry_metatype(node) == BTE_MACRODEF && macroMatch.hasMatch()) { 0310 char* macro; 0311 (void) bt_next_field(node, nullptr, ¯o); 0312 m_macros.insert(QString::fromUtf8(macro), macroMatch.captured(1).trimmed()); 0313 } 0314 m_nodes.append(node); 0315 needsCleanup = true; 0316 } 0317 startpos = pos+1; 0318 line += entry.count(QLatin1Char('\n')); 0319 } 0320 m = rx.match(text, pos+1); 0321 pos = m.capturedStart(); 0322 } 0323 if(needsCleanup) { 0324 // clean up some structures 0325 bt_parse_entry_s(nullptr, nullptr, 1, 0, nullptr); 0326 } 0327 #endif // ENABLE_BTPARSE 0328 } 0329 0330 void BibtexImporter::slotCancel() { 0331 m_cancelled = true; 0332 } 0333 0334 QWidget* BibtexImporter::widget(QWidget* parent_) { 0335 if(m_widget) { 0336 return m_widget; 0337 } 0338 0339 m_widget = new QWidget(parent_); 0340 QVBoxLayout* l = new QVBoxLayout(m_widget); 0341 0342 QGroupBox* gbox = new QGroupBox(i18n("Bibtex Options"), m_widget); 0343 QVBoxLayout* vlay = new QVBoxLayout(gbox); 0344 0345 m_readUTF8 = new QRadioButton(i18n("Use Unicode (UTF-8) encoding"), gbox); 0346 m_readUTF8->setWhatsThis(i18n("Read the imported file in Unicode (UTF-8).")); 0347 QString localStr = i18n("Use user locale (%1) encoding", 0348 QLatin1String(QTextCodec::codecForLocale()->name())); 0349 m_readLocale = new QRadioButton(localStr, gbox); 0350 m_readLocale->setChecked(true); 0351 m_readLocale->setWhatsThis(i18n("Read the imported file in the local encoding.")); 0352 0353 vlay->addWidget(m_readUTF8); 0354 vlay->addWidget(m_readLocale); 0355 0356 QButtonGroup* bg = new QButtonGroup(gbox); 0357 bg->addButton(m_readUTF8); 0358 bg->addButton(m_readLocale); 0359 0360 KConfigGroup config(KSharedConfig::openConfig(), "Import Options"); 0361 bool useUTF8 = config.readEntry("Bibtex UTF8", false); 0362 if(useUTF8) { 0363 m_readUTF8->setChecked(true); 0364 } else { 0365 m_readLocale->setChecked(true); 0366 } 0367 0368 l->addWidget(gbox); 0369 l->addStretch(1); 0370 return m_widget; 0371 } 0372 0373 bool BibtexImporter::maybeBibtex(const QUrl& url_) { 0374 QString text = FileHandler::readTextFile(url_, true /*quiet*/); 0375 if(text.isEmpty()) { 0376 return false; 0377 } 0378 return maybeBibtex(text, url_); 0379 } 0380 0381 bool BibtexImporter::maybeBibtex(const QString& text, const QUrl& url_) { 0382 bool foundOne = false; 0383 #ifdef ENABLE_BTPARSE 0384 bt_initialize(); 0385 static const QRegularExpression rx(QLatin1String("[{}]")); 0386 0387 ushort bt_options = 0; // ushort is defined in btparse.h 0388 boolean ok; // boolean is defined in btparse.h as an int 0389 int brace = 0; 0390 int startpos = 0; 0391 QRegularExpressionMatch m = rx.match(text); 0392 int pos = m.capturedStart(); 0393 while(pos > 0) { 0394 if(text[pos] == QLatin1Char('{')) { 0395 ++brace; 0396 } else if(text[pos] == QLatin1Char('}') && brace > 0) { 0397 --brace; 0398 } 0399 if(brace == 0) { 0400 QString entry = text.mid(startpos, pos-startpos+1).trimmed(); 0401 // All the downstream text processing on the AST node will assume utf-8 0402 AST* node = bt_parse_entry_s(const_cast<char*>(entry.toUtf8().data()), 0403 const_cast<char*>(url_.fileName().toLocal8Bit().data()), 0404 0, bt_options, &ok); 0405 if(ok && node) { 0406 foundOne = true; 0407 break; 0408 } 0409 startpos = pos+1; 0410 } 0411 m = rx.match(text, pos+1); 0412 pos = m.capturedStart(); 0413 } 0414 if(foundOne) { 0415 // clean up some structures 0416 bt_parse_entry_s(nullptr, nullptr, 1, 0, nullptr); 0417 } 0418 bt_cleanup(); 0419 #endif // ENABLE_BTPARSE 0420 return foundOne; 0421 } 0422 0423 void BibtexImporter::appendCollection(Data::CollPtr coll_) { 0424 Data::BibtexCollection* mainColl = static_cast<Data::BibtexCollection*>(m_coll.data()); 0425 Data::BibtexCollection* newColl = static_cast<Data::BibtexCollection*>(coll_.data()); 0426 0427 foreach(Data::FieldPtr field, coll_->fields()) { 0428 m_coll->mergeField(field); 0429 } 0430 0431 mainColl->addEntries(newColl->entries()); 0432 // append the preamble and macro lists 0433 if(!newColl->preamble().isEmpty()) { 0434 QString pre = mainColl->preamble(); 0435 if(!pre.isEmpty()) { 0436 pre += QLatin1Char('\n'); 0437 } 0438 mainColl->setPreamble(pre + newColl->preamble()); 0439 } 0440 StringMap macros = mainColl->macroList(); 0441 #if (QT_VERSION < QT_VERSION_CHECK(5, 15, 0)) 0442 macros.unite(newColl->macroList()); 0443 #else 0444 macros.insert(newColl->macroList()); 0445 #endif 0446 mainColl->setMacroList(macros); 0447 }