File indexing completed on 2024-05-12 16:46:36

0001 /***************************************************************************
0002     Copyright (C) 2003-2009 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "bibtexhandler.h"
0026 #include "../collections/bibtexcollection.h"
0027 #include "../entry.h"
0028 #include "../field.h"
0029 #include "../core/filehandler.h"
0030 #include "../utils/datafileregistry.h"
0031 #include "../tellico_debug.h"
0032 
0033 #include <QUrl>
0034 
0035 #include <QDomDocument>
0036 
0037 // don't add braces around capital letters by default
0038 #define TELLICO_BIBTEX_BRACES 0
0039 
0040 using Tellico::BibtexHandler;
0041 
0042 BibtexHandler::StringListHash BibtexHandler::s_utf8LatexMap;
0043 BibtexHandler::QuoteStyle BibtexHandler::s_quoteStyle = BibtexHandler::BRACES;
0044 const QRegularExpression BibtexHandler::s_badKeyChars(QLatin1String("[^0-9a-zA-Z-]"));
0045 
0046 QStringList BibtexHandler::bibtexKeys(const Tellico::Data::EntryList& entries_) {
0047   QStringList keys;
0048   foreach(Data::EntryPtr entry, entries_) {
0049     QString s = bibtexKey(entry);
0050     if(!s.isEmpty()) {
0051       keys << s;
0052     }
0053   }
0054   return keys;
0055 }
0056 
0057 QString BibtexHandler::bibtexKey(Tellico::Data::EntryPtr entry_) {
0058   if(!entry_ || !entry_->collection() || entry_->collection()->type() != Data::Collection::Bibtex) {
0059     return QString();
0060   }
0061 
0062   const Data::BibtexCollection* c = static_cast<const Data::BibtexCollection*>(entry_->collection().data());
0063   Data::FieldPtr f = c->fieldByBibtexName(QStringLiteral("key"));
0064   if(f) {
0065     const QString key = entry_->field(f);
0066     if(!key.isEmpty()) {
0067       return key;
0068     }
0069   }
0070 
0071   QString author;
0072   Data::FieldPtr authorField = c->fieldByBibtexName(QStringLiteral("author"));
0073   if(authorField) {
0074     if(authorField->hasFlag(Data::Field::AllowMultiple)) {
0075       // grab first author only;
0076       QString tmp = entry_->field(authorField);
0077       author = tmp.section(QLatin1Char(';'), 0, 0);
0078     } else {
0079       author = entry_->field(authorField);
0080     }
0081   }
0082 
0083   Data::FieldPtr titleField = c->fieldByBibtexName(QStringLiteral("title"));
0084   QString title;
0085   if(titleField) {
0086     title = entry_->field(titleField);
0087   }
0088 
0089   Data::FieldPtr yearField = c->fieldByBibtexName(QStringLiteral("year"));
0090   QString year;
0091   if(yearField) {
0092     year = entry_->field(yearField);
0093   }
0094   if(year.isEmpty()) {
0095     year = entry_->field(QStringLiteral("pub_year"));
0096     if(year.isEmpty()) {
0097       year = entry_->field(QStringLiteral("cr_year"));
0098     }
0099   }
0100   year = year.section(QLatin1Char(';'), 0, 0);
0101 
0102   return bibtexKey(author, title, year);
0103 }
0104 
0105 QString BibtexHandler::bibtexKey(const QString& author_, const QString& title_, const QString& year_) {
0106   QString key;
0107   // if no comma, take the last word
0108   if(!author_.isEmpty()) {
0109     if(author_.indexOf(QLatin1Char(',')) == -1) {
0110       key += author_.section(QLatin1Char(' '), -1).toLower() + QLatin1Char('-');
0111     } else {
0112       // if there is a comma, take the string up to the first comma
0113       key += author_.section(QLatin1Char(','), 0, 0).toLower() + QLatin1Char('-');
0114     }
0115   }
0116 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0117   QStringList words = title_.split(QLatin1Char(' '), QString::SkipEmptyParts);
0118 #else
0119   QStringList words = title_.split(QLatin1Char(' '), Qt::SkipEmptyParts);
0120 #endif
0121   foreach(const QString& word, words) {
0122     key += word.at(0).toLower();
0123   }
0124   key += year_;
0125   // bibtex key may only contain [0-9a-zA-Z-]
0126   return key.remove(s_badKeyChars);
0127 }
0128 
0129 void BibtexHandler::loadTranslationMaps() {
0130   QString mapfile = DataFileRegistry::self()->locate(QStringLiteral("bibtex-translation.xml"));
0131   if(mapfile.isEmpty()) {
0132     static bool showMsg = true;
0133     if(showMsg) {
0134       myWarning() << "bibtex-translation.xml not found";
0135       showMsg = false;
0136     }
0137     return;
0138   }
0139 
0140   QUrl u = QUrl::fromLocalFile(mapfile);
0141   // no namespace processing
0142   QDomDocument dom = FileHandler::readXMLDocument(u, false);
0143 
0144   QDomNodeList keyList = dom.elementsByTagName(QStringLiteral("key"));
0145 
0146   for(int i = 0; i < keyList.count(); ++i) {
0147     QDomNodeList strList = keyList.item(i).toElement().elementsByTagName(QStringLiteral("string"));
0148     // the strList might have more than one node since there are multiple ways
0149     // to represent a character in LaTex.
0150     QString s = keyList.item(i).toElement().attribute(QStringLiteral("char"));
0151     for(int j = 0; j < strList.count(); ++j) {
0152       s_utf8LatexMap[s].append(strList.item(j).toElement().text());
0153 //      myDebug() << s << " = " << strList.item(j).toElement().text();
0154     }
0155   }
0156 }
0157 
0158 QString BibtexHandler::importText(char* text_) {
0159   QString str = QString::fromUtf8(text_);
0160 
0161   if(s_utf8LatexMap.isEmpty()) {
0162     loadTranslationMaps();
0163   }
0164 
0165   for(StringListHash::ConstIterator it = s_utf8LatexMap.constBegin(); it != s_utf8LatexMap.constEnd(); ++it) {
0166     foreach(const QString& word, it.value()) {
0167       str.replace(word, it.key());
0168     }
0169   }
0170 
0171   // now replace capitalized letters, such as {X}
0172   // but since we don't want to turn "... X" into "... {X}" later when exporting
0173   // we need to lower-case any capitalized text after the first letter that is
0174   // NOT contained in braces
0175 
0176   QRegularExpression rx(QStringLiteral("\\{([A-Z]+?)\\}"));
0177   str.replace(rx, QStringLiteral("\\1"));
0178 
0179   return str;
0180 }
0181 
0182 QString BibtexHandler::exportText(const QString& text_, const QStringList& macros_) {
0183   if(s_utf8LatexMap.isEmpty()) {
0184     loadTranslationMaps();
0185   }
0186 
0187   QChar lquote, rquote;
0188   switch(s_quoteStyle) {
0189     case BRACES:
0190       lquote = QLatin1Char('{');
0191       rquote = QLatin1Char('}');
0192       break;
0193     case QUOTES:
0194       lquote =  QLatin1Char('"');
0195       rquote =  QLatin1Char('"');
0196       break;
0197   }
0198 
0199   QString text = text_;
0200 
0201   for(StringListHash::ConstIterator it = s_utf8LatexMap.constBegin(); it != s_utf8LatexMap.constEnd(); ++it) {
0202     text.replace(it.key(), it.value()[0]);
0203   }
0204 
0205   if(macros_.isEmpty()) {
0206     return lquote + addBraces(text) + rquote;
0207   }
0208 
0209 // Now, split the text by the character QLatin1Char('#'), and examine each token to see if it is in
0210 // the macro list. If it is not, then add left-quote and right-quote around it. If it is, don't
0211 // change it. Then, in case QLatin1Char('#') occurs in a non-macro string, replace any occurrences of '}#{' with '#'
0212 
0213 // list of new tokens
0214   QStringList list;
0215 
0216 // first, split the text
0217 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0218   const QStringList tokens = text.split(QLatin1Char('#'), QString::KeepEmptyParts);
0219 #else
0220   const QStringList tokens = text.split(QLatin1Char('#'), Qt::KeepEmptyParts);
0221 #endif
0222   foreach(const QString& token, tokens) {
0223     // check to see if token is a macro
0224     if(macros_.indexOf(token.trimmed()) == -1) {
0225       // the token is NOT a macro, add braces around whole words and also around capitals
0226       list << lquote + addBraces(token) + rquote;
0227     } else {
0228       list << token;
0229     }
0230   }
0231 
0232   const QChar octo = QLatin1Char('#');
0233   text = list.join(octo);
0234   text.replace(QString(rquote)+octo+lquote, octo);
0235 
0236   return text;
0237 }
0238 
0239 QString& BibtexHandler::cleanText(QString& text_) {
0240   // FIXME: need to improve this for removing all Latex entities
0241 //  QRegularExpression rx(QLatin1String("(?=[^\\\\])\\\\.+\\{"));
0242   static const QRegularExpression rx(QLatin1String("\\\\.+?\\{"));
0243   static const QRegularExpression brackets(QLatin1String("[{}]"));
0244   text_.remove(rx);
0245   text_.remove(brackets);
0246   return text_;
0247 }
0248 
0249 // add braces around capital letters
0250 QString BibtexHandler::addBraces(const QString& text_) {
0251   QString text = text_;
0252 #if !TELLICO_BIBTEX_BRACES
0253   return text;
0254 #else
0255   int inside = 0;
0256   uint l = text.length();
0257   // start at first letter, but skip if only the first is capitalized
0258   for(uint i = 0; i < l; ++i) {
0259     const QChar c = text.at(i);
0260     if(inside == 0 && c >= 'A' && c <= 'Z') {
0261       uint j = i+1;
0262       while(text.at(j) >= 'A' && text.at(j) <= 'Z' && j < l) {
0263         ++j;
0264       }
0265       if(i == 0 && j == 1) {
0266         continue; // no need to do anything to first letter
0267       }
0268       text.insert(i, '{');
0269       // now j should be incremented
0270       text.insert(j+1, '}');
0271       i = j+1;
0272       l += 2; // the length changed
0273     } else if(c == '{') {
0274       ++inside;
0275     } else if(c == '}') {
0276       --inside;
0277     }
0278   }
0279   return text;
0280 #endif
0281 }