File indexing completed on 2024-05-12 05:10:15
0001 /*************************************************************************** 0002 Copyright (C) 2003-2009 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "bibtexhandler.h" 0026 #include "../collections/bibtexcollection.h" 0027 #include "../entry.h" 0028 #include "../field.h" 0029 #include "../core/filehandler.h" 0030 #include "../utils/datafileregistry.h" 0031 #include "../tellico_debug.h" 0032 0033 #include <QUrl> 0034 0035 #include <QDomDocument> 0036 0037 // don't add braces around capital letters by default 0038 #define TELLICO_BIBTEX_BRACES 0 0039 0040 using Tellico::BibtexHandler; 0041 0042 BibtexHandler::StringListHash BibtexHandler::s_utf8LatexMap; 0043 BibtexHandler::QuoteStyle BibtexHandler::s_quoteStyle = BibtexHandler::BRACES; 0044 const QRegularExpression BibtexHandler::s_badKeyChars(QLatin1String("[^0-9a-zA-Z-]")); 0045 0046 QStringList BibtexHandler::bibtexKeys(const Tellico::Data::EntryList& entries_) { 0047 QStringList keys; 0048 foreach(Data::EntryPtr entry, entries_) { 0049 QString s = bibtexKey(entry); 0050 if(!s.isEmpty()) { 0051 keys << s; 0052 } 0053 } 0054 return keys; 0055 } 0056 0057 QString BibtexHandler::bibtexKey(Tellico::Data::EntryPtr entry_) { 0058 if(!entry_ || !entry_->collection() || entry_->collection()->type() != Data::Collection::Bibtex) { 0059 return QString(); 0060 } 0061 0062 const Data::BibtexCollection* c = static_cast<const Data::BibtexCollection*>(entry_->collection().data()); 0063 Data::FieldPtr f = c->fieldByBibtexName(QStringLiteral("key")); 0064 if(f) { 0065 const QString key = entry_->field(f); 0066 if(!key.isEmpty()) { 0067 return key; 0068 } 0069 } 0070 0071 QString author; 0072 Data::FieldPtr authorField = c->fieldByBibtexName(QStringLiteral("author")); 0073 if(authorField) { 0074 if(authorField->hasFlag(Data::Field::AllowMultiple)) { 0075 // grab first author only; 0076 QString tmp = entry_->field(authorField); 0077 author = tmp.section(QLatin1Char(';'), 0, 0); 0078 } else { 0079 author = entry_->field(authorField); 0080 } 0081 } 0082 0083 Data::FieldPtr titleField = c->fieldByBibtexName(QStringLiteral("title")); 0084 QString title; 0085 if(titleField) { 0086 title = entry_->field(titleField); 0087 } 0088 0089 Data::FieldPtr yearField = c->fieldByBibtexName(QStringLiteral("year")); 0090 QString year; 0091 if(yearField) { 0092 year = entry_->field(yearField); 0093 } 0094 if(year.isEmpty()) { 0095 year = entry_->field(QStringLiteral("pub_year")); 0096 if(year.isEmpty()) { 0097 year = entry_->field(QStringLiteral("cr_year")); 0098 } 0099 } 0100 year = year.section(QLatin1Char(';'), 0, 0); 0101 0102 return bibtexKey(author, title, year); 0103 } 0104 0105 QString BibtexHandler::bibtexKey(const QString& author_, const QString& title_, const QString& year_) { 0106 QString key; 0107 // if no comma, take the last word 0108 if(!author_.isEmpty()) { 0109 if(author_.indexOf(QLatin1Char(',')) == -1) { 0110 key += author_.section(QLatin1Char(' '), -1).toLower() + QLatin1Char('-'); 0111 } else { 0112 // if there is a comma, take the string up to the first comma 0113 key += author_.section(QLatin1Char(','), 0, 0).toLower() + QLatin1Char('-'); 0114 } 0115 } 0116 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0117 QStringList words = title_.split(QLatin1Char(' '), QString::SkipEmptyParts); 0118 #else 0119 QStringList words = title_.split(QLatin1Char(' '), Qt::SkipEmptyParts); 0120 #endif 0121 foreach(const QString& word, words) { 0122 key += word.at(0).toLower(); 0123 } 0124 key += year_; 0125 // bibtex key may only contain [0-9a-zA-Z-] 0126 return key.remove(s_badKeyChars); 0127 } 0128 0129 void BibtexHandler::loadTranslationMaps() { 0130 QString mapfile = DataFileRegistry::self()->locate(QStringLiteral("bibtex-translation.xml")); 0131 if(mapfile.isEmpty()) { 0132 static bool showMsg = true; 0133 if(showMsg) { 0134 myWarning() << "bibtex-translation.xml not found"; 0135 showMsg = false; 0136 } 0137 return; 0138 } 0139 0140 QUrl u = QUrl::fromLocalFile(mapfile); 0141 // no namespace processing 0142 QDomDocument dom = FileHandler::readXMLDocument(u, false); 0143 0144 QDomNodeList keyList = dom.elementsByTagName(QStringLiteral("key")); 0145 0146 for(int i = 0; i < keyList.count(); ++i) { 0147 QDomNodeList strList = keyList.item(i).toElement().elementsByTagName(QStringLiteral("string")); 0148 // the strList might have more than one node since there are multiple ways 0149 // to represent a character in LaTex. 0150 QString s = keyList.item(i).toElement().attribute(QStringLiteral("char")); 0151 for(int j = 0; j < strList.count(); ++j) { 0152 s_utf8LatexMap[s].append(strList.item(j).toElement().text()); 0153 // myDebug() << s << " = " << strList.item(j).toElement().text(); 0154 } 0155 } 0156 } 0157 0158 QString BibtexHandler::importText(char* text_) { 0159 QString str = QString::fromUtf8(text_); 0160 0161 if(s_utf8LatexMap.isEmpty()) { 0162 loadTranslationMaps(); 0163 } 0164 0165 for(StringListHash::ConstIterator it = s_utf8LatexMap.constBegin(); it != s_utf8LatexMap.constEnd(); ++it) { 0166 foreach(const QString& word, it.value()) { 0167 str.replace(word, it.key()); 0168 } 0169 } 0170 0171 // now replace capitalized letters, such as {X} 0172 // but since we don't want to turn "... X" into "... {X}" later when exporting 0173 // we need to lower-case any capitalized text after the first letter that is 0174 // NOT contained in braces 0175 0176 QRegularExpression rx(QStringLiteral("\\{([A-Z]+?)\\}")); 0177 str.replace(rx, QStringLiteral("\\1")); 0178 0179 return str; 0180 } 0181 0182 QString BibtexHandler::exportText(const QString& text_, const QStringList& macros_) { 0183 if(s_utf8LatexMap.isEmpty()) { 0184 loadTranslationMaps(); 0185 } 0186 0187 QChar lquote, rquote; 0188 switch(s_quoteStyle) { 0189 case BRACES: 0190 lquote = QLatin1Char('{'); 0191 rquote = QLatin1Char('}'); 0192 break; 0193 case QUOTES: 0194 lquote = QLatin1Char('"'); 0195 rquote = QLatin1Char('"'); 0196 break; 0197 } 0198 0199 QString text = text_; 0200 0201 for(StringListHash::ConstIterator it = s_utf8LatexMap.constBegin(); it != s_utf8LatexMap.constEnd(); ++it) { 0202 text.replace(it.key(), it.value()[0]); 0203 } 0204 0205 if(macros_.isEmpty()) { 0206 return lquote + addBraces(text) + rquote; 0207 } 0208 0209 // Now, split the text by the character QLatin1Char('#'), and examine each token to see if it is in 0210 // the macro list. If it is not, then add left-quote and right-quote around it. If it is, don't 0211 // change it. Then, in case QLatin1Char('#') occurs in a non-macro string, replace any occurrences of '}#{' with '#' 0212 0213 // list of new tokens 0214 QStringList list; 0215 0216 // first, split the text 0217 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0218 const QStringList tokens = text.split(QLatin1Char('#'), QString::KeepEmptyParts); 0219 #else 0220 const QStringList tokens = text.split(QLatin1Char('#'), Qt::KeepEmptyParts); 0221 #endif 0222 foreach(const QString& token, tokens) { 0223 // check to see if token is a macro 0224 if(macros_.indexOf(token.trimmed()) == -1) { 0225 // the token is NOT a macro, add braces around whole words and also around capitals 0226 list << lquote + addBraces(token) + rquote; 0227 } else { 0228 list << token; 0229 } 0230 } 0231 0232 const QChar octo = QLatin1Char('#'); 0233 text = list.join(octo); 0234 text.replace(QString(rquote)+octo+lquote, octo); 0235 0236 return text; 0237 } 0238 0239 QString& BibtexHandler::cleanText(QString& text_) { 0240 // FIXME: need to improve this for removing all Latex entities 0241 // QRegularExpression rx(QLatin1String("(?=[^\\\\])\\\\.+\\{")); 0242 static const QRegularExpression rx(QLatin1String("\\\\.+?\\{")); 0243 static const QRegularExpression brackets(QLatin1String("[{}]")); 0244 text_.remove(rx); 0245 text_.remove(brackets); 0246 return text_; 0247 } 0248 0249 // add braces around capital letters 0250 QString BibtexHandler::addBraces(const QString& text_) { 0251 QString text = text_; 0252 #if !TELLICO_BIBTEX_BRACES 0253 return text; 0254 #else 0255 int inside = 0; 0256 uint l = text.length(); 0257 // start at first letter, but skip if only the first is capitalized 0258 for(uint i = 0; i < l; ++i) { 0259 const QChar c = text.at(i); 0260 if(inside == 0 && c >= 'A' && c <= 'Z') { 0261 uint j = i+1; 0262 while(text.at(j) >= 'A' && text.at(j) <= 'Z' && j < l) { 0263 ++j; 0264 } 0265 if(i == 0 && j == 1) { 0266 continue; // no need to do anything to first letter 0267 } 0268 text.insert(i, '{'); 0269 // now j should be incremented 0270 text.insert(j+1, '}'); 0271 i = j+1; 0272 l += 2; // the length changed 0273 } else if(c == '{') { 0274 ++inside; 0275 } else if(c == '}') { 0276 --inside; 0277 } 0278 } 0279 return text; 0280 #endif 0281 }