File indexing completed on 2024-05-19 05:05:33
0001 /*************************************************************************** 0002 * SPDX-License-Identifier: GPL-2.0-or-later 0003 * * 0004 * SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de> 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 * This program is distributed in the hope that it will be useful, * 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0014 * GNU General Public License for more details. * 0015 * * 0016 * You should have received a copy of the GNU General Public License * 0017 * along with this program; if not, see <https://www.gnu.org/licenses/>. * 0018 ***************************************************************************/ 0019 0020 #include "fileexporterbibtex.h" 0021 0022 #include <typeinfo> 0023 0024 #include <QTextCodec> 0025 #include <QTextStream> 0026 #include <QStringList> 0027 #include <QBuffer> 0028 0029 #include <BibTeXEntries> 0030 #include <BibTeXFields> 0031 #include <Preferences> 0032 #include <File> 0033 #include <Element> 0034 #include <Entry> 0035 #include <Macro> 0036 #include <Preamble> 0037 #include <Value> 0038 #include <Comment> 0039 #include "encoderlatex.h" 0040 #include "fileexporter_p.h" 0041 #include "logging_io.h" 0042 0043 #define normalizeText(text) (text).normalized(QString::NormalizationForm_C) 0044 0045 class FileExporterBibTeX::Private 0046 { 0047 private: 0048 FileExporterBibTeX *parent; 0049 0050 /** 0051 * Determine a codec to use based on various settings such as 0052 * the global preferences, per-file settings, or configuration 0053 * settings passed to this FileExporterBibTeX instance. 0054 */ 0055 QPair<QString, QTextCodec *> determineTargetCodec() { 0056 QString encoding = QStringLiteral("utf-8"); ///< default encoding if nothing else is set 0057 if (!this->encoding.isEmpty()) 0058 /// Encoding as loaded in loadPreferencesAndProperties(..) has low priority 0059 encoding = this->encoding; 0060 if (!forcedEncoding.isEmpty()) 0061 /// Encoding as set via setEncoding(..) has high priority 0062 encoding = forcedEncoding; 0063 encoding = encoding.toLower(); 0064 if (encoding == QStringLiteral("utf-8") || encoding == QStringLiteral("utf8")) 0065 return QPair<QString, QTextCodec *>(QStringLiteral("utf-8"), nullptr); ///< a 'nullptr' encoder signifies UTF-8 0066 else if (encoding == QStringLiteral("latex")) 0067 return QPair<QString, QTextCodec *>(encoding, nullptr); ///< "LaTeX" encoding is actually just UTF-8 0068 else 0069 return QPair<QString, QTextCodec *>(encoding, QTextCodec::codecForName(encoding.toLatin1().constData())); 0070 } 0071 0072 inline bool canEncode(const QChar &c, QTextCodec *codec) { 0073 if (codec == nullptr) 0074 return true; ///< no codec means 'use UTF-8'; assume that UTF-8 can encode anything 0075 0076 /// QTextCodec::canEncode has some issues and cannot be relied upon 0077 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull); 0078 const QByteArray conversionResult = codec->fromUnicode(&c, 1, &state); 0079 /// Conversion failed if codec gave a single byte back which is 0x00 0080 /// (due to QTextCodec::ConvertInvalidToNull above) 0081 return conversionResult.length() != 1 || conversionResult.at(0) != QLatin1Char('\0'); 0082 } 0083 0084 public: 0085 QChar stringOpenDelimiter; 0086 QChar stringCloseDelimiter; 0087 KBibTeX::Casing keywordCasing; 0088 QString encoding, forcedEncoding; 0089 Qt::CheckState protectCasing; 0090 QString personNameFormatting; 0091 QString listSeparator; 0092 bool sortedByIdentifier; 0093 bool cancelFlag; 0094 0095 Private(FileExporterBibTeX *p) 0096 : parent(p), cancelFlag(false) 0097 { 0098 // Initialize variables like 'keywordCasing' or 'personNameFormatting' from Preferences 0099 loadPreferencesAndProperties(nullptr /** no File object to evaluate properties from */); 0100 } 0101 0102 void loadPreferencesAndProperties(const File *bibtexfile) { 0103 #ifdef HAVE_KF 0104 encoding = Preferences::instance().bibTeXEncoding(); 0105 QString stringDelimiter = Preferences::instance().bibTeXStringDelimiter(); 0106 if (stringDelimiter.length() != 2) 0107 stringDelimiter = Preferences::defaultBibTeXStringDelimiter; 0108 #else // HAVE_KF 0109 encoding = QStringLiteral("LaTeX"); 0110 const QString stringDelimiter = QStringLiteral("{}"); 0111 #endif // HAVE_KF 0112 stringOpenDelimiter = stringDelimiter[0]; 0113 stringCloseDelimiter = stringDelimiter[1]; 0114 #ifdef HAVE_KF 0115 keywordCasing = Preferences::instance().bibTeXKeywordCasing(); 0116 protectCasing = Preferences::instance().bibTeXProtectCasing() ? Qt::Checked : Qt::Unchecked; 0117 listSeparator = Preferences::instance().bibTeXListSeparator(); 0118 sortedByIdentifier = Preferences::instance().bibTeXEntriesSortedByIdentifier(); 0119 #else // HAVE_KF 0120 keywordCasing = KBibTeX::Casing::LowerCase; 0121 protectCasing = Qt::PartiallyChecked; 0122 listSeparator = QStringLiteral("; "); 0123 sortedByIdentifier = false; 0124 #endif // HAVE_KF 0125 personNameFormatting = Preferences::instance().personNameFormat(); 0126 0127 /// Check if a valid File object was provided 0128 if (bibtexfile != nullptr) { 0129 /// If there is a File object, extract its properties which 0130 /// overturn the global preferences 0131 if (bibtexfile->hasProperty(File::Encoding)) 0132 encoding = bibtexfile->property(File::Encoding).toString(); 0133 if (bibtexfile->hasProperty(File::StringDelimiter)) { 0134 QString stringDelimiter = bibtexfile->property(File::StringDelimiter).toString(); 0135 if (stringDelimiter.length() != 2) 0136 stringDelimiter = Preferences::defaultBibTeXStringDelimiter; 0137 stringOpenDelimiter = stringDelimiter[0]; 0138 stringCloseDelimiter = stringDelimiter[1]; 0139 } 0140 if (bibtexfile->hasProperty(File::KeywordCasing)) 0141 keywordCasing = static_cast<KBibTeX::Casing>(bibtexfile->property(File::KeywordCasing).toInt()); 0142 if (bibtexfile->hasProperty(File::ProtectCasing)) 0143 protectCasing = static_cast<Qt::CheckState>(bibtexfile->property(File::ProtectCasing).toInt()); 0144 if (bibtexfile->hasProperty(File::NameFormatting)) { 0145 /// if the user set "use global default", this property is an empty string 0146 /// in this case, keep default value 0147 const QString buffer = bibtexfile->property(File::NameFormatting).toString(); 0148 personNameFormatting = buffer.isEmpty() ? personNameFormatting : buffer; 0149 } 0150 if (bibtexfile->hasProperty(File::ListSeparator)) 0151 listSeparator = bibtexfile->property(File::ListSeparator).toString(); 0152 if (bibtexfile->hasProperty(File::SortedByIdentifier)) 0153 sortedByIdentifier = bibtexfile->property(File::SortedByIdentifier).toBool(); 0154 } 0155 } 0156 0157 QString internalValueToBibTeX(const Value &value, const Encoder::TargetEncoding targetEncoding, const QString &key = QString()) 0158 { 0159 if (value.isEmpty()) 0160 return QString(); 0161 0162 QString result; 0163 result.reserve(1024); 0164 bool isOpen = false; 0165 QSharedPointer<const ValueItem> prev; 0166 for (const auto &valueItem : value) { 0167 QSharedPointer<const MacroKey> macroKey = valueItem.dynamicCast<const MacroKey>(); 0168 if (!macroKey.isNull()) { 0169 if (isOpen) result.append(stringCloseDelimiter); 0170 isOpen = false; 0171 if (!result.isEmpty()) result.append(QStringLiteral(" # ")); 0172 result.append(macroKey->text()); 0173 prev = macroKey; 0174 } else { 0175 QSharedPointer<const PlainText> plainText = valueItem.dynamicCast<const PlainText>(); 0176 if (!plainText.isNull()) { 0177 QString textBody = EncoderLaTeX::instance().encode(plainText->text(), targetEncoding); 0178 if (!isOpen) { 0179 if (!result.isEmpty()) result.append(QStringLiteral(" # ")); 0180 result.append(stringOpenDelimiter); 0181 } else if (!prev.dynamicCast<const PlainText>().isNull()) { 0182 if (key.toLower().startsWith(Entry::ftKeywords)) 0183 // Keywords in the 'keywords' field are separated by semicolons 0184 result.append(QStringLiteral(";")); 0185 else 0186 result.append(QStringLiteral(" ")); 0187 } else if (!prev.dynamicCast<const Person>().isNull()) { 0188 /// handle "et al." i.e. "and others" 0189 result.append(QStringLiteral(" and ")); 0190 } else { 0191 result.append(stringCloseDelimiter).append(QStringLiteral(" # ")).append(stringOpenDelimiter); 0192 } 0193 isOpen = true; 0194 0195 if (stringOpenDelimiter == QLatin1Char('"')) 0196 protectQuotationMarks(textBody); 0197 result.append(textBody); 0198 prev = plainText; 0199 } else { 0200 QSharedPointer<const VerbatimText> verbatimText = valueItem.dynamicCast<const VerbatimText>(); 0201 if (!verbatimText.isNull()) { 0202 const QString keyToLower(key.toLower()); 0203 QString textBody = verbatimText->text(); 0204 if (!isOpen) { 0205 if (!result.isEmpty()) result.append(QStringLiteral(" # ")); 0206 result.append(stringOpenDelimiter); 0207 } else if (!prev.dynamicCast<const VerbatimText>().isNull()) { 0208 if (keyToLower.startsWith(Entry::ftUrl) || keyToLower.startsWith(Entry::ftLocalFile) || keyToLower.startsWith(Entry::ftFile) || keyToLower.startsWith(Entry::ftDOI)) 0209 /// Filenames and alike have be separated by a semicolon, 0210 /// as a plain comma may be part of the filename or URL 0211 result.append(QStringLiteral("; ")); 0212 else 0213 result.append(QStringLiteral(" ")); 0214 } else { 0215 result.append(stringCloseDelimiter).append(QStringLiteral(" # ")).append(stringOpenDelimiter); 0216 } 0217 isOpen = true; 0218 0219 if (stringOpenDelimiter == QLatin1Char('"')) 0220 protectQuotationMarks(textBody); 0221 if (keyToLower == Entry::ftFile && verbatimText->hasComment()) { 0222 /// Special case: This verbatim text is for a 'file' field and contains a comment. 0223 /// This means it most probably came from JabRef which makes use of the non-standard 0224 /// format of comment:filename:filetype 0225 /// To be compatible with JabRef, rebuild a string that matches what JabRef would 0226 /// generate. As filetype is not stored, make an educated guess here. 0227 /// Also, filenames are not verbatim for JabRef, so _ must be written as \_ 0228 const int p = qMin(textBody.length(), qMin(8, qMax(2, textBody.lastIndexOf(QLatin1Char('.'))))); 0229 const QString extension = textBody.right(p).toLower(); 0230 const QString filetype = extension == QStringLiteral(".pdf") ? QStringLiteral("PDF") : extension == QStringLiteral(".html") || extension == QStringLiteral(".htm") ? QStringLiteral("HTML") : extension == QStringLiteral(".doc") ? QStringLiteral("DOC") : extension == QStringLiteral(".docx") ? QStringLiteral("DOCX") : QStringLiteral("BINARY"); 0231 result.append(verbatimText->comment()).append(QLatin1Char(':')).append(EncoderLaTeX::instance().encode(textBody, EncoderLaTeX::TargetEncoding::ASCII)).append(QLatin1Char(':')).append(filetype); 0232 } else 0233 result.append(textBody); 0234 prev = verbatimText; 0235 } else { 0236 QSharedPointer<const Person> person = valueItem.dynamicCast<const Person>(); 0237 if (!person.isNull()) { 0238 QString firstName = person->firstName(); 0239 if (!firstName.isEmpty() && requiresPersonQuoting(firstName, false)) 0240 firstName = firstName.prepend(QStringLiteral("{")).append(QStringLiteral("}")); 0241 0242 QString lastName = person->lastName(); 0243 if (!lastName.isEmpty() && requiresPersonQuoting(lastName, true)) 0244 lastName = lastName.prepend(QStringLiteral("{")).append(QStringLiteral("}")); 0245 0246 QString suffix = person->suffix(); 0247 0248 /// Fall back and enforce comma-based name formatting 0249 /// if name contains a suffix like "Jr." 0250 /// Otherwise name could not be parsed again reliable 0251 const QString pnf = suffix.isEmpty() ? personNameFormatting : Preferences::personNameFormatLastFirst; 0252 QString thisName = EncoderLaTeX::instance().encode(Person::transcribePersonName(pnf, firstName, lastName, suffix), targetEncoding); 0253 0254 if (!isOpen) { 0255 if (!result.isEmpty()) result.append(QStringLiteral(" # ")); 0256 result.append(stringOpenDelimiter); 0257 } else if (!prev.dynamicCast<const Person>().isNull()) 0258 result.append(QStringLiteral(" and ")); 0259 else { 0260 result.append(stringCloseDelimiter).append(QStringLiteral(" # ")).append(stringOpenDelimiter); 0261 } 0262 isOpen = true; 0263 0264 if (stringOpenDelimiter == QLatin1Char('"')) 0265 protectQuotationMarks(thisName); 0266 result.append(thisName); 0267 prev = person; 0268 } else { 0269 QSharedPointer<const Keyword> keyword = valueItem.dynamicCast<const Keyword>(); 0270 if (!keyword.isNull()) { 0271 QString textBody = EncoderLaTeX::instance().encode(keyword->text(), targetEncoding); 0272 if (!isOpen) { 0273 if (!result.isEmpty()) result.append(QStringLiteral(" # ")); 0274 result.append(stringOpenDelimiter); 0275 } else if (!prev.dynamicCast<const Keyword>().isNull()) 0276 result.append(listSeparator); 0277 else { 0278 result.append(stringCloseDelimiter).append(QStringLiteral(" # ")).append(stringOpenDelimiter); 0279 } 0280 isOpen = true; 0281 0282 if (stringOpenDelimiter == QLatin1Char('"')) 0283 protectQuotationMarks(textBody); 0284 result.append(textBody); 0285 prev = keyword; 0286 } 0287 } 0288 } 0289 } 0290 } 0291 prev = valueItem; 0292 } 0293 0294 if (isOpen) result.append(stringCloseDelimiter); 0295 0296 result.squeeze(); 0297 return result; 0298 } 0299 0300 bool writeEntry(QString &output, const Entry &entry, const Encoder::TargetEncoding &targetEncoding) { 0301 /// write start of a entry (entry type and id) in plain ASCII 0302 output.append(QLatin1Char('@')).append(BibTeXEntries::instance().format(entry.type(), keywordCasing)); 0303 output.append(QLatin1Char('{')).append(Encoder::instance().convertToPlainAscii(entry.id())); 0304 0305 for (Entry::ConstIterator it = entry.constBegin(); it != entry.constEnd(); ++it) { 0306 const QString &key = it.key(); 0307 const Value &value = it.value(); 0308 if (value.isEmpty()) continue; ///< ignore empty key-value pairs 0309 0310 QString text = internalValueToBibTeX(value, targetEncoding, key); 0311 if (text.isEmpty()) { 0312 /// ignore empty key-value pairs 0313 qCWarning(LOG_KBIBTEX_IO) << "Value for field " << key << " is empty"; 0314 continue; 0315 } 0316 0317 // FIXME hack! 0318 const QSharedPointer<const ValueItem> &first = *value.constBegin(); 0319 if (PlainText::isPlainText(*first) && (key == Entry::ftTitle || key == Entry::ftBookTitle || key == Entry::ftSeries)) { 0320 if (protectCasing == Qt::Checked) 0321 addProtectiveCasing(text); 0322 else if (protectCasing == Qt::Unchecked) 0323 removeProtectiveCasing(text); 0324 } 0325 0326 output.append(QStringLiteral(",\n\t")); 0327 output.append(Encoder::instance().convertToPlainAscii(BibTeXFields::instance().format(key, keywordCasing))); 0328 output.append(QStringLiteral(" = ")).append(normalizeText(text)); 0329 } 0330 output.append(QStringLiteral("\n}\n\n")); 0331 0332 return true; 0333 } 0334 0335 bool writeMacro(QString &output, const Macro ¯o, const Encoder::TargetEncoding &targetEncoding) { 0336 QString text = internalValueToBibTeX(macro.value(), targetEncoding); 0337 if (protectCasing == Qt::Checked) 0338 addProtectiveCasing(text); 0339 else if (protectCasing == Qt::Unchecked) 0340 removeProtectiveCasing(text); 0341 0342 output.append(QLatin1Char('@')).append(BibTeXEntries::instance().format(QStringLiteral("String"), keywordCasing)); 0343 output.append(QLatin1Char('{')).append(normalizeText(macro.key())); 0344 output.append(QStringLiteral(" = ")).append(normalizeText(text)); 0345 output.append(QStringLiteral("}\n\n")); 0346 0347 return true; 0348 } 0349 0350 bool writeComment(QString &output, const Comment &comment) { 0351 QString text = comment.text() ; 0352 0353 switch (comment.context()) { 0354 case Preferences::CommentContext::Verbatim: 0355 output.append(normalizeText(text)).append(QStringLiteral("\n\n")); 0356 break; 0357 case Preferences::CommentContext::Prefix: { 0358 const QStringList commentLines = text.split(QStringLiteral("\n")); 0359 for (const QString &line : commentLines) 0360 output.append(comment.prefix()).append(normalizeText(line)).append(QLatin1Char('\n')); 0361 output.append(QLatin1Char('\n')); 0362 } 0363 break; 0364 case Preferences::CommentContext::Command: 0365 output.append(QLatin1Char('@')).append(BibTeXEntries::instance().format(QStringLiteral("Comment"), keywordCasing)); 0366 output.append(QLatin1Char('{')).append(normalizeText(text)); 0367 output.append(QLatin1Char('}')).append(QStringLiteral("\n\n")); 0368 break; 0369 } 0370 0371 return true; 0372 } 0373 0374 bool writePreamble(QString &output, const Preamble &preamble) { 0375 output.append(QLatin1Char('@')).append(BibTeXEntries::instance().format(QStringLiteral("Preamble"), keywordCasing)).append(QLatin1Char('{')); 0376 /// Strings from preamble do not get LaTeX-encoded, may contain raw LaTeX commands and code 0377 output.append(normalizeText(internalValueToBibTeX(preamble.value(), Encoder::TargetEncoding::RAW))); 0378 output.append(QStringLiteral("}\n\n")); 0379 0380 return true; 0381 } 0382 0383 QString addProtectiveCasing(QString &text) { 0384 /// Check if either 0385 /// - text is too short (less than two characters) or 0386 /// - text neither starts/stops with double quotation marks 0387 /// nor starts with { and stops with } 0388 if (text.length() < 2 || ((text[0] != QLatin1Char('"') || text[text.length() - 1] != QLatin1Char('"')) && (text[0] != QLatin1Char('{') || text[text.length() - 1] != QLatin1Char('}')))) { 0389 /// Nothing to protect, as this is no text string 0390 return text; 0391 } 0392 0393 bool addBrackets = true; 0394 0395 if (text[1] == QLatin1Char('{') && text[text.length() - 2] == QLatin1Char('}')) { 0396 /// If the given text looks like this: {{...}} or "{...}" 0397 /// still check that it is not like this: {{..}..{..}} 0398 addBrackets = false; 0399 for (int i = text.length() - 2, count = 0; !addBrackets && i > 1; --i) { 0400 if (text[i] == QLatin1Char('{')) ++count; 0401 else if (text[i] == QLatin1Char('}')) --count; 0402 if (count == 0) addBrackets = true; 0403 } 0404 } 0405 0406 if (addBrackets) 0407 text.insert(1, QStringLiteral("{")).insert(text.length() - 1, QStringLiteral("}")); 0408 0409 return text; 0410 } 0411 0412 QString removeProtectiveCasing(QString &text) { 0413 /// Check if either 0414 /// - text is too short (less than two characters) or 0415 /// - text neither starts/stops with double quotation marks 0416 /// nor starts with { and stops with } 0417 if (text.length() < 2 || ((text[0] != QLatin1Char('"') || text[text.length() - 1] != QLatin1Char('"')) && (text[0] != QLatin1Char('{') || text[text.length() - 1] != QLatin1Char('}')))) { 0418 /// Nothing to protect, as this is no text string 0419 return text; 0420 } 0421 0422 if (text[1] != QLatin1Char('{') || text[text.length() - 2] != QLatin1Char('}')) 0423 /// Nothing to remove 0424 return text; 0425 0426 /// If the given text looks like this: {{...}} or "{...}" 0427 /// still check that it is not like this: {{..}..{..}} 0428 bool removeBrackets = true; 0429 for (int i = text.length() - 2, count = 0; removeBrackets && i > 1; --i) { 0430 if (text[i] == QLatin1Char('{')) ++count; 0431 else if (text[i] == QLatin1Char('}')) --count; 0432 if (count == 0) removeBrackets = false; 0433 } 0434 0435 if (removeBrackets) 0436 text.remove(text.length() - 2, 1).remove(1, 1); 0437 0438 return text; 0439 } 0440 0441 QString &protectQuotationMarks(QString &text) { 0442 int p = -1; 0443 while ((p = text.indexOf(QLatin1Char('"'), p + 1)) > 0) 0444 if (p == 0 || text[p - 1] != QLatin1Char('\\')) { 0445 text.insert(p + 1, QStringLiteral("}")).insert(p, QStringLiteral("{")); 0446 ++p; 0447 } 0448 return text; 0449 } 0450 0451 bool requiresPersonQuoting(const QString &text, bool isLastName) { 0452 if (isLastName && !text.contains(QLatin1Char(' '))) 0453 /** Last name contains NO spaces, no quoting necessary */ 0454 return false; 0455 else if (!isLastName && !text.contains(QStringLiteral(" and "))) 0456 /** First name contains no " and " no quoting necessary */ 0457 return false; 0458 else if (isLastName && !text.isEmpty() && text[0].isLower()) 0459 /** Last name starts with lower-case character (von, van, de, ...) */ 0460 // FIXME does not work yet 0461 return false; 0462 else if (text[0] != QLatin1Char('{') || text[text.length() - 1] != QLatin1Char('}')) 0463 /** as either last name contains spaces or first name contains " and " and there is no protective quoting yet, there must be a protective quoting added */ 0464 return true; 0465 0466 int bracketCounter = 0; 0467 for (int i = text.length() - 1; i >= 0; --i) { 0468 if (text[i] == QLatin1Char('{')) 0469 ++bracketCounter; 0470 else if (text[i] == QLatin1Char('}')) 0471 --bracketCounter; 0472 if (bracketCounter == 0 && i > 0) 0473 return true; 0474 } 0475 return false; 0476 } 0477 0478 bool saveAsString(QString &output, const File *bibtexfile) { 0479 const Encoder::TargetEncoding targetEncoding = determineTargetCodec().first == QStringLiteral("latex") || determineTargetCodec().first == QStringLiteral("us-ascii") ? Encoder::TargetEncoding::ASCII : Encoder::TargetEncoding::UTF8; 0480 0481 const File *_bibtexfile = sortedByIdentifier ? File::sortByIdentifier(bibtexfile) : bibtexfile; 0482 0483 /// Memorize which entries are used in a crossref field 0484 QHash<QString, QStringList> crossRefMap; 0485 for (File::ConstIterator it = _bibtexfile->constBegin(); it != _bibtexfile->constEnd() && !cancelFlag; ++it) { 0486 QSharedPointer<const Entry> entry = (*it).dynamicCast<const Entry>(); 0487 if (!entry.isNull()) { 0488 const QString crossRef = PlainTextValue::text(entry->value(Entry::ftCrossRef)); 0489 if (!crossRef.isEmpty()) { 0490 QStringList crossRefList = crossRefMap.value(crossRef, QStringList()); 0491 crossRefList.append(entry->id()); 0492 crossRefMap.insert(crossRef, crossRefList); 0493 } 0494 } 0495 } 0496 0497 int currentPos = 0, totalElements = _bibtexfile->count(); 0498 bool result = true; 0499 bool allPreamblesAndMacrosProcessed = false; 0500 QSet<QString> processedEntryIds; 0501 for (File::ConstIterator it = _bibtexfile->constBegin(); it != _bibtexfile->constEnd() && result && !cancelFlag; ++it) { 0502 QSharedPointer<const Element> element = (*it); 0503 QSharedPointer<const Entry> entry = element.dynamicCast<const Entry>(); 0504 0505 if (!entry.isNull()) { 0506 processedEntryIds.insert(entry->id()); 0507 0508 /// Postpone entries that are crossref'ed 0509 const QStringList crossRefList = crossRefMap.value(entry->id(), QStringList()); 0510 if (!crossRefList.isEmpty()) { 0511 bool allProcessed = true; 0512 for (const QString &origin : crossRefList) 0513 allProcessed &= processedEntryIds.contains(origin); 0514 if (allProcessed) 0515 crossRefMap.remove(entry->id()); 0516 else 0517 continue; 0518 } 0519 0520 if (!allPreamblesAndMacrosProcessed) { 0521 /// Guarantee that all macros and the preamble are written 0522 /// before the first entry (@article, ...) is written 0523 for (File::ConstIterator msit = it + 1; msit != _bibtexfile->constEnd() && result && !cancelFlag; ++msit) { 0524 QSharedPointer<const Preamble> preamble = (*msit).dynamicCast<const Preamble>(); 0525 if (!preamble.isNull()) { 0526 result &= writePreamble(output, *preamble); 0527 /// Instead of an 'emit' ... 0528 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0529 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0530 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0531 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0532 #endif 0533 } else { 0534 QSharedPointer<const Macro> macro = (*msit).dynamicCast<const Macro>(); 0535 if (!macro.isNull()) { 0536 result &= writeMacro(output, *macro, targetEncoding); 0537 /// Instead of an 'emit' ... 0538 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0539 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0540 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0541 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0542 #endif 0543 } 0544 } 0545 } 0546 allPreamblesAndMacrosProcessed = true; 0547 } 0548 0549 result &= writeEntry(output, *entry, targetEncoding); 0550 /// Instead of an 'emit' ... 0551 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0552 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0553 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0554 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0555 #endif 0556 } else { 0557 QSharedPointer<const Comment> comment = element.dynamicCast<const Comment>(); 0558 if (!comment.isNull() && !comment->text().startsWith(QStringLiteral("x-kbibtex-"))) { 0559 result &= writeComment(output, *comment); 0560 /// Instead of an 'emit' ... 0561 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0562 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0563 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0564 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0565 #endif 0566 } else if (!allPreamblesAndMacrosProcessed) { 0567 QSharedPointer<const Preamble> preamble = element.dynamicCast<const Preamble>(); 0568 if (!preamble.isNull()) { 0569 result &= writePreamble(output, *preamble); 0570 /// Instead of an 'emit' ... 0571 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0572 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0573 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0574 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0575 #endif 0576 } else { 0577 QSharedPointer<const Macro> macro = element.dynamicCast<const Macro>(); 0578 if (!macro.isNull()) { 0579 result &= writeMacro(output, *macro, targetEncoding); 0580 /// Instead of an 'emit' ... 0581 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0582 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0583 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0584 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0585 #endif 0586 } 0587 } 0588 } 0589 } 0590 } 0591 0592 /// Crossref'ed entries are written last 0593 if (!crossRefMap.isEmpty()) 0594 for (File::ConstIterator it = _bibtexfile->constBegin(); it != _bibtexfile->constEnd() && result && !cancelFlag; ++it) { 0595 QSharedPointer<const Entry> entry = (*it).dynamicCast<const Entry>(); 0596 if (entry.isNull()) continue; 0597 if (!crossRefMap.contains(entry->id())) continue; 0598 0599 result &= writeEntry(output, *entry, targetEncoding); 0600 /// Instead of an 'emit' ... 0601 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0602 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0603 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0604 QMetaObject::invokeMethod(parent, "progress", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(int, ++currentPos), Q_ARG(int, totalElements)); 0605 #endif 0606 } 0607 0608 if (_bibtexfile != bibtexfile) 0609 /// _bibtexfile is not the origianl bibtexfile passed to this function, 0610 /// but was generated as part of the sorting by identifier, 0611 /// thus destroy it now 0612 delete _bibtexfile; 0613 0614 return result; 0615 } 0616 0617 bool saveAsString(QString &output, const QSharedPointer<const Element> &element) { 0618 const Encoder::TargetEncoding targetEncoding = determineTargetCodec().first == QStringLiteral("latex") ? Encoder::TargetEncoding::ASCII : Encoder::TargetEncoding::UTF8; 0619 0620 const QSharedPointer<const Entry> &entry = element.dynamicCast<const Entry>(); 0621 if (!entry.isNull()) 0622 return writeEntry(output, *entry, targetEncoding); 0623 else { 0624 const QSharedPointer<const Macro> ¯o = element.dynamicCast<const Macro>(); 0625 if (!macro.isNull()) 0626 return writeMacro(output, *macro, targetEncoding); 0627 else { 0628 const QSharedPointer<const Comment> &comment = element.dynamicCast<const Comment>(); 0629 if (!comment.isNull()) 0630 return writeComment(output, *comment); 0631 else { 0632 const QSharedPointer<const Preamble> &preamble = element.dynamicCast<const Preamble>(); 0633 if (!preamble.isNull()) 0634 return writePreamble(output, *preamble); 0635 else 0636 qCWarning(LOG_KBIBTEX_IO) << "Trying to save unsupported Element to BibTeX"; 0637 } 0638 } 0639 } 0640 0641 return false; 0642 } 0643 0644 QByteArray applyEncoding(const QString &input) { 0645 QTextCodec *codec = determineTargetCodec().second; 0646 0647 QString rewrittenInput; 0648 rewrittenInput.reserve(input.length() * 12 / 10 /* add 20% */ + 1024 /* plus 1K */); 0649 const Encoder &laTeXEncoder = EncoderLaTeX::instance(); 0650 for (const QChar &c : input) { 0651 if (codec == nullptr /** meaning UTF-8, which can encode anything */ || canEncode(c, codec)) 0652 rewrittenInput.append(c); 0653 else 0654 rewrittenInput.append(laTeXEncoder.encode(QString(c), Encoder::TargetEncoding::ASCII)); 0655 } 0656 0657 if (codec == nullptr || (codec->name().toLower() != "utf-16" && codec->name().toLower() != "utf-32")) { 0658 // Unless encoding is UTF-16 or UTF-32 (those have BOM to detect encoding) ... 0659 0660 // Determine which (if at all) encoding comment to be included in BibTeX data 0661 QString encodingForComment; //< empty by default 0662 if (!forcedEncoding.isEmpty()) 0663 // For this exporter instance, a specific encoding was forced upon 0664 encodingForComment = forcedEncoding; 0665 else if (!encoding.isEmpty()) 0666 // File had an encoding in its properties 0667 // (variable 'encoding' was set in 'loadPreferencesAndProperties') 0668 encodingForComment = encoding; 0669 0670 if (!encodingForComment.isEmpty()) { 0671 // Verify that 'encodingForComment', which labels an encoding, 0672 // is compatible with the target codec 0673 #define normalizeCodecName(codecname) codecname.toLower().remove(QLatin1Char(' ')).remove(QLatin1Char('-')).remove(QLatin1Char('_')).replace(QStringLiteral("euckr"),QStringLiteral("windows949")) 0674 const QString lowerNormalizedEncodingForComment = normalizeCodecName(encodingForComment); 0675 const QString lowerNormalizedCodecName = codec != nullptr ? normalizeCodecName(QString::fromLatin1(codec->name())) : QString(); 0676 if (codec == nullptr) { 0677 if (lowerNormalizedEncodingForComment != QStringLiteral("utf8") && lowerNormalizedEncodingForComment != QStringLiteral("latex")) { 0678 qCWarning(LOG_KBIBTEX_IO) << "No codec (means UTF-8 encoded output) does not match with encoding" << encodingForComment; 0679 return QByteArray(); 0680 } 0681 } else if (lowerNormalizedCodecName != lowerNormalizedEncodingForComment) { 0682 qCWarning(LOG_KBIBTEX_IO) << "Codec with name" << codec->name() << "does not match with encoding" << encodingForComment; 0683 return QByteArray(); 0684 } 0685 } 0686 0687 if (!encodingForComment.isEmpty() && encodingForComment.toLower() != QStringLiteral("latex") && encodingForComment.toLower() != QStringLiteral("us-ascii")) 0688 // Only if encoding is not pure ASCII (i.e. 'LaTeX' or 'US-ASCII') add 0689 // a comment at the beginning of the file to tell which encoding was used 0690 rewrittenInput.prepend(QString(QStringLiteral("@comment{x-kbibtex-encoding=%1}\n\n")).arg(encodingForComment)); 0691 } else { 0692 // For UTF-16 and UTF-32, no special comment needs to be added: 0693 // Those encodings are recognized by their BOM or the regular 0694 // occurrence of 0x00 bytes which is typically if encoding 0695 // ASCII text. 0696 } 0697 0698 rewrittenInput.squeeze(); 0699 0700 return codec == nullptr ? rewrittenInput.toUtf8() : codec->fromUnicode(rewrittenInput); 0701 } 0702 0703 bool writeOutString(const QString &outputString, QIODevice *iodevice) { 0704 bool result = outputString.length() > 0; 0705 0706 if (result) { 0707 const QByteArray outputData = applyEncoding(outputString); 0708 result &= outputData.length() > 0; 0709 if (!result) 0710 qCWarning(LOG_KBIBTEX_IO) << "outputData.length() is" << outputData.length(); 0711 if (result) 0712 result &= iodevice->write(outputData) == outputData.length(); 0713 if (!result) 0714 qCWarning(LOG_KBIBTEX_IO) << "Writing data to IO device failed, not everything was written"; 0715 } else 0716 qCWarning(LOG_KBIBTEX_IO) << "outputString.length() is" << outputString.length(); 0717 0718 return result; 0719 } 0720 }; 0721 0722 0723 FileExporterBibTeX::FileExporterBibTeX(QObject *parent) 0724 : FileExporter(parent), d(new Private(this)) 0725 { 0726 /// nothing 0727 } 0728 0729 FileExporterBibTeX::~FileExporterBibTeX() 0730 { 0731 delete d; 0732 } 0733 0734 void FileExporterBibTeX::setEncoding(const QString &encoding) 0735 { 0736 d->forcedEncoding = encoding; 0737 } 0738 0739 QString FileExporterBibTeX::toString(const QSharedPointer<const Element> &element, const File *bibtexfile) 0740 { 0741 d->cancelFlag = false; 0742 0743 if (bibtexfile != nullptr) 0744 d->loadPreferencesAndProperties(bibtexfile); 0745 0746 QString outputString; 0747 outputString.reserve(1024); 0748 bool result = d->saveAsString(outputString, element); 0749 if (!result) { 0750 qCWarning(LOG_KBIBTEX_IO) << "saveInString(..) failed"; 0751 return QString(); 0752 } 0753 0754 outputString.squeeze(); 0755 return outputString.normalized(QString::NormalizationForm_C); 0756 } 0757 0758 QString FileExporterBibTeX::toString(const File *bibtexfile) 0759 { 0760 d->cancelFlag = false; 0761 0762 if (bibtexfile == nullptr) { 0763 qCWarning(LOG_KBIBTEX_IO) << "No bibliography to write given"; 0764 return QString(); 0765 } else if (bibtexfile->isEmpty()) { 0766 qCDebug(LOG_KBIBTEX_IO) << "Bibliography is empty"; 0767 return QString(); 0768 } 0769 0770 d->loadPreferencesAndProperties(bibtexfile); 0771 0772 QString outputString; 0773 outputString.reserve(bibtexfile->length() * 1024); //< reserve 1K per element 0774 bool result = d->saveAsString(outputString, bibtexfile); 0775 if (!result) { 0776 qCWarning(LOG_KBIBTEX_IO) << "saveInString(..) failed"; 0777 return QString(); 0778 } 0779 0780 outputString.squeeze(); 0781 return outputString.normalized(QString::NormalizationForm_C); 0782 } 0783 0784 bool FileExporterBibTeX::save(QIODevice *iodevice, const File *bibtexfile) 0785 { 0786 d->cancelFlag = false; 0787 0788 check_if_bibtexfile_or_iodevice_invalid(bibtexfile, iodevice); 0789 0790 // Call 'toString' to get an in-memory representation of the BibTeX data, 0791 // then rewrite the output either protect only sensitive text (e.g. '&') 0792 // or rewrite all known non-ASCII characters to their LaTeX equivalents 0793 // (e.g. U+00E4 to '{\"a}') 0794 const bool result = d->writeOutString(toString(bibtexfile), iodevice); 0795 0796 return result && !d->cancelFlag; 0797 } 0798 0799 bool FileExporterBibTeX::save(QIODevice *iodevice, const QSharedPointer<const Element> &element, const File *bibtexfile) 0800 { 0801 d->cancelFlag = false; 0802 0803 check_if_iodevice_invalid(iodevice); 0804 0805 const bool result = d->writeOutString(toString(element, bibtexfile), iodevice); 0806 0807 iodevice->close(); 0808 return result && !d->cancelFlag; 0809 } 0810 0811 void FileExporterBibTeX::cancel() 0812 { 0813 d->cancelFlag = true; 0814 } 0815 0816 QString FileExporterBibTeX::valueToBibTeX(const Value &value, Encoder::TargetEncoding targetEncoding, const QString &key) 0817 { 0818 FileExporterBibTeX staticFileExporterBibTeX(nullptr); 0819 staticFileExporterBibTeX.d->cancelFlag = false; 0820 return staticFileExporterBibTeX.d->internalValueToBibTeX(value, targetEncoding, key); 0821 } 0822 0823 QString FileExporterBibTeX::editionNumberToString(const int edition, const Preferences::BibliographySystem bibliographySystem) 0824 { 0825 if (edition <= 0) { 0826 qCWarning(LOG_KBIBTEX_IO) << "Cannot convert a non-positive number (" << edition << ") into a textual representation"; 0827 return QString(); 0828 } 0829 0830 // According to http://mirrors.ctan.org/biblio/bibtex/contrib/doc/btxFAQ.pdf, 0831 // edition values should look like this: 0832 // - for first to fifth, write "First" to "Fifth" 0833 // - starting from sixth, use numeric form like "17th" 0834 // According to http://mirrors.ctan.org/macros/latex/contrib/biblatex/doc/biblatex.pdf, 0835 // edition values should by just numbers (digits) without text, 0836 // such as '1' in a @sa PlainText. 0837 0838 if (bibliographySystem == Preferences::BibliographySystem::BibLaTeX) 0839 return QString::number(edition); 0840 else if (bibliographySystem == Preferences::BibliographySystem::BibTeX) 0841 // BibTeX uses ordinals 0842 return numberToOrdinal(edition); 0843 else 0844 return QString(); 0845 } 0846 0847 bool FileExporterBibTeX::isFileExporterBibTeX(const FileExporter &other) { 0848 return typeid(other) == typeid(FileExporterBibTeX); 0849 }