File indexing completed on 2024-05-19 05:05:35

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "fileexporterxml.h"
0021 
0022 #include <QIODevice>
0023 #include <QRegularExpression>
0024 #include <QStringList>
0025 
0026 #include <KBibTeX>
0027 #include <File>
0028 #include <Entry>
0029 #include <Macro>
0030 #include <Comment>
0031 #include "fileimporterbibtex.h"
0032 #include "fileexporter.h"
0033 #include "fileexporter_p.h"
0034 #include "encoderxml.h"
0035 #include "logging_io.h"
0036 
0037 #if QT_VERSION >= 0x050e00
0038 #define ENDL Qt::endl
0039 #else // QT_VERSION < 0x050e00
0040 #define ENDL endl
0041 #endif // QT_VERSION >= 0x050e00
0042 
0043 
0044 QString htmlify(const QString &input)
0045 {
0046     QString output;
0047     output.reserve(input.length() * 4 / 3 + 128);
0048     QChar prev_c;
0049     for (const QChar &c : input) {
0050         if (c.unicode() < 128) {
0051             static const QSet<QChar> skipChar{QLatin1Char('{'), QLatin1Char('}')};
0052             if (!skipChar.contains(c) || prev_c == QLatin1Char('\\'))
0053                 output.append(c);
0054         } else
0055             output.append(QString(QStringLiteral("&#x%1;")).arg(c.unicode(), 4, 16, QLatin1Char('0')));
0056         prev_c = c;
0057     }
0058     return output;
0059 }
0060 
0061 QString cleanXML(const QString &text)
0062 {
0063     static const QRegularExpression removal(QStringLiteral("[{}]+"));
0064     static const QRegularExpression lineBreaksRegExp(QStringLiteral("[ \\t]*[\\n\\r]"));
0065     QString result = text;
0066     result = result.replace(lineBreaksRegExp, QStringLiteral("<br/>")).remove(removal).remove(QStringLiteral("\\ensuremath"));
0067     return result;
0068 }
0069 
0070 QString valueItemToXML(const QSharedPointer<const ValueItem> &valueItem)
0071 {
0072     QSharedPointer<const PlainText> plainText = valueItem.dynamicCast<const PlainText>();
0073     if (!plainText.isNull())
0074         return QStringLiteral("<text>") + cleanXML(EncoderXML::instance().encode(PlainTextValue::text(valueItem), Encoder::TargetEncoding::UTF8)) + QStringLiteral("</text>");
0075     else {
0076         QSharedPointer<const Person> p = valueItem.dynamicCast<const Person>();
0077         if (!p.isNull()) {
0078             QString result(QStringLiteral("<person>"));
0079             if (!p->firstName().isEmpty())
0080                 result.append(QStringLiteral("<firstname>") + cleanXML(EncoderXML::instance().encode(p->firstName(), Encoder::TargetEncoding::UTF8)) + QStringLiteral("</firstname>"));
0081             if (!p->lastName().isEmpty())
0082                 result.append(QStringLiteral("<lastname>") + cleanXML(EncoderXML::instance().encode(p->lastName(), Encoder::TargetEncoding::UTF8)) + QStringLiteral("</lastname>"));
0083             if (!p->suffix().isEmpty())
0084                 result.append(QStringLiteral("<suffix>") + cleanXML(EncoderXML::instance().encode(p->suffix(), Encoder::TargetEncoding::UTF8)) + QStringLiteral("</suffix>"));
0085             result.append(QStringLiteral("</person>"));
0086             return result;
0087         }
0088         // TODO: Other data types
0089         else
0090             return QStringLiteral("<text>") + cleanXML(EncoderXML::instance().encode(PlainTextValue::text(valueItem), Encoder::TargetEncoding::UTF8)) + QStringLiteral("</text>");
0091     }
0092 }
0093 
0094 QString valueToXML(const Value &value)
0095 {
0096     QString result;
0097 
0098     for (const auto &valueItem : value)
0099         result.append(valueItemToXML(valueItem));
0100 
0101     return result;
0102 }
0103 
0104 struct RewriteFunctions {
0105     std::function<bool(QTextStream &)> header, footer;
0106     std::function<bool(QTextStream &, const QSharedPointer<const Entry> &)> writeEntry;
0107     std::function<bool(QTextStream &, const QSharedPointer<const Macro> &)> writeMacro;
0108     std::function<bool(QTextStream &, const QSharedPointer<const Comment> &)> writeComment;
0109 };
0110 
0111 QHash<FileExporterXML::OutputStyle, struct RewriteFunctions> rewriteFunctions =
0112 {
0113     {
0114         FileExporterXML::OutputStyle::XML_KBibTeX, {
0115             /*.header =*/ [](QTextStream &stream) {
0116                 stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << ENDL;
0117                 stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << ENDL;
0118                 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << ENDL;
0119                 stream << "<bibliography>" << ENDL;
0120                 return true;
0121             },
0122             /*.footer =*/ [](QTextStream &stream) {
0123                 stream << "</bibliography>" << ENDL;
0124                 return true;
0125             },
0126             /*.writeEntry =*/ [](QTextStream &stream, const QSharedPointer<const Entry> &entry) {
0127                 stream << " <entry id=\"" << EncoderXML::instance().encode(entry->id(), Encoder::TargetEncoding::UTF8) << "\" type=\"" << entry->type().toLower() << "\">" << ENDL;
0128                 for (Entry::ConstIterator it = entry->constBegin(); it != entry->constEnd(); ++it) {
0129                     const QString key = it.key().toLower();
0130                     const Value value = it.value();
0131 
0132                     if (key == Entry::ftAuthor || key == Entry::ftEditor) {
0133                         Value internal = value;
0134                         Value::ConstIterator lastIt = internal.constEnd();
0135                         --lastIt;
0136                         const QSharedPointer<const ValueItem> &last = *lastIt;
0137                         stream << "  <" << key << "s";
0138 
0139                         if (!value.isEmpty() && PlainText::isPlainText(*last)) {
0140                             const QSharedPointer<const PlainText> pt = internal.last().staticCast<const PlainText>();
0141                             if (pt->text() == QStringLiteral("others")) {
0142                                 internal.erase(internal.end() - 1);
0143                                 stream << " etal=\"true\"";
0144                             }
0145                         }
0146                         stream << ">" << ENDL;
0147                         stream << valueToXML(internal) << ENDL;
0148                         stream << "  </" << key << "s>" << ENDL;
0149                     } else if (key == Entry::ftAbstract) {
0150                         static const QRegularExpression abstractRegExp(QStringLiteral("\\bAbstract[:]?([ ]|&nbsp;|&amp;nbsp;)*"), QRegularExpression::CaseInsensitiveOption);
0151                         /// clean up HTML artifacts
0152                         QString text = valueToXML(value);
0153                         text = text.remove(abstractRegExp);
0154                         stream << "  <" << key << ">" << text << "</" << key << ">" << ENDL;
0155                     } else if (key == Entry::ftPages) {
0156                         // Guess a ints representing first and last page
0157                         const QString textualRepresentation = PlainTextValue::text(value);
0158                         static const QRegularExpression fromPageRegExp(QStringLiteral("^\\s*([1-9]\\d*)\\b"));
0159                         static const QRegularExpression toPageRegExp(QStringLiteral("\\b([1-9]\\d*)\\s*$"));
0160                         const QRegularExpressionMatch fromPageMatch = fromPageRegExp.match(textualRepresentation);
0161                         const QRegularExpressionMatch toPageMatch = toPageRegExp.match(textualRepresentation);
0162                         bool okFromPage = false, okToPage = false;
0163                         const int fromPage = fromPageMatch.hasMatch() ? fromPageMatch.captured(1).toInt(&okFromPage) : -1;
0164                         const int toPage = toPageMatch.hasMatch() ? toPageMatch.captured(1).toInt(&okToPage) : -1;
0165 
0166                         stream << "  <pages";
0167                         if (okFromPage && fromPage > 0)
0168                             stream << " firstpage=\"" << fromPage << "\"";
0169                         if (okToPage && toPage > 0)
0170                             stream << " lastpage=\"" << toPage << "\"";
0171                         stream << '>' << valueToXML(value) << "</pages>" << ENDL;
0172                     } else if (key == Entry::ftEdition) {
0173                         const QString textualRepresentation = PlainTextValue::text(value);
0174                         bool ok = false;
0175                         const int asInt = FileExporter::editionStringToNumber(textualRepresentation, &ok);
0176                         const QString asText = ok && asInt > 0 ? QStringLiteral("<text>") + FileExporter::numberToOrdinal(asInt) + QStringLiteral("</text>") : valueToXML(value);
0177 
0178                         stream << "  <edition";
0179                         if (ok && asInt > 0)
0180                             stream << " number=\"" << asInt << "\"";
0181                         stream << '>' << asText << "</edition>" << ENDL;
0182                     } else if (key == Entry::ftYear) {
0183                         // Guess an int representing the year
0184                         const QString textualRepresentation = value.count() > 0 ? PlainTextValue::text(value.first()) : QString();
0185                         static const QRegularExpression yearRegExp(QStringLiteral("^(1[2-9]|2[01])\\d{2}$"));
0186                         bool ok = false;
0187                         const int asInt = yearRegExp.match(textualRepresentation).hasMatch() ? textualRepresentation.toInt(&ok) : -1;
0188 
0189                         stream << "  <year";
0190                         if (ok && asInt > 0)
0191                             stream << " number=\"" << asInt << "\"";
0192                         stream << '>' << valueToXML(value) << "</year>" << ENDL;
0193                     } else if (key == Entry::ftMonth) {
0194                         int asInt = -1;
0195                         QString content;
0196                         for (const auto &valueItem : value) {
0197                             bool ok = false;
0198                             if (asInt < 0)
0199                                 asInt = FileExporter::monthStringToNumber(PlainTextValue::text(valueItem), &ok);
0200                             if (ok)
0201                                 content.append(QStringLiteral("<text>") + KBibTeX::Months[asInt - 1] + QStringLiteral("</text>"));
0202                             else {
0203                                 content.append(valueItemToXML(valueItem));
0204                                 asInt = -1;
0205                             }
0206                         }
0207 
0208                         stream << "  <month";
0209                         if (asInt >= 1 && asInt <= 12)
0210                             stream << " triple=\"" << KBibTeX::MonthsTriple[asInt - 1] << "\" number=\"" << asInt << "\"";
0211                         stream << '>' << content << "</month>" << ENDL;
0212                     } else {
0213                         // Guess an int representing of this value
0214                         const QString textualRepresentation = value.count() > 0 ? PlainTextValue::text(value.first()) : QString();
0215                         static const QRegularExpression numberRegExp(QStringLiteral("^[1-9]\\d*$"));
0216                         bool ok = false;
0217                         const int asInt = numberRegExp.match(textualRepresentation).hasMatch() ? textualRepresentation.toInt(&ok) : -1;
0218 
0219                         stream << "  <" << key;
0220                         if (ok && asInt > 0)
0221                             stream << " number=\"" << asInt << "\"";
0222                         stream << '>' << valueToXML(value) << "</" << key << ">" << ENDL;
0223                     }
0224                 }
0225                 stream << " </entry>" << ENDL;
0226 
0227                 return true;
0228             },
0229             /*.writeMacro =*/ [](QTextStream &stream, const QSharedPointer<const Macro> &macro) {
0230                 stream << " <string key=\"" << macro->key() << "\">";
0231                 stream << valueToXML(macro->value());
0232                 stream << "</string>" << ENDL;
0233                 return true;
0234             },
0235             /*.writeComment =*/ [](QTextStream &stream, const QSharedPointer<const Comment> &comment) {
0236                 stream << " <comment>" ;
0237                 stream << EncoderXML::instance().encode(comment->text(), Encoder::TargetEncoding::UTF8);
0238                 stream << "</comment>" << ENDL;
0239                 return true;
0240             }
0241         }
0242     }, {
0243         FileExporterXML::OutputStyle::HTML_Standard, {
0244             /*.header =*/ [](QTextStream &stream) {
0245                 stream << "<!DOCTYPE html>" << ENDL;
0246                 stream << "<!-- HTML document written by KBibTeXIO as part of KBibTeX -->" << ENDL;
0247                 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << ENDL;
0248                 stream << "<html xmlns=\"http://www.w3.org/1999/xhtml\">" << ENDL;
0249                 stream << "<head><title>Bibliography</title><meta charset=\"utf-8\"></head>" << ENDL;
0250                 stream << "<body>" << ENDL;
0251                 return true;
0252             },
0253             /*.footer =*/ [](QTextStream &stream) {
0254                 stream << "</body></html>" << ENDL << ENDL;
0255                 return true;
0256             },
0257             /*.writeEntry =*/ [](QTextStream &stream, const QSharedPointer<const Entry> &entry) {
0258                 static const auto formatPersons = [](QTextStream &stream, const Value & value) {
0259                     bool firstPerson = true;
0260                     for (const QSharedPointer<ValueItem> &vi : value) {
0261                         const QSharedPointer<Person> &p = vi.dynamicCast<Person>();
0262                         if (!p.isNull()) {
0263                             if (!firstPerson)
0264                                 stream << ", ";
0265                             else
0266                                 firstPerson = false;
0267                             stream << htmlify(p->lastName()).toUtf8().constData();
0268                             if (!p->firstName().isEmpty())
0269                                 stream << "<span style=\"opacity:0.75;\">, " << htmlify(p->firstName()).toUtf8().constData() << "</span>";
0270                         } else {
0271                             const QString text = PlainTextValue::text(vi);
0272                             if (text == QStringLiteral("others"))
0273                                 stream << "<span style=\"opacity:0.75;\"><i>et&#160;al.</i></span>";
0274                         }
0275                     }
0276                 };
0277 
0278                 stream << "<p>";
0279                 const Value &authors = entry->contains(Entry::ftAuthor) ? entry->value(Entry::ftAuthor) : Value();
0280                 formatPersons(stream, authors);
0281                 if (!authors.isEmpty())
0282                     stream << ": ";
0283 
0284                 const QString title = PlainTextValue::text(entry->contains(Entry::ftTitle) ? entry->value(Entry::ftTitle) : Value());
0285                 if (!title.isEmpty())
0286                     stream << "<strong>" << htmlify(title).toUtf8().constData() << "</strong>";
0287 
0288                 const QString booktitle = PlainTextValue::text(entry->contains(Entry::ftBookTitle) ? entry->value(Entry::ftBookTitle) : Value());
0289                 if (!booktitle.isEmpty())
0290                     stream << ", <i>" << htmlify(booktitle).toUtf8().constData() << "</i>";
0291                 else {
0292                     const QString journal = PlainTextValue::text(entry->contains(Entry::ftJournal) ? entry->value(Entry::ftJournal) : Value());
0293                     if (!journal.isEmpty()) {
0294                         stream << ", <i>" << htmlify(journal).toUtf8().constData() << "</i>";
0295                         const QString volume = PlainTextValue::text(entry->contains(Entry::ftVolume) ? entry->value(Entry::ftVolume) : Value());
0296                         if (!volume.isEmpty()) {
0297                             stream << " " << volume;
0298                             const QString number = PlainTextValue::text(entry->contains(Entry::ftNumber) ? entry->value(Entry::ftNumber) : Value());
0299                             if (!number.isEmpty()) {
0300                                 stream << "(" << number << ")";
0301                             }
0302                         }
0303                     }
0304                 }
0305                 const QString series = PlainTextValue::text(entry->contains(Entry::ftSeries) ? entry->value(Entry::ftSeries) : Value());
0306                 if (!series.isEmpty())
0307                     stream << ", <i>" << htmlify(series).toUtf8().constData() << "</i>";
0308 
0309                 const Value &editors = entry->contains(Entry::ftEditor) ? entry->value(Entry::ftEditor) : Value();
0310                 if (!editors.isEmpty()) {
0311                     stream << (editors.length() > 1 ? ", Eds. " : ", Ed. ");
0312                     formatPersons(stream, editors);
0313                 }
0314 
0315                 const QString school = PlainTextValue::text(entry->contains(Entry::ftSchool) ? entry->value(Entry::ftSchool) : Value());
0316                 if (!school.isEmpty())
0317                     stream << ", " << htmlify(school).toUtf8().constData();
0318 
0319                 QString edition = PlainTextValue::text(entry->contains(Entry::ftEdition) ? entry->value(Entry::ftEdition) : Value());
0320                 if (!edition.isEmpty()) {
0321                     bool ok = false;
0322                     int edInt = FileExporter::editionStringToNumber(edition, &ok);
0323                     if (ok && edInt > 0) {
0324                         edition = FileExporter::numberToOrdinal(edInt);
0325                     }
0326                     stream << ", " << htmlify(edition).toUtf8().constData() << " ed.";
0327                 }
0328 
0329                 const QString publisher = PlainTextValue::text(entry->contains(Entry::ftPublisher) ? entry->value(Entry::ftPublisher) : Value());
0330                 if (!publisher.isEmpty())
0331                     stream << ", " << htmlify(publisher).toUtf8().constData();
0332 
0333                 const QString pages = PlainTextValue::text(entry->contains(Entry::ftPages) ? entry->value(Entry::ftPages) : Value());
0334                 if (!pages.isEmpty())
0335                     stream << ", p. " << htmlify(pages).toUtf8().constData();
0336 
0337                 const QString year = PlainTextValue::text(entry->contains(Entry::ftYear) ? entry->value(Entry::ftYear) : Value());
0338                 const QString month = PlainTextValue::text(entry->contains(Entry::ftMonth) ? entry->value(Entry::ftMonth) : Value());
0339                 if (!year.isEmpty()) {
0340                     stream << ", ";
0341                     if (!month.isEmpty()) {
0342                         bool ok = false;
0343                         int iMonth = FileExporter::monthStringToNumber(month, &ok);
0344                         if (ok && iMonth >= 1 && iMonth <= 12)
0345                             stream << KBibTeX::Months[iMonth - 1].toUtf8().constData() << " ";
0346                         else
0347                             stream << month.toUtf8().constData() << " ";
0348                     }
0349                     stream << year.toUtf8().constData();
0350                 }
0351 
0352                 const QString abstract = PlainTextValue::text(entry->contains(Entry::ftAbstract) ? entry->value(Entry::ftAbstract) : Value());
0353                 if (!abstract.isEmpty()) {
0354                     stream << "<br/><small><i>Abstract</i>: " << htmlify(abstract).toUtf8().constData() << "</small>";
0355                 }
0356 
0357                 stream << "</p>" << ENDL;
0358                 return true;
0359             },
0360             /*.writeMacro =*/ [](QTextStream &stream, const QSharedPointer<const Macro> &macro) {
0361                 stream << "<p>" << htmlify(PlainTextValue::text(macro->value())).toUtf8().constData() << "</p>" << ENDL;
0362                 return true;
0363             },
0364             /*.writeComment =*/ [](QTextStream &stream, const QSharedPointer<const Comment> &comment) {
0365                 stream << "<p>" << htmlify(comment->text()).toUtf8().constData() << "</p>" << ENDL;
0366                 return true;
0367             }
0368         }
0369     }, {
0370         FileExporterXML::OutputStyle::HTML_AbstractOnly, {
0371             /*.header =*/ [](QTextStream &stream) {
0372                 stream << "<!DOCTYPE html>" << ENDL;
0373                 stream << "<!-- HTML document written by KBibTeXIO as part of KBibTeX -->" << ENDL;
0374                 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << ENDL;
0375                 stream << "<html xmlns=\"http://www.w3.org/1999/xhtml\">" << ENDL;
0376                 stream << "<head><title>Bibliography</title><meta charset=\"utf-8\"></head>" << ENDL;
0377                 stream << "<body>" << ENDL;
0378                 return true;
0379             },
0380             /*.footer =*/ [](QTextStream &stream) {
0381                 stream << "</body></html>" << ENDL << ENDL;
0382                 return true;
0383             },
0384             /*.writeEntry =*/ [](QTextStream &stream, const QSharedPointer<const Entry> &entry) {
0385                 const QString abstract = PlainTextValue::text(entry->contains(Entry::ftAbstract) ? entry->value(Entry::ftAbstract) : Value());
0386                 if (!abstract.isEmpty()) {
0387                     stream << "<p>" << htmlify(abstract).toUtf8().constData() << "</p>" << ENDL;
0388                     return true;
0389                 } else
0390                     return false;
0391             },
0392             /*.writeMacro =*/ [](QTextStream &stream, const QSharedPointer<const Macro> &macro) {
0393                 stream << "<p>" << htmlify(PlainTextValue::text(macro->value())).toUtf8().constData() << "</p>" << ENDL;
0394                 return true;
0395             },
0396             /*.writeComment =*/ [](QTextStream &stream, const QSharedPointer<const Comment> &comment) {
0397                 stream << "<p>" << htmlify(comment->text()).toUtf8().constData() << "</p>" << ENDL;
0398                 return true;
0399             }
0400         }
0401     }, {
0402         FileExporterXML::OutputStyle::Plain_WikipediaCite, {
0403             /*.header =*/ [](QTextStream &stream) {
0404                 Q_UNUSED(stream);
0405                 // No header for Wikipedia Cite output
0406                 return true;
0407             },
0408             /*.footer =*/ [](QTextStream &stream) {
0409                 Q_UNUSED(stream);
0410                 // No footer for Wikipedia Cite output
0411                 return true;
0412             },
0413             /*.writeEntry =*/ [](QTextStream &stream, const QSharedPointer<const Entry> &entry) {
0414                 stream << "{{Citation";
0415 
0416                 QSet<QString> insertedKeys;
0417                 const QVector<QPair<QString, QString>> values{
0418                     {QStringLiteral("title"), Entry::ftTitle},
0419                     {QStringLiteral("year"), Entry::ftYear},
0420                     {QStringLiteral("journal"), Entry::ftJournal},
0421                     {QStringLiteral("publisher"), Entry::ftPublisher},
0422                     {QStringLiteral("volume"), Entry::ftVolume},
0423                     {QStringLiteral("issue"), Entry::ftNumber},
0424                     {QStringLiteral("url"), Entry::ftUrl},
0425                     {QStringLiteral("doi"), Entry::ftDOI},
0426                     {QStringLiteral("isbn"), Entry::ftISBN},
0427                     {QStringLiteral("issn"), Entry::ftISSN},
0428                     {QStringLiteral("pages"), Entry::ftPages},
0429                     {QStringLiteral("jstor"), QStringLiteral("jstor_id")},
0430                     {QStringLiteral("pmid"), entry->contains(QStringLiteral("pmid")) ? QStringLiteral("pmid") : (entry->contains(QStringLiteral("pubmed")) ? QStringLiteral("pubmed") : QString())}
0431                 };
0432                 for (auto it = values.constBegin(); it != values.constEnd(); ++it) {
0433                     const QString value{entry->contains(it->second) ? PlainTextValue::text(entry->value(it->second)) : QString()};
0434                     if (!value.isEmpty() && !insertedKeys.contains(it->first)) {
0435                         stream << ENDL << "| " << it->first << " = " << value;
0436                         insertedKeys.insert(it->first);
0437                     }
0438                 }
0439 
0440                 static const QHash<QRegularExpression, QSet<QString>> substrings{
0441                     {QRegularExpression(QStringLiteral("jstor[:_-]*(?<jstor>.+)")), {QStringLiteral("^id")}},
0442                     {QRegularExpression(QStringLiteral("https?://hdl\\.handle\\.net/(?<hdl>.+)")), {Entry::ftUrl}},
0443                     {QRegularExpression(QStringLiteral("/pubmed/(?<pmid>[^/]+)")), {Entry::ftUrl}},
0444                     {KBibTeX::doiRegExp, {Entry::ftDOI}},
0445                     {KBibTeX::arXivRegExp, {QStringLiteral("eprint"), QStringLiteral("arxiv")}}
0446                 };
0447                 for (auto it = substrings.constBegin(); it != substrings.constEnd(); ++it) {
0448                     const QRegularExpression &re = it.key();
0449                     const QSet<QString> &bibKeys = it.value();
0450                     for (const QString &bibKey : bibKeys) {
0451                         const QString bibText = bibKey == QStringLiteral("^id") ? entry->id() : (entry->contains(bibKey) ? PlainTextValue::text(entry->value(bibKey)) : QString());
0452                         if (bibText.isEmpty())
0453                             continue;
0454                         const QRegularExpressionMatch match = re.match(bibText);
0455                         if (match.hasMatch()) {
0456                             for (const QString &namedGroup : re.namedCaptureGroups()) {
0457                                 if (namedGroup.isEmpty() || insertedKeys.contains(namedGroup))
0458                                     continue;
0459                                 const QString foundText = match.captured(namedGroup);
0460                                 if (!foundText.isEmpty()) {
0461                                     stream << ENDL << "| " << namedGroup << " = " << foundText;
0462                                     insertedKeys.insert(namedGroup);
0463                                 }
0464                             }
0465                         }
0466                     }
0467                 }
0468 
0469                 int authorCounter = 0;
0470                 const Value &authors = entry->contains(Entry::ftAuthor) ? entry->value(Entry::ftAuthor) : Value();
0471                 for (const QSharedPointer<ValueItem> &vi : authors) {
0472                     const QSharedPointer<Person> &p = vi.dynamicCast<Person>();
0473                     if (!p.isNull()) {
0474                         ++authorCounter;
0475                         stream << ENDL << "|last" << authorCounter << " = " << p->lastName();
0476                         if (!p->firstName().isEmpty())
0477                             stream << ENDL << "|first" << authorCounter << " = " << p->firstName();
0478                     }
0479                 }
0480 
0481                 stream  << ENDL << "}}" << ENDL;
0482                 return true;
0483             },
0484             /*.writeMacro =*/ [](QTextStream &stream, const QSharedPointer<const Macro> &macro) {
0485                 Q_UNUSED(stream);
0486                 Q_UNUSED(macro);
0487                 // Wikipedia Cite output does not support BibTeX macros
0488                 return true;
0489             },
0490             /*.writeComment =*/ [](QTextStream &stream, const QSharedPointer<const Comment> &comment) {
0491                 Q_UNUSED(stream);
0492                 Q_UNUSED(comment);
0493                 // Wikipedia Cite output does not support BibTeX comments
0494                 return true;
0495             }
0496         }
0497     }
0498 };
0499 
0500 
0501 class FileExporterXML::Private
0502 {
0503 private:
0504     FileExporterXML *parent;
0505 
0506 public:
0507     static const QHash<FileExporterXML::OutputStyle, std::function<bool(const QTextStream &)>> outputHeader;
0508 
0509     OutputStyle outputStyle;
0510 
0511     Private(FileExporterXML *p)
0512             : parent(p), outputStyle(FileExporterXML::OutputStyle::XML_KBibTeX)
0513     {
0514         // nothing
0515     }
0516 
0517     bool writeElement(QTextStream &stream, const QSharedPointer<const Element> element, const File *bibtexfile = nullptr)
0518     {
0519         bool result = false;
0520 
0521         const QSharedPointer<const Entry> &entry = element.dynamicCast<const Entry>();
0522         if (!entry.isNull()) {
0523             if (bibtexfile == nullptr)
0524                 result |= rewriteFunctions[outputStyle].writeEntry(stream, entry);
0525             else {
0526                 const QSharedPointer<const Entry> resolvedEntry(entry->resolveCrossref(bibtexfile));
0527                 result |= rewriteFunctions[outputStyle].writeEntry(stream, resolvedEntry);
0528             }
0529         } else {
0530             const QSharedPointer<const Macro> &macro = element.dynamicCast<const Macro>();
0531             if (!macro.isNull())
0532                 result |= rewriteFunctions[outputStyle].writeMacro(stream, macro);
0533             else {
0534                 const QSharedPointer<const Comment> &comment = element.dynamicCast<const Comment>();
0535                 if (!comment.isNull())
0536                     result |= rewriteFunctions[outputStyle].writeComment(stream, comment);
0537                 else {
0538                     // preambles are ignored, make no sense in XML files
0539                 }
0540             }
0541         }
0542 
0543         return result;
0544     }
0545 };
0546 
0547 
0548 FileExporterXML::FileExporterXML(QObject *parent)
0549         : FileExporter(parent), d(new Private(this))
0550 {
0551     // nothing
0552 }
0553 
0554 FileExporterXML::~FileExporterXML()
0555 {
0556     delete d;
0557 }
0558 
0559 void FileExporterXML::setOutputStyle(OutputStyle outputStyle)
0560 {
0561     d->outputStyle = outputStyle;
0562 }
0563 
0564 bool FileExporterXML::save(QIODevice *iodevice, const File *bibtexfile)
0565 {
0566     check_if_bibtexfile_or_iodevice_invalid(bibtexfile, iodevice);
0567 
0568     bool result = true;
0569     QTextStream stream(iodevice);
0570     // https://forum.qt.io/topic/135724/qt-6-replacement-for-qtextcodec
0571 #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0))
0572     stream.setCodec("UTF-8");
0573 #else
0574     stream.setEncoding(QStringConverter::Utf8);
0575 #endif
0576 
0577     result &= rewriteFunctions[d->outputStyle].header(stream);
0578 
0579     for (File::ConstIterator it = bibtexfile->constBegin(); it != bibtexfile->constEnd() && result; ++it)
0580         result &= d->writeElement(stream, *it, bibtexfile);
0581 
0582     result &= rewriteFunctions[d->outputStyle].footer(stream);
0583 
0584     return result;
0585 }
0586 
0587 bool FileExporterXML::save(QIODevice *iodevice, const QSharedPointer<const Element> &element, const File *bibtexfile)
0588 {
0589     Q_UNUSED(bibtexfile)
0590 
0591     check_if_iodevice_invalid(iodevice);
0592 
0593     QTextStream stream(iodevice);
0594     // https://forum.qt.io/topic/135724/qt-6-replacement-for-qtextcodec
0595 #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0))
0596     stream.setCodec("UTF-8");
0597 #else
0598     stream.setEncoding(QStringConverter::Utf8);
0599 #endif
0600 
0601     bool result = rewriteFunctions[d->outputStyle].header(stream);
0602 
0603     result &= d->writeElement(stream, element);
0604 
0605     result &= rewriteFunctions[d->outputStyle].footer(stream);
0606 
0607     return result;
0608 }
0609 
0610 QDebug operator<<(QDebug dbg, FileExporterXML::OutputStyle outputStyle)
0611 {
0612     switch (outputStyle) {
0613     case FileExporterXML::OutputStyle::XML_KBibTeX:
0614         dbg << "FileExporterXML::OutputStyle::XML_KBibTeX";
0615         break;
0616     case FileExporterXML::OutputStyle::HTML_Standard:
0617         dbg << "FileExporterXML::OutputStyle::HTML_Standard";
0618         break;
0619     case FileExporterXML::OutputStyle::HTML_AbstractOnly:
0620         dbg << "FileExporterXML::OutputStyle::HTML_AbstractOnly";
0621         break;
0622     case FileExporterXML::OutputStyle::Plain_WikipediaCite:
0623         dbg << "FileExporterXML::OutputStyle::Plain_WikipediaCite";
0624         break;
0625     default:
0626         dbg << "FileExporterXML::OutputStyle::???";
0627         break;
0628     }
0629     return dbg;
0630 }