File indexing completed on 2024-05-19 05:05:35

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2023 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "fileexporterwordbibxml.h"
0021 
0022 #include <QIODevice>
0023 #include <QTextStream>
0024 #include <QRegularExpression>
0025 
0026 #include <File>
0027 #include <Entry>
0028 #include "encoderxml.h"
0029 #include "fileexporter_p.h"
0030 #include "logging_io.h"
0031 
0032 class FileExporterWordBibXML::Private
0033 {
0034 public:
0035     bool cancelFlag;
0036 
0037     Private(FileExporterWordBibXML *)
0038         : cancelFlag(false)
0039     {
0040         // nothing
0041     }
0042 
0043     ~Private() {
0044         // nothing
0045     }
0046 
0047     bool insideProtectiveCurleyBrackets(const QString &input) {
0048         if (input.length() < 3 || input[0] != QLatin1Char('{') || input[input.length() - 1] != QLatin1Char('}'))
0049             return false;
0050 
0051         int depth = 0;
0052         QChar prev;
0053         for (const QChar &c : input) {
0054             if (c == QLatin1Char('{') && prev != QLatin1Char('\\'))
0055                 ++depth;
0056             else if (c == QLatin1Char('}') && prev != QLatin1Char('\\'))
0057                 --depth;
0058             prev = c;
0059         }
0060 
0061         return depth == 0;
0062     }
0063 
0064     QString removeUnwantedChars(const QString &input) {
0065         QString result;
0066         result.reserve(input.length());
0067         static const QSet<QChar> skip{QLatin1Char('{'), QLatin1Char('}'), QLatin1Char('<'), QLatin1Char('>'), QLatin1Char('&')};
0068         static const QHash<QChar, QString> replace{{QLatin1Char('~'), QStringLiteral(" ")}};
0069         for (const QChar &c : input)
0070             if (skip.contains(c))
0071                 continue;
0072             else if (replace.contains(c))
0073                 result.append(replace[c]);
0074             else
0075                 result.append(c);
0076         return result;
0077     }
0078 
0079     QString entryTypeToSourceType(const QString &entryType) {
0080         if (entryType == Entry::etBook)
0081             return QStringLiteral("Book");
0082         else if (entryType == Entry::etInBook || entryType == QStringLiteral("incollection"))
0083             return QStringLiteral("BookSection");
0084         else if (entryType == Entry::etArticle)
0085             return QStringLiteral("JournalArticle");
0086         else if (entryType == Entry::etInProceedings || entryType == Entry::etProceedings || entryType == QStringLiteral("conference"))
0087             return QStringLiteral("ConferenceProceedings");
0088         else if (entryType == Entry::etUnpublished || entryType == Entry::etMastersThesis || entryType == Entry::etPhDThesis || entryType == Entry::etTechReport || entryType == QStringLiteral("manual"))
0089             return QStringLiteral("Report");
0090         else if (entryType == Entry::etMisc)
0091             return QStringLiteral("Misc");
0092         else {
0093             qCDebug(LOG_KBIBTEX_IO) << "Unsupported entry type:" << entryType;
0094             return QStringLiteral("Misc");
0095         }
0096     }
0097 
0098     QString fieldTypeToXMLkey(const QString &fieldType) {
0099         if (fieldType == Entry::ftTitle)
0100             return QStringLiteral("Title");
0101         else if (fieldType == Entry::ftPublisher)
0102             return QStringLiteral("Publisher");
0103         else if (fieldType == Entry::ftJournal || fieldType == QStringLiteral("journaltitle"))
0104             return QStringLiteral("JournalName");
0105         else if (fieldType == Entry::ftVolume)
0106             return QStringLiteral("Volume");
0107         else if (fieldType == Entry::ftNote)
0108             return QStringLiteral("Comments");
0109         else if (fieldType == Entry::ftEdition)
0110             return QStringLiteral("Edititon");
0111         else if (fieldType == Entry::ftBookTitle)
0112             return QStringLiteral("BookTitle");
0113         else if (fieldType == Entry::ftChapter)
0114             return QStringLiteral("ChapterNumber");
0115         else if (fieldType == Entry::ftNumber)
0116             return QStringLiteral("Issue");
0117         else if (fieldType == Entry::ftSchool)
0118             return QStringLiteral("Department");
0119         else if (fieldType == Entry::ftDOI)
0120             return QStringLiteral("DOI");
0121         else if (fieldType == Entry::ftUrl)
0122             return QStringLiteral("URL");
0123         else if (fieldType == Entry::ftPages)
0124             return QStringLiteral("Pages");
0125         else if (fieldType == Entry::ftLocation)
0126             return QStringLiteral("City");
0127         else {
0128             qCDebug(LOG_KBIBTEX_IO) << "Unsupported field type:" << fieldType;
0129             return QString();
0130         }
0131     }
0132 
0133 
0134     bool writeEntry(QTextStream &stream, const QSharedPointer<const Entry> &entry)
0135     {
0136         // Documentation of Word XML Bibliography:
0137         //  - https://docs.jabref.org/advanced/knowledge/msofficebibfieldmapping
0138         stream << "<b:Source><b:Tag>" << EncoderXML::instance().encode(entry->id(), Encoder::TargetEncoding::UTF8) << "</b:Tag><b:SourceType>" << entryTypeToSourceType(entry->type().toLower()) << "</b:SourceType>";
0139 
0140         static const QSet<QString> standardNumberKeys{Entry::ftISBN, Entry::ftISSN, QStringLiteral("lccn")};
0141         QString standardNumber;
0142 
0143         // Authors and editors are grouped
0144         static const QHash<QString, QString> personFields{{QStringLiteral("Author"), Entry::ftAuthor}, {QStringLiteral("Editor"), Entry::ftEditor}, {QStringLiteral("Translator"), QStringLiteral("translator")}, {QStringLiteral("BookAuthor"), QStringLiteral("bookauthor")}};
0145         if (entry->contains(Entry::ftAuthor) || entry->contains(Entry::ftEditor)) {
0146             stream << "<b:Author>";
0147             for (auto it = personFields.constBegin(); it != personFields.constEnd(); ++it) {
0148                 if (entry->contains(it.value())) {
0149                     stream << "<b:" << it.key() << ">";
0150 
0151                     bool nameListOpened = false;
0152                     const Value value = entry->value(it.value());
0153                     for (const auto &valueItem : value) {
0154                         const QSharedPointer<const Person> p = valueItem.dynamicCast<const Person>();
0155                         if (!p.isNull()) {
0156                             if (!nameListOpened && p->firstName().isEmpty() && insideProtectiveCurleyBrackets(p->lastName())) {
0157                                 // Person's last name looks like  {KDE e.V.}  so treat as organization name instead of a person's name
0158                                 stream << "<b:Corporate>" << removeUnwantedChars(p->lastName()) << "</b:Corporate>";
0159                                 break; //< only one corporate, nothing more
0160                             } else {
0161                                 if (!nameListOpened) {
0162                                     stream << "<b:NameList>";
0163                                     nameListOpened = true;
0164                                 }
0165                                 stream << "<b:Person><b:Last>" << removeUnwantedChars(p->lastName()) << "</b:Last><b:First>" << removeUnwantedChars(p->firstName()) << "</b:First></b:Person>";
0166                             }
0167                         } else {
0168                             qCWarning(LOG_KBIBTEX_IO) << it.value() << "field contains something else than a Person:" << PlainTextValue::text(value);
0169                         }
0170                     }
0171 
0172                     if (nameListOpened)
0173                         stream << "</b:NameList>";
0174                     stream << "</b:" << it.key() << ">";
0175                 }
0176             }
0177             stream << "</b:Author>";
0178         }
0179 
0180         for (Entry::ConstIterator it = entry->constBegin(); it != entry->constEnd(); ++it) {
0181             const QString &key = it.key().toLower();
0182             if (personFields.values().contains(key)) {
0183                 // Authors, editors, etc. were processed above
0184                 continue;
0185             }
0186             const Value &value = it.value();
0187 
0188             static const QSet<QString> fieldsKeptAsIs{Entry::ftTitle, Entry::ftPublisher, Entry::ftJournal, Entry::ftVolume, Entry::ftNote, Entry::ftEdition, Entry::ftBookTitle, Entry::ftChapter, Entry::ftNumber, Entry::ftSchool, Entry::ftDOI, Entry::ftUrl, Entry::ftPages, Entry::ftLocation};
0189             static const QSet<QString> ignoredFields{Entry::ftAbstract, Entry::ftLocalFile, Entry::ftSeries, Entry::ftKeywords, Entry::ftCrossRef, Entry::ftAddress, QStringLiteral("acmid"), QStringLiteral("articleno"), QStringLiteral("numpages"), QStringLiteral("added-at"), QStringLiteral("biburl"), QStringLiteral("organization"), QStringLiteral("ee"), QStringLiteral("interhash"), QStringLiteral("intrahash"), QStringLiteral("howpublished"), QStringLiteral("key"), QStringLiteral("type"), QStringLiteral("institution"), QStringLiteral("issue"), QStringLiteral("eprint"), QStringLiteral("affiliation"), QStringLiteral("keyword"), QStringLiteral("urldate"), QStringLiteral("date"), QStringLiteral("shortauthor")};
0190 
0191             if (ignoredFields.contains(key) || key.startsWith(QStringLiteral("x-"))) {
0192                 // qCDebug(LOG_KBIBTEX_IO) << "Ignoring field" << key << "for entry" << entry->id();
0193             } else if (key == Entry::ftYear) {
0194                 const QString textualRepresentation = PlainTextValue::text(value);
0195                 static const QRegularExpression yearRegExp(QStringLiteral("\\b(1[2-9]|2[01])\\d{2}\\b"));
0196                 const auto m = yearRegExp.match(textualRepresentation);
0197                 if (m.hasMatch())
0198                     stream << "<b:Year>" << m.captured() << "</b:Year>";
0199             } else if (standardNumberKeys.contains(key)) {
0200                 standardNumber = PlainTextValue::text(value);
0201             } else if (key == Entry::ftMonth) {
0202                 // TODO
0203             } else if (fieldsKeptAsIs.contains(key)) {
0204                 const QString xmlKey{fieldTypeToXMLkey(key)};
0205                 const QString textualRepresentation{removeUnwantedChars(PlainTextValue::text(value))};
0206                 stream << "<b:" << xmlKey << ">" << textualRepresentation << "</b:" << xmlKey << ">";
0207             } else {
0208                 qCDebug(LOG_KBIBTEX_IO) << "Field not supported by Word XML exporter:" << key;
0209             }
0210         }
0211 
0212         if (!standardNumber.isEmpty()) {
0213             stream << "<b:StandardNumber>" << removeUnwantedChars(standardNumber) << "</b:StandardNumber>";
0214         }
0215 
0216         stream << "</b:Source>";
0217 
0218         return true;
0219     }
0220 
0221 
0222     bool write(QTextStream &stream, const QSharedPointer<const Element> &element, const File *bibtexfile = nullptr) {
0223         bool result = false;
0224 
0225         const QSharedPointer<const Entry> &entry = element.dynamicCast<const Entry>();
0226         if (!entry.isNull()) {
0227             if (bibtexfile == nullptr)
0228                 result |= writeEntry(stream, entry);
0229             else {
0230                 const QSharedPointer<const Entry> resolvedEntry(entry->resolveCrossref(bibtexfile));
0231                 result |= writeEntry(stream, resolvedEntry);
0232             }
0233         } else {
0234             // not (yet) supported
0235             return true;
0236         }
0237 
0238         return result;
0239     }
0240 };
0241 
0242 FileExporterWordBibXML::FileExporterWordBibXML(QObject *parent)
0243     : FileExporter(parent), d(new FileExporterWordBibXML::Private(this))
0244 {
0245     /// nothing
0246 }
0247 
0248 FileExporterWordBibXML::~FileExporterWordBibXML()
0249 {
0250     delete d;
0251 }
0252 
0253 bool FileExporterWordBibXML::save(QIODevice *iodevice, const File *bibtexfile)
0254 {
0255     check_if_bibtexfile_or_iodevice_invalid(bibtexfile, iodevice);
0256 
0257     bool result = true;
0258     d->cancelFlag = false;
0259     QTextStream stream(iodevice);
0260     // https://forum.qt.io/topic/135724/qt-6-replacement-for-qtextcodec
0261 #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0))
0262     stream.setCodec("UTF-8");
0263 #else
0264     stream.setEncoding(QStringConverter::Utf8);
0265 #endif
0266 
0267 #if QT_VERSION >= 0x050e00
0268     stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << Qt::endl;
0269     stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << Qt::endl;
0270     stream << "<!-- https://userbase.kde.org/KBibTeX -->" << Qt::endl;
0271     stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << Qt::endl;
0272 #else // QT_VERSION < 0x050e00
0273     stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl;
0274     stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << endl;
0275     stream << "<!-- https://userbase.kde.org/KBibTeX -->" << endl;
0276     stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << endl;
0277 #endif // QT_VERSION >= 0x050e00
0278 
0279     for (File::ConstIterator it = bibtexfile->constBegin(); it != bibtexfile->constEnd() && result && !d->cancelFlag; ++it)
0280         result &= d->write(stream, *it, bibtexfile);
0281 
0282 #if QT_VERSION >= 0x050e00
0283     stream << "</b:Sources>" << Qt::endl;
0284 #else // QT_VERSION < 0x050e00
0285     stream << "</b:Sources>" << endl;
0286 #endif // QT_VERSION >= 0x050e00
0287 
0288     return result && !d->cancelFlag;
0289 }
0290 
0291 bool FileExporterWordBibXML::save(QIODevice *iodevice, const QSharedPointer<const Element> &element, const File *bibtexfile)
0292 {
0293     check_if_iodevice_invalid(iodevice);
0294 
0295     d->cancelFlag = false;
0296     QTextStream stream(iodevice);
0297     // https://forum.qt.io/topic/135724/qt-6-replacement-for-qtextcodec
0298 #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0))
0299     stream.setCodec("UTF-8");
0300 #else
0301     stream.setEncoding(QStringConverter::Utf8);
0302 #endif
0303 
0304 #if QT_VERSION >= 0x050e00
0305     stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << Qt::endl;
0306     stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << Qt::endl;
0307     stream << "<!-- https://userbase.kde.org/KBibTeX -->" << Qt::endl;
0308     stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << Qt::endl;
0309 #else // QT_VERSION < 0x050e00
0310     stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl;
0311     stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << endl;
0312     stream << "<!-- https://userbase.kde.org/KBibTeX -->" << endl;
0313     stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << endl;
0314 #endif // QT_VERSION >= 0x050e00
0315 
0316     const bool result = d->write(stream, element, bibtexfile);
0317 
0318 #if QT_VERSION >= 0x050e00
0319     stream << "</b:Sources>" << Qt::endl;
0320 #else // QT_VERSION < 0x050e00
0321     stream << "</b:Sources>" << endl;
0322 #endif // QT_VERSION >= 0x050e00
0323 
0324     return result && !d->cancelFlag;
0325 }
0326 
0327 void FileExporterWordBibXML::cancel()
0328 {
0329     d->cancelFlag = true;
0330 }
0331