File indexing completed on 2024-05-19 05:05:35
0001 /*************************************************************************** 0002 * SPDX-License-Identifier: GPL-2.0-or-later 0003 * * 0004 * SPDX-FileCopyrightText: 2023 Thomas Fischer <fischer@unix-ag.uni-kl.de> 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 * This program is distributed in the hope that it will be useful, * 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0014 * GNU General Public License for more details. * 0015 * * 0016 * You should have received a copy of the GNU General Public License * 0017 * along with this program; if not, see <https://www.gnu.org/licenses/>. * 0018 ***************************************************************************/ 0019 0020 #include "fileexporterwordbibxml.h" 0021 0022 #include <QIODevice> 0023 #include <QTextStream> 0024 #include <QRegularExpression> 0025 0026 #include <File> 0027 #include <Entry> 0028 #include "encoderxml.h" 0029 #include "fileexporter_p.h" 0030 #include "logging_io.h" 0031 0032 class FileExporterWordBibXML::Private 0033 { 0034 public: 0035 bool cancelFlag; 0036 0037 Private(FileExporterWordBibXML *) 0038 : cancelFlag(false) 0039 { 0040 // nothing 0041 } 0042 0043 ~Private() { 0044 // nothing 0045 } 0046 0047 bool insideProtectiveCurleyBrackets(const QString &input) { 0048 if (input.length() < 3 || input[0] != QLatin1Char('{') || input[input.length() - 1] != QLatin1Char('}')) 0049 return false; 0050 0051 int depth = 0; 0052 QChar prev; 0053 for (const QChar &c : input) { 0054 if (c == QLatin1Char('{') && prev != QLatin1Char('\\')) 0055 ++depth; 0056 else if (c == QLatin1Char('}') && prev != QLatin1Char('\\')) 0057 --depth; 0058 prev = c; 0059 } 0060 0061 return depth == 0; 0062 } 0063 0064 QString removeUnwantedChars(const QString &input) { 0065 QString result; 0066 result.reserve(input.length()); 0067 static const QSet<QChar> skip{QLatin1Char('{'), QLatin1Char('}'), QLatin1Char('<'), QLatin1Char('>'), QLatin1Char('&')}; 0068 static const QHash<QChar, QString> replace{{QLatin1Char('~'), QStringLiteral(" ")}}; 0069 for (const QChar &c : input) 0070 if (skip.contains(c)) 0071 continue; 0072 else if (replace.contains(c)) 0073 result.append(replace[c]); 0074 else 0075 result.append(c); 0076 return result; 0077 } 0078 0079 QString entryTypeToSourceType(const QString &entryType) { 0080 if (entryType == Entry::etBook) 0081 return QStringLiteral("Book"); 0082 else if (entryType == Entry::etInBook || entryType == QStringLiteral("incollection")) 0083 return QStringLiteral("BookSection"); 0084 else if (entryType == Entry::etArticle) 0085 return QStringLiteral("JournalArticle"); 0086 else if (entryType == Entry::etInProceedings || entryType == Entry::etProceedings || entryType == QStringLiteral("conference")) 0087 return QStringLiteral("ConferenceProceedings"); 0088 else if (entryType == Entry::etUnpublished || entryType == Entry::etMastersThesis || entryType == Entry::etPhDThesis || entryType == Entry::etTechReport || entryType == QStringLiteral("manual")) 0089 return QStringLiteral("Report"); 0090 else if (entryType == Entry::etMisc) 0091 return QStringLiteral("Misc"); 0092 else { 0093 qCDebug(LOG_KBIBTEX_IO) << "Unsupported entry type:" << entryType; 0094 return QStringLiteral("Misc"); 0095 } 0096 } 0097 0098 QString fieldTypeToXMLkey(const QString &fieldType) { 0099 if (fieldType == Entry::ftTitle) 0100 return QStringLiteral("Title"); 0101 else if (fieldType == Entry::ftPublisher) 0102 return QStringLiteral("Publisher"); 0103 else if (fieldType == Entry::ftJournal || fieldType == QStringLiteral("journaltitle")) 0104 return QStringLiteral("JournalName"); 0105 else if (fieldType == Entry::ftVolume) 0106 return QStringLiteral("Volume"); 0107 else if (fieldType == Entry::ftNote) 0108 return QStringLiteral("Comments"); 0109 else if (fieldType == Entry::ftEdition) 0110 return QStringLiteral("Edititon"); 0111 else if (fieldType == Entry::ftBookTitle) 0112 return QStringLiteral("BookTitle"); 0113 else if (fieldType == Entry::ftChapter) 0114 return QStringLiteral("ChapterNumber"); 0115 else if (fieldType == Entry::ftNumber) 0116 return QStringLiteral("Issue"); 0117 else if (fieldType == Entry::ftSchool) 0118 return QStringLiteral("Department"); 0119 else if (fieldType == Entry::ftDOI) 0120 return QStringLiteral("DOI"); 0121 else if (fieldType == Entry::ftUrl) 0122 return QStringLiteral("URL"); 0123 else if (fieldType == Entry::ftPages) 0124 return QStringLiteral("Pages"); 0125 else if (fieldType == Entry::ftLocation) 0126 return QStringLiteral("City"); 0127 else { 0128 qCDebug(LOG_KBIBTEX_IO) << "Unsupported field type:" << fieldType; 0129 return QString(); 0130 } 0131 } 0132 0133 0134 bool writeEntry(QTextStream &stream, const QSharedPointer<const Entry> &entry) 0135 { 0136 // Documentation of Word XML Bibliography: 0137 // - https://docs.jabref.org/advanced/knowledge/msofficebibfieldmapping 0138 stream << "<b:Source><b:Tag>" << EncoderXML::instance().encode(entry->id(), Encoder::TargetEncoding::UTF8) << "</b:Tag><b:SourceType>" << entryTypeToSourceType(entry->type().toLower()) << "</b:SourceType>"; 0139 0140 static const QSet<QString> standardNumberKeys{Entry::ftISBN, Entry::ftISSN, QStringLiteral("lccn")}; 0141 QString standardNumber; 0142 0143 // Authors and editors are grouped 0144 static const QHash<QString, QString> personFields{{QStringLiteral("Author"), Entry::ftAuthor}, {QStringLiteral("Editor"), Entry::ftEditor}, {QStringLiteral("Translator"), QStringLiteral("translator")}, {QStringLiteral("BookAuthor"), QStringLiteral("bookauthor")}}; 0145 if (entry->contains(Entry::ftAuthor) || entry->contains(Entry::ftEditor)) { 0146 stream << "<b:Author>"; 0147 for (auto it = personFields.constBegin(); it != personFields.constEnd(); ++it) { 0148 if (entry->contains(it.value())) { 0149 stream << "<b:" << it.key() << ">"; 0150 0151 bool nameListOpened = false; 0152 const Value value = entry->value(it.value()); 0153 for (const auto &valueItem : value) { 0154 const QSharedPointer<const Person> p = valueItem.dynamicCast<const Person>(); 0155 if (!p.isNull()) { 0156 if (!nameListOpened && p->firstName().isEmpty() && insideProtectiveCurleyBrackets(p->lastName())) { 0157 // Person's last name looks like {KDE e.V.} so treat as organization name instead of a person's name 0158 stream << "<b:Corporate>" << removeUnwantedChars(p->lastName()) << "</b:Corporate>"; 0159 break; //< only one corporate, nothing more 0160 } else { 0161 if (!nameListOpened) { 0162 stream << "<b:NameList>"; 0163 nameListOpened = true; 0164 } 0165 stream << "<b:Person><b:Last>" << removeUnwantedChars(p->lastName()) << "</b:Last><b:First>" << removeUnwantedChars(p->firstName()) << "</b:First></b:Person>"; 0166 } 0167 } else { 0168 qCWarning(LOG_KBIBTEX_IO) << it.value() << "field contains something else than a Person:" << PlainTextValue::text(value); 0169 } 0170 } 0171 0172 if (nameListOpened) 0173 stream << "</b:NameList>"; 0174 stream << "</b:" << it.key() << ">"; 0175 } 0176 } 0177 stream << "</b:Author>"; 0178 } 0179 0180 for (Entry::ConstIterator it = entry->constBegin(); it != entry->constEnd(); ++it) { 0181 const QString &key = it.key().toLower(); 0182 if (personFields.values().contains(key)) { 0183 // Authors, editors, etc. were processed above 0184 continue; 0185 } 0186 const Value &value = it.value(); 0187 0188 static const QSet<QString> fieldsKeptAsIs{Entry::ftTitle, Entry::ftPublisher, Entry::ftJournal, Entry::ftVolume, Entry::ftNote, Entry::ftEdition, Entry::ftBookTitle, Entry::ftChapter, Entry::ftNumber, Entry::ftSchool, Entry::ftDOI, Entry::ftUrl, Entry::ftPages, Entry::ftLocation}; 0189 static const QSet<QString> ignoredFields{Entry::ftAbstract, Entry::ftLocalFile, Entry::ftSeries, Entry::ftKeywords, Entry::ftCrossRef, Entry::ftAddress, QStringLiteral("acmid"), QStringLiteral("articleno"), QStringLiteral("numpages"), QStringLiteral("added-at"), QStringLiteral("biburl"), QStringLiteral("organization"), QStringLiteral("ee"), QStringLiteral("interhash"), QStringLiteral("intrahash"), QStringLiteral("howpublished"), QStringLiteral("key"), QStringLiteral("type"), QStringLiteral("institution"), QStringLiteral("issue"), QStringLiteral("eprint"), QStringLiteral("affiliation"), QStringLiteral("keyword"), QStringLiteral("urldate"), QStringLiteral("date"), QStringLiteral("shortauthor")}; 0190 0191 if (ignoredFields.contains(key) || key.startsWith(QStringLiteral("x-"))) { 0192 // qCDebug(LOG_KBIBTEX_IO) << "Ignoring field" << key << "for entry" << entry->id(); 0193 } else if (key == Entry::ftYear) { 0194 const QString textualRepresentation = PlainTextValue::text(value); 0195 static const QRegularExpression yearRegExp(QStringLiteral("\\b(1[2-9]|2[01])\\d{2}\\b")); 0196 const auto m = yearRegExp.match(textualRepresentation); 0197 if (m.hasMatch()) 0198 stream << "<b:Year>" << m.captured() << "</b:Year>"; 0199 } else if (standardNumberKeys.contains(key)) { 0200 standardNumber = PlainTextValue::text(value); 0201 } else if (key == Entry::ftMonth) { 0202 // TODO 0203 } else if (fieldsKeptAsIs.contains(key)) { 0204 const QString xmlKey{fieldTypeToXMLkey(key)}; 0205 const QString textualRepresentation{removeUnwantedChars(PlainTextValue::text(value))}; 0206 stream << "<b:" << xmlKey << ">" << textualRepresentation << "</b:" << xmlKey << ">"; 0207 } else { 0208 qCDebug(LOG_KBIBTEX_IO) << "Field not supported by Word XML exporter:" << key; 0209 } 0210 } 0211 0212 if (!standardNumber.isEmpty()) { 0213 stream << "<b:StandardNumber>" << removeUnwantedChars(standardNumber) << "</b:StandardNumber>"; 0214 } 0215 0216 stream << "</b:Source>"; 0217 0218 return true; 0219 } 0220 0221 0222 bool write(QTextStream &stream, const QSharedPointer<const Element> &element, const File *bibtexfile = nullptr) { 0223 bool result = false; 0224 0225 const QSharedPointer<const Entry> &entry = element.dynamicCast<const Entry>(); 0226 if (!entry.isNull()) { 0227 if (bibtexfile == nullptr) 0228 result |= writeEntry(stream, entry); 0229 else { 0230 const QSharedPointer<const Entry> resolvedEntry(entry->resolveCrossref(bibtexfile)); 0231 result |= writeEntry(stream, resolvedEntry); 0232 } 0233 } else { 0234 // not (yet) supported 0235 return true; 0236 } 0237 0238 return result; 0239 } 0240 }; 0241 0242 FileExporterWordBibXML::FileExporterWordBibXML(QObject *parent) 0243 : FileExporter(parent), d(new FileExporterWordBibXML::Private(this)) 0244 { 0245 /// nothing 0246 } 0247 0248 FileExporterWordBibXML::~FileExporterWordBibXML() 0249 { 0250 delete d; 0251 } 0252 0253 bool FileExporterWordBibXML::save(QIODevice *iodevice, const File *bibtexfile) 0254 { 0255 check_if_bibtexfile_or_iodevice_invalid(bibtexfile, iodevice); 0256 0257 bool result = true; 0258 d->cancelFlag = false; 0259 QTextStream stream(iodevice); 0260 // https://forum.qt.io/topic/135724/qt-6-replacement-for-qtextcodec 0261 #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0)) 0262 stream.setCodec("UTF-8"); 0263 #else 0264 stream.setEncoding(QStringConverter::Utf8); 0265 #endif 0266 0267 #if QT_VERSION >= 0x050e00 0268 stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << Qt::endl; 0269 stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << Qt::endl; 0270 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << Qt::endl; 0271 stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << Qt::endl; 0272 #else // QT_VERSION < 0x050e00 0273 stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl; 0274 stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << endl; 0275 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << endl; 0276 stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << endl; 0277 #endif // QT_VERSION >= 0x050e00 0278 0279 for (File::ConstIterator it = bibtexfile->constBegin(); it != bibtexfile->constEnd() && result && !d->cancelFlag; ++it) 0280 result &= d->write(stream, *it, bibtexfile); 0281 0282 #if QT_VERSION >= 0x050e00 0283 stream << "</b:Sources>" << Qt::endl; 0284 #else // QT_VERSION < 0x050e00 0285 stream << "</b:Sources>" << endl; 0286 #endif // QT_VERSION >= 0x050e00 0287 0288 return result && !d->cancelFlag; 0289 } 0290 0291 bool FileExporterWordBibXML::save(QIODevice *iodevice, const QSharedPointer<const Element> &element, const File *bibtexfile) 0292 { 0293 check_if_iodevice_invalid(iodevice); 0294 0295 d->cancelFlag = false; 0296 QTextStream stream(iodevice); 0297 // https://forum.qt.io/topic/135724/qt-6-replacement-for-qtextcodec 0298 #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0)) 0299 stream.setCodec("UTF-8"); 0300 #else 0301 stream.setEncoding(QStringConverter::Utf8); 0302 #endif 0303 0304 #if QT_VERSION >= 0x050e00 0305 stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << Qt::endl; 0306 stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << Qt::endl; 0307 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << Qt::endl; 0308 stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << Qt::endl; 0309 #else // QT_VERSION < 0x050e00 0310 stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl; 0311 stream << "<!-- XML document written by KBibTeXIO as part of KBibTeX -->" << endl; 0312 stream << "<!-- https://userbase.kde.org/KBibTeX -->" << endl; 0313 stream << "<b:Sources xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">" << endl; 0314 #endif // QT_VERSION >= 0x050e00 0315 0316 const bool result = d->write(stream, element, bibtexfile); 0317 0318 #if QT_VERSION >= 0x050e00 0319 stream << "</b:Sources>" << Qt::endl; 0320 #else // QT_VERSION < 0x050e00 0321 stream << "</b:Sources>" << endl; 0322 #endif // QT_VERSION >= 0x050e00 0323 0324 return result && !d->cancelFlag; 0325 } 0326 0327 void FileExporterWordBibXML::cancel() 0328 { 0329 d->cancelFlag = true; 0330 } 0331