File indexing completed on 2025-03-16 12:49:35
0001 /* 0002 SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de> 0003 0004 SPDX-License-Identifier: LGPL-2.1-or-later 0005 */ 0006 0007 #include "fb2extractor.h" 0008 #include "kfilemetadata_debug.h" 0009 0010 #include <QDateTime> 0011 #include <QFile> 0012 #include <QXmlStreamReader> 0013 0014 #include <KZip> 0015 0016 #include <memory> 0017 0018 using namespace KFileMetaData; 0019 0020 Fb2Extractor::Fb2Extractor(QObject *parent) 0021 : ExtractorPlugin(parent) 0022 { 0023 } 0024 0025 namespace 0026 { 0027 static const QString regularMimeType() 0028 { 0029 return QStringLiteral("application/x-fictionbook+xml"); 0030 } 0031 0032 static const QString compressedMimeType() 0033 { 0034 return QStringLiteral("application/x-zip-compressed-fb2"); 0035 } 0036 0037 static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()}; 0038 0039 } 0040 0041 QStringList Fb2Extractor::mimetypes() const 0042 { 0043 return supportedMimeTypes; 0044 } 0045 0046 void Fb2Extractor::extract(ExtractionResult *result) 0047 { 0048 std::unique_ptr<QIODevice> device; 0049 std::unique_ptr<KZip> zip; 0050 0051 if (result->inputMimetype() == regularMimeType()) { 0052 device.reset(new QFile(result->inputUrl())); 0053 if (!device->open(QIODevice::ReadOnly | QIODevice::Text)) { 0054 return; 0055 } 0056 0057 } else if (result->inputMimetype() == compressedMimeType()) { 0058 zip.reset(new KZip(result->inputUrl())); 0059 if (!zip->open(QIODevice::ReadOnly)) { 0060 return; 0061 } 0062 0063 const auto entries = zip->directory()->entries(); 0064 if (entries.count() != 1) { 0065 return; 0066 } 0067 0068 const QString entryPath = entries.first(); 0069 if (!entryPath.endsWith(QLatin1String(".fb2"))) { 0070 return; 0071 } 0072 0073 const auto *entry = zip->directory()->file(entryPath); 0074 if (!entry) { 0075 return; 0076 } 0077 0078 device.reset(entry->createDevice()); 0079 } 0080 0081 result->addType(Type::Document); 0082 0083 QXmlStreamReader xml(device.get()); 0084 0085 bool inFictionBook = false; 0086 bool inDescription = false; 0087 bool inTitleInfo = false; 0088 bool inAuthor = false; 0089 bool inDocumentInfo = false; 0090 bool inPublishInfo = false; 0091 bool inBody = false; 0092 0093 QString authorFirstName; 0094 QString authorMiddleName; 0095 QString authorLastName; 0096 QString authorNickName; 0097 0098 while (!xml.atEnd() && !xml.hasError()) { 0099 xml.readNext(); 0100 0101 if (xml.name() == QLatin1String("FictionBook")) { 0102 if (xml.isStartElement()) { 0103 inFictionBook = true; 0104 } else if (xml.isEndElement()) { 0105 break; 0106 } 0107 } else if (xml.name() == QLatin1String("description")) { 0108 if (xml.isStartElement()) { 0109 inDescription = true; 0110 } else if (xml.isEndElement()) { 0111 inDescription = false; 0112 } 0113 } else if (xml.name() == QLatin1String("title-info")) { 0114 if (xml.isStartElement()) { 0115 inTitleInfo = true; 0116 } else if (xml.isEndElement()) { 0117 inTitleInfo = false; 0118 } 0119 } else if (xml.name() == QLatin1String("document-info")) { 0120 if (xml.isStartElement()) { 0121 inDocumentInfo = true; 0122 } else if (xml.isEndElement()) { 0123 inDocumentInfo = false; 0124 } 0125 } else if (xml.name() == QLatin1String("publish-info")) { 0126 if (xml.isStartElement()) { 0127 inPublishInfo = true; 0128 } else if (xml.isEndElement()) { 0129 inPublishInfo = false; 0130 } 0131 } else if (xml.name() == QLatin1String("body")) { 0132 if (xml.isStartElement()) { 0133 inBody = true; 0134 } else if (xml.isEndElement()) { 0135 inBody = false; 0136 } 0137 } 0138 0139 if (!inFictionBook) { 0140 continue; 0141 } 0142 0143 if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) { 0144 if (inTitleInfo) { 0145 if (xml.isStartElement()) { 0146 if (xml.name() == QLatin1String("author")) { 0147 inAuthor = true; 0148 } else if (inAuthor) { 0149 if (xml.name() == QLatin1String("first-name")) { 0150 authorFirstName = xml.readElementText(); 0151 } else if (xml.name() == QLatin1String("middle-name")) { 0152 authorMiddleName = xml.readElementText(); 0153 } else if (xml.name() == QLatin1String("last-name")) { 0154 authorLastName = xml.readElementText(); 0155 } else if (xml.name() == QLatin1String("nickname")) { 0156 authorNickName = xml.readElementText(); 0157 } 0158 } else if (xml.name() == QLatin1String("book-title")) { 0159 result->add(Property::Title, xml.readElementText()); 0160 } else if (xml.name() == QLatin1String("annotation")) { 0161 result->add(Property::Description, xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed()); 0162 } else if (xml.name() == QLatin1String("lang")) { 0163 result->add(Property::Language, xml.readElementText()); 0164 } else if (xml.name() == QLatin1String("genre")) { 0165 result->add(Property::Genre, xml.readElementText()); 0166 } 0167 } else if (xml.isEndElement()) { 0168 inAuthor = false; 0169 0170 QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName}; 0171 nameParts.removeAll(QString()); 0172 0173 if (!nameParts.isEmpty()) { 0174 result->add(Property::Author, nameParts.join(QLatin1Char(' '))); 0175 } else if (!authorNickName.isEmpty()) { 0176 result->add(Property::Author, authorNickName); 0177 } 0178 0179 authorFirstName.clear(); 0180 authorMiddleName.clear(); 0181 authorLastName.clear(); 0182 authorNickName.clear(); 0183 } 0184 } else if (inDocumentInfo) { 0185 if (xml.name() == QLatin1String("date")) { 0186 // Date can be "not exact" but date "value", if present, is an xs:date 0187 const auto dateValue = xml.attributes().value(QLatin1String("value")); 0188 QDateTime dt = QDateTime::fromString(dateValue.toString()); 0189 0190 if (!dt.isValid()) { 0191 dt = ExtractorPlugin::dateTimeFromString(xml.readElementText()); 0192 } 0193 0194 if (dt.isValid()) { 0195 result->add(Property::CreationDate, dt); 0196 } 0197 } else if (xml.name() == QLatin1String("program-used")) { 0198 result->add(Property::Generator, xml.readElementText()); 0199 // "Owner of the fb2 document copyrights" 0200 } else if (xml.name() == QLatin1String("publisher")) { 0201 result->add(Property::Copyright, xml.readElementText()); 0202 } 0203 } else if (inPublishInfo) { 0204 if (xml.name() == QLatin1String("publisher")) { 0205 result->add(Property::Publisher, xml.readElementText()); 0206 } else if (xml.name() == QLatin1String("year")) { 0207 bool ok; 0208 const int releaseYear = xml.readElementText().toInt(&ok); 0209 if (ok) { 0210 result->add(Property::ReleaseYear, releaseYear); 0211 } 0212 } 0213 } 0214 } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) { 0215 result->append(xml.text().toString()); 0216 } 0217 } 0218 } 0219 0220 #include "moc_fb2extractor.cpp"