File indexing completed on 2024-05-05 16:09:02

0001 /*
0002     This file is part of the KFileMetaData project
0003     SPDX-FileCopyrightText: 2016 Varun Joshi <varunj.1011@gmail.com>
0004     SPDX-FileCopyrightText: 2015 Boudhayan Gupta <bgupta@kde.org>
0005 
0006     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0007 */
0008 
0009 #include "externalextractor.h"
0010 #include "kfilemetadata_debug.h"
0011 #include "properties.h"
0012 #include "propertyinfo.h"
0013 #include "typeinfo.h"
0014 
0015 #include <QDir>
0016 #include <QProcess>
0017 #include <QJsonDocument>
0018 #include <QJsonObject>
0019 #include <QJsonArray>
0020 
0021 #define EXTRACTOR_TIMEOUT_MS 30000
0022 
0023 namespace KFileMetaData
0024 {
0025 class ExternalExtractorPrivate
0026 {
0027 public:
0028     QString path;
0029     QStringList writeMimetypes;
0030     QString mainPath;
0031 };
0032 }
0033 
0034 using namespace KFileMetaData;
0035 
0036 ExternalExtractor::ExternalExtractor(QObject* parent)
0037     : ExtractorPlugin(parent),
0038       d_ptr(new ExternalExtractorPrivate)
0039 {
0040 }
0041 
0042 ExternalExtractor::ExternalExtractor(const QString& pluginPath)
0043     : ExtractorPlugin(nullptr),
0044       d_ptr(new ExternalExtractorPrivate)
0045 {
0046     Q_D(ExternalExtractor);
0047 
0048     d->path = pluginPath;
0049 
0050     QDir pluginDir(pluginPath);
0051     QStringList pluginDirContents = pluginDir.entryList(QDir::Files | QDir::NoDotAndDotDot);
0052 
0053     if (!pluginDirContents.contains(QStringLiteral("manifest.json"))) {
0054         qCDebug(KFILEMETADATA_LOG) << pluginPath << "does not seem to contain a valid plugin";
0055         return;
0056     }
0057 
0058     QFile manifest(pluginDir.filePath(QStringLiteral("manifest.json")));
0059     manifest.open(QIODevice::ReadOnly);
0060     QJsonDocument manifestDoc = QJsonDocument::fromJson(manifest.readAll());
0061     if (!manifestDoc.isObject()) {
0062         qCDebug(KFILEMETADATA_LOG) << "Manifest does not seem to be a valid JSON Object";
0063         return;
0064     }
0065 
0066     QJsonObject rootObject = manifestDoc.object();
0067     const QJsonArray mimetypesArray = rootObject.value(QStringLiteral("mimetypes")).toArray();
0068     QStringList mimetypes;
0069     mimetypes.reserve(mimetypesArray.count());
0070 
0071      for (const QJsonValue &mimetype : mimetypesArray) {
0072         mimetypes << mimetype.toString();
0073     }
0074 
0075     d->writeMimetypes.append(mimetypes);
0076     d->mainPath = pluginDir.absoluteFilePath(rootObject[QStringLiteral("main")].toString());
0077 }
0078 
0079 ExternalExtractor::~ExternalExtractor() = default;
0080 
0081 QStringList ExternalExtractor::mimetypes() const
0082 {
0083     Q_D(const ExternalExtractor);
0084 
0085     return d->writeMimetypes;
0086 }
0087 
0088 void ExternalExtractor::extract(ExtractionResult* result)
0089 {
0090     Q_D(ExternalExtractor);
0091 
0092     QJsonDocument writeData;
0093     QJsonObject writeRootObject;
0094     QByteArray output;
0095     QByteArray errorOutput;
0096 
0097     writeRootObject[QStringLiteral("path")] = QJsonValue(result->inputUrl());
0098     writeRootObject[QStringLiteral("mimetype")] = result->inputMimetype();
0099     writeData.setObject(writeRootObject);
0100 
0101     QProcess extractorProcess;
0102     extractorProcess.start(d->mainPath, QStringList(), QIODevice::ReadWrite);
0103     bool started = extractorProcess.waitForStarted();
0104     if (!started) {
0105         qCWarning(KFILEMETADATA_LOG) << "External extractor" << d->mainPath
0106             << "failed to start:" << extractorProcess.errorString();
0107         return;
0108     }
0109 
0110     extractorProcess.write(writeData.toJson());
0111     extractorProcess.closeWriteChannel();
0112     extractorProcess.waitForFinished(EXTRACTOR_TIMEOUT_MS);
0113 
0114     output = extractorProcess.readAll();
0115     errorOutput = extractorProcess.readAllStandardError();
0116 
0117     if (extractorProcess.exitStatus()) {
0118         qCWarning(KFILEMETADATA_LOG) << "External extractor" << d->mainPath
0119             << "failed to index" << result->inputUrl() << "-" << errorOutput;
0120         return;
0121     }
0122 
0123     // now we read in the output (which is a standard json format) into the
0124     // ExtractionResult
0125 
0126     QJsonDocument extractorData = QJsonDocument::fromJson(output);
0127     if (!extractorData.isObject()) {
0128         return;
0129     }
0130     QJsonObject rootObject = extractorData.object();
0131     QJsonObject propertiesObject = rootObject[QStringLiteral("properties")].toObject();
0132 
0133     const auto propertiesObjectEnd = propertiesObject.constEnd();
0134     auto i = propertiesObject.constBegin();
0135     for (; i != propertiesObjectEnd; ++i) {
0136         if (i.key() == QStringLiteral("typeInfo")) {
0137             TypeInfo info = TypeInfo::fromName(i.value().toString());
0138             result->addType(info.type());
0139             continue;
0140         }
0141 
0142         // for plaintext extraction
0143         if (i.key() == QStringLiteral("text")) {
0144             result->append(i.value().toString());
0145             continue;
0146         }
0147 
0148         PropertyInfo info = PropertyInfo::fromName(i.key());
0149         if (info.name() != i.key()) {
0150             continue;
0151         }
0152         result->add(info.property(), i.value().toVariant());
0153     }
0154 
0155     if (rootObject[QStringLiteral("status")].toString() != QStringLiteral("OK")) {
0156         qCDebug(KFILEMETADATA_LOG) << rootObject[QStringLiteral("error")].toString();
0157     }
0158 }
0159 
0160 #include "moc_externalextractor.cpp"