File indexing completed on 2024-05-05 16:09:02

0001 /*
0002     SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in>
0003     SPDX-FileCopyrightText: 2016 Varun Joshi <varunj.1011@gmail.com>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-or-later
0006 */
0007 
0008 #include "extractorcollection.h"
0009 #include "extractor_p.h"
0010 #include "extractorplugin.h"
0011 #include "externalextractor.h"
0012 #include "kfilemetadata_debug.h"
0013 #include "config-kfilemetadata.h"
0014 
0015 #include <KPluginMetaData>
0016 #include <QCoreApplication>
0017 #include <QDir>
0018 #include <QMimeDatabase>
0019 #include <vector>
0020 
0021 using namespace KFileMetaData;
0022 
0023 class KFileMetaData::ExtractorCollectionPrivate
0024 {
0025 public:
0026     QMultiHash<QString, Extractor*> m_mimeExtractors;
0027 
0028     std::vector<Extractor> m_allExtractors;
0029 
0030     void findExtractors();
0031     QList<Extractor*> getExtractors(const QString& mimetype);
0032 };
0033 
0034 ExtractorCollection::ExtractorCollection()
0035     : d(new ExtractorCollectionPrivate)
0036 {
0037     d->findExtractors();
0038 }
0039 
0040 ExtractorCollection::~ExtractorCollection() = default;
0041 
0042 
0043 QList<Extractor*> ExtractorCollection::allExtractors()
0044 {
0045     QList<Extractor*> plugins;
0046     for (auto& ex : d->m_allExtractors) {
0047         if (ex.d->initPlugin()) {
0048             plugins.push_back(&ex);
0049         }
0050     }
0051     return plugins;
0052 }
0053 
0054 void ExtractorCollectionPrivate::findExtractors()
0055 {
0056     const QVector<KPluginMetaData> kfilemetadataPlugins =
0057         KPluginMetaData::findPlugins(QStringLiteral("kf" QT_STRINGIFY(QT_VERSION_MAJOR) "/kfilemetadata"), {}, KPluginMetaData::AllowEmptyMetaData);
0058     for (const KPluginMetaData &plugin : kfilemetadataPlugins) {
0059         Extractor extractor;
0060         extractor.d->m_pluginPath = plugin.fileName();
0061         extractor.setAutoDeletePlugin(Extractor::DoNotDeletePlugin);
0062 
0063         if (!plugin.rawData().isEmpty()) {
0064             qCDebug(KFILEMETADATA_LOG) << "Found plugin with metadata:" << extractor.d->m_pluginPath;
0065             extractor.setMetaData(plugin.rawData().toVariantMap());
0066             m_allExtractors.push_back(std::move(extractor));
0067         } else {
0068             qCDebug(KFILEMETADATA_LOG) << "Found plugin without metadata:" << extractor.d->m_pluginPath;
0069             if (extractor.d->initPlugin() && !extractor.mimetypes().isEmpty()) {
0070                 m_allExtractors.push_back(std::move(extractor));
0071             }
0072         }
0073     }
0074 
0075     QStringList externalPlugins;
0076     const QDir externalPluginDir(QStringLiteral(LIBEXEC_INSTALL_DIR "/kfilemetadata/externalextractors"));
0077     qCDebug(KFILEMETADATA_LOG) << "Searching for external extractors:" << externalPluginDir.path();
0078     // For external plugins, we look into the directories
0079     const QStringList externalPluginEntryList = externalPluginDir.entryList(QDir::Dirs | QDir::NoDotAndDotDot);
0080     for (const QString &externalPlugin : externalPluginEntryList) {
0081         if (externalPlugins.contains(externalPlugin)) {
0082             qCDebug(KFILEMETADATA_LOG) << "Skipping duplicate - " << externalPluginDir.path() << ":" << externalPlugin;
0083             continue;
0084         }
0085 
0086         qCDebug(KFILEMETADATA_LOG) << "Adding plugin - " << externalPluginDir.path() << ":" << externalPlugin;
0087         externalPlugins << externalPlugin;
0088 
0089         Extractor extractor;
0090         auto pluginPath = externalPluginDir.absoluteFilePath(externalPlugin);
0091         ExternalExtractor *plugin = new ExternalExtractor(pluginPath);
0092         if (plugin && !plugin->mimetypes().isEmpty()) {
0093               extractor.setExtractorPlugin(plugin);
0094               extractor.setAutoDeletePlugin(Extractor::AutoDeletePlugin);
0095               m_allExtractors.push_back(std::move(extractor));
0096         }
0097     }
0098     externalPlugins.clear();
0099 
0100     for (Extractor& extractor : m_allExtractors) {
0101         auto pluginProperties = extractor.extractorProperties();
0102         if (!pluginProperties.isEmpty()) {
0103             auto mimetypeProperties = pluginProperties[QLatin1String("MimeTypes")];
0104             const auto mimetypes = mimetypeProperties.toMap().keys();
0105             for (const QString &mimetype : mimetypes) {
0106                 m_mimeExtractors.insert(mimetype, &extractor);
0107             }
0108         } else if (extractor.d->m_plugin) {
0109             const auto mimetypes = extractor.mimetypes();
0110             for (const QString &mimetype : mimetypes) {
0111                 m_mimeExtractors.insert(mimetype, &extractor);
0112             }
0113         }
0114     }
0115 }
0116 
0117 QList<Extractor*> ExtractorCollectionPrivate::getExtractors(const QString& mimetype)
0118 {
0119     QList<Extractor*> extractors = m_mimeExtractors.values(mimetype);
0120 
0121     if (extractors.isEmpty()) {
0122         qCDebug(KFILEMETADATA_LOG) << "No extractor for" << mimetype;
0123         return extractors;
0124     }
0125 
0126     qCDebug(KFILEMETADATA_LOG) << "Fetching extractors for" << mimetype;
0127     Extractor* failed = nullptr;
0128     for (auto ex : extractors) {
0129         if (!ex->d->initPlugin()) {
0130             failed = ex;
0131             break;
0132         }
0133     }
0134 
0135     if (!failed) {
0136         return extractors;
0137     }
0138 
0139     auto it = m_mimeExtractors.begin();
0140     while (it != m_mimeExtractors.end()) {
0141         if (it.value() == failed) {
0142             it = m_mimeExtractors.erase(it);
0143         } else {
0144             ++it;
0145         }
0146     }
0147     return getExtractors(mimetype);
0148 }
0149 
0150 QList<Extractor*> ExtractorCollection::fetchExtractors(const QString& mimetype) const
0151 {
0152     QList<Extractor*> plugins = d->getExtractors(mimetype);
0153     if (!plugins.isEmpty()) {
0154         return plugins;
0155     }
0156 
0157     // try to find the best matching more generic extractor by mimetype inheritance
0158     QMimeDatabase db;
0159     auto type = db.mimeTypeForName(mimetype);
0160     const QStringList ancestors = type.allAncestors();
0161 
0162     for (const auto &ancestor : ancestors) {
0163         if (ancestor == QLatin1String("application/octet-stream")) {
0164             continue;
0165         }
0166         QList<Extractor*> plugins = d->getExtractors(ancestor);
0167         if (!plugins.isEmpty()) {
0168             qCDebug(KFILEMETADATA_LOG) << "Using inherited mimetype" << ancestor <<  "for" << mimetype;
0169             return plugins;
0170         }
0171     }
0172 
0173     return plugins;
0174 }