File indexing completed on 2024-04-28 07:40:07

0001 /*
0002     This file is part of the KDE Baloo Project
0003     SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006 */
0007 
0008 #include "basicindexingjob.h"
0009 #include "termgenerator.h"
0010 #include "idutils.h"
0011 
0012 #include <QStringList>
0013 #include <QFile>
0014 
0015 #include <KFileMetaData/Types>
0016 #include <KFileMetaData/UserMetaData>
0017 
0018 using namespace Baloo;
0019 
0020 BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype,
0021                                    IndexingLevel level)
0022     : m_filePath(filePath)
0023     , m_mimetype(mimetype)
0024     , m_indexingLevel(level)
0025 {
0026     if (m_filePath.endsWith(QLatin1Char('/'))) {
0027     m_filePath.chop(1);
0028     }
0029 }
0030 
0031 namespace {
0032 
0033 void indexXAttr(const QString& url, Document& doc)
0034 {
0035     KFileMetaData::UserMetaData userMetaData(url);
0036 
0037     using Attribute = KFileMetaData::UserMetaData::Attribute;
0038     auto attributes = userMetaData.queryAttributes(Attribute::Tags |
0039         Attribute::Rating | Attribute::Comment);
0040     if (attributes == Attribute::None) {
0041     return;
0042     }
0043 
0044     TermGenerator tg(doc);
0045 
0046     const QStringList tags = userMetaData.tags();
0047     for (const QString& tag : tags) {
0048         tg.indexXattrText(tag, QByteArray("TA"));
0049         doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8());
0050     }
0051 
0052     int rating = userMetaData.rating();
0053     if (rating) {
0054         doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating));
0055     }
0056 
0057     QString comment = userMetaData.userComment();
0058     if (!comment.isEmpty()) {
0059         tg.indexXattrText(comment, QByteArray("C"));
0060     }
0061 }
0062 
0063 QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType)
0064 {
0065     using namespace KFileMetaData;
0066     QVector<Type::Type> types;
0067     types.reserve(2);
0068 
0069     // Basic types
0070     if (mimeType.startsWith(QLatin1String("audio/"))) {
0071         types << Type::Audio;
0072     }
0073     if (mimeType.startsWith(QLatin1String("video/"))) {
0074         types << Type::Video;
0075     }
0076     if (mimeType.startsWith(QLatin1String("image/"))) {
0077         types << Type::Image;
0078     }
0079     if (mimeType.startsWith(QLatin1String("text/"))) {
0080         types << Type::Text;
0081     }
0082     if (mimeType.contains(QLatin1String("document"))) {
0083         types << Type::Document;
0084     }
0085     if (mimeType.startsWith(QLatin1String("model/"))) {
0086         types << Type::Model;
0087     }
0088     if (mimeType.contains(QLatin1String("powerpoint"))) {
0089         types << Type::Presentation;
0090         types << Type::Document;
0091     }
0092     if (mimeType.contains(QLatin1String("excel"))) {
0093         types << Type::Spreadsheet;
0094         types << Type::Document;
0095     }
0096     // Compressed tar archives: "application/x-<compression>-compressed-tar"
0097     if ((mimeType.startsWith(QLatin1String("application/x-"))) &&
0098         (mimeType.endsWith(QLatin1String("-compressed-tar")))) {
0099         types << Type::Archive;
0100     }
0101 
0102     static QMultiHash<QString, Type::Type> typeMapper {
0103         {QStringLiteral("text/plain"), Type::Document},
0104         // MS Office
0105         {QStringLiteral("application/msword"), Type::Document},
0106         {QStringLiteral("application/x-scribus"), Type::Document},
0107         // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above:
0108         // - application/vnd.ms-powerpoint
0109         // - application/vnd.ms-excel
0110         // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document
0111         // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
0112         // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
0113         // - application/vnd.openxmlformats-officedocument.presentationml.presentation
0114         // - application/vnd.oasis.opendocument.text
0115         // - application/vnd.oasis.opendocument.spreadsheet
0116         // - application/vnd.oasis.opendocument.presentation
0117         // Office 2007
0118         {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation},
0119         {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation},
0120         {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation},
0121         {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet},
0122         // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification
0123         {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation},
0124         {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet},
0125         {QStringLiteral("application/pdf"), Type::Document},
0126         {QStringLiteral("application/postscript"), Type::Document},
0127         {QStringLiteral("application/x-dvi"), Type::Document},
0128         {QStringLiteral("application/rtf"), Type::Document},
0129         // EBooks
0130         {QStringLiteral("application/epub+zip"), Type::Document},
0131         {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document},
0132         {QStringLiteral("application/x-mobipocket-ebook"), Type::Document},
0133         // Graphic EBooks
0134         {QStringLiteral("application/vnd.comicbook-rar"), Type::Document},
0135         {QStringLiteral("application/vnd.comicbook+zip"), Type::Document},
0136         {QStringLiteral("application/x-cb7"), Type::Document},
0137         {QStringLiteral("application/x-cbt"), Type::Document},
0138         // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats
0139         {QStringLiteral("application/gzip"), Type::Archive},
0140         {QStringLiteral("application/x-tar"), Type::Archive},
0141         {QStringLiteral("application/x-tarz"), Type::Archive},
0142         {QStringLiteral("application/x-arc"), Type::Archive},
0143         {QStringLiteral("application/x-archive"), Type::Archive},
0144         {QStringLiteral("application/x-bzip"), Type::Archive},
0145         {QStringLiteral("application/x-cpio"), Type::Archive},
0146         {QStringLiteral("application/x-lha"), Type::Archive},
0147         {QStringLiteral("application/x-lhz"), Type::Archive},
0148         {QStringLiteral("application/x-lrzip"), Type::Archive},
0149         {QStringLiteral("application/x-lz4"), Type::Archive},
0150         {QStringLiteral("application/x-lzip"), Type::Archive},
0151         {QStringLiteral("application/x-lzma"), Type::Archive},
0152         {QStringLiteral("application/x-lzop"), Type::Archive},
0153         {QStringLiteral("application/x-7z-compressed"), Type::Archive},
0154         {QStringLiteral("application/x-ace"), Type::Archive},
0155         {QStringLiteral("application/x-astrotite-afa"), Type::Archive},
0156         {QStringLiteral("application/x-alz"), Type::Archive},
0157         {QStringLiteral("application/vnd.android.package-archive"), Type::Archive},
0158         {QStringLiteral("application/x-arj"), Type::Archive},
0159         {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive},
0160         {QStringLiteral("application/x-cfs-compressed"), Type::Archive},
0161         {QStringLiteral("application/x-dar"), Type::Archive},
0162         {QStringLiteral("application/x-lzh"), Type::Archive},
0163         {QStringLiteral("application/x-lzx"), Type::Archive},
0164         {QStringLiteral("application/vnd.rar"), Type::Archive},
0165         {QStringLiteral("application/x-stuffit"), Type::Archive},
0166         {QStringLiteral("application/x-stuffitx"), Type::Archive},
0167         {QStringLiteral("application/x-tzo"), Type::Archive},
0168         {QStringLiteral("application/x-ustar"), Type::Archive},
0169         {QStringLiteral("application/x-xar"), Type::Archive},
0170         {QStringLiteral("application/x-xz"), Type::Archive},
0171         {QStringLiteral("application/x-zoo"), Type::Archive},
0172         {QStringLiteral("application/zip"), Type::Archive},
0173         {QStringLiteral("application/zlib"), Type::Archive},
0174         {QStringLiteral("application/zstd"), Type::Archive},
0175         // WPS office
0176         {QStringLiteral("application/wps-office.doc"), Type::Document},
0177         {QStringLiteral("application/wps-office.xls"), Type::Document},
0178         {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet},
0179         {QStringLiteral("application/wps-office.pot"), Type::Document},
0180         {QStringLiteral("application/wps-office.pot"), Type::Presentation},
0181         {QStringLiteral("application/wps-office.wps"), Type::Document},
0182         {QStringLiteral("application/wps-office.docx"), Type::Document},
0183         {QStringLiteral("application/wps-office.xlsx"), Type::Document},
0184         {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet},
0185         {QStringLiteral("application/wps-office.pptx"), Type::Document},
0186         {QStringLiteral("application/wps-office.pptx"), Type::Presentation},
0187         // Other
0188         {QStringLiteral("text/markdown"), Type::Document},
0189         {QStringLiteral("image/vnd.djvu+multipage"), Type::Document},
0190         {QStringLiteral("application/x-lyx"), Type::Document},
0191     };
0192 
0193     auto hashIt = typeMapper.find(mimeType);
0194     while (hashIt != typeMapper.end() && hashIt.key() == mimeType) {
0195         types.append(hashIt.value());
0196         ++hashIt;
0197     }
0198 
0199     return types;
0200 }
0201 } // namespace
0202 
0203 BasicIndexingJob::~BasicIndexingJob()
0204 {
0205 }
0206 
0207 bool BasicIndexingJob::index()
0208 {
0209     const QByteArray url = QFile::encodeName(m_filePath);
0210     auto lastSlash = url.lastIndexOf('/');
0211 
0212     const QByteArray fileName = url.mid(lastSlash + 1);
0213     const QByteArray filePath = url.left(lastSlash);
0214 
0215     QT_STATBUF statBuf;
0216     if (filePathToStat(filePath, statBuf) != 0) {
0217         return false;
0218     }
0219 
0220     Document doc;
0221     doc.setParentId(statBufToId(statBuf));
0222 
0223     if (filePathToStat(url, statBuf) != 0) {
0224         return false;
0225     }
0226     doc.setId(statBufToId(statBuf));
0227     doc.setUrl(url);
0228 
0229     TermGenerator tg(doc);
0230     tg.indexFileNameText(QFile::decodeName(fileName));
0231     if (statBuf.st_size == 0) {
0232         tg.indexText(QStringLiteral("application/x-zerosize"), QByteArray("M"));
0233     } else {
0234         tg.indexText(m_mimetype, QByteArray("M"));
0235     }
0236 
0237     // (Content) Modification time, Metadata (e.g. XAttr) change time
0238     doc.setMTime(statBuf.st_mtime);
0239     doc.setCTime(statBuf.st_ctime);
0240 
0241     if (S_ISDIR(statBuf.st_mode)) {
0242         static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder));
0243         doc.addTerm(type);
0244         // For folders we do not need to go through file indexing, so we do not set contentIndexing
0245 
0246     } else if (statBuf.st_size > 0) {
0247         if (m_indexingLevel == MarkForContentIndexing) {
0248             doc.setContentIndexing(true);
0249         }
0250         // Types
0251         const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(m_mimetype);
0252         for (KFileMetaData::Type::Type type : tList) {
0253             QByteArray num = QByteArray::number(static_cast<int>(type));
0254             doc.addTerm(QByteArray("T") + num);
0255         }
0256     }
0257 
0258     indexXAttr(m_filePath, doc);
0259 
0260     m_doc = doc;
0261     return true;
0262 }