File indexing completed on 2024-04-28 15:17:35

0001 /*
0002     This file is part of the KDE Baloo Project
0003     SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006 */
0007 
0008 #include "basicindexingjob.h"
0009 #include "termgenerator.h"
0010 #include "idutils.h"
0011 
0012 #include <QStringList>
0013 #include <QFile>
0014 
0015 #include <KFileMetaData/Types>
0016 #include <KFileMetaData/UserMetaData>
0017 
0018 using namespace Baloo;
0019 
0020 BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype,
0021                                    IndexingLevel level)
0022     : m_filePath(filePath)
0023     , m_mimetype(mimetype)
0024     , m_indexingLevel(level)
0025 {
0026     if (m_filePath.endsWith(QLatin1Char('/'))) {
0027     m_filePath.chop(1);
0028     }
0029 }
0030 
0031 namespace {
0032 
0033 void indexXAttr(const QString& url, Document& doc)
0034 {
0035     KFileMetaData::UserMetaData userMetaData(url);
0036 
0037     using Attribute = KFileMetaData::UserMetaData::Attribute;
0038     auto attributes = userMetaData.queryAttributes(Attribute::Tags |
0039         Attribute::Rating | Attribute::Comment);
0040     if (attributes == Attribute::None) {
0041     return;
0042     }
0043 
0044     TermGenerator tg(doc);
0045 
0046     const QStringList tags = userMetaData.tags();
0047     for (const QString& tag : tags) {
0048         tg.indexXattrText(tag, QByteArray("TA"));
0049         doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8());
0050     }
0051 
0052     int rating = userMetaData.rating();
0053     if (rating) {
0054         doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating));
0055     }
0056 
0057     QString comment = userMetaData.userComment();
0058     if (!comment.isEmpty()) {
0059         tg.indexXattrText(comment, QByteArray("C"));
0060     }
0061 }
0062 
0063 QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType)
0064 {
0065     using namespace KFileMetaData;
0066     QVector<Type::Type> types;
0067     types.reserve(2);
0068 
0069     // Basic types
0070     if (mimeType.startsWith(QLatin1String("audio/"))) {
0071         types << Type::Audio;
0072     }
0073     if (mimeType.startsWith(QLatin1String("video/"))) {
0074         types << Type::Video;
0075     }
0076     if (mimeType.startsWith(QLatin1String("image/"))) {
0077         types << Type::Image;
0078     }
0079     if (mimeType.startsWith(QLatin1String("text/"))) {
0080         types << Type::Text;
0081     }
0082     if (mimeType.contains(QLatin1String("document"))) {
0083         types << Type::Document;
0084     }
0085 
0086     if (mimeType.contains(QLatin1String("powerpoint"))) {
0087         types << Type::Presentation;
0088         types << Type::Document;
0089     }
0090     if (mimeType.contains(QLatin1String("excel"))) {
0091         types << Type::Spreadsheet;
0092         types << Type::Document;
0093     }
0094     // Compressed tar archives: "application/x-<compression>-compressed-tar"
0095     if ((mimeType.startsWith(QLatin1String("application/x-"))) &&
0096         (mimeType.endsWith(QLatin1String("-compressed-tar")))) {
0097         types << Type::Archive;
0098     }
0099 
0100     static QMultiHash<QString, Type::Type> typeMapper {
0101         {QStringLiteral("text/plain"), Type::Document},
0102         // MS Office
0103         {QStringLiteral("application/msword"), Type::Document},
0104         {QStringLiteral("application/x-scribus"), Type::Document},
0105         // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above:
0106         // - application/vnd.ms-powerpoint
0107         // - application/vnd.ms-excel
0108         // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document
0109         // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
0110         // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
0111         // - application/vnd.openxmlformats-officedocument.presentationml.presentation
0112         // - application/vnd.oasis.opendocument.text
0113         // - application/vnd.oasis.opendocument.spreadsheet
0114         // - application/vnd.oasis.opendocument.presentation
0115         // Office 2007
0116         {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation},
0117         {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation},
0118         {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation},
0119         {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet},
0120         // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification
0121         {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation},
0122         {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet},
0123         {QStringLiteral("application/pdf"), Type::Document},
0124         {QStringLiteral("application/postscript"), Type::Document},
0125         {QStringLiteral("application/x-dvi"), Type::Document},
0126         {QStringLiteral("application/rtf"), Type::Document},
0127         // EBooks
0128         {QStringLiteral("application/epub+zip"), Type::Document},
0129         {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document},
0130         {QStringLiteral("application/x-mobipocket-ebook"), Type::Document},
0131         // Graphic EBooks
0132         {QStringLiteral("application/vnd.comicbook-rar"), Type::Document},
0133         {QStringLiteral("application/vnd.comicbook+zip"), Type::Document},
0134         {QStringLiteral("application/x-cb7"), Type::Document},
0135         {QStringLiteral("application/x-cbt"), Type::Document},
0136         // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats
0137         {QStringLiteral("application/gzip"), Type::Archive},
0138         {QStringLiteral("application/x-tar"), Type::Archive},
0139         {QStringLiteral("application/x-tarz"), Type::Archive},
0140         {QStringLiteral("application/x-arc"), Type::Archive},
0141         {QStringLiteral("application/x-archive"), Type::Archive},
0142         {QStringLiteral("application/x-bzip"), Type::Archive},
0143         {QStringLiteral("application/x-cpio"), Type::Archive},
0144         {QStringLiteral("application/x-lha"), Type::Archive},
0145         {QStringLiteral("application/x-lhz"), Type::Archive},
0146         {QStringLiteral("application/x-lrzip"), Type::Archive},
0147         {QStringLiteral("application/x-lz4"), Type::Archive},
0148         {QStringLiteral("application/x-lzip"), Type::Archive},
0149         {QStringLiteral("application/x-lzma"), Type::Archive},
0150         {QStringLiteral("application/x-lzop"), Type::Archive},
0151         {QStringLiteral("application/x-7z-compressed"), Type::Archive},
0152         {QStringLiteral("application/x-ace"), Type::Archive},
0153         {QStringLiteral("application/x-astrotite-afa"), Type::Archive},
0154         {QStringLiteral("application/x-alz"), Type::Archive},
0155         {QStringLiteral("application/vnd.android.package-archive"), Type::Archive},
0156         {QStringLiteral("application/x-arj"), Type::Archive},
0157         {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive},
0158         {QStringLiteral("application/x-cfs-compressed"), Type::Archive},
0159         {QStringLiteral("application/x-dar"), Type::Archive},
0160         {QStringLiteral("application/x-lzh"), Type::Archive},
0161         {QStringLiteral("application/x-lzx"), Type::Archive},
0162         {QStringLiteral("application/vnd.rar"), Type::Archive},
0163         {QStringLiteral("application/x-stuffit"), Type::Archive},
0164         {QStringLiteral("application/x-stuffitx"), Type::Archive},
0165         {QStringLiteral("application/x-tzo"), Type::Archive},
0166         {QStringLiteral("application/x-ustar"), Type::Archive},
0167         {QStringLiteral("application/x-xar"), Type::Archive},
0168         {QStringLiteral("application/x-xz"), Type::Archive},
0169         {QStringLiteral("application/x-zoo"), Type::Archive},
0170         {QStringLiteral("application/zip"), Type::Archive},
0171         {QStringLiteral("application/zlib"), Type::Archive},
0172         {QStringLiteral("application/zstd"), Type::Archive},
0173         // WPS office
0174         {QStringLiteral("application/wps-office.doc"), Type::Document},
0175         {QStringLiteral("application/wps-office.xls"), Type::Document},
0176         {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet},
0177         {QStringLiteral("application/wps-office.pot"), Type::Document},
0178         {QStringLiteral("application/wps-office.pot"), Type::Presentation},
0179         {QStringLiteral("application/wps-office.wps"), Type::Document},
0180         {QStringLiteral("application/wps-office.docx"), Type::Document},
0181         {QStringLiteral("application/wps-office.xlsx"), Type::Document},
0182         {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet},
0183         {QStringLiteral("application/wps-office.pptx"), Type::Document},
0184         {QStringLiteral("application/wps-office.pptx"), Type::Presentation},
0185         // Other
0186         {QStringLiteral("text/markdown"), Type::Document},
0187         {QStringLiteral("image/vnd.djvu+multipage"), Type::Document},
0188         {QStringLiteral("application/x-lyx"), Type::Document}
0189     };
0190 
0191     auto hashIt = typeMapper.find(mimeType);
0192     while (hashIt != typeMapper.end() && hashIt.key() == mimeType) {
0193         types.append(hashIt.value());
0194         ++hashIt;
0195     }
0196 
0197     return types;
0198 }
0199 } // namespace
0200 
0201 BasicIndexingJob::~BasicIndexingJob()
0202 {
0203 }
0204 
0205 bool BasicIndexingJob::index()
0206 {
0207     const QByteArray url = QFile::encodeName(m_filePath);
0208     auto lastSlash = url.lastIndexOf('/');
0209 
0210     const QByteArray fileName = url.mid(lastSlash + 1);
0211     const QByteArray filePath = url.left(lastSlash);
0212 
0213     QT_STATBUF statBuf;
0214     if (filePathToStat(filePath, statBuf) != 0) {
0215         return false;
0216     }
0217 
0218     Document doc;
0219     doc.setParentId(statBufToId(statBuf));
0220 
0221     if (filePathToStat(url, statBuf) != 0) {
0222         return false;
0223     }
0224     doc.setId(statBufToId(statBuf));
0225     doc.setUrl(url);
0226 
0227     TermGenerator tg(doc);
0228     tg.indexFileNameText(QFile::decodeName(fileName));
0229     if (statBuf.st_size == 0) {
0230         tg.indexText(QStringLiteral("application/x-zerosize"), QByteArray("M"));
0231     } else {
0232         tg.indexText(m_mimetype, QByteArray("M"));
0233     }
0234 
0235     // (Content) Modification time, Metadata (e.g. XAttr) change time
0236     doc.setMTime(statBuf.st_mtime);
0237     doc.setCTime(statBuf.st_ctime);
0238 
0239     if (S_ISDIR(statBuf.st_mode)) {
0240         static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder));
0241         doc.addTerm(type);
0242         // For folders we do not need to go through file indexing, so we do not set contentIndexing
0243 
0244     } else if (statBuf.st_size > 0) {
0245         if (m_indexingLevel == MarkForContentIndexing) {
0246             doc.setContentIndexing(true);
0247         }
0248         // Types
0249         const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(m_mimetype);
0250         for (KFileMetaData::Type::Type type : tList) {
0251             QByteArray num = QByteArray::number(static_cast<int>(type));
0252             doc.addTerm(QByteArray("T") + num);
0253         }
0254     }
0255 
0256     indexXAttr(m_filePath, doc);
0257 
0258     m_doc = doc;
0259     return true;
0260 }