File indexing completed on 2024-03-24 03:54:32
0001 /* 0002 This file is part of the KDE Baloo Project 0003 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in> 0004 0005 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0006 */ 0007 0008 #include "basicindexingjob.h" 0009 #include "termgenerator.h" 0010 #include "idutils.h" 0011 0012 #include <QStringList> 0013 #include <QFile> 0014 0015 #include <KFileMetaData/Types> 0016 #include <KFileMetaData/UserMetaData> 0017 0018 using namespace Baloo; 0019 0020 BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype, 0021 IndexingLevel level) 0022 : m_filePath(filePath) 0023 , m_mimetype(mimetype) 0024 , m_indexingLevel(level) 0025 { 0026 if (m_filePath.endsWith(QLatin1Char('/'))) { 0027 m_filePath.chop(1); 0028 } 0029 } 0030 0031 namespace { 0032 0033 void indexXAttr(const QString& url, Document& doc) 0034 { 0035 KFileMetaData::UserMetaData userMetaData(url); 0036 0037 using Attribute = KFileMetaData::UserMetaData::Attribute; 0038 auto attributes = userMetaData.queryAttributes(Attribute::Tags | 0039 Attribute::Rating | Attribute::Comment); 0040 if (attributes == Attribute::None) { 0041 return; 0042 } 0043 0044 TermGenerator tg(doc); 0045 0046 const QStringList tags = userMetaData.tags(); 0047 for (const QString& tag : tags) { 0048 tg.indexXattrText(tag, QByteArray("TA")); 0049 doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8()); 0050 } 0051 0052 int rating = userMetaData.rating(); 0053 if (rating) { 0054 doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating)); 0055 } 0056 0057 QString comment = userMetaData.userComment(); 0058 if (!comment.isEmpty()) { 0059 tg.indexXattrText(comment, QByteArray("C")); 0060 } 0061 } 0062 0063 QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType) 0064 { 0065 using namespace KFileMetaData; 0066 QVector<Type::Type> types; 0067 types.reserve(2); 0068 0069 // Basic types 0070 if (mimeType.startsWith(QLatin1String("audio/"))) { 0071 types << Type::Audio; 0072 } 0073 if (mimeType.startsWith(QLatin1String("video/"))) { 0074 types << Type::Video; 0075 } 0076 if (mimeType.startsWith(QLatin1String("image/"))) { 0077 types << Type::Image; 0078 } 0079 if (mimeType.startsWith(QLatin1String("text/"))) { 0080 types << Type::Text; 0081 } 0082 if (mimeType.contains(QLatin1String("document"))) { 0083 types << Type::Document; 0084 } 0085 if (mimeType.startsWith(QLatin1String("model/"))) { 0086 types << Type::Model; 0087 } 0088 if (mimeType.contains(QLatin1String("powerpoint"))) { 0089 types << Type::Presentation; 0090 types << Type::Document; 0091 } 0092 if (mimeType.contains(QLatin1String("excel"))) { 0093 types << Type::Spreadsheet; 0094 types << Type::Document; 0095 } 0096 // Compressed tar archives: "application/x-<compression>-compressed-tar" 0097 if ((mimeType.startsWith(QLatin1String("application/x-"))) && 0098 (mimeType.endsWith(QLatin1String("-compressed-tar")))) { 0099 types << Type::Archive; 0100 } 0101 0102 static QMultiHash<QString, Type::Type> typeMapper { 0103 {QStringLiteral("text/plain"), Type::Document}, 0104 // MS Office 0105 {QStringLiteral("application/msword"), Type::Document}, 0106 {QStringLiteral("application/x-scribus"), Type::Document}, 0107 // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above: 0108 // - application/vnd.ms-powerpoint 0109 // - application/vnd.ms-excel 0110 // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document 0111 // - application/vnd.openxmlformats-officedocument.wordprocessingml.document 0112 // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet 0113 // - application/vnd.openxmlformats-officedocument.presentationml.presentation 0114 // - application/vnd.oasis.opendocument.text 0115 // - application/vnd.oasis.opendocument.spreadsheet 0116 // - application/vnd.oasis.opendocument.presentation 0117 // Office 2007 0118 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation}, 0119 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation}, 0120 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation}, 0121 {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet}, 0122 // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification 0123 {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation}, 0124 {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet}, 0125 {QStringLiteral("application/pdf"), Type::Document}, 0126 {QStringLiteral("application/postscript"), Type::Document}, 0127 {QStringLiteral("application/x-dvi"), Type::Document}, 0128 {QStringLiteral("application/rtf"), Type::Document}, 0129 // EBooks 0130 {QStringLiteral("application/epub+zip"), Type::Document}, 0131 {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document}, 0132 {QStringLiteral("application/x-mobipocket-ebook"), Type::Document}, 0133 // Graphic EBooks 0134 {QStringLiteral("application/vnd.comicbook-rar"), Type::Document}, 0135 {QStringLiteral("application/vnd.comicbook+zip"), Type::Document}, 0136 {QStringLiteral("application/x-cb7"), Type::Document}, 0137 {QStringLiteral("application/x-cbt"), Type::Document}, 0138 // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats 0139 {QStringLiteral("application/gzip"), Type::Archive}, 0140 {QStringLiteral("application/x-tar"), Type::Archive}, 0141 {QStringLiteral("application/x-tarz"), Type::Archive}, 0142 {QStringLiteral("application/x-arc"), Type::Archive}, 0143 {QStringLiteral("application/x-archive"), Type::Archive}, 0144 {QStringLiteral("application/x-bzip"), Type::Archive}, 0145 {QStringLiteral("application/x-cpio"), Type::Archive}, 0146 {QStringLiteral("application/x-lha"), Type::Archive}, 0147 {QStringLiteral("application/x-lhz"), Type::Archive}, 0148 {QStringLiteral("application/x-lrzip"), Type::Archive}, 0149 {QStringLiteral("application/x-lz4"), Type::Archive}, 0150 {QStringLiteral("application/x-lzip"), Type::Archive}, 0151 {QStringLiteral("application/x-lzma"), Type::Archive}, 0152 {QStringLiteral("application/x-lzop"), Type::Archive}, 0153 {QStringLiteral("application/x-7z-compressed"), Type::Archive}, 0154 {QStringLiteral("application/x-ace"), Type::Archive}, 0155 {QStringLiteral("application/x-astrotite-afa"), Type::Archive}, 0156 {QStringLiteral("application/x-alz"), Type::Archive}, 0157 {QStringLiteral("application/vnd.android.package-archive"), Type::Archive}, 0158 {QStringLiteral("application/x-arj"), Type::Archive}, 0159 {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive}, 0160 {QStringLiteral("application/x-cfs-compressed"), Type::Archive}, 0161 {QStringLiteral("application/x-dar"), Type::Archive}, 0162 {QStringLiteral("application/x-lzh"), Type::Archive}, 0163 {QStringLiteral("application/x-lzx"), Type::Archive}, 0164 {QStringLiteral("application/vnd.rar"), Type::Archive}, 0165 {QStringLiteral("application/x-stuffit"), Type::Archive}, 0166 {QStringLiteral("application/x-stuffitx"), Type::Archive}, 0167 {QStringLiteral("application/x-tzo"), Type::Archive}, 0168 {QStringLiteral("application/x-ustar"), Type::Archive}, 0169 {QStringLiteral("application/x-xar"), Type::Archive}, 0170 {QStringLiteral("application/x-xz"), Type::Archive}, 0171 {QStringLiteral("application/x-zoo"), Type::Archive}, 0172 {QStringLiteral("application/zip"), Type::Archive}, 0173 {QStringLiteral("application/zlib"), Type::Archive}, 0174 {QStringLiteral("application/zstd"), Type::Archive}, 0175 // WPS office 0176 {QStringLiteral("application/wps-office.doc"), Type::Document}, 0177 {QStringLiteral("application/wps-office.xls"), Type::Document}, 0178 {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet}, 0179 {QStringLiteral("application/wps-office.pot"), Type::Document}, 0180 {QStringLiteral("application/wps-office.pot"), Type::Presentation}, 0181 {QStringLiteral("application/wps-office.wps"), Type::Document}, 0182 {QStringLiteral("application/wps-office.docx"), Type::Document}, 0183 {QStringLiteral("application/wps-office.xlsx"), Type::Document}, 0184 {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet}, 0185 {QStringLiteral("application/wps-office.pptx"), Type::Document}, 0186 {QStringLiteral("application/wps-office.pptx"), Type::Presentation}, 0187 // Other 0188 {QStringLiteral("text/markdown"), Type::Document}, 0189 {QStringLiteral("image/vnd.djvu+multipage"), Type::Document}, 0190 {QStringLiteral("application/x-lyx"), Type::Document}, 0191 }; 0192 0193 auto hashIt = typeMapper.find(mimeType); 0194 while (hashIt != typeMapper.end() && hashIt.key() == mimeType) { 0195 types.append(hashIt.value()); 0196 ++hashIt; 0197 } 0198 0199 return types; 0200 } 0201 } // namespace 0202 0203 BasicIndexingJob::~BasicIndexingJob() 0204 { 0205 } 0206 0207 bool BasicIndexingJob::index() 0208 { 0209 const QByteArray url = QFile::encodeName(m_filePath); 0210 auto lastSlash = url.lastIndexOf('/'); 0211 0212 const QByteArray fileName = url.mid(lastSlash + 1); 0213 const QByteArray filePath = url.left(lastSlash); 0214 0215 QT_STATBUF statBuf; 0216 if (filePathToStat(filePath, statBuf) != 0) { 0217 return false; 0218 } 0219 0220 Document doc; 0221 doc.setParentId(statBufToId(statBuf)); 0222 0223 if (filePathToStat(url, statBuf) != 0) { 0224 return false; 0225 } 0226 doc.setId(statBufToId(statBuf)); 0227 doc.setUrl(url); 0228 0229 TermGenerator tg(doc); 0230 tg.indexFileNameText(QFile::decodeName(fileName)); 0231 if (statBuf.st_size == 0) { 0232 tg.indexText(QStringLiteral("application/x-zerosize"), QByteArray("M")); 0233 } else { 0234 tg.indexText(m_mimetype, QByteArray("M")); 0235 } 0236 0237 // (Content) Modification time, Metadata (e.g. XAttr) change time 0238 doc.setMTime(statBuf.st_mtime); 0239 doc.setCTime(statBuf.st_ctime); 0240 0241 if (S_ISDIR(statBuf.st_mode)) { 0242 static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder)); 0243 doc.addTerm(type); 0244 // For folders we do not need to go through file indexing, so we do not set contentIndexing 0245 0246 } else if (statBuf.st_size > 0) { 0247 if (m_indexingLevel == MarkForContentIndexing) { 0248 doc.setContentIndexing(true); 0249 } 0250 // Types 0251 const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(m_mimetype); 0252 for (KFileMetaData::Type::Type type : tList) { 0253 QByteArray num = QByteArray::number(static_cast<int>(type)); 0254 doc.addTerm(QByteArray("T") + num); 0255 } 0256 } 0257 0258 indexXAttr(m_filePath, doc); 0259 0260 m_doc = doc; 0261 return true; 0262 }