File indexing completed on 2024-10-06 08:17:31
0001 /* 0002 kquery.cpp 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 0006 */ 0007 0008 #include "kquery.h" 0009 #include "kfind_debug.h" 0010 #include <stdlib.h> 0011 0012 #include <QCoreApplication> 0013 #include <QMimeDatabase> 0014 #include <QRegularExpression> 0015 #include <QStandardPaths> 0016 #include <QTextCodec> 0017 #include <QTextStream> 0018 0019 #include <KFileItem> 0020 #include <KLocalizedString> 0021 #include <KMessageBox> 0022 #include <KZip> 0023 0024 #include <KFileMetaData/Extractor> 0025 #include <KFileMetaData/ExtractorCollection> 0026 #include <KFileMetaData/PropertyInfo> 0027 #include <KFileMetaData/SimpleExtractionResult> 0028 0029 KQuery::KQuery(QObject *parent) 0030 : QObject(parent) 0031 , m_filetype(0) 0032 , m_sizemode(0) 0033 , m_sizeboundary1(0) 0034 , m_sizeboundary2(0) 0035 , m_timeFrom(0) 0036 , m_timeTo(0) 0037 , m_recursive(false) 0038 , m_casesensitive(false) 0039 , m_search_binary(false) 0040 , m_useLocate(false) 0041 , m_showHiddenFiles(false) 0042 , job(nullptr) 0043 , m_insideCheckEntries(false) 0044 , m_result(0) 0045 { 0046 processLocate = new KProcess(this); 0047 connect(processLocate, &KProcess::readyReadStandardOutput, this, &KQuery::slotreadyReadStandardOutput); 0048 connect(processLocate, &KProcess::readyReadStandardError, this, &KQuery::slotreadyReadStandardError); 0049 connect(processLocate, QOverload<int, QProcess::ExitStatus>::of(&KProcess::finished), this, &KQuery::slotendProcessLocate); 0050 0051 // Files with these mime types can be ignored, even if 0052 // findFormatByFileContent() in some cases may claim that 0053 // these are text files: 0054 ignore_mimetypes.append(QStringLiteral("application/pdf")); 0055 ignore_mimetypes.append(QStringLiteral("application/postscript")); 0056 0057 // PLEASE update the documentation when you add another 0058 // file type here: 0059 ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.writer")); 0060 ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.calc")); 0061 ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.impress")); 0062 // OASIS mimetypes, used by OOo-2.x and KOffice >= 1.4 0063 //ooo_mimetypes.append("application/vnd.oasis.opendocument.chart"); 0064 //ooo_mimetypes.append("application/vnd.oasis.opendocument.graphics"); 0065 //ooo_mimetypes.append("application/vnd.oasis.opendocument.graphics-template"); 0066 //ooo_mimetypes.append("application/vnd.oasis.opendocument.formula"); 0067 //ooo_mimetypes.append("application/vnd.oasis.opendocument.image"); 0068 ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.presentation-template")); 0069 ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.presentation")); 0070 ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.spreadsheet-template")); 0071 ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.spreadsheet")); 0072 ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.text-template")); 0073 ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.text")); 0074 // KOffice-1.3 mimetypes 0075 koffice_mimetypes.append(QStringLiteral("application/x-kword")); 0076 koffice_mimetypes.append(QStringLiteral("application/x-kspread")); 0077 koffice_mimetypes.append(QStringLiteral("application/x-kpresenter")); 0078 } 0079 0080 KQuery::~KQuery() 0081 { 0082 qDeleteAll(m_regexps); 0083 m_fileItems.clear(); 0084 if (processLocate->state() == QProcess::Running) { 0085 disconnect(processLocate); 0086 processLocate->kill(); 0087 processLocate->waitForFinished(5000); 0088 delete processLocate; 0089 } 0090 } 0091 0092 void KQuery::kill() 0093 { 0094 if (job) { 0095 job->kill(KJob::EmitResult); 0096 } 0097 if (processLocate->state() == QProcess::Running) { 0098 processLocate->kill(); 0099 } 0100 m_fileItems.clear(); 0101 } 0102 0103 void KQuery::start() 0104 { 0105 m_fileItems.clear(); 0106 if (m_useLocate) { //Use "locate" instead of the internal search method 0107 bufferLocate.clear(); 0108 m_url = m_url.adjusted(QUrl::NormalizePathSegments); 0109 0110 processLocate->clearProgram(); 0111 processLocate->setProgram(QStandardPaths::findExecutable(QStringLiteral("locate")), QStringList{m_url.toLocalFile()}); 0112 0113 processLocate->setOutputChannelMode(KProcess::SeparateChannels); 0114 processLocate->start(); 0115 } else { //Use KIO 0116 if (m_recursive) { 0117 job = KIO::listRecursive(m_url, KIO::HideProgressInfo); 0118 } else { 0119 job = KIO::listDir(m_url, KIO::HideProgressInfo); 0120 } 0121 0122 connect(job, &KIO::ListJob::entries, this, QOverload<KIO::Job*, const KIO::UDSEntryList&>::of(&KQuery::slotListEntries)); 0123 connect(job, &KIO::ListJob::result, this, &KQuery::slotResult); 0124 } 0125 } 0126 0127 void KQuery::slotResult(KJob *_job) 0128 { 0129 if (job != _job) { 0130 return; 0131 } 0132 job = nullptr; 0133 0134 m_result = _job->error(); 0135 if (m_result == KIO::ERR_USER_CANCELED) { 0136 m_fileItems.clear(); 0137 } 0138 checkEntries(); 0139 } 0140 0141 void KQuery::slotListEntries(KIO::Job *, const KIO::UDSEntryList &list) 0142 { 0143 for (const KIO::UDSEntry &entry : list) { 0144 m_fileItems.enqueue(KFileItem(entry, m_url, true, true)); 0145 } 0146 0147 checkEntries(); 0148 } 0149 0150 void KQuery::checkEntries() 0151 { 0152 if (m_insideCheckEntries) { 0153 return; 0154 } 0155 0156 m_insideCheckEntries = true; 0157 0158 metaKeyRx = QRegExp(m_metainfokey); 0159 metaKeyRx.setPatternSyntax(QRegExp::Wildcard); 0160 0161 m_foundFilesList.clear(); 0162 0163 int processingCount = 0; 0164 while (!m_fileItems.isEmpty()) 0165 { 0166 processQuery(m_fileItems.dequeue()); 0167 processingCount++; 0168 0169 /* This is a workaround. As the qApp->processEvents() call inside processQuery 0170 * will bring more KIO entries, m_fileItems will increase even inside this loop 0171 * and that will lead to a big loop, it will take time to report found items to the GUI 0172 * so we are going to force Q_EMIT results every 100 files processed */ 0173 if (processingCount == 100) { 0174 processingCount = 0; 0175 if (m_foundFilesList.size() > 0) { 0176 Q_EMIT foundFileList(m_foundFilesList); 0177 m_foundFilesList.clear(); 0178 } 0179 } 0180 } 0181 0182 if (m_foundFilesList.size() > 0) { 0183 Q_EMIT foundFileList(m_foundFilesList); 0184 } 0185 0186 if (job == nullptr) { 0187 Q_EMIT result(m_result); 0188 } 0189 0190 m_insideCheckEntries = false; 0191 } 0192 0193 /* List of files found using slocate */ 0194 void KQuery::slotListEntries(const QStringList &list) 0195 { 0196 metaKeyRx = QRegExp(m_metainfokey); 0197 metaKeyRx.setPatternSyntax(QRegExp::Wildcard); 0198 0199 m_foundFilesList.clear(); 0200 for (const auto &file : list) { 0201 processQuery(KFileItem(QUrl::fromLocalFile(file))); 0202 } 0203 0204 if (!m_foundFilesList.isEmpty()) { 0205 Q_EMIT foundFileList(m_foundFilesList); 0206 } 0207 } 0208 0209 /* Check if file meets the find's requirements*/ 0210 void KQuery::processQuery(const KFileItem &file) 0211 { 0212 if (file.name() == QLatin1String(".") || file.name() == QLatin1String("..")) { 0213 return; 0214 } 0215 0216 if (!m_showHiddenFiles && file.isHidden()) { 0217 return; 0218 } 0219 0220 bool matched = false; 0221 0222 for (const QRegExp *reg : std::as_const(m_regexps)) { 0223 matched = matched || (reg == nullptr) || (reg->exactMatch(file.url().adjusted(QUrl::StripTrailingSlash).fileName())); 0224 } 0225 if (!matched) { 0226 return; 0227 } 0228 0229 // make sure the files are in the correct range 0230 switch (m_sizemode) { 0231 case 1: // "at least" 0232 if (file.size() < m_sizeboundary1) { 0233 return; 0234 } 0235 break; 0236 case 2: // "at most" 0237 if (file.size() > m_sizeboundary1) { 0238 return; 0239 } 0240 break; 0241 case 3: // "equal" 0242 if (file.size() != m_sizeboundary1) { 0243 return; 0244 } 0245 break; 0246 case 4: // "between" 0247 if ((file.size() < m_sizeboundary1) 0248 || (file.size() > m_sizeboundary2)) { 0249 return; 0250 } 0251 break; 0252 case 0: // "none" -> Fall to default 0253 default: 0254 break; 0255 } 0256 0257 // make sure it's in the correct date range 0258 // what about 0 times? 0259 if (m_timeFrom && ((uint)m_timeFrom) > file.time(KFileItem::ModificationTime).toSecsSinceEpoch()) { 0260 return; 0261 } 0262 if (m_timeTo && ((uint)m_timeTo) < file.time(KFileItem::ModificationTime).toSecsSinceEpoch()) { 0263 return; 0264 } 0265 0266 // username / group match 0267 if ((!m_username.isEmpty()) && (m_username != file.user())) { 0268 return; 0269 } 0270 if ((!m_groupname.isEmpty()) && (m_groupname != file.group())) { 0271 return; 0272 } 0273 0274 // file type 0275 switch (m_filetype) { 0276 case 0: 0277 break; 0278 case 1: // plain file 0279 if (!S_ISREG(file.mode())) { 0280 return; 0281 } 0282 break; 0283 case 2: 0284 if (!file.isDir()) { 0285 return; 0286 } 0287 break; 0288 case 3: 0289 if (!file.isLink()) { 0290 return; 0291 } 0292 break; 0293 case 4: 0294 if (!S_ISCHR(file.mode()) && !S_ISBLK(file.mode()) 0295 && !S_ISFIFO(file.mode()) && !S_ISSOCK(file.mode())) { 0296 return; 0297 } 0298 break; 0299 case 5: // binary 0300 if ((file.permissions() & 0111) != 0111 || file.isDir()) { 0301 return; 0302 } 0303 break; 0304 case 6: // suid 0305 if ((file.permissions() & 04000) != 04000) { // fixme 0306 return; 0307 } 0308 break; 0309 default: 0310 if (!m_mimetype.isEmpty() && !m_mimetype.contains(file.mimetype())) { 0311 return; 0312 } 0313 } 0314 0315 // match data in metainfo... 0316 if ((!m_metainfo.isEmpty()) && (!m_metainfokey.isEmpty())) { 0317 //Avoid sequential files (fifo,char devices) 0318 if (!file.isRegularFile()) { 0319 return; 0320 } 0321 0322 bool foundmeta = false; 0323 QString filename = file.url().path(); 0324 0325 if (filename.startsWith(QLatin1String("/dev/"))) { 0326 return; 0327 } 0328 0329 QMimeDatabase mimeDb; 0330 QString mimetype = mimeDb.mimeTypeForFile(filename).name(); 0331 QString strmetakeycontent; 0332 0333 KFileMetaData::ExtractorCollection extractors; 0334 const QList<KFileMetaData::Extractor*> exList = extractors.fetchExtractors(mimetype); 0335 0336 for (KFileMetaData::Extractor* ex : exList) { 0337 KFileMetaData::SimpleExtractionResult result(filename, mimetype, 0338 KFileMetaData::ExtractionResult::ExtractMetaData); 0339 ex->extract(&result); 0340 0341 const KFileMetaData::PropertyMultiMap properties = result.properties(); 0342 for (auto it = properties.cbegin(); it != properties.cend(); ++it) { 0343 if (!metaKeyRx.exactMatch(KFileMetaData::PropertyInfo(it.key()).displayName())) { 0344 continue; 0345 } 0346 strmetakeycontent = it.value().toString(); 0347 if (strmetakeycontent.indexOf(m_metainfo) != -1) { 0348 foundmeta = true; 0349 break; 0350 } 0351 } 0352 } 0353 if (!foundmeta) { 0354 return; 0355 } 0356 } 0357 0358 // match contents... 0359 QString matchingLine; 0360 if (!m_context.isEmpty()) { 0361 //Avoid sequential files (fifo,char devices) 0362 if (!file.isRegularFile()) { 0363 return; 0364 } 0365 0366 if (!m_search_binary && ignore_mimetypes.indexOf(file.mimetype()) != -1) { 0367 return; 0368 } 0369 0370 bool found = false; 0371 bool isZippedOfficeDocument = false; 0372 int matchingLineNumber = 0; 0373 0374 // FIXME: doesn't work with non local files 0375 0376 const QRegularExpression xmlTags(QStringLiteral("<.*?>")); 0377 QString filename; 0378 QTextStream *stream = nullptr; 0379 QFile qf; 0380 QByteArray zippedXmlFileContent; 0381 0382 // KWord's and OpenOffice.org's files are zipped... 0383 if (ooo_mimetypes.indexOf(file.mimetype()) != -1 0384 || koffice_mimetypes.indexOf(file.mimetype()) != -1) { 0385 KZip zipfile(file.url().path()); 0386 KZipFileEntry *zipfileEntry; 0387 0388 if (zipfile.open(QIODevice::ReadOnly)) { 0389 const KArchiveDirectory *zipfileContent = zipfile.directory(); 0390 0391 if (koffice_mimetypes.indexOf(file.mimetype()) != -1) { 0392 zipfileEntry = (KZipFileEntry *)zipfileContent->entry(QStringLiteral("maindoc.xml")); 0393 } else { 0394 zipfileEntry = (KZipFileEntry *)zipfileContent->entry(QStringLiteral("content.xml")); //for OpenOffice.org 0395 } 0396 if (!zipfileEntry) { 0397 qCWarning(KFING_LOG) << "Expected XML file not found in ZIP archive " << file.url(); 0398 return; 0399 } 0400 0401 zippedXmlFileContent = zipfileEntry->data(); 0402 stream = new QTextStream(zippedXmlFileContent, QIODevice::ReadOnly); 0403 0404 isZippedOfficeDocument = true; 0405 } else { 0406 qCWarning(KFING_LOG) << "Cannot open supposed ZIP file " << file.url(); 0407 } 0408 } else if (!m_search_binary && !file.mimetype().startsWith(QLatin1String("text/")) 0409 && file.url().isLocalFile() && !file.url().path().startsWith(QLatin1String("/dev"))) { 0410 QFile binfile(file.url().toLocalFile()); 0411 if (!binfile.open(QIODevice::ReadOnly)) { 0412 return; // err, whatever 0413 } 0414 // Check the first 128 bytes (see shared-mime spec) 0415 const QByteArray bindata = binfile.read(128); 0416 const char* pbin = bindata.data(); 0417 const int end = qMin(128, bindata.size()); 0418 for (int i = 0; i < end; ++i) { 0419 if ((unsigned char)(pbin[i]) < 32 && pbin[i] != 9 && pbin[i] != 10 && pbin[i] != 13) // ASCII control character 0420 return; 0421 } 0422 } 0423 0424 if (!isZippedOfficeDocument) { //any other file or non-compressed KWord 0425 filename = file.url().path(); 0426 if (filename.startsWith(QLatin1String("/dev/"))) { 0427 return; 0428 } 0429 qf.setFileName(filename); 0430 qf.open(QIODevice::ReadOnly); 0431 stream = new QTextStream(&qf); 0432 stream->setEncoding(QStringConverter::System); 0433 } 0434 0435 while (!stream->atEnd()) 0436 { 0437 QString str = stream->readLine(); 0438 matchingLineNumber++; 0439 0440 //If the stream ended (readLine().isNull() is true) the file was read completely 0441 //Do *not* use isEmpty() because that will exit if there is an empty line in the file 0442 if (str.isNull()) { 0443 break; 0444 } 0445 if (isZippedOfficeDocument) { 0446 str.remove(xmlTags); 0447 } 0448 0449 if (str.indexOf(m_context, 0, m_casesensitive ? Qt::CaseSensitive : Qt::CaseInsensitive) != -1) { 0450 matchingLine = QString::number(matchingLineNumber) + QStringLiteral(": ") + str.trimmed(); 0451 found = true; 0452 break; 0453 } 0454 qApp->processEvents(); 0455 } 0456 0457 delete stream; 0458 0459 if (!found) { 0460 return; 0461 } 0462 } 0463 0464 m_foundFilesList.append(QPair<KFileItem, QString>(file, matchingLine)); 0465 } 0466 0467 void KQuery::setContext(const QString &context, bool casesensitive, bool search_binary) 0468 { 0469 m_context = context; 0470 m_casesensitive = casesensitive; 0471 m_search_binary = search_binary; 0472 m_regexp.setPatternSyntax(QRegExp::Wildcard); 0473 if (casesensitive) { 0474 m_regexp.setCaseSensitivity(Qt::CaseSensitive); 0475 } else { 0476 m_regexp.setCaseSensitivity(Qt::CaseInsensitive); 0477 } 0478 } 0479 0480 void KQuery::setMetaInfo(const QString &metainfo, const QString &metainfokey) 0481 { 0482 m_metainfo = metainfo; 0483 m_metainfokey = metainfokey; 0484 } 0485 0486 void KQuery::setMimeType(const QStringList &mimetype) 0487 { 0488 m_mimetype = mimetype; 0489 } 0490 0491 void KQuery::setFileType(int filetype) 0492 { 0493 m_filetype = filetype; 0494 } 0495 0496 void KQuery::setSizeRange(int mode, KIO::filesize_t value1, KIO::filesize_t value2) 0497 { 0498 m_sizemode = mode; 0499 m_sizeboundary1 = value1; 0500 m_sizeboundary2 = value2; 0501 } 0502 0503 void KQuery::setTimeRange(time_t from, time_t to) 0504 { 0505 m_timeFrom = from; 0506 m_timeTo = to; 0507 } 0508 0509 void KQuery::setUsername(const QString &username) 0510 { 0511 m_username = username; 0512 } 0513 0514 void KQuery::setGroupname(const QString &groupname) 0515 { 0516 m_groupname = groupname; 0517 } 0518 0519 void KQuery::setRegExp(const QString ®exp, bool caseSensitive) 0520 { 0521 const QStringList strList = regexp.split(QLatin1Char(';'), Qt::SkipEmptyParts); 0522 // QRegExp globChars ("[\\*\\?\\[\\]]", TRUE, FALSE); 0523 qDeleteAll(m_regexps); 0524 m_regexps.clear(); 0525 m_regexps.reserve(strList.size()); 0526 0527 // m_regexpsContainsGlobs.clear(); 0528 for (const auto &str : strList) { 0529 //m_regexpsContainsGlobs.append(regExp->pattern().contains(globChars)); 0530 m_regexps.append(new QRegExp(str, (caseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive), QRegExp::Wildcard)); 0531 } 0532 } 0533 0534 void KQuery::setRecursive(bool recursive) 0535 { 0536 m_recursive = recursive; 0537 } 0538 0539 void KQuery::setPath(const QUrl &url) 0540 { 0541 m_url = url; 0542 } 0543 0544 void KQuery::setUseFileIndex(bool useLocate) 0545 { 0546 m_useLocate = useLocate; 0547 } 0548 0549 void KQuery::setShowHiddenFiles(bool showHidden) 0550 { 0551 m_showHiddenFiles = showHidden; 0552 } 0553 0554 void KQuery::slotreadyReadStandardError() 0555 { 0556 KMessageBox::error(nullptr, QString::fromLocal8Bit(processLocate->readAllStandardOutput()), i18nc("@title:window", "Error while using locate")); 0557 } 0558 0559 void KQuery::slotreadyReadStandardOutput() 0560 { 0561 bufferLocate += processLocate->readAllStandardOutput(); 0562 } 0563 0564 void KQuery::slotendProcessLocate(int code, QProcess::ExitStatus) 0565 { 0566 if (code == 0) { 0567 if (!bufferLocate.isEmpty()) { 0568 QString str = QString::fromLocal8Bit(bufferLocate); 0569 bufferLocate.clear(); 0570 slotListEntries(str.split(QLatin1Char('\n'), Qt::SkipEmptyParts)); 0571 } 0572 } 0573 Q_EMIT result(0); 0574 } 0575 0576 #include "moc_kquery.cpp"