File indexing completed on 2023-09-24 09:52:47

0001 /*
0002     kquery.cpp
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 
0006 */
0007 
0008 #include "kquery.h"
0009 #include "kfind_debug.h"
0010 #include <stdlib.h>
0011 
0012 #include <QCoreApplication>
0013 #include <QMimeDatabase>
0014 #include <QRegularExpression>
0015 #include <QStandardPaths>
0016 #include <QTextCodec>
0017 #include <QTextStream>
0018  
0019 #include <KFileItem>
0020 #include <KLocalizedString>
0021 #include <KMessageBox>
0022 #include <KZip>
0023 
0024 #include <KFileMetaData/Extractor>
0025 #include <KFileMetaData/ExtractorCollection>
0026 #include <KFileMetaData/PropertyInfo>
0027 #include <KFileMetaData/SimpleExtractionResult>
0028 
0029 KQuery::KQuery(QObject *parent)
0030     : QObject(parent)
0031     , m_filetype(0)
0032     , m_sizemode(0)
0033     , m_sizeboundary1(0)
0034     , m_sizeboundary2(0)
0035     , m_timeFrom(0)
0036     , m_timeTo(0)
0037     , m_recursive(false)
0038     , m_casesensitive(false)
0039     , m_search_binary(false)
0040     , m_useLocate(false)
0041     , m_showHiddenFiles(false)
0042     , job(nullptr)
0043     , m_insideCheckEntries(false)
0044     , m_result(0)
0045 {
0046     processLocate = new KProcess(this);
0047     connect(processLocate, &KProcess::readyReadStandardOutput, this, &KQuery::slotreadyReadStandardOutput);
0048     connect(processLocate, &KProcess::readyReadStandardError, this, &KQuery::slotreadyReadStandardError);
0049     connect(processLocate, QOverload<int, QProcess::ExitStatus>::of(&KProcess::finished), this, &KQuery::slotendProcessLocate);
0050 
0051     // Files with these mime types can be ignored, even if
0052     // findFormatByFileContent() in some cases may claim that
0053     // these are text files:
0054     ignore_mimetypes.append(QStringLiteral("application/pdf"));
0055     ignore_mimetypes.append(QStringLiteral("application/postscript"));
0056 
0057     // PLEASE update the documentation when you add another
0058     // file type here:
0059     ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.writer"));
0060     ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.calc"));
0061     ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.impress"));
0062     // OASIS mimetypes, used by OOo-2.x and KOffice >= 1.4
0063     //ooo_mimetypes.append("application/vnd.oasis.opendocument.chart");
0064     //ooo_mimetypes.append("application/vnd.oasis.opendocument.graphics");
0065     //ooo_mimetypes.append("application/vnd.oasis.opendocument.graphics-template");
0066     //ooo_mimetypes.append("application/vnd.oasis.opendocument.formula");
0067     //ooo_mimetypes.append("application/vnd.oasis.opendocument.image");
0068     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.presentation-template"));
0069     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.presentation"));
0070     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.spreadsheet-template"));
0071     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"));
0072     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.text-template"));
0073     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.text"));
0074     // KOffice-1.3 mimetypes
0075     koffice_mimetypes.append(QStringLiteral("application/x-kword"));
0076     koffice_mimetypes.append(QStringLiteral("application/x-kspread"));
0077     koffice_mimetypes.append(QStringLiteral("application/x-kpresenter"));
0078 }
0079 
0080 KQuery::~KQuery()
0081 {
0082     qDeleteAll(m_regexps);
0083     m_fileItems.clear();
0084     if (processLocate->state() == QProcess::Running) {
0085         disconnect(processLocate);
0086         processLocate->kill();
0087         processLocate->waitForFinished(5000);
0088         delete processLocate;
0089     }
0090 }
0091 
0092 void KQuery::kill()
0093 {
0094     if (job) {
0095         job->kill(KJob::EmitResult);
0096     }
0097     if (processLocate->state() == QProcess::Running) {
0098         processLocate->kill();
0099     }
0100     m_fileItems.clear();
0101 }
0102 
0103 void KQuery::start()
0104 {
0105     m_fileItems.clear();
0106     if (m_useLocate) { //Use "locate" instead of the internal search method
0107         bufferLocate.clear();
0108         m_url = m_url.adjusted(QUrl::NormalizePathSegments);
0109 
0110         processLocate->clearProgram();
0111         processLocate->setProgram(QStandardPaths::findExecutable(QStringLiteral("locate")), QStringList{m_url.toLocalFile()});
0112 
0113         processLocate->setOutputChannelMode(KProcess::SeparateChannels);
0114         processLocate->start();
0115     } else { //Use KIO
0116         if (m_recursive) {
0117             job = KIO::listRecursive(m_url, KIO::HideProgressInfo);
0118         } else {
0119             job = KIO::listDir(m_url, KIO::HideProgressInfo);
0120         }
0121 
0122         connect(job, &KIO::ListJob::entries, this, QOverload<KIO::Job*, const KIO::UDSEntryList&>::of(&KQuery::slotListEntries));
0123         connect(job, &KIO::ListJob::result, this, &KQuery::slotResult);
0124     }
0125 }
0126 
0127 void KQuery::slotResult(KJob *_job)
0128 {
0129     if (job != _job) {
0130         return;
0131     }
0132     job = nullptr;
0133 
0134     m_result = _job->error();
0135     if (m_result == KIO::ERR_USER_CANCELED) {
0136         m_fileItems.clear();
0137     }
0138     checkEntries();
0139 }
0140 
0141 void KQuery::slotListEntries(KIO::Job *, const KIO::UDSEntryList &list)
0142 {
0143     for (const KIO::UDSEntry &entry : list) {
0144         m_fileItems.enqueue(KFileItem(entry, m_url, true, true));
0145     }
0146 
0147     checkEntries();
0148 }
0149 
0150 void KQuery::checkEntries()
0151 {
0152     if (m_insideCheckEntries) {
0153         return;
0154     }
0155 
0156     m_insideCheckEntries = true;
0157 
0158     metaKeyRx = QRegExp(m_metainfokey);
0159     metaKeyRx.setPatternSyntax(QRegExp::Wildcard);
0160 
0161     m_foundFilesList.clear();
0162 
0163     int processingCount = 0;
0164     while (!m_fileItems.isEmpty())
0165     {
0166         processQuery(m_fileItems.dequeue());
0167         processingCount++;
0168 
0169         /* This is a workaround. As the qApp->processEvents() call inside processQuery
0170          * will bring more KIO entries, m_fileItems will increase even inside this loop
0171          * and that will lead to a big loop, it will take time to report found items to the GUI
0172          * so we are going to force Q_EMIT results every 100 files processed */
0173         if (processingCount == 100) {
0174             processingCount = 0;
0175             if (m_foundFilesList.size() > 0) {
0176                 Q_EMIT foundFileList(m_foundFilesList);
0177                 m_foundFilesList.clear();
0178             }
0179         }
0180     }
0181 
0182     if (m_foundFilesList.size() > 0) {
0183         Q_EMIT foundFileList(m_foundFilesList);
0184     }
0185 
0186     if (job == nullptr) {
0187         Q_EMIT result(m_result);
0188     }
0189 
0190     m_insideCheckEntries = false;
0191 }
0192 
0193 /* List of files found using slocate */
0194 void KQuery::slotListEntries(const QStringList &list)
0195 {
0196     metaKeyRx = QRegExp(m_metainfokey);
0197     metaKeyRx.setPatternSyntax(QRegExp::Wildcard);
0198 
0199     m_foundFilesList.clear();
0200     for (const auto &file : list) {
0201         processQuery(KFileItem(QUrl::fromLocalFile(file)));
0202     }
0203 
0204     if (!m_foundFilesList.isEmpty()) {
0205         Q_EMIT foundFileList(m_foundFilesList);
0206     }
0207 }
0208 
0209 /* Check if file meets the find's requirements*/
0210 void KQuery::processQuery(const KFileItem &file)
0211 {
0212     if (file.name() == QLatin1String(".") || file.name() == QLatin1String("..")) {
0213         return;
0214     }
0215 
0216     if (!m_showHiddenFiles && file.isHidden()) {
0217         return;
0218     }
0219 
0220     bool matched = false;
0221 
0222     for (const QRegExp *reg : std::as_const(m_regexps)) {
0223         matched = matched || (reg == nullptr) || (reg->exactMatch(file.url().adjusted(QUrl::StripTrailingSlash).fileName()));
0224     }
0225     if (!matched) {
0226         return;
0227     }
0228 
0229     // make sure the files are in the correct range
0230     switch (m_sizemode) {
0231     case 1: // "at least"
0232         if (file.size() < m_sizeboundary1) {
0233             return;
0234         }
0235         break;
0236     case 2: // "at most"
0237         if (file.size() > m_sizeboundary1) {
0238             return;
0239         }
0240         break;
0241     case 3: // "equal"
0242         if (file.size() != m_sizeboundary1) {
0243             return;
0244         }
0245         break;
0246     case 4: // "between"
0247         if ((file.size() < m_sizeboundary1)
0248             || (file.size() > m_sizeboundary2)) {
0249             return;
0250         }
0251         break;
0252     case 0: // "none" -> Fall to default
0253     default:
0254         break;
0255     }
0256 
0257     // make sure it's in the correct date range
0258     // what about 0 times?
0259     if (m_timeFrom && ((uint)m_timeFrom) > file.time(KFileItem::ModificationTime).toSecsSinceEpoch()) {
0260         return;
0261     }
0262     if (m_timeTo && ((uint)m_timeTo) < file.time(KFileItem::ModificationTime).toSecsSinceEpoch()) {
0263         return;
0264     }
0265 
0266     // username / group match
0267     if ((!m_username.isEmpty()) && (m_username != file.user())) {
0268         return;
0269     }
0270     if ((!m_groupname.isEmpty()) && (m_groupname != file.group())) {
0271         return;
0272     }
0273 
0274     // file type
0275     switch (m_filetype) {
0276     case 0:
0277         break;
0278     case 1: // plain file
0279         if (!S_ISREG(file.mode())) {
0280             return;
0281         }
0282         break;
0283     case 2:
0284         if (!file.isDir()) {
0285             return;
0286         }
0287         break;
0288     case 3:
0289         if (!file.isLink()) {
0290             return;
0291         }
0292         break;
0293     case 4:
0294         if (!S_ISCHR(file.mode()) && !S_ISBLK(file.mode())
0295             && !S_ISFIFO(file.mode()) && !S_ISSOCK(file.mode())) {
0296             return;
0297         }
0298         break;
0299     case 5: // binary
0300         if ((file.permissions() & 0111) != 0111 || file.isDir()) {
0301             return;
0302         }
0303         break;
0304     case 6: // suid
0305         if ((file.permissions() & 04000) != 04000) { // fixme
0306             return;
0307         }
0308         break;
0309     default:
0310         if (!m_mimetype.isEmpty() && !m_mimetype.contains(file.mimetype())) {
0311             return;
0312         }
0313     }
0314 
0315     // match data in metainfo...
0316     if ((!m_metainfo.isEmpty()) && (!m_metainfokey.isEmpty())) {
0317         //Avoid sequential files (fifo,char devices)
0318         if (!file.isRegularFile()) {
0319             return;
0320         }
0321 
0322         bool foundmeta = false;
0323         QString filename = file.url().path();
0324 
0325         if (filename.startsWith(QLatin1String("/dev/"))) {
0326             return;
0327         }
0328 
0329         QMimeDatabase mimeDb;
0330         QString mimetype = mimeDb.mimeTypeForFile(filename).name();
0331         QString strmetakeycontent;
0332 
0333         KFileMetaData::ExtractorCollection extractors;
0334         const QList<KFileMetaData::Extractor*> exList = extractors.fetchExtractors(mimetype);
0335 
0336         for (KFileMetaData::Extractor* ex : exList) {
0337             KFileMetaData::SimpleExtractionResult result(filename, mimetype,
0338                                                          KFileMetaData::ExtractionResult::ExtractMetaData);
0339             ex->extract(&result);
0340 
0341             const KFileMetaData::PropertyMultiMap properties = result.properties();
0342             for (auto it = properties.cbegin(); it != properties.cend(); ++it) {
0343                 if (!metaKeyRx.exactMatch(KFileMetaData::PropertyInfo(it.key()).displayName())) {
0344                     continue;
0345                 }
0346                 strmetakeycontent = it.value().toString();
0347                 if (strmetakeycontent.indexOf(m_metainfo) != -1) {
0348                     foundmeta = true;
0349                     break;
0350                 }
0351             }
0352         }
0353         if (!foundmeta) {
0354             return;
0355         }
0356     }
0357 
0358     // match contents...
0359     QString matchingLine;
0360     if (!m_context.isEmpty()) {
0361         //Avoid sequential files (fifo,char devices)
0362         if (!file.isRegularFile()) {
0363             return;
0364         }
0365 
0366         if (!m_search_binary && ignore_mimetypes.indexOf(file.mimetype()) != -1) {
0367             return;
0368         }
0369 
0370         bool found = false;
0371         bool isZippedOfficeDocument = false;
0372         int matchingLineNumber = 0;
0373 
0374         // FIXME: doesn't work with non local files
0375 
0376         const QRegularExpression xmlTags(QStringLiteral("<.*?>"));
0377         QString filename;
0378         QTextStream *stream = nullptr;
0379         QFile qf;
0380         QByteArray zippedXmlFileContent;
0381 
0382         // KWord's and OpenOffice.org's files are zipped...
0383         if (ooo_mimetypes.indexOf(file.mimetype()) != -1
0384             || koffice_mimetypes.indexOf(file.mimetype()) != -1) {
0385             KZip zipfile(file.url().path());
0386             KZipFileEntry *zipfileEntry;
0387 
0388             if (zipfile.open(QIODevice::ReadOnly)) {
0389                 const KArchiveDirectory *zipfileContent = zipfile.directory();
0390 
0391                 if (koffice_mimetypes.indexOf(file.mimetype()) != -1) {
0392                     zipfileEntry = (KZipFileEntry *)zipfileContent->entry(QStringLiteral("maindoc.xml"));
0393                 } else {
0394                     zipfileEntry = (KZipFileEntry *)zipfileContent->entry(QStringLiteral("content.xml")); //for OpenOffice.org
0395                 }
0396                 if (!zipfileEntry) {
0397                     qCWarning(KFING_LOG) << "Expected XML file not found in ZIP archive " << file.url();
0398                     return;
0399                 }
0400 
0401                 zippedXmlFileContent = zipfileEntry->data();
0402                 stream = new QTextStream(zippedXmlFileContent, QIODevice::ReadOnly);
0403 
0404                 // QTextStream default encoding is UTF-8 in Qt6
0405                 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0406                 stream->setCodec("UTF-8");
0407                 #endif
0408 
0409                 isZippedOfficeDocument = true;
0410             } else {
0411                 qCWarning(KFING_LOG) << "Cannot open supposed ZIP file " << file.url();
0412             }
0413         } else if (!m_search_binary && !file.mimetype().startsWith(QLatin1String("text/"))
0414                    && file.url().isLocalFile() && !file.url().path().startsWith(QLatin1String("/dev"))) {
0415             QFile binfile(file.url().toLocalFile());
0416             if (!binfile.open(QIODevice::ReadOnly)) {
0417                 return; // err, whatever
0418             }
0419             // Check the first 128 bytes (see shared-mime spec)
0420             const QByteArray bindata = binfile.read(128);
0421             const char* pbin = bindata.data();
0422             const int end = qMin(128, bindata.size());
0423             for (int i = 0; i < end; ++i) {
0424                 if ((unsigned char)(pbin[i]) < 32 && pbin[i] != 9 && pbin[i] != 10 && pbin[i] != 13) // ASCII control character
0425                     return;
0426             }
0427         }
0428 
0429         if (!isZippedOfficeDocument) { //any other file or non-compressed KWord
0430             filename = file.url().path();
0431             if (filename.startsWith(QLatin1String("/dev/"))) {
0432                 return;
0433             }
0434             qf.setFileName(filename);
0435             qf.open(QIODevice::ReadOnly);
0436             stream = new QTextStream(&qf);
0437             #if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
0438             stream->setEncoding(QStringConverter::System);
0439             #else
0440             stream->setCodec(QTextCodec::codecForLocale());
0441             #endif
0442         }
0443 
0444         while (!stream->atEnd())
0445         {
0446             QString str = stream->readLine();
0447             matchingLineNumber++;
0448 
0449             //If the stream ended (readLine().isNull() is true) the file was read completely
0450             //Do *not* use isEmpty() because that will exit if there is an empty line in the file
0451             if (str.isNull()) {
0452                 break;
0453             }
0454             if (isZippedOfficeDocument) {
0455                 str.remove(xmlTags);
0456             }
0457 
0458             if (str.indexOf(m_context, 0, m_casesensitive ? Qt::CaseSensitive : Qt::CaseInsensitive) != -1) {
0459                 matchingLine = QString::number(matchingLineNumber) + QStringLiteral(": ") + str.trimmed();
0460                 found = true;
0461                 break;
0462             }
0463             qApp->processEvents();
0464         }
0465 
0466         delete stream;
0467 
0468         if (!found) {
0469             return;
0470         }
0471     }
0472 
0473     m_foundFilesList.append(QPair<KFileItem, QString>(file, matchingLine));
0474 }
0475 
0476 void KQuery::setContext(const QString &context, bool casesensitive, bool search_binary)
0477 {
0478     m_context = context;
0479     m_casesensitive = casesensitive;
0480     m_search_binary = search_binary;
0481     m_regexp.setPatternSyntax(QRegExp::Wildcard);
0482     if (casesensitive) {
0483         m_regexp.setCaseSensitivity(Qt::CaseSensitive);
0484     } else {
0485         m_regexp.setCaseSensitivity(Qt::CaseInsensitive);
0486     }
0487 }
0488 
0489 void KQuery::setMetaInfo(const QString &metainfo, const QString &metainfokey)
0490 {
0491     m_metainfo = metainfo;
0492     m_metainfokey = metainfokey;
0493 }
0494 
0495 void KQuery::setMimeType(const QStringList &mimetype)
0496 {
0497     m_mimetype = mimetype;
0498 }
0499 
0500 void KQuery::setFileType(int filetype)
0501 {
0502     m_filetype = filetype;
0503 }
0504 
0505 void KQuery::setSizeRange(int mode, KIO::filesize_t value1, KIO::filesize_t value2)
0506 {
0507     m_sizemode = mode;
0508     m_sizeboundary1 = value1;
0509     m_sizeboundary2 = value2;
0510 }
0511 
0512 void KQuery::setTimeRange(time_t from, time_t to)
0513 {
0514     m_timeFrom = from;
0515     m_timeTo = to;
0516 }
0517 
0518 void KQuery::setUsername(const QString &username)
0519 {
0520     m_username = username;
0521 }
0522 
0523 void KQuery::setGroupname(const QString &groupname)
0524 {
0525     m_groupname = groupname;
0526 }
0527 
0528 void KQuery::setRegExp(const QString &regexp, bool caseSensitive)
0529 {
0530     const QStringList strList = regexp.split(QLatin1Char(';'), Qt::SkipEmptyParts);
0531     //  QRegExp globChars ("[\\*\\?\\[\\]]", TRUE, FALSE);
0532     qDeleteAll(m_regexps);
0533     m_regexps.clear();
0534     m_regexps.reserve(strList.size());
0535 
0536     //  m_regexpsContainsGlobs.clear();
0537     for (const auto &str : strList) {
0538         //m_regexpsContainsGlobs.append(regExp->pattern().contains(globChars));
0539         m_regexps.append(new QRegExp(str, (caseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive), QRegExp::Wildcard));
0540     }
0541 }
0542 
0543 void KQuery::setRecursive(bool recursive)
0544 {
0545     m_recursive = recursive;
0546 }
0547 
0548 void KQuery::setPath(const QUrl &url)
0549 {
0550     m_url = url;
0551 }
0552 
0553 void KQuery::setUseFileIndex(bool useLocate)
0554 {
0555     m_useLocate = useLocate;
0556 }
0557 
0558 void KQuery::setShowHiddenFiles(bool showHidden)
0559 {
0560     m_showHiddenFiles = showHidden;
0561 }
0562 
0563 void KQuery::slotreadyReadStandardError()
0564 {
0565     KMessageBox::error(nullptr, QString::fromLocal8Bit(processLocate->readAllStandardOutput()), i18nc("@title:window", "Error while using locate"));
0566 }
0567 
0568 void KQuery::slotreadyReadStandardOutput()
0569 {
0570     bufferLocate += processLocate->readAllStandardOutput();
0571 }
0572 
0573 void KQuery::slotendProcessLocate(int code, QProcess::ExitStatus)
0574 {
0575     if (code == 0) {
0576         if (!bufferLocate.isEmpty()) {
0577             QString str = QString::fromLocal8Bit(bufferLocate);
0578             bufferLocate.clear();
0579             slotListEntries(str.split(QLatin1Char('\n'), Qt::SkipEmptyParts));
0580         }
0581     }
0582     Q_EMIT result(0);
0583 }
0584 
0585 #include "moc_kquery.cpp"