File indexing completed on 2024-04-14 05:43:22

0001 /*
0002     kquery.cpp
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 
0006 */
0007 
0008 #include "kquery.h"
0009 #include "kfind_debug.h"
0010 #include <stdlib.h>
0011 
0012 #include <QCoreApplication>
0013 #include <QMimeDatabase>
0014 #include <QRegularExpression>
0015 #include <QStandardPaths>
0016 #include <QTextCodec>
0017 #include <QTextStream>
0018  
0019 #include <KFileItem>
0020 #include <KLocalizedString>
0021 #include <KMessageBox>
0022 #include <KZip>
0023 
0024 #include <KFileMetaData/Extractor>
0025 #include <KFileMetaData/ExtractorCollection>
0026 #include <KFileMetaData/PropertyInfo>
0027 #include <KFileMetaData/SimpleExtractionResult>
0028 
0029 KQuery::KQuery(QObject *parent)
0030     : QObject(parent)
0031     , m_filetype(0)
0032     , m_sizemode(0)
0033     , m_sizeboundary1(0)
0034     , m_sizeboundary2(0)
0035     , m_timeFrom(0)
0036     , m_timeTo(0)
0037     , m_recursive(false)
0038     , m_casesensitive(false)
0039     , m_search_binary(false)
0040     , m_useLocate(false)
0041     , m_showHiddenFiles(false)
0042     , job(nullptr)
0043     , m_insideCheckEntries(false)
0044     , m_result(0)
0045 {
0046     processLocate = new KProcess(this);
0047     connect(processLocate, &KProcess::readyReadStandardOutput, this, &KQuery::slotreadyReadStandardOutput);
0048     connect(processLocate, &KProcess::readyReadStandardError, this, &KQuery::slotreadyReadStandardError);
0049     connect(processLocate, QOverload<int, QProcess::ExitStatus>::of(&KProcess::finished), this, &KQuery::slotendProcessLocate);
0050 
0051     // Files with these mime types can be ignored, even if
0052     // findFormatByFileContent() in some cases may claim that
0053     // these are text files:
0054     ignore_mimetypes.append(QStringLiteral("application/pdf"));
0055     ignore_mimetypes.append(QStringLiteral("application/postscript"));
0056 
0057     // PLEASE update the documentation when you add another
0058     // file type here:
0059     ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.writer"));
0060     ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.calc"));
0061     ooo_mimetypes.append(QStringLiteral("application/vnd.sun.xml.impress"));
0062     // OASIS mimetypes, used by OOo-2.x and KOffice >= 1.4
0063     //ooo_mimetypes.append("application/vnd.oasis.opendocument.chart");
0064     //ooo_mimetypes.append("application/vnd.oasis.opendocument.graphics");
0065     //ooo_mimetypes.append("application/vnd.oasis.opendocument.graphics-template");
0066     //ooo_mimetypes.append("application/vnd.oasis.opendocument.formula");
0067     //ooo_mimetypes.append("application/vnd.oasis.opendocument.image");
0068     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.presentation-template"));
0069     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.presentation"));
0070     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.spreadsheet-template"));
0071     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"));
0072     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.text-template"));
0073     ooo_mimetypes.append(QStringLiteral("application/vnd.oasis.opendocument.text"));
0074     // KOffice-1.3 mimetypes
0075     koffice_mimetypes.append(QStringLiteral("application/x-kword"));
0076     koffice_mimetypes.append(QStringLiteral("application/x-kspread"));
0077     koffice_mimetypes.append(QStringLiteral("application/x-kpresenter"));
0078 }
0079 
0080 KQuery::~KQuery()
0081 {
0082     qDeleteAll(m_regexps);
0083     m_fileItems.clear();
0084     if (processLocate->state() == QProcess::Running) {
0085         disconnect(processLocate);
0086         processLocate->kill();
0087         processLocate->waitForFinished(5000);
0088         delete processLocate;
0089     }
0090 }
0091 
0092 void KQuery::kill()
0093 {
0094     if (job) {
0095         job->kill(KJob::EmitResult);
0096     }
0097     if (processLocate->state() == QProcess::Running) {
0098         processLocate->kill();
0099     }
0100     m_fileItems.clear();
0101 }
0102 
0103 void KQuery::start()
0104 {
0105     m_fileItems.clear();
0106     if (m_useLocate) { //Use "locate" instead of the internal search method
0107         bufferLocate.clear();
0108         m_url = m_url.adjusted(QUrl::NormalizePathSegments);
0109 
0110         processLocate->clearProgram();
0111         processLocate->setProgram(QStandardPaths::findExecutable(QStringLiteral("locate")), QStringList{m_url.toLocalFile()});
0112 
0113         processLocate->setOutputChannelMode(KProcess::SeparateChannels);
0114         processLocate->start();
0115     } else { //Use KIO
0116         if (m_recursive) {
0117             job = KIO::listRecursive(m_url, KIO::HideProgressInfo);
0118         } else {
0119             job = KIO::listDir(m_url, KIO::HideProgressInfo);
0120         }
0121 
0122         connect(job, &KIO::ListJob::entries, this, QOverload<KIO::Job*, const KIO::UDSEntryList&>::of(&KQuery::slotListEntries));
0123         connect(job, &KIO::ListJob::result, this, &KQuery::slotResult);
0124     }
0125 }
0126 
0127 void KQuery::slotResult(KJob *_job)
0128 {
0129     if (job != _job) {
0130         return;
0131     }
0132     job = nullptr;
0133 
0134     m_result = _job->error();
0135     if (m_result == KIO::ERR_USER_CANCELED) {
0136         m_fileItems.clear();
0137     }
0138     checkEntries();
0139 }
0140 
0141 void KQuery::slotListEntries(KIO::Job *, const KIO::UDSEntryList &list)
0142 {
0143     for (const KIO::UDSEntry &entry : list) {
0144         m_fileItems.enqueue(KFileItem(entry, m_url, true, true));
0145     }
0146 
0147     checkEntries();
0148 }
0149 
0150 void KQuery::checkEntries()
0151 {
0152     if (m_insideCheckEntries) {
0153         return;
0154     }
0155 
0156     m_insideCheckEntries = true;
0157 
0158     metaKeyRx = QRegExp(m_metainfokey);
0159     metaKeyRx.setPatternSyntax(QRegExp::Wildcard);
0160 
0161     m_foundFilesList.clear();
0162 
0163     int processingCount = 0;
0164     while (!m_fileItems.isEmpty())
0165     {
0166         processQuery(m_fileItems.dequeue());
0167         processingCount++;
0168 
0169         /* This is a workaround. As the qApp->processEvents() call inside processQuery
0170          * will bring more KIO entries, m_fileItems will increase even inside this loop
0171          * and that will lead to a big loop, it will take time to report found items to the GUI
0172          * so we are going to force Q_EMIT results every 100 files processed */
0173         if (processingCount == 100) {
0174             processingCount = 0;
0175             if (m_foundFilesList.size() > 0) {
0176                 Q_EMIT foundFileList(m_foundFilesList);
0177                 m_foundFilesList.clear();
0178             }
0179         }
0180     }
0181 
0182     if (m_foundFilesList.size() > 0) {
0183         Q_EMIT foundFileList(m_foundFilesList);
0184     }
0185 
0186     if (job == nullptr) {
0187         Q_EMIT result(m_result);
0188     }
0189 
0190     m_insideCheckEntries = false;
0191 }
0192 
0193 /* List of files found using slocate */
0194 void KQuery::slotListEntries(const QStringList &list)
0195 {
0196     metaKeyRx = QRegExp(m_metainfokey);
0197     metaKeyRx.setPatternSyntax(QRegExp::Wildcard);
0198 
0199     m_foundFilesList.clear();
0200     for (const auto &file : list) {
0201         processQuery(KFileItem(QUrl::fromLocalFile(file)));
0202     }
0203 
0204     if (!m_foundFilesList.isEmpty()) {
0205         Q_EMIT foundFileList(m_foundFilesList);
0206     }
0207 }
0208 
0209 /* Check if file meets the find's requirements*/
0210 void KQuery::processQuery(const KFileItem &file)
0211 {
0212     if (file.name() == QLatin1String(".") || file.name() == QLatin1String("..")) {
0213         return;
0214     }
0215 
0216     if (!m_showHiddenFiles && file.isHidden()) {
0217         return;
0218     }
0219 
0220     bool matched = false;
0221 
0222     for (const QRegExp *reg : std::as_const(m_regexps)) {
0223         matched = matched || (reg == nullptr) || (reg->exactMatch(file.url().adjusted(QUrl::StripTrailingSlash).fileName()));
0224     }
0225     if (!matched) {
0226         return;
0227     }
0228 
0229     // make sure the files are in the correct range
0230     switch (m_sizemode) {
0231     case 1: // "at least"
0232         if (file.size() < m_sizeboundary1) {
0233             return;
0234         }
0235         break;
0236     case 2: // "at most"
0237         if (file.size() > m_sizeboundary1) {
0238             return;
0239         }
0240         break;
0241     case 3: // "equal"
0242         if (file.size() != m_sizeboundary1) {
0243             return;
0244         }
0245         break;
0246     case 4: // "between"
0247         if ((file.size() < m_sizeboundary1)
0248             || (file.size() > m_sizeboundary2)) {
0249             return;
0250         }
0251         break;
0252     case 0: // "none" -> Fall to default
0253     default:
0254         break;
0255     }
0256 
0257     // make sure it's in the correct date range
0258     // what about 0 times?
0259     if (m_timeFrom && ((uint)m_timeFrom) > file.time(KFileItem::ModificationTime).toSecsSinceEpoch()) {
0260         return;
0261     }
0262     if (m_timeTo && ((uint)m_timeTo) < file.time(KFileItem::ModificationTime).toSecsSinceEpoch()) {
0263         return;
0264     }
0265 
0266     // username / group match
0267     if ((!m_username.isEmpty()) && (m_username != file.user())) {
0268         return;
0269     }
0270     if ((!m_groupname.isEmpty()) && (m_groupname != file.group())) {
0271         return;
0272     }
0273 
0274     // file type
0275     switch (m_filetype) {
0276     case 0:
0277         break;
0278     case 1: // plain file
0279         if (!S_ISREG(file.mode())) {
0280             return;
0281         }
0282         break;
0283     case 2:
0284         if (!file.isDir()) {
0285             return;
0286         }
0287         break;
0288     case 3:
0289         if (!file.isLink()) {
0290             return;
0291         }
0292         break;
0293     case 4:
0294         if (!S_ISCHR(file.mode()) && !S_ISBLK(file.mode())
0295             && !S_ISFIFO(file.mode()) && !S_ISSOCK(file.mode())) {
0296             return;
0297         }
0298         break;
0299     case 5: // binary
0300         if ((file.permissions() & 0111) != 0111 || file.isDir()) {
0301             return;
0302         }
0303         break;
0304     case 6: // suid
0305         if ((file.permissions() & 04000) != 04000) { // fixme
0306             return;
0307         }
0308         break;
0309     default:
0310         if (!m_mimetype.isEmpty() && !m_mimetype.contains(file.mimetype())) {
0311             return;
0312         }
0313     }
0314 
0315     // match data in metainfo...
0316     if ((!m_metainfo.isEmpty()) && (!m_metainfokey.isEmpty())) {
0317         //Avoid sequential files (fifo,char devices)
0318         if (!file.isRegularFile()) {
0319             return;
0320         }
0321 
0322         bool foundmeta = false;
0323         QString filename = file.url().path();
0324 
0325         if (filename.startsWith(QLatin1String("/dev/"))) {
0326             return;
0327         }
0328 
0329         QMimeDatabase mimeDb;
0330         QString mimetype = mimeDb.mimeTypeForFile(filename).name();
0331         QString strmetakeycontent;
0332 
0333         KFileMetaData::ExtractorCollection extractors;
0334         const QList<KFileMetaData::Extractor*> exList = extractors.fetchExtractors(mimetype);
0335 
0336         for (KFileMetaData::Extractor* ex : exList) {
0337             KFileMetaData::SimpleExtractionResult result(filename, mimetype,
0338                                                          KFileMetaData::ExtractionResult::ExtractMetaData);
0339             ex->extract(&result);
0340 
0341             const KFileMetaData::PropertyMultiMap properties = result.properties();
0342             for (auto it = properties.cbegin(); it != properties.cend(); ++it) {
0343                 if (!metaKeyRx.exactMatch(KFileMetaData::PropertyInfo(it.key()).displayName())) {
0344                     continue;
0345                 }
0346                 strmetakeycontent = it.value().toString();
0347                 if (strmetakeycontent.indexOf(m_metainfo) != -1) {
0348                     foundmeta = true;
0349                     break;
0350                 }
0351             }
0352         }
0353         if (!foundmeta) {
0354             return;
0355         }
0356     }
0357 
0358     // match contents...
0359     QString matchingLine;
0360     if (!m_context.isEmpty()) {
0361         //Avoid sequential files (fifo,char devices)
0362         if (!file.isRegularFile()) {
0363             return;
0364         }
0365 
0366         if (!m_search_binary && ignore_mimetypes.indexOf(file.mimetype()) != -1) {
0367             return;
0368         }
0369 
0370         bool found = false;
0371         bool isZippedOfficeDocument = false;
0372         int matchingLineNumber = 0;
0373 
0374         // FIXME: doesn't work with non local files
0375 
0376         const QRegularExpression xmlTags(QStringLiteral("<.*?>"));
0377         QString filename;
0378         QTextStream *stream = nullptr;
0379         QFile qf;
0380         QByteArray zippedXmlFileContent;
0381 
0382         // KWord's and OpenOffice.org's files are zipped...
0383         if (ooo_mimetypes.indexOf(file.mimetype()) != -1
0384             || koffice_mimetypes.indexOf(file.mimetype()) != -1) {
0385             KZip zipfile(file.url().path());
0386             KZipFileEntry *zipfileEntry;
0387 
0388             if (zipfile.open(QIODevice::ReadOnly)) {
0389                 const KArchiveDirectory *zipfileContent = zipfile.directory();
0390 
0391                 if (koffice_mimetypes.indexOf(file.mimetype()) != -1) {
0392                     zipfileEntry = (KZipFileEntry *)zipfileContent->entry(QStringLiteral("maindoc.xml"));
0393                 } else {
0394                     zipfileEntry = (KZipFileEntry *)zipfileContent->entry(QStringLiteral("content.xml")); //for OpenOffice.org
0395                 }
0396                 if (!zipfileEntry) {
0397                     qCWarning(KFING_LOG) << "Expected XML file not found in ZIP archive " << file.url();
0398                     return;
0399                 }
0400 
0401                 zippedXmlFileContent = zipfileEntry->data();
0402                 stream = new QTextStream(zippedXmlFileContent, QIODevice::ReadOnly);
0403 
0404                 isZippedOfficeDocument = true;
0405             } else {
0406                 qCWarning(KFING_LOG) << "Cannot open supposed ZIP file " << file.url();
0407             }
0408         } else if (!m_search_binary && !file.mimetype().startsWith(QLatin1String("text/"))
0409                    && file.url().isLocalFile() && !file.url().path().startsWith(QLatin1String("/dev"))) {
0410             QFile binfile(file.url().toLocalFile());
0411             if (!binfile.open(QIODevice::ReadOnly)) {
0412                 return; // err, whatever
0413             }
0414             // Check the first 128 bytes (see shared-mime spec)
0415             const QByteArray bindata = binfile.read(128);
0416             const char* pbin = bindata.data();
0417             const int end = qMin(128, bindata.size());
0418             for (int i = 0; i < end; ++i) {
0419                 if ((unsigned char)(pbin[i]) < 32 && pbin[i] != 9 && pbin[i] != 10 && pbin[i] != 13) // ASCII control character
0420                     return;
0421             }
0422         }
0423 
0424         if (!isZippedOfficeDocument) { //any other file or non-compressed KWord
0425             filename = file.url().path();
0426             if (filename.startsWith(QLatin1String("/dev/"))) {
0427                 return;
0428             }
0429             qf.setFileName(filename);
0430             qf.open(QIODevice::ReadOnly);
0431             stream = new QTextStream(&qf);
0432             stream->setEncoding(QStringConverter::System);
0433         }
0434 
0435         while (!stream->atEnd())
0436         {
0437             QString str = stream->readLine();
0438             matchingLineNumber++;
0439 
0440             //If the stream ended (readLine().isNull() is true) the file was read completely
0441             //Do *not* use isEmpty() because that will exit if there is an empty line in the file
0442             if (str.isNull()) {
0443                 break;
0444             }
0445             if (isZippedOfficeDocument) {
0446                 str.remove(xmlTags);
0447             }
0448 
0449             if (str.indexOf(m_context, 0, m_casesensitive ? Qt::CaseSensitive : Qt::CaseInsensitive) != -1) {
0450                 matchingLine = QString::number(matchingLineNumber) + QStringLiteral(": ") + str.trimmed();
0451                 found = true;
0452                 break;
0453             }
0454             qApp->processEvents();
0455         }
0456 
0457         delete stream;
0458 
0459         if (!found) {
0460             return;
0461         }
0462     }
0463 
0464     m_foundFilesList.append(QPair<KFileItem, QString>(file, matchingLine));
0465 }
0466 
0467 void KQuery::setContext(const QString &context, bool casesensitive, bool search_binary)
0468 {
0469     m_context = context;
0470     m_casesensitive = casesensitive;
0471     m_search_binary = search_binary;
0472     m_regexp.setPatternSyntax(QRegExp::Wildcard);
0473     if (casesensitive) {
0474         m_regexp.setCaseSensitivity(Qt::CaseSensitive);
0475     } else {
0476         m_regexp.setCaseSensitivity(Qt::CaseInsensitive);
0477     }
0478 }
0479 
0480 void KQuery::setMetaInfo(const QString &metainfo, const QString &metainfokey)
0481 {
0482     m_metainfo = metainfo;
0483     m_metainfokey = metainfokey;
0484 }
0485 
0486 void KQuery::setMimeType(const QStringList &mimetype)
0487 {
0488     m_mimetype = mimetype;
0489 }
0490 
0491 void KQuery::setFileType(int filetype)
0492 {
0493     m_filetype = filetype;
0494 }
0495 
0496 void KQuery::setSizeRange(int mode, KIO::filesize_t value1, KIO::filesize_t value2)
0497 {
0498     m_sizemode = mode;
0499     m_sizeboundary1 = value1;
0500     m_sizeboundary2 = value2;
0501 }
0502 
0503 void KQuery::setTimeRange(time_t from, time_t to)
0504 {
0505     m_timeFrom = from;
0506     m_timeTo = to;
0507 }
0508 
0509 void KQuery::setUsername(const QString &username)
0510 {
0511     m_username = username;
0512 }
0513 
0514 void KQuery::setGroupname(const QString &groupname)
0515 {
0516     m_groupname = groupname;
0517 }
0518 
0519 void KQuery::setRegExp(const QString &regexp, bool caseSensitive)
0520 {
0521     const QStringList strList = regexp.split(QLatin1Char(';'), Qt::SkipEmptyParts);
0522     //  QRegExp globChars ("[\\*\\?\\[\\]]", TRUE, FALSE);
0523     qDeleteAll(m_regexps);
0524     m_regexps.clear();
0525     m_regexps.reserve(strList.size());
0526 
0527     //  m_regexpsContainsGlobs.clear();
0528     for (const auto &str : strList) {
0529         //m_regexpsContainsGlobs.append(regExp->pattern().contains(globChars));
0530         m_regexps.append(new QRegExp(str, (caseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive), QRegExp::Wildcard));
0531     }
0532 }
0533 
0534 void KQuery::setRecursive(bool recursive)
0535 {
0536     m_recursive = recursive;
0537 }
0538 
0539 void KQuery::setPath(const QUrl &url)
0540 {
0541     m_url = url;
0542 }
0543 
0544 void KQuery::setUseFileIndex(bool useLocate)
0545 {
0546     m_useLocate = useLocate;
0547 }
0548 
0549 void KQuery::setShowHiddenFiles(bool showHidden)
0550 {
0551     m_showHiddenFiles = showHidden;
0552 }
0553 
0554 void KQuery::slotreadyReadStandardError()
0555 {
0556     KMessageBox::error(nullptr, QString::fromLocal8Bit(processLocate->readAllStandardOutput()), i18nc("@title:window", "Error while using locate"));
0557 }
0558 
0559 void KQuery::slotreadyReadStandardOutput()
0560 {
0561     bufferLocate += processLocate->readAllStandardOutput();
0562 }
0563 
0564 void KQuery::slotendProcessLocate(int code, QProcess::ExitStatus)
0565 {
0566     if (code == 0) {
0567         if (!bufferLocate.isEmpty()) {
0568             QString str = QString::fromLocal8Bit(bufferLocate);
0569             bufferLocate.clear();
0570             slotListEntries(str.split(QLatin1Char('\n'), Qt::SkipEmptyParts));
0571         }
0572     }
0573     Q_EMIT result(0);
0574 }
0575 
0576 #include "moc_kquery.cpp"