File indexing completed on 2024-04-28 05:49:12

0001 /*
0002     SPDX-FileCopyrightText: 2011-21 Kåre Särs <kare.sars@iki.fi>
0003     SPDX-FileCopyrightText: 2021 Christoph Cullmann <cullmann@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "SearchDiskFiles.h"
0009 
0010 #include <QDir>
0011 #include <QElapsedTimer>
0012 #include <QTextStream>
0013 #include <QUrl>
0014 
0015 SearchDiskFiles::SearchDiskFiles(SearchDiskFilesWorkList &worklist, const QRegularExpression &regexp, const bool includeBinaryFiles)
0016     : m_worklist(worklist)
0017     , m_regExp(regexp.pattern(), regexp.patternOptions()) // we WANT to kill the sharing, ELSE WE LOCK US DEAD!
0018     , m_includeBinaryFiles(includeBinaryFiles)
0019 {
0020     // ensure we have a proper thread name during e.g. perf profiling
0021     setObjectName(QStringLiteral("SearchDiskFiles"));
0022 }
0023 
0024 void SearchDiskFiles::run()
0025 {
0026     // do we need to search multiple lines?
0027     const bool multiLineSearch = m_regExp.patternOptions().testFlag(QRegularExpression::MultilineOption) && m_regExp.pattern().contains(QLatin1String("\\n"));
0028 
0029     // timer to emit matchesFound once in a time even for files without matches
0030     // this triggers process in the UI
0031     QElapsedTimer emitTimer;
0032     emitTimer.start();
0033 
0034     // search, pulls work from the shared work list for all workers
0035     while (true) {
0036         // get next file, we get empty string if all done or search canceled!
0037         const auto fileName = m_worklist.nextFileToSearch();
0038         if (fileName.isEmpty()) {
0039             break;
0040         }
0041 
0042         // open file early, this allows mime-type detection & search to use same io device
0043         QFile file(fileName);
0044         if (!file.open(QFile::ReadOnly)) {
0045             continue;
0046         }
0047 
0048         // let the right search algorithm compute the matches for this file
0049         QList<KateSearchMatch> matches;
0050         if (multiLineSearch) {
0051             matches = searchMultiLineRegExp(file);
0052         } else {
0053             matches = searchSingleLineRegExp(file);
0054         }
0055 
0056         // if we have matches or didn't emit something long enough, do so
0057         // we don't emit for all file to not stall get GUI and lock us a lot ;)
0058         if (!matches.isEmpty() || emitTimer.hasExpired(100)) {
0059             Q_EMIT matchesFound(QUrl::fromLocalFile(file.fileName()), matches);
0060             emitTimer.restart();
0061         }
0062     }
0063 }
0064 
0065 QList<KateSearchMatch> SearchDiskFiles::searchSingleLineRegExp(QFile &file)
0066 {
0067     QTextStream stream(&file);
0068     QList<KateSearchMatch> matches;
0069     QString line;
0070     int currentLineNumber = 0;
0071     while (stream.readLineInto(&line)) {
0072         // check if not binary data....
0073         // bad, but stuff better than asking QMimeDatabase which is a performance & threading disaster...
0074         if (!m_includeBinaryFiles && line.contains(QLatin1Char('\0'))) {
0075             // kill all seen matches and be done
0076             matches.clear();
0077             return matches;
0078         }
0079 
0080         // match all occurrences in the current line
0081         int columnToStartMatch = 0;
0082         bool canceled = false;
0083         while (true) {
0084             // handle canceling
0085             if (m_worklist.isCanceled()) {
0086                 canceled = true;
0087                 break;
0088             }
0089 
0090             // try match at the current interesting column, abort search loop if nothing found!
0091             const QRegularExpressionMatch match = m_regExp.match(line, columnToStartMatch);
0092             const int column = match.capturedStart();
0093             if (column == -1 || match.capturedLength() == 0)
0094                 break;
0095 
0096             // remember match
0097             const int endColumn = column + match.capturedLength();
0098             const auto [preContextStart, postContextLen] = MatchModel::contextLengths(line.size(), column, endColumn);
0099             const QString preContext = line.mid(preContextStart, column - preContextStart);
0100             const QString postContext = line.mid(endColumn, postContextLen);
0101             matches.push_back(KateSearchMatch{preContext,
0102                                               match.captured(),
0103                                               postContext,
0104                                               QString(),
0105                                               KTextEditor::Range{currentLineNumber, column, currentLineNumber, int(column + match.capturedLength())},
0106                                               true,
0107                                               true});
0108 
0109             // advance match column
0110             columnToStartMatch = column + match.capturedLength();
0111         }
0112 
0113         // handle canceling => above we only did break out of the matching loop!
0114         if (canceled) {
0115             break;
0116         }
0117 
0118         // advance to next line
0119         ++currentLineNumber;
0120     }
0121     return matches;
0122 }
0123 
0124 QList<KateSearchMatch> SearchDiskFiles::searchMultiLineRegExp(QFile &file)
0125 {
0126     int column = 0;
0127     int line = 0;
0128     QString fullDoc;
0129     QList<int> lineStart;
0130     QRegularExpression tmpRegExp = m_regExp;
0131 
0132     QList<KateSearchMatch> matches;
0133     QTextStream stream(&file);
0134     fullDoc = stream.readAll();
0135 
0136     // check if not binary data....
0137     // bad, but stuff better than asking QMimeDatabase which is a performance & threading disaster...
0138     if (!m_includeBinaryFiles && fullDoc.contains(QLatin1Char('\0'))) {
0139         // kill all seen matches and be done
0140         matches.clear();
0141         return matches;
0142     }
0143 
0144     fullDoc.remove(QLatin1Char('\r'));
0145 
0146     lineStart.clear();
0147     lineStart << 0;
0148     for (int i = 0; i < fullDoc.size() - 1; i++) {
0149         if (fullDoc[i] == QLatin1Char('\n')) {
0150             lineStart << i + 1;
0151         }
0152     }
0153     if (tmpRegExp.pattern().endsWith(QLatin1Char('$'))) {
0154         fullDoc += QLatin1Char('\n');
0155         QString newPatern = tmpRegExp.pattern();
0156         newPatern.replace(QStringLiteral("$"), QStringLiteral("(?=\\n)"));
0157         tmpRegExp.setPattern(newPatern);
0158     }
0159 
0160     QRegularExpressionMatch match;
0161     match = tmpRegExp.match(fullDoc);
0162     column = match.capturedStart();
0163     while (column != -1 && !match.captured().isEmpty()) {
0164         if (m_worklist.isCanceled()) {
0165             break;
0166         }
0167         // search for the line number of the match
0168         int i;
0169         line = -1;
0170         for (i = 1; i < lineStart.size(); i++) {
0171             if (lineStart[i] > column) {
0172                 line = i - 1;
0173                 break;
0174             }
0175         }
0176         if (line == -1) {
0177             break;
0178         }
0179         int startColumn = (column - lineStart[line]);
0180         int endLine = line + match.captured().count(QLatin1Char('\n'));
0181         int lastNL = match.captured().lastIndexOf(QLatin1Char('\n'));
0182         int endColumn = lastNL == -1 ? startColumn + match.captured().length() : match.captured().length() - lastNL - 1;
0183 
0184         int preContextStart = qMax(lineStart[line], column - MatchModel::PreContextLen);
0185         QString preContext = fullDoc.mid(preContextStart, column - preContextStart);
0186         QString postContext = fullDoc.mid(column + match.captured().length(), MatchModel::PostContextLen);
0187 
0188         matches.push_back(
0189             KateSearchMatch{preContext, match.captured(), postContext, QString(), KTextEditor::Range{line, startColumn, endLine, endColumn}, true, true});
0190 
0191         match = tmpRegExp.match(fullDoc, column + match.capturedLength());
0192         column = match.capturedStart();
0193     }
0194     return matches;
0195 }
0196 
0197 #include "moc_SearchDiskFiles.cpp"