File indexing completed on 2024-04-28 05:49:12
0001 /* 0002 SPDX-FileCopyrightText: 2011-21 Kåre Särs <kare.sars@iki.fi> 0003 SPDX-FileCopyrightText: 2021 Christoph Cullmann <cullmann@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "SearchDiskFiles.h" 0009 0010 #include <QDir> 0011 #include <QElapsedTimer> 0012 #include <QTextStream> 0013 #include <QUrl> 0014 0015 SearchDiskFiles::SearchDiskFiles(SearchDiskFilesWorkList &worklist, const QRegularExpression ®exp, const bool includeBinaryFiles) 0016 : m_worklist(worklist) 0017 , m_regExp(regexp.pattern(), regexp.patternOptions()) // we WANT to kill the sharing, ELSE WE LOCK US DEAD! 0018 , m_includeBinaryFiles(includeBinaryFiles) 0019 { 0020 // ensure we have a proper thread name during e.g. perf profiling 0021 setObjectName(QStringLiteral("SearchDiskFiles")); 0022 } 0023 0024 void SearchDiskFiles::run() 0025 { 0026 // do we need to search multiple lines? 0027 const bool multiLineSearch = m_regExp.patternOptions().testFlag(QRegularExpression::MultilineOption) && m_regExp.pattern().contains(QLatin1String("\\n")); 0028 0029 // timer to emit matchesFound once in a time even for files without matches 0030 // this triggers process in the UI 0031 QElapsedTimer emitTimer; 0032 emitTimer.start(); 0033 0034 // search, pulls work from the shared work list for all workers 0035 while (true) { 0036 // get next file, we get empty string if all done or search canceled! 0037 const auto fileName = m_worklist.nextFileToSearch(); 0038 if (fileName.isEmpty()) { 0039 break; 0040 } 0041 0042 // open file early, this allows mime-type detection & search to use same io device 0043 QFile file(fileName); 0044 if (!file.open(QFile::ReadOnly)) { 0045 continue; 0046 } 0047 0048 // let the right search algorithm compute the matches for this file 0049 QList<KateSearchMatch> matches; 0050 if (multiLineSearch) { 0051 matches = searchMultiLineRegExp(file); 0052 } else { 0053 matches = searchSingleLineRegExp(file); 0054 } 0055 0056 // if we have matches or didn't emit something long enough, do so 0057 // we don't emit for all file to not stall get GUI and lock us a lot ;) 0058 if (!matches.isEmpty() || emitTimer.hasExpired(100)) { 0059 Q_EMIT matchesFound(QUrl::fromLocalFile(file.fileName()), matches); 0060 emitTimer.restart(); 0061 } 0062 } 0063 } 0064 0065 QList<KateSearchMatch> SearchDiskFiles::searchSingleLineRegExp(QFile &file) 0066 { 0067 QTextStream stream(&file); 0068 QList<KateSearchMatch> matches; 0069 QString line; 0070 int currentLineNumber = 0; 0071 while (stream.readLineInto(&line)) { 0072 // check if not binary data.... 0073 // bad, but stuff better than asking QMimeDatabase which is a performance & threading disaster... 0074 if (!m_includeBinaryFiles && line.contains(QLatin1Char('\0'))) { 0075 // kill all seen matches and be done 0076 matches.clear(); 0077 return matches; 0078 } 0079 0080 // match all occurrences in the current line 0081 int columnToStartMatch = 0; 0082 bool canceled = false; 0083 while (true) { 0084 // handle canceling 0085 if (m_worklist.isCanceled()) { 0086 canceled = true; 0087 break; 0088 } 0089 0090 // try match at the current interesting column, abort search loop if nothing found! 0091 const QRegularExpressionMatch match = m_regExp.match(line, columnToStartMatch); 0092 const int column = match.capturedStart(); 0093 if (column == -1 || match.capturedLength() == 0) 0094 break; 0095 0096 // remember match 0097 const int endColumn = column + match.capturedLength(); 0098 const auto [preContextStart, postContextLen] = MatchModel::contextLengths(line.size(), column, endColumn); 0099 const QString preContext = line.mid(preContextStart, column - preContextStart); 0100 const QString postContext = line.mid(endColumn, postContextLen); 0101 matches.push_back(KateSearchMatch{preContext, 0102 match.captured(), 0103 postContext, 0104 QString(), 0105 KTextEditor::Range{currentLineNumber, column, currentLineNumber, int(column + match.capturedLength())}, 0106 true, 0107 true}); 0108 0109 // advance match column 0110 columnToStartMatch = column + match.capturedLength(); 0111 } 0112 0113 // handle canceling => above we only did break out of the matching loop! 0114 if (canceled) { 0115 break; 0116 } 0117 0118 // advance to next line 0119 ++currentLineNumber; 0120 } 0121 return matches; 0122 } 0123 0124 QList<KateSearchMatch> SearchDiskFiles::searchMultiLineRegExp(QFile &file) 0125 { 0126 int column = 0; 0127 int line = 0; 0128 QString fullDoc; 0129 QList<int> lineStart; 0130 QRegularExpression tmpRegExp = m_regExp; 0131 0132 QList<KateSearchMatch> matches; 0133 QTextStream stream(&file); 0134 fullDoc = stream.readAll(); 0135 0136 // check if not binary data.... 0137 // bad, but stuff better than asking QMimeDatabase which is a performance & threading disaster... 0138 if (!m_includeBinaryFiles && fullDoc.contains(QLatin1Char('\0'))) { 0139 // kill all seen matches and be done 0140 matches.clear(); 0141 return matches; 0142 } 0143 0144 fullDoc.remove(QLatin1Char('\r')); 0145 0146 lineStart.clear(); 0147 lineStart << 0; 0148 for (int i = 0; i < fullDoc.size() - 1; i++) { 0149 if (fullDoc[i] == QLatin1Char('\n')) { 0150 lineStart << i + 1; 0151 } 0152 } 0153 if (tmpRegExp.pattern().endsWith(QLatin1Char('$'))) { 0154 fullDoc += QLatin1Char('\n'); 0155 QString newPatern = tmpRegExp.pattern(); 0156 newPatern.replace(QStringLiteral("$"), QStringLiteral("(?=\\n)")); 0157 tmpRegExp.setPattern(newPatern); 0158 } 0159 0160 QRegularExpressionMatch match; 0161 match = tmpRegExp.match(fullDoc); 0162 column = match.capturedStart(); 0163 while (column != -1 && !match.captured().isEmpty()) { 0164 if (m_worklist.isCanceled()) { 0165 break; 0166 } 0167 // search for the line number of the match 0168 int i; 0169 line = -1; 0170 for (i = 1; i < lineStart.size(); i++) { 0171 if (lineStart[i] > column) { 0172 line = i - 1; 0173 break; 0174 } 0175 } 0176 if (line == -1) { 0177 break; 0178 } 0179 int startColumn = (column - lineStart[line]); 0180 int endLine = line + match.captured().count(QLatin1Char('\n')); 0181 int lastNL = match.captured().lastIndexOf(QLatin1Char('\n')); 0182 int endColumn = lastNL == -1 ? startColumn + match.captured().length() : match.captured().length() - lastNL - 1; 0183 0184 int preContextStart = qMax(lineStart[line], column - MatchModel::PreContextLen); 0185 QString preContext = fullDoc.mid(preContextStart, column - preContextStart); 0186 QString postContext = fullDoc.mid(column + match.captured().length(), MatchModel::PostContextLen); 0187 0188 matches.push_back( 0189 KateSearchMatch{preContext, match.captured(), postContext, QString(), KTextEditor::Range{line, startColumn, endLine, endColumn}, true, true}); 0190 0191 match = tmpRegExp.match(fullDoc, column + match.capturedLength()); 0192 column = match.capturedStart(); 0193 } 0194 return matches; 0195 } 0196 0197 #include "moc_SearchDiskFiles.cpp"