Warning, file /pim/akonadi-search/xapian/xapianqueryparser.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 * SPDX-FileCopyrightText: 2014 Vishesh Handa <me@vhanda.in> 0003 * 0004 * SPDX-License-Identifier: LGPL-2.1-or-later 0005 * 0006 */ 0007 0008 #include "xapianqueryparser.h" 0009 0010 #include "akonadi_search_xapian_debug.h" 0011 #include <QStringList> 0012 #include <QTextBoundaryFinder> 0013 0014 using namespace Akonadi::Search; 0015 0016 XapianQueryParser::XapianQueryParser() = default; 0017 0018 void XapianQueryParser::setDatabase(Xapian::Database *db) 0019 { 0020 m_db = db; 0021 } 0022 0023 namespace 0024 { 0025 struct Term { 0026 std::string t; 0027 uint count; 0028 0029 // pop_heap pops the largest element, we want the smallest to be popped 0030 bool operator<(const Term &rhs) const 0031 { 0032 return count > rhs.count; 0033 } 0034 }; 0035 0036 Xapian::Query makeQuery(const QString &string, int position, Xapian::Database *db) 0037 { 0038 if (!db) { 0039 const QByteArray arr = string.toUtf8(); 0040 const std::string stdString(arr.constData(), arr.size()); 0041 return Xapian::Query(stdString, 1, position); 0042 } 0043 0044 // Lets just keep the top x (+1 for push_heap) 0045 static const int MaxTerms = 100; 0046 QList<Term> topTerms; 0047 topTerms.reserve(MaxTerms + 1); 0048 0049 const std::string stdString(string.toStdString()); 0050 Xapian::TermIterator it = db->allterms_begin(stdString); 0051 Xapian::TermIterator end = db->allterms_end(stdString); 0052 for (; it != end; ++it) { 0053 Term term; 0054 term.t = *it; 0055 term.count = db->get_collection_freq(term.t); 0056 0057 if (topTerms.size() < MaxTerms) { 0058 topTerms.push_back(term); 0059 std::push_heap(topTerms.begin(), topTerms.end()); 0060 } else { 0061 // Remove the term with the min count 0062 topTerms.push_back(term); 0063 std::push_heap(topTerms.begin(), topTerms.end()); 0064 0065 std::pop_heap(topTerms.begin(), topTerms.end()); 0066 topTerms.pop_back(); 0067 } 0068 } 0069 0070 QList<Xapian::Query> queries; 0071 queries.reserve(topTerms.size()); 0072 0073 for (const Term &term : std::as_const(topTerms)) { 0074 queries << Xapian::Query(term.t, 1, position); 0075 } 0076 0077 if (queries.isEmpty()) { 0078 return Xapian::Query(string.toStdString(), 1, position); 0079 } 0080 Xapian::Query finalQ(Xapian::Query::OP_SYNONYM, queries.begin(), queries.end()); 0081 return finalQ; 0082 } 0083 0084 bool containsSpace(const QString &string) 0085 { 0086 for (const QChar &ch : string) { 0087 if (ch.isSpace()) { 0088 return true; 0089 } 0090 } 0091 0092 return false; 0093 } 0094 } 0095 0096 Xapian::Query XapianQueryParser::parseQuery(const QString &text, const QString &prefix) 0097 { 0098 /* 0099 Xapian::QueryParser parser; 0100 parser.set_default_op(Xapian::Query::OP_AND); 0101 0102 if (m_db) 0103 parser.set_database(*m_db); 0104 0105 int flags = Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_PARTIAL; 0106 0107 std::string stdString(text.toStdString()); 0108 return parser.parse_query(stdString, flags); 0109 */ 0110 0111 if (text.isEmpty()) { 0112 return {}; 0113 } 0114 0115 QList<Xapian::Query> queries; 0116 QList<Xapian::Query> phraseQueries; 0117 0118 int start = 0; 0119 int end = 0; 0120 int position = 0; 0121 0122 bool inDoubleQuotes = false; 0123 bool inSingleQuotes = false; 0124 bool inPhrase = false; 0125 0126 QTextBoundaryFinder bf(QTextBoundaryFinder::Word, text); 0127 for (; bf.position() != -1; bf.toNextBoundary()) { 0128 if (bf.boundaryReasons() & QTextBoundaryFinder::StartOfItem) { 0129 // 0130 // Check the previous delimiter 0131 int pos = bf.position(); 0132 if (pos != end) { 0133 QString delim = text.mid(end, pos - end); 0134 if (delim.contains(QLatin1Char('"'))) { 0135 if (inDoubleQuotes) { 0136 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end()); 0137 phraseQueries.clear(); 0138 inDoubleQuotes = false; 0139 } else { 0140 inDoubleQuotes = true; 0141 } 0142 } else if (delim.contains(QLatin1Char('\''))) { 0143 if (inSingleQuotes) { 0144 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end()); 0145 phraseQueries.clear(); 0146 inSingleQuotes = false; 0147 } else { 0148 inSingleQuotes = true; 0149 } 0150 } else if (!containsSpace(delim)) { 0151 if (!inPhrase && !queries.isEmpty()) { 0152 phraseQueries << queries.takeLast(); 0153 } 0154 inPhrase = true; 0155 } else if (inPhrase && !phraseQueries.isEmpty()) { 0156 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end()); 0157 phraseQueries.clear(); 0158 inPhrase = false; 0159 } 0160 } 0161 0162 start = bf.position(); 0163 continue; 0164 } else if (bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { 0165 end = bf.position(); 0166 0167 QString str = text.mid(start, end - start); 0168 0169 // Get the string ready for saving 0170 str = str.toLower(); 0171 0172 // Remove all accents 0173 const QString denormalized = str.normalized(QString::NormalizationForm_KD); 0174 QString cleanString; 0175 for (const QChar &ch : denormalized) { 0176 auto cat = ch.category(); 0177 if (cat != QChar::Mark_NonSpacing && cat != QChar::Mark_SpacingCombining && cat != QChar::Mark_Enclosing) { 0178 cleanString.append(ch); 0179 } 0180 } 0181 0182 str = cleanString.normalized(QString::NormalizationForm_KC); 0183 const QStringList lst = str.split(QLatin1Char('_'), Qt::SkipEmptyParts); 0184 for (const QString &t : lst) { 0185 const QString term = prefix + t; 0186 0187 position++; 0188 if (inDoubleQuotes || inSingleQuotes || inPhrase) { 0189 const QByteArray arr = term.toUtf8(); 0190 const std::string str(arr.constData(), arr.length()); 0191 phraseQueries << Xapian::Query(str, 1, position); 0192 } else { 0193 if (m_autoExpand) { 0194 queries << makeQuery(term, position, m_db); 0195 } else { 0196 queries << Xapian::Query(term.toStdString(), 1, position); 0197 } 0198 } 0199 } 0200 } 0201 } 0202 0203 if (inPhrase) { 0204 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end()); 0205 phraseQueries.clear(); 0206 } 0207 0208 if (!phraseQueries.isEmpty()) { 0209 queries << phraseQueries; 0210 phraseQueries.clear(); 0211 } 0212 0213 if (queries.size() == 1) { 0214 return queries.first(); 0215 } 0216 return {Xapian::Query::OP_AND, queries.begin(), queries.end()}; 0217 } 0218 0219 void XapianQueryParser::setAutoExapand(bool autoexpand) 0220 { 0221 m_autoExpand = autoexpand; 0222 } 0223 0224 Xapian::Query XapianQueryParser::expandWord(const QString &word, const QString &prefix) 0225 { 0226 const std::string stdString((prefix + word).toUtf8().constData()); 0227 Xapian::TermIterator it = m_db->allterms_begin(stdString); 0228 Xapian::TermIterator end = m_db->allterms_end(stdString); 0229 0230 QList<Xapian::Query> queries; 0231 for (; it != end; ++it) { 0232 queries << Xapian::Query(*it); 0233 } 0234 0235 if (queries.isEmpty()) { 0236 return Xapian::Query(stdString); 0237 } 0238 Xapian::Query finalQ(Xapian::Query::OP_SYNONYM, queries.begin(), queries.end()); 0239 return finalQ; 0240 }