File indexing completed on 2024-05-12 05:11:21
0001 /* 0002 * SPDX-FileCopyrightText: 2014 Vishesh Handa <me@vhanda.in> 0003 * 0004 * SPDX-License-Identifier: LGPL-2.1-or-later 0005 * 0006 */ 0007 0008 #include "xapiantermgenerator.h" 0009 0010 #include "akonadi_search_xapian_debug.h" 0011 #include <QTextBoundaryFinder> 0012 0013 using namespace Akonadi::Search; 0014 0015 XapianTermGenerator::XapianTermGenerator(Xapian::Document *doc) 0016 : m_doc(doc) 0017 { 0018 if (doc) { 0019 m_termGen.set_document(*doc); 0020 } 0021 } 0022 0023 void XapianTermGenerator::indexText(const QString &text) 0024 { 0025 indexText(text, QString()); 0026 } 0027 0028 void XapianTermGenerator::setDocument(Xapian::Document *doc) 0029 { 0030 m_doc = doc; 0031 } 0032 0033 QStringList XapianTermGenerator::termList(const QString &text) 0034 { 0035 int start = 0; 0036 int end = 0; 0037 0038 QStringList list; 0039 QTextBoundaryFinder bf(QTextBoundaryFinder::Word, text); 0040 for (; bf.position() != -1; bf.toNextBoundary()) { 0041 if (bf.boundaryReasons() & QTextBoundaryFinder::StartOfItem) { 0042 start = bf.position(); 0043 continue; 0044 } else if (bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { 0045 end = bf.position(); 0046 0047 QString str = text.mid(start, end - start); 0048 0049 // Get the string ready for saving 0050 str = str.toLower(); 0051 0052 // Remove all accents 0053 const QString denormalized = str.normalized(QString::NormalizationForm_KD); 0054 0055 QString cleanString; 0056 cleanString.reserve(denormalized.size()); 0057 for (const QChar &ch : denormalized) { 0058 const auto cat = ch.category(); 0059 if (cat != QChar::Mark_NonSpacing && cat != QChar::Mark_SpacingCombining && cat != QChar::Mark_Enclosing) { 0060 cleanString.append(ch); 0061 } 0062 } 0063 0064 str = cleanString.normalized(QString::NormalizationForm_KC); 0065 list << str.split(QLatin1Char('_'), Qt::SkipEmptyParts); 0066 } 0067 } 0068 0069 return list; 0070 } 0071 0072 void XapianTermGenerator::indexText(const QString &text, const QString &prefix, int wdfInc) 0073 { 0074 const QByteArray par = prefix.toUtf8(); 0075 const QByteArray ta = text.toUtf8(); 0076 m_termGen.index_text(ta.constData(), wdfInc, par.constData()); 0077 0078 const QStringList terms = termList(text); 0079 for (const QString &term : terms) { 0080 const QByteArray arr = term.toUtf8(); 0081 0082 const QByteArray finalArr = par + arr; 0083 const std::string stdString(finalArr.constData(), finalArr.size()); 0084 m_doc->add_posting(stdString, m_position, wdfInc); 0085 0086 m_position++; 0087 } 0088 } 0089 0090 int XapianTermGenerator::position() const 0091 { 0092 return m_position; 0093 } 0094 0095 void XapianTermGenerator::setPosition(int position) 0096 { 0097 m_position = position; 0098 }