File indexing completed on 2024-04-21 03:51:40

0001 /*
0002     This file is part of the KDE Baloo project.
0003     SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-or-later
0006 */
0007 
0008 #include "phraseanditerator.h"
0009 #include "positioninfo.h"
0010 
0011 using namespace Baloo;
0012 
0013 PhraseAndIterator::PhraseAndIterator(const QVector<VectorPositionInfoIterator*>& iterators)
0014     : m_iterators(iterators)
0015     , m_docId(0)
0016 {
0017     if (m_iterators.contains(nullptr)) {
0018         qDeleteAll(m_iterators);
0019         m_iterators.clear();
0020     }
0021 }
0022 
0023 PhraseAndIterator::~PhraseAndIterator()
0024 {
0025     qDeleteAll(m_iterators);
0026 }
0027 
0028 quint64 PhraseAndIterator::docId() const
0029 {
0030     return m_docId;
0031 }
0032 
0033 bool PhraseAndIterator::checkIfPositionsMatch()
0034 {
0035     using Offset = decltype(m_iterators[0]->positions().size());
0036     using Position = std::remove_reference<decltype(m_iterators[0]->positions()[0])>::type;
0037 
0038     std::vector<Offset> offsets;
0039     offsets.resize(m_iterators.size());
0040 
0041     const auto firstPositions = m_iterators[0]->positions();
0042     Position lower_bound = 0;
0043 
0044     while (offsets[0] < firstPositions.size()) {
0045         for (int i = 0; i < m_iterators.size(); i++) {
0046             const auto positions = m_iterators[i]->positions();
0047             Offset off = offsets[i];
0048 
0049             for (; off < positions.size(); ++off) {
0050                 Position pos = positions[off];
0051                 // Adjust the position. We have a match iff
0052                 // term0 is at pos N, term1 at N+1, term2 at N+2 ...
0053                 if (pos >= (lower_bound + i)) {
0054                     lower_bound = pos - i;
0055                     break;
0056                 }
0057             }
0058             if (off >= positions.size()) {
0059                 return false;
0060             }
0061             offsets[i] = off;
0062         }
0063 
0064         if (lower_bound == firstPositions[offsets[0]]) {
0065             // lower_bound has not changed, i.e. all offsets are aligned
0066             for (int i = 0; i < m_iterators.size(); i++) {
0067                 auto positions = m_iterators[i]->positions();
0068             }
0069             return true;
0070         } else {
0071             offsets[0]++;
0072         }
0073     }
0074     return false;
0075 }
0076 
0077 quint64 PhraseAndIterator::skipTo(quint64 id)
0078 {
0079     if (m_iterators.isEmpty()) {
0080         m_docId = 0;
0081         return 0;
0082     }
0083 
0084     while (true) {
0085         quint64 lower_bound = id;
0086         for (PostingIterator* iter : std::as_const(m_iterators)) {
0087             lower_bound = iter->skipTo(lower_bound);
0088 
0089             if (lower_bound == 0) {
0090                 m_docId = 0;
0091                 return 0;
0092             }
0093         }
0094 
0095         if (lower_bound == id) {
0096             if (checkIfPositionsMatch()) {
0097                 m_docId = lower_bound;
0098                 return lower_bound;
0099             } else {
0100                 lower_bound = m_iterators[0]->next();
0101             }
0102         }
0103         id = lower_bound;
0104     }
0105 }
0106 
0107 quint64 PhraseAndIterator::next()
0108 {
0109     if (m_iterators.isEmpty()) {
0110         m_docId = 0;
0111         return 0;
0112     }
0113 
0114     m_docId = m_iterators[0]->next();
0115     m_docId = skipTo(m_docId);
0116 
0117     return m_docId;
0118 }