File indexing completed on 2024-04-21 14:52:26

0001 /*
0002     SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.1-or-later
0005 */
0006 
0007 #include "doctermscodec.h"
0008 
0009 using namespace Baloo;
0010 
0011 QByteArray DocTermsCodec::encode(const QVector<QByteArray>& terms)
0012 {
0013     Q_ASSERT(!terms.isEmpty());
0014 
0015     QByteArray full;
0016     full.append(terms.first());
0017     full.append('\0');
0018 
0019     for (int i = 1; i < terms.size(); i++) {
0020         const QByteArray term = terms[i];
0021         const QByteArray prevTerm = terms[i-1];
0022 
0023         if (term.startsWith(prevTerm)) {
0024             full.append(term.mid(prevTerm.size()));
0025             full.append(static_cast<char>(1));
0026         } else {
0027             full.append(term);
0028             full.append('\0');
0029         }
0030     }
0031 
0032     return full;
0033 }
0034 
0035 QVector<QByteArray> DocTermsCodec::decode(const QByteArray& full)
0036 {
0037     Q_ASSERT(full.size());
0038 
0039     QVector<QByteArray> list;
0040 
0041     int prevWordBoundary = 0;
0042     for (int i = 0; i < full.size(); i++) {
0043         if (full[i] == 1) {
0044             if (list.isEmpty()) {
0045                 // corrupted entry - no way to recover
0046                 return list;
0047             }
0048 
0049             QByteArray arr(full.constData() + prevWordBoundary, i - prevWordBoundary);
0050 
0051             list << list.last() + arr;
0052             prevWordBoundary = i + 1;
0053             continue;
0054         }
0055 
0056         if (full[i] == '\0') {
0057             QByteArray arr(full.constData() + prevWordBoundary, i - prevWordBoundary);
0058 
0059             list << arr;
0060             prevWordBoundary = i + 1;
0061             continue;
0062         }
0063     }
0064 
0065     return list;
0066 }