File indexing completed on 2024-06-16 04:20:01

0001 /*
0002     Kchmviewer - a CHM and EPUB file viewer with broad language support
0003     SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com
0004 
0005     SPDX-License-Identifier: GPL-3.0-or-later
0006 */
0007 
0008 #ifndef EBOOK_SEARCH_INDEX_H
0009 #define EBOOK_SEARCH_INDEX_H
0010 
0011 #include <QDataStream>
0012 #include <QHash>
0013 #include <QStringList>
0014 #include <QUrl>
0015 #include <QVector>
0016 
0017 #include "helper_entitydecoder.h"
0018 
0019 class EBook;
0020 
0021 // This code is based on some pretty old version of Qt Assistant
0022 namespace QtAs
0023 {
0024 struct Document {
0025     Document(int d, int f)
0026         : docNumber(d)
0027         , frequency(f)
0028     {
0029     }
0030     Document()
0031         : docNumber(-1)
0032         , frequency(0)
0033     {
0034     }
0035     bool operator==(const Document doc) const
0036     {
0037         return docNumber == doc.docNumber;
0038     }
0039 
0040     bool operator<(const Document doc) const
0041     {
0042         return frequency > doc.frequency;
0043     }
0044 
0045     bool operator<=(const Document doc) const
0046     {
0047         return frequency >= doc.frequency;
0048     }
0049 
0050     bool operator>(const Document doc) const
0051     {
0052         return frequency < doc.frequency;
0053     }
0054 
0055     qint16 docNumber;
0056     qint16 frequency;
0057 };
0058 
0059 QDataStream &operator>>(QDataStream &s, Document &l);
0060 QDataStream &operator<<(QDataStream &s, const Document l);
0061 
0062 class Index : public QObject
0063 {
0064     Q_OBJECT
0065 public:
0066     Index();
0067 
0068     void writeDict(QDataStream &stream);
0069     bool readDict(QDataStream &stream);
0070     bool makeIndex(const QList<QUrl> &docs, EBook *chmFile);
0071     QList<QUrl> query(const QStringList &, const QStringList &, const QStringList &, EBook *chmFile);
0072     QString getCharsSplit() const
0073     {
0074         return m_charssplit;
0075     }
0076     QString getCharsPartOfWord() const
0077     {
0078         return m_charsword;
0079     }
0080 
0081 Q_SIGNALS:
0082     void indexingProgress(int, const QString &);
0083 
0084 public Q_SLOTS:
0085     void setLastWinClosed();
0086 
0087 private:
0088     struct Entry {
0089         explicit Entry(int d)
0090         {
0091             documents.append(Document(d, 1));
0092         }
0093         explicit Entry(const QVector<Document> &l)
0094             : documents(l)
0095         {
0096         }
0097         QVector<Document> documents;
0098     };
0099 
0100     struct PosEntry {
0101         explicit PosEntry(int p)
0102         {
0103             positions.append(p);
0104         }
0105         QList<uint> positions;
0106     };
0107 
0108     bool parseDocumentToStringlist(EBook *chmFile, const QUrl &filename, QStringList &tokenlist);
0109     void insertInDict(const QString &, int);
0110 
0111     QStringList getWildcardTerms(const QString &);
0112     QStringList split(const QString &);
0113     QList<Document> setupDummyTerm(const QStringList &);
0114     bool searchForPhrases(const QStringList &phrases, const QStringList &words, const QUrl &filename, EBook *chmFile);
0115 
0116     QList<QUrl> docList;
0117     QHash<QString, Entry *> dict;
0118     QHash<QString, PosEntry *> miniDict;
0119     bool lastWindowClosed;
0120     HelperEntityDecoder entityDecoder;
0121 
0122     // Those characters are splitters (i.e. split the word), but added themselves into dictionary too.
0123     // This makes the dictionary MUCH larger, but ensure that for the piece of "window->print" both
0124     // search for "print" and "->print" will find it.
0125     QString m_charssplit;
0126 
0127     // Those characters are parts of word - for example, '_' is here, and search for _debug will find only _debug.
0128     QString m_charsword;
0129 };
0130 
0131 };
0132 
0133 Q_DECLARE_TYPEINFO(QtAs::Document, Q_MOVABLE_TYPE);
0134 
0135 #endif // EBOOK_SEARCH_INDEX_H