File indexing completed on 2024-06-16 04:20:01
0001 /* 0002 Kchmviewer - a CHM and EPUB file viewer with broad language support 0003 SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com 0004 0005 SPDX-License-Identifier: GPL-3.0-or-later 0006 */ 0007 0008 #ifndef EBOOK_SEARCH_INDEX_H 0009 #define EBOOK_SEARCH_INDEX_H 0010 0011 #include <QDataStream> 0012 #include <QHash> 0013 #include <QStringList> 0014 #include <QUrl> 0015 #include <QVector> 0016 0017 #include "helper_entitydecoder.h" 0018 0019 class EBook; 0020 0021 // This code is based on some pretty old version of Qt Assistant 0022 namespace QtAs 0023 { 0024 struct Document { 0025 Document(int d, int f) 0026 : docNumber(d) 0027 , frequency(f) 0028 { 0029 } 0030 Document() 0031 : docNumber(-1) 0032 , frequency(0) 0033 { 0034 } 0035 bool operator==(const Document doc) const 0036 { 0037 return docNumber == doc.docNumber; 0038 } 0039 0040 bool operator<(const Document doc) const 0041 { 0042 return frequency > doc.frequency; 0043 } 0044 0045 bool operator<=(const Document doc) const 0046 { 0047 return frequency >= doc.frequency; 0048 } 0049 0050 bool operator>(const Document doc) const 0051 { 0052 return frequency < doc.frequency; 0053 } 0054 0055 qint16 docNumber; 0056 qint16 frequency; 0057 }; 0058 0059 QDataStream &operator>>(QDataStream &s, Document &l); 0060 QDataStream &operator<<(QDataStream &s, const Document l); 0061 0062 class Index : public QObject 0063 { 0064 Q_OBJECT 0065 public: 0066 Index(); 0067 0068 void writeDict(QDataStream &stream); 0069 bool readDict(QDataStream &stream); 0070 bool makeIndex(const QList<QUrl> &docs, EBook *chmFile); 0071 QList<QUrl> query(const QStringList &, const QStringList &, const QStringList &, EBook *chmFile); 0072 QString getCharsSplit() const 0073 { 0074 return m_charssplit; 0075 } 0076 QString getCharsPartOfWord() const 0077 { 0078 return m_charsword; 0079 } 0080 0081 Q_SIGNALS: 0082 void indexingProgress(int, const QString &); 0083 0084 public Q_SLOTS: 0085 void setLastWinClosed(); 0086 0087 private: 0088 struct Entry { 0089 explicit Entry(int d) 0090 { 0091 documents.append(Document(d, 1)); 0092 } 0093 explicit Entry(const QVector<Document> &l) 0094 : documents(l) 0095 { 0096 } 0097 QVector<Document> documents; 0098 }; 0099 0100 struct PosEntry { 0101 explicit PosEntry(int p) 0102 { 0103 positions.append(p); 0104 } 0105 QList<uint> positions; 0106 }; 0107 0108 bool parseDocumentToStringlist(EBook *chmFile, const QUrl &filename, QStringList &tokenlist); 0109 void insertInDict(const QString &, int); 0110 0111 QStringList getWildcardTerms(const QString &); 0112 QStringList split(const QString &); 0113 QList<Document> setupDummyTerm(const QStringList &); 0114 bool searchForPhrases(const QStringList &phrases, const QStringList &words, const QUrl &filename, EBook *chmFile); 0115 0116 QList<QUrl> docList; 0117 QHash<QString, Entry *> dict; 0118 QHash<QString, PosEntry *> miniDict; 0119 bool lastWindowClosed; 0120 HelperEntityDecoder entityDecoder; 0121 0122 // Those characters are splitters (i.e. split the word), but added themselves into dictionary too. 0123 // This makes the dictionary MUCH larger, but ensure that for the piece of "window->print" both 0124 // search for "print" and "->print" will find it. 0125 QString m_charssplit; 0126 0127 // Those characters are parts of word - for example, '_' is here, and search for _debug will find only _debug. 0128 QString m_charsword; 0129 }; 0130 0131 }; 0132 0133 Q_DECLARE_TYPEINFO(QtAs::Document, Q_MOVABLE_TYPE); 0134 0135 #endif // EBOOK_SEARCH_INDEX_H