File indexing completed on 2024-06-16 04:20:01
0001 /* 0002 Kchmviewer - a CHM and EPUB file viewer with broad language support 0003 SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com 0004 0005 SPDX-License-Identifier: GPL-3.0-or-later 0006 */ 0007 0008 #ifndef EBOOK_EPUB_H 0009 #define EBOOK_EPUB_H 0010 0011 #include <QFile> 0012 #include <QMap> 0013 #include <QString> 0014 #include <QStringList> 0015 #include <QUrl> 0016 0017 #include "ebook.h" 0018 #include "zip.h" 0019 0020 class QXmlDefaultHandler; 0021 0022 class EBook_EPUB : public EBook 0023 { 0024 public: 0025 EBook_EPUB(); 0026 ~EBook_EPUB() override; 0027 0028 /*! 0029 * \brief Attempts to load epub file. 0030 * \param archiveName filename. 0031 * \return EBook object on success, NULL on failure. 0032 * 0033 * Loads a epub file. 0034 * \ingroup init 0035 */ 0036 bool load(const QString &archiveName) override; 0037 0038 /*! 0039 * \brief Closes all the files, and frees the appropriate data. 0040 * \ingroup init 0041 */ 0042 void close() override; 0043 0044 /*! 0045 * \brief Gets the title name of the opened ebook. 0046 * \return The name of the opened document, or an empty string if no ebook has been loaded. 0047 * \ingroup information 0048 */ 0049 QString title() const override; 0050 0051 /*! 0052 * \brief Gets the default URL of the e-book which should be opened when the book it first open 0053 * 0054 * \return The home page name, with a '/' added in front and relative to 0055 * the root of the archive filesystem. If no book has been opened, returns "/". 0056 * \ingroup information 0057 */ 0058 QUrl homeUrl() const override; 0059 0060 /*! 0061 * \brief Checks whether the specific feature is present in this file. 0062 * \return true if it is available; false otherwise. 0063 * \ingroup information 0064 */ 0065 bool hasFeature(Feature code) const override; 0066 0067 /*! 0068 * \brief Parses and fills up the Table of Contents (TOC) 0069 * \param topics A pointer to the container which will store the parsed results. 0070 * Will be cleaned before parsing. 0071 * \return true if the tree is present and parsed successfully, false otherwise. 0072 * The parser is built to be error-prone, however it still can abort with qFatal() 0073 * by really buggy files; please report a bug if the file is opened ok under Windows. 0074 * \ingroup fileparsing 0075 */ 0076 bool getTableOfContents(QList<EBookTocEntry> &toc) const override; 0077 0078 /*! 0079 * \brief Parses the index table 0080 * \param indexes A pointer to the container which will store the parsed results. 0081 * Will be cleaned before parsing. 0082 * \return true if the tree is present and parsed successfully, false otherwise. 0083 * The parser is built to be error-prone, however it still can abort with qFatal() 0084 * by really buggy chm file; so far it never happened on indexes. 0085 * \ingroup fileparsing 0086 */ 0087 bool getIndex(QList<EBookIndexEntry> &index) const override; 0088 0089 /*! 0090 * \brief Retrieves the content associated with the url from the current ebook as QString. 0091 * \param str A string where the retreived content should be stored. 0092 * \param url An URL in chm file to retreive content from. Must be absolute. 0093 * \return true if the content is successfully received; false otherwise. Note content may be an empty string. 0094 * 0095 * This function retreives the file content (mostly for HTML pages) from the ebook. Because the content 0096 * in chm file might not be stored in Unicode, it will be recoded according to current encoding. 0097 * Do not use for binary data. 0098 * 0099 * \sa setCurrentEncoding() currentEncoding() getFileContentAsBinary() 0100 * \ingroup dataretrieve 0101 */ 0102 bool getFileContentAsString(QString &str, const QUrl &url) const override; 0103 0104 /*! 0105 * \brief Retrieves the content from url in current chm file to QByteArray. 0106 * \param data A data array where the retreived content should be stored. 0107 * \param url An URL in chm file to retreive content from. Must be absolute. 0108 * \return true if the content is successfully received; false otherwise. 0109 * 0110 * This function retreives the file content from the chm archive opened by load() 0111 * function. The content is not encoded. 0112 * 0113 * \sa getFileContentAsString() 0114 * \ingroup dataretrieve 0115 */ 0116 bool getFileContentAsBinary(QByteArray &data, const QUrl &url) const override; 0117 0118 /*! 0119 * \brief Obtains the list of all the files (URLs) in current ebook archive. This is used in search 0120 * and to dump the e-book content. 0121 * \param files An array to store list of URLs (file names) present in chm archive. 0122 * \return true if the enumeration succeed; false otherwise (I could hardly imagine a reason). 0123 * 0124 * \ingroup dataretrieve 0125 */ 0126 bool enumerateFiles(QList<QUrl> &files) override; 0127 0128 /*! 0129 * \brief Gets the Title of the page referenced by url. 0130 * \param url An URL in ebook file to get title from. Must be absolute. 0131 * \return The title, or QString() if the URL cannot be found or not a HTML page. 0132 * 0133 * \ingroup dataretrieve 0134 */ 0135 QString getTopicByUrl(const QUrl &url) override; 0136 0137 /*! 0138 * \brief Gets the current ebook encoding (set or autodetected) as qtcodec 0139 * \return The current encoding. 0140 * 0141 * \ingroup encoding 0142 */ 0143 QString currentEncoding() const override; 0144 0145 /*! 0146 * \brief Sets the ebook encoding to use for TOC and content 0147 * \param encoding An encoding to use. 0148 * 0149 * \ingroup encoding 0150 */ 0151 bool setCurrentEncoding(const char *encoding) override; 0152 0153 /*! 0154 * \brief Checks if this kind of URL is supported by the ebook format (i.e. could be passed to ebook functions) 0155 * \param url The url to check 0156 */ 0157 bool isSupportedUrl(const QUrl &url) override; 0158 0159 // Converts the string to the ebook-specific URL format 0160 QUrl pathToUrl(const QString &link) const override; 0161 0162 // Extracts the path component from the URL 0163 QString urlToPath(const QUrl &link) const override; 0164 0165 private: 0166 // Parses the XML file using a specified parser 0167 bool parseXML(const QString &uri, QXmlDefaultHandler *parser); 0168 0169 // Parses the book description file. Fills up the ebook info 0170 bool parseBookinfo(); 0171 0172 // Get file content from path 0173 bool getFileAsString(QString &str, const QString &path) const; 0174 bool getFileAsBinary(QByteArray &data, const QString &path) const; 0175 0176 // ZIP archive fd and structs 0177 QFile m_epubFile; 0178 struct zip *m_zipFile; 0179 0180 // Ebook info 0181 QString m_title; 0182 QString m_documentRoot; 0183 0184 // List of files in the ebook 0185 QList<QUrl> m_ebookManifest; 0186 0187 // Table of contents 0188 QList<EBookTocEntry> m_tocEntries; 0189 0190 // Map of URL-Title 0191 QMap<QUrl, QString> m_urlTitleMap; 0192 }; 0193 0194 #endif // EBOOK_EPUB_H