File indexing completed on 2024-06-16 04:20:01

0001 /*
0002     Kchmviewer - a CHM and EPUB file viewer with broad language support
0003     SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com
0004 
0005     SPDX-License-Identifier: GPL-3.0-or-later
0006 */
0007 
0008 #ifndef EBOOK_EPUB_H
0009 #define EBOOK_EPUB_H
0010 
0011 #include <QFile>
0012 #include <QMap>
0013 #include <QString>
0014 #include <QStringList>
0015 #include <QUrl>
0016 
0017 #include "ebook.h"
0018 #include "zip.h"
0019 
0020 class QXmlDefaultHandler;
0021 
0022 class EBook_EPUB : public EBook
0023 {
0024 public:
0025     EBook_EPUB();
0026     ~EBook_EPUB() override;
0027 
0028     /*!
0029      * \brief Attempts to load epub file.
0030      * \param archiveName filename.
0031      * \return EBook object on success, NULL on failure.
0032      *
0033      * Loads a epub file.
0034      * \ingroup init
0035      */
0036     bool load(const QString &archiveName) override;
0037 
0038     /*!
0039      * \brief Closes all the files, and frees the appropriate data.
0040      * \ingroup init
0041      */
0042     void close() override;
0043 
0044     /*!
0045      * \brief Gets the title name of the opened ebook.
0046      * \return The name of the opened document, or an empty string if no ebook has been loaded.
0047      * \ingroup information
0048      */
0049     QString title() const override;
0050 
0051     /*!
0052      * \brief Gets the default URL of the e-book which should be opened when the book it first open
0053      *
0054      * \return The home page name, with a '/' added in front and relative to
0055      *         the root of the archive filesystem. If no book has been opened, returns "/".
0056      * \ingroup information
0057      */
0058     QUrl homeUrl() const override;
0059 
0060     /*!
0061      * \brief Checks whether the specific feature is present in this file.
0062      * \return true if it is available; false otherwise.
0063      * \ingroup information
0064      */
0065     bool hasFeature(Feature code) const override;
0066 
0067     /*!
0068      * \brief Parses and fills up the Table of Contents (TOC)
0069      * \param topics A pointer to the container which will store the parsed results.
0070      *               Will be cleaned before parsing.
0071      * \return true if the tree is present and parsed successfully, false otherwise.
0072      *         The parser is built to be error-prone, however it still can abort with qFatal()
0073      *         by really buggy files; please report a bug if the file is opened ok under Windows.
0074      * \ingroup fileparsing
0075      */
0076     bool getTableOfContents(QList<EBookTocEntry> &toc) const override;
0077 
0078     /*!
0079      * \brief Parses the index table
0080      * \param indexes A pointer to the container which will store the parsed results.
0081      *               Will be cleaned before parsing.
0082      * \return true if the tree is present and parsed successfully, false otherwise.
0083      *         The parser is built to be error-prone, however it still can abort with qFatal()
0084      *         by really buggy chm file; so far it never happened on indexes.
0085      * \ingroup fileparsing
0086      */
0087     bool getIndex(QList<EBookIndexEntry> &index) const override;
0088 
0089     /*!
0090      * \brief Retrieves the content associated with the url from the current ebook as QString.
0091      * \param str A string where the retreived content should be stored.
0092      * \param url An URL in chm file to retreive content from. Must be absolute.
0093      * \return true if the content is successfully received; false otherwise. Note content may be an empty string.
0094      *
0095      * This function retreives the file content (mostly for HTML pages) from the ebook. Because the content
0096      * in chm file might not be stored in Unicode, it will be recoded according to current encoding.
0097      * Do not use for binary data.
0098      *
0099      * \sa setCurrentEncoding() currentEncoding() getFileContentAsBinary()
0100      * \ingroup dataretrieve
0101      */
0102     bool getFileContentAsString(QString &str, const QUrl &url) const override;
0103 
0104     /*!
0105      * \brief Retrieves the content from url in current chm file to QByteArray.
0106      * \param data A data array where the retreived content should be stored.
0107      * \param url An URL in chm file to retreive content from. Must be absolute.
0108      * \return true if the content is successfully received; false otherwise.
0109      *
0110      * This function retreives the file content from the chm archive opened by load()
0111      * function. The content is not encoded.
0112      *
0113      * \sa getFileContentAsString()
0114      * \ingroup dataretrieve
0115      */
0116     bool getFileContentAsBinary(QByteArray &data, const QUrl &url) const override;
0117 
0118     /*!
0119      * \brief Obtains the list of all the files (URLs) in current ebook archive. This is used in search
0120      * and to dump the e-book content.
0121      * \param files An array to store list of URLs (file names) present in chm archive.
0122      * \return true if the enumeration succeed; false otherwise (I could hardly imagine a reason).
0123      *
0124      * \ingroup dataretrieve
0125      */
0126     bool enumerateFiles(QList<QUrl> &files) override;
0127 
0128     /*!
0129      * \brief Gets the Title of the page referenced by url.
0130      * \param url An URL in ebook file to get title from. Must be absolute.
0131      * \return The title, or QString() if the URL cannot be found or not a HTML page.
0132      *
0133      * \ingroup dataretrieve
0134      */
0135     QString getTopicByUrl(const QUrl &url) override;
0136 
0137     /*!
0138      * \brief Gets the current ebook encoding (set or autodetected) as qtcodec
0139      * \return The current encoding.
0140      *
0141      * \ingroup encoding
0142      */
0143     QString currentEncoding() const override;
0144 
0145     /*!
0146      * \brief Sets the ebook encoding to use for TOC and content
0147      * \param encoding An encoding to use.
0148      *
0149      * \ingroup encoding
0150      */
0151     bool setCurrentEncoding(const char *encoding) override;
0152 
0153     /*!
0154      * \brief Checks if this kind of URL is supported by the ebook format (i.e. could be passed to ebook functions)
0155      * \param url The url to check
0156      */
0157     bool isSupportedUrl(const QUrl &url) override;
0158 
0159     // Converts the string to the ebook-specific URL format
0160     QUrl pathToUrl(const QString &link) const override;
0161 
0162     // Extracts the path component from the URL
0163     QString urlToPath(const QUrl &link) const override;
0164 
0165 private:
0166     // Parses the XML file using a specified parser
0167     bool parseXML(const QString &uri, QXmlDefaultHandler *parser);
0168 
0169     // Parses the book description file. Fills up the ebook info
0170     bool parseBookinfo();
0171 
0172     // Get file content from path
0173     bool getFileAsString(QString &str, const QString &path) const;
0174     bool getFileAsBinary(QByteArray &data, const QString &path) const;
0175 
0176     // ZIP archive fd and structs
0177     QFile m_epubFile;
0178     struct zip *m_zipFile;
0179 
0180     // Ebook info
0181     QString m_title;
0182     QString m_documentRoot;
0183 
0184     // List of files in the ebook
0185     QList<QUrl> m_ebookManifest;
0186 
0187     // Table of contents
0188     QList<EBookTocEntry> m_tocEntries;
0189 
0190     // Map of URL-Title
0191     QMap<QUrl, QString> m_urlTitleMap;
0192 };
0193 
0194 #endif // EBOOK_EPUB_H