File indexing completed on 2024-06-16 04:20:01

0001 /*
0002     Kchmviewer - a CHM and EPUB file viewer with broad language support
0003     SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com
0004 
0005     SPDX-License-Identifier: GPL-3.0-or-later
0006 */
0007 
0008 #if defined(WIN32)
0009 #include <io.h> // dup
0010 #else
0011 #include <unistd.h>
0012 #endif
0013 
0014 #include <KLocalizedString>
0015 #include <QMessageBox>
0016 #include <QXmlSimpleReader>
0017 
0018 #include "ebook_epub.h"
0019 #include "helperxmlhandler_epubcontainer.h"
0020 #include "helperxmlhandler_epubcontent.h"
0021 #include "helperxmlhandler_epubtoc.h"
0022 
0023 #define URL_SCHEME_EPUB QStringLiteral("epub")
0024 
0025 EBook_EPUB::EBook_EPUB()
0026     : EBook()
0027 {
0028     m_zipFile = nullptr;
0029 }
0030 
0031 EBook_EPUB::~EBook_EPUB()
0032 {
0033     close();
0034 }
0035 
0036 bool EBook_EPUB::load(const QString &archiveName)
0037 {
0038     close();
0039 
0040     // We use QFile and zip_fdopen instead of zip_open because latter does not support Unicode file names
0041     m_epubFile.setFileName(archiveName);
0042 
0043     if (!m_epubFile.open(QIODevice::ReadOnly)) {
0044         qWarning("Could not open file %s: %s", qPrintable(archiveName), qPrintable(m_epubFile.errorString()));
0045         return false;
0046     }
0047 
0048     // Open the ZIP archive: http://www.nih.at/libzip/zip_fdopen.html
0049     // Note that zip_fdopen takes control over the passed descriptor,
0050     // so we need to pass a duplicate of it for this to work correctly
0051     int fdcopy = dup(m_epubFile.handle());
0052 
0053     if (fdcopy < 0) {
0054         qWarning("Could not duplicate descriptor");
0055         return false;
0056     }
0057 
0058     int errcode;
0059     m_zipFile = zip_fdopen(fdcopy, 0, &errcode);
0060 
0061     if (!m_zipFile) {
0062         qWarning("Could not open file %s: error %d", qPrintable(archiveName), errcode);
0063         return false;
0064     }
0065 
0066     // Parse the book descriptor file
0067     if (!parseBookinfo()) {
0068         return false;
0069     }
0070 
0071     return true;
0072 }
0073 
0074 void EBook_EPUB::close()
0075 {
0076     if (m_zipFile) {
0077         zip_close(m_zipFile);
0078         m_zipFile = nullptr;
0079     }
0080 
0081     // if ( m_epubFile.isOpen() )
0082     //  m_epubFile.close();
0083 }
0084 
0085 bool EBook_EPUB::getFileContentAsString(QString &str, const QUrl &url) const
0086 {
0087     return getFileAsString(str, urlToPath(url));
0088 }
0089 
0090 bool EBook_EPUB::getFileContentAsBinary(QByteArray &data, const QUrl &url) const
0091 {
0092     return getFileAsBinary(data, urlToPath(url));
0093 }
0094 
0095 bool EBook_EPUB::enumerateFiles(QList<QUrl> &files)
0096 {
0097     files = m_ebookManifest;
0098     return true;
0099 }
0100 
0101 QString EBook_EPUB::title() const
0102 {
0103     return m_title;
0104 }
0105 
0106 QUrl EBook_EPUB::homeUrl() const
0107 {
0108     return m_tocEntries[0].url;
0109 }
0110 
0111 bool EBook_EPUB::hasFeature(EBook::Feature code) const
0112 {
0113     switch (code) {
0114     case FEATURE_TOC:
0115         return true;
0116 
0117     case FEATURE_INDEX:
0118         return false;
0119 
0120     case FEATURE_ENCODING:
0121         return false;
0122     }
0123 
0124     return false;
0125 }
0126 
0127 bool EBook_EPUB::getTableOfContents(QList<EBookTocEntry> &toc) const
0128 {
0129     toc = m_tocEntries;
0130     return true;
0131 }
0132 
0133 bool EBook_EPUB::getIndex(QList<EBookIndexEntry> &) const
0134 {
0135     return false;
0136 }
0137 
0138 QString EBook_EPUB::getTopicByUrl(const QUrl &url)
0139 {
0140     if (m_urlTitleMap.contains(url)) {
0141         return m_urlTitleMap[url];
0142     }
0143 
0144     return QLatin1String("");
0145 }
0146 
0147 QString EBook_EPUB::currentEncoding() const
0148 {
0149     return QStringLiteral("UTF-8");
0150 }
0151 
0152 bool EBook_EPUB::setCurrentEncoding(const char *)
0153 {
0154     abort();
0155 }
0156 
0157 bool EBook_EPUB::isSupportedUrl(const QUrl &url)
0158 {
0159     return url.scheme() == URL_SCHEME_EPUB;
0160 }
0161 
0162 bool EBook_EPUB::parseXML(const QString &uri, QXmlDefaultHandler *parser)
0163 {
0164     QByteArray container;
0165 
0166     if (!getFileAsBinary(container, uri)) {
0167         qDebug("Failed to retrieve XML file %s", qPrintable(uri));
0168         return false;
0169     }
0170 
0171     // Use it as XML source
0172     QXmlInputSource source;
0173     source.setData(container);
0174 
0175     // Init the reader
0176     QXmlSimpleReader reader;
0177     reader.setContentHandler(parser);
0178     reader.setErrorHandler(parser);
0179 
0180     return reader.parse(source);
0181 }
0182 
0183 bool EBook_EPUB::parseBookinfo()
0184 {
0185     // Parse the container.xml to find the content descriptor
0186     HelperXmlHandler_EpubContainer container_parser;
0187 
0188     if (!parseXML(QStringLiteral("META-INF/container.xml"), &container_parser) || container_parser.contentPath.isEmpty()) {
0189         return false;
0190     }
0191 
0192     // Parse the content.opf
0193     HelperXmlHandler_EpubContent content_parser;
0194 
0195     if (!parseXML(container_parser.contentPath, &content_parser)) {
0196         return false;
0197     }
0198 
0199     // At least title and the TOC must be present
0200     if (!content_parser.metadata.contains(QStringLiteral("title")) || content_parser.tocname.isEmpty()) {
0201         return false;
0202     }
0203 
0204     // All the files, including TOC, are relative to the container_parser.contentPath
0205     m_documentRoot.clear();
0206     int sep = container_parser.contentPath.lastIndexOf(QLatin1Char('/'));
0207 
0208     if (sep != -1) {
0209         m_documentRoot = container_parser.contentPath.left(sep + 1); // Keep the trailing slash
0210     }
0211 
0212     // Parse the TOC
0213     HelperXmlHandler_EpubTOC toc_parser(this);
0214 
0215     if (!parseXML(content_parser.tocname, &toc_parser)) {
0216         return false;
0217     }
0218 
0219     // Get the data
0220     m_title = content_parser.metadata[QStringLiteral("title")];
0221 
0222     // Move the manifest entries into the list
0223     for (const QString &f : std::as_const(content_parser.manifest)) {
0224         m_ebookManifest.push_back(pathToUrl(f));
0225     }
0226 
0227     // Copy the manifest information and fill up the other maps if we have it
0228     if (!toc_parser.entries.isEmpty()) {
0229         for (const EBookTocEntry &e : std::as_const(toc_parser.entries)) {
0230             // Add into url-title map
0231             m_urlTitleMap[e.url] = e.name;
0232             m_tocEntries.push_back(e);
0233         }
0234     } else {
0235         // Copy them from spine
0236         for (QString url : std::as_const(content_parser.spine)) {
0237             EBookTocEntry e;
0238 
0239             if (content_parser.manifest.contains(url)) {
0240                 url = content_parser.manifest[url];
0241             }
0242 
0243             e.name = url;
0244             e.url = pathToUrl(url);
0245             e.iconid = EBookTocEntry::IMAGE_NONE;
0246             e.indent = 0;
0247 
0248             // Add into url-title map
0249             m_urlTitleMap[pathToUrl(url)] = url;
0250             m_tocEntries.push_back(e);
0251         }
0252     }
0253 
0254     // EPub with an empty TOC is not valid
0255     if (m_tocEntries.isEmpty()) {
0256         return false;
0257     }
0258 
0259     return true;
0260 }
0261 
0262 QUrl EBook_EPUB::pathToUrl(const QString &link) const
0263 {
0264     QUrl url;
0265     url.setScheme(URL_SCHEME_EPUB);
0266     url.setHost(URL_SCHEME_EPUB);
0267 
0268     // Does the link contain the fragment as well?
0269     int off = link.indexOf(QLatin1Char('#'));
0270     QString path;
0271 
0272     if (off != -1) {
0273         path = link.left(off);
0274         url.setFragment(link.mid(off + 1));
0275     } else {
0276         path = link;
0277     }
0278 
0279     if (!path.startsWith(QLatin1Char('/'))) {
0280         path.prepend(QLatin1Char('/'));
0281     }
0282 
0283     url.setPath(QUrl::fromPercentEncoding(path.toUtf8()));
0284 
0285     return url;
0286 }
0287 
0288 QString EBook_EPUB::urlToPath(const QUrl &link) const
0289 {
0290     if (link.scheme() == URL_SCHEME_EPUB) {
0291         return link.path();
0292     }
0293 
0294     return QLatin1String("");
0295 }
0296 
0297 bool EBook_EPUB::getFileAsString(QString &str, const QString &path) const
0298 {
0299     QByteArray data;
0300 
0301     if (!getFileAsBinary(data, path)) {
0302         return false;
0303     }
0304 
0305     // I have never seen yet an UTF16 epub
0306     if (data.startsWith("<?xml")) {
0307         int endxmltag = data.indexOf("?>");
0308         int utf16 = data.indexOf("UTF-16");
0309 
0310         if (utf16 > 0 && utf16 < endxmltag) {
0311             QMessageBox::critical(nullptr, i18n("Unsupported encoding"), i18n("The encoding of this ebook is not supported yet. Please open a bug in https://bugs.kde.org for support to be added"));
0312             return false;
0313         }
0314     }
0315 
0316     str = QString::fromUtf8(data);
0317     return true;
0318 }
0319 
0320 bool EBook_EPUB::getFileAsBinary(QByteArray &data, const QString &path) const
0321 {
0322     // Retrieve the file size
0323     struct zip_stat fileinfo;
0324     QString completeUrl;
0325 
0326     if (!path.isEmpty() && path[0] == QLatin1Char('/')) {
0327         completeUrl = m_documentRoot + path.mid(1);
0328     } else {
0329         completeUrl = m_documentRoot + path;
0330     }
0331 
0332     // qDebug("URL requested: %s (%s)", qPrintable(path), qPrintable(completeUrl));
0333 
0334     // http://www.nih.at/libzip/zip_stat.html
0335     if (zip_stat(m_zipFile, completeUrl.toUtf8().constData(), 0, &fileinfo) != 0) {
0336         qDebug("File %s is not found in the archive", qPrintable(completeUrl));
0337         return false;
0338     }
0339 
0340     // Make sure the size field is valid
0341     if ((fileinfo.valid & ZIP_STAT_SIZE) == 0 || (fileinfo.valid & ZIP_STAT_INDEX) == 0) {
0342         return false;
0343     }
0344 
0345     // Open the file
0346     struct zip_file *file = zip_fopen_index(m_zipFile, fileinfo.index, 0);
0347 
0348     if (!file) {
0349         return false;
0350     }
0351 
0352     // Allocate the memory and read the file
0353     data.resize(fileinfo.size);
0354 
0355     // Could it return a positive number but not fileinfo.size???
0356     int ret = zip_fread(file, data.data(), fileinfo.size);
0357     if (ret != (int)fileinfo.size) {
0358         zip_fclose(file);
0359         return false;
0360     }
0361 
0362     zip_fclose(file);
0363     return true;
0364 }