File indexing completed on 2024-06-16 04:20:01
0001 /* 0002 Kchmviewer - a CHM and EPUB file viewer with broad language support 0003 SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com 0004 0005 SPDX-License-Identifier: GPL-3.0-or-later 0006 */ 0007 0008 #if defined(WIN32) 0009 #include <io.h> // dup 0010 #else 0011 #include <unistd.h> 0012 #endif 0013 0014 #include <KLocalizedString> 0015 #include <QMessageBox> 0016 #include <QXmlSimpleReader> 0017 0018 #include "ebook_epub.h" 0019 #include "helperxmlhandler_epubcontainer.h" 0020 #include "helperxmlhandler_epubcontent.h" 0021 #include "helperxmlhandler_epubtoc.h" 0022 0023 #define URL_SCHEME_EPUB QStringLiteral("epub") 0024 0025 EBook_EPUB::EBook_EPUB() 0026 : EBook() 0027 { 0028 m_zipFile = nullptr; 0029 } 0030 0031 EBook_EPUB::~EBook_EPUB() 0032 { 0033 close(); 0034 } 0035 0036 bool EBook_EPUB::load(const QString &archiveName) 0037 { 0038 close(); 0039 0040 // We use QFile and zip_fdopen instead of zip_open because latter does not support Unicode file names 0041 m_epubFile.setFileName(archiveName); 0042 0043 if (!m_epubFile.open(QIODevice::ReadOnly)) { 0044 qWarning("Could not open file %s: %s", qPrintable(archiveName), qPrintable(m_epubFile.errorString())); 0045 return false; 0046 } 0047 0048 // Open the ZIP archive: http://www.nih.at/libzip/zip_fdopen.html 0049 // Note that zip_fdopen takes control over the passed descriptor, 0050 // so we need to pass a duplicate of it for this to work correctly 0051 int fdcopy = dup(m_epubFile.handle()); 0052 0053 if (fdcopy < 0) { 0054 qWarning("Could not duplicate descriptor"); 0055 return false; 0056 } 0057 0058 int errcode; 0059 m_zipFile = zip_fdopen(fdcopy, 0, &errcode); 0060 0061 if (!m_zipFile) { 0062 qWarning("Could not open file %s: error %d", qPrintable(archiveName), errcode); 0063 return false; 0064 } 0065 0066 // Parse the book descriptor file 0067 if (!parseBookinfo()) { 0068 return false; 0069 } 0070 0071 return true; 0072 } 0073 0074 void EBook_EPUB::close() 0075 { 0076 if (m_zipFile) { 0077 zip_close(m_zipFile); 0078 m_zipFile = nullptr; 0079 } 0080 0081 // if ( m_epubFile.isOpen() ) 0082 // m_epubFile.close(); 0083 } 0084 0085 bool EBook_EPUB::getFileContentAsString(QString &str, const QUrl &url) const 0086 { 0087 return getFileAsString(str, urlToPath(url)); 0088 } 0089 0090 bool EBook_EPUB::getFileContentAsBinary(QByteArray &data, const QUrl &url) const 0091 { 0092 return getFileAsBinary(data, urlToPath(url)); 0093 } 0094 0095 bool EBook_EPUB::enumerateFiles(QList<QUrl> &files) 0096 { 0097 files = m_ebookManifest; 0098 return true; 0099 } 0100 0101 QString EBook_EPUB::title() const 0102 { 0103 return m_title; 0104 } 0105 0106 QUrl EBook_EPUB::homeUrl() const 0107 { 0108 return m_tocEntries[0].url; 0109 } 0110 0111 bool EBook_EPUB::hasFeature(EBook::Feature code) const 0112 { 0113 switch (code) { 0114 case FEATURE_TOC: 0115 return true; 0116 0117 case FEATURE_INDEX: 0118 return false; 0119 0120 case FEATURE_ENCODING: 0121 return false; 0122 } 0123 0124 return false; 0125 } 0126 0127 bool EBook_EPUB::getTableOfContents(QList<EBookTocEntry> &toc) const 0128 { 0129 toc = m_tocEntries; 0130 return true; 0131 } 0132 0133 bool EBook_EPUB::getIndex(QList<EBookIndexEntry> &) const 0134 { 0135 return false; 0136 } 0137 0138 QString EBook_EPUB::getTopicByUrl(const QUrl &url) 0139 { 0140 if (m_urlTitleMap.contains(url)) { 0141 return m_urlTitleMap[url]; 0142 } 0143 0144 return QLatin1String(""); 0145 } 0146 0147 QString EBook_EPUB::currentEncoding() const 0148 { 0149 return QStringLiteral("UTF-8"); 0150 } 0151 0152 bool EBook_EPUB::setCurrentEncoding(const char *) 0153 { 0154 abort(); 0155 } 0156 0157 bool EBook_EPUB::isSupportedUrl(const QUrl &url) 0158 { 0159 return url.scheme() == URL_SCHEME_EPUB; 0160 } 0161 0162 bool EBook_EPUB::parseXML(const QString &uri, QXmlDefaultHandler *parser) 0163 { 0164 QByteArray container; 0165 0166 if (!getFileAsBinary(container, uri)) { 0167 qDebug("Failed to retrieve XML file %s", qPrintable(uri)); 0168 return false; 0169 } 0170 0171 // Use it as XML source 0172 QXmlInputSource source; 0173 source.setData(container); 0174 0175 // Init the reader 0176 QXmlSimpleReader reader; 0177 reader.setContentHandler(parser); 0178 reader.setErrorHandler(parser); 0179 0180 return reader.parse(source); 0181 } 0182 0183 bool EBook_EPUB::parseBookinfo() 0184 { 0185 // Parse the container.xml to find the content descriptor 0186 HelperXmlHandler_EpubContainer container_parser; 0187 0188 if (!parseXML(QStringLiteral("META-INF/container.xml"), &container_parser) || container_parser.contentPath.isEmpty()) { 0189 return false; 0190 } 0191 0192 // Parse the content.opf 0193 HelperXmlHandler_EpubContent content_parser; 0194 0195 if (!parseXML(container_parser.contentPath, &content_parser)) { 0196 return false; 0197 } 0198 0199 // At least title and the TOC must be present 0200 if (!content_parser.metadata.contains(QStringLiteral("title")) || content_parser.tocname.isEmpty()) { 0201 return false; 0202 } 0203 0204 // All the files, including TOC, are relative to the container_parser.contentPath 0205 m_documentRoot.clear(); 0206 int sep = container_parser.contentPath.lastIndexOf(QLatin1Char('/')); 0207 0208 if (sep != -1) { 0209 m_documentRoot = container_parser.contentPath.left(sep + 1); // Keep the trailing slash 0210 } 0211 0212 // Parse the TOC 0213 HelperXmlHandler_EpubTOC toc_parser(this); 0214 0215 if (!parseXML(content_parser.tocname, &toc_parser)) { 0216 return false; 0217 } 0218 0219 // Get the data 0220 m_title = content_parser.metadata[QStringLiteral("title")]; 0221 0222 // Move the manifest entries into the list 0223 for (const QString &f : std::as_const(content_parser.manifest)) { 0224 m_ebookManifest.push_back(pathToUrl(f)); 0225 } 0226 0227 // Copy the manifest information and fill up the other maps if we have it 0228 if (!toc_parser.entries.isEmpty()) { 0229 for (const EBookTocEntry &e : std::as_const(toc_parser.entries)) { 0230 // Add into url-title map 0231 m_urlTitleMap[e.url] = e.name; 0232 m_tocEntries.push_back(e); 0233 } 0234 } else { 0235 // Copy them from spine 0236 for (QString url : std::as_const(content_parser.spine)) { 0237 EBookTocEntry e; 0238 0239 if (content_parser.manifest.contains(url)) { 0240 url = content_parser.manifest[url]; 0241 } 0242 0243 e.name = url; 0244 e.url = pathToUrl(url); 0245 e.iconid = EBookTocEntry::IMAGE_NONE; 0246 e.indent = 0; 0247 0248 // Add into url-title map 0249 m_urlTitleMap[pathToUrl(url)] = url; 0250 m_tocEntries.push_back(e); 0251 } 0252 } 0253 0254 // EPub with an empty TOC is not valid 0255 if (m_tocEntries.isEmpty()) { 0256 return false; 0257 } 0258 0259 return true; 0260 } 0261 0262 QUrl EBook_EPUB::pathToUrl(const QString &link) const 0263 { 0264 QUrl url; 0265 url.setScheme(URL_SCHEME_EPUB); 0266 url.setHost(URL_SCHEME_EPUB); 0267 0268 // Does the link contain the fragment as well? 0269 int off = link.indexOf(QLatin1Char('#')); 0270 QString path; 0271 0272 if (off != -1) { 0273 path = link.left(off); 0274 url.setFragment(link.mid(off + 1)); 0275 } else { 0276 path = link; 0277 } 0278 0279 if (!path.startsWith(QLatin1Char('/'))) { 0280 path.prepend(QLatin1Char('/')); 0281 } 0282 0283 url.setPath(QUrl::fromPercentEncoding(path.toUtf8())); 0284 0285 return url; 0286 } 0287 0288 QString EBook_EPUB::urlToPath(const QUrl &link) const 0289 { 0290 if (link.scheme() == URL_SCHEME_EPUB) { 0291 return link.path(); 0292 } 0293 0294 return QLatin1String(""); 0295 } 0296 0297 bool EBook_EPUB::getFileAsString(QString &str, const QString &path) const 0298 { 0299 QByteArray data; 0300 0301 if (!getFileAsBinary(data, path)) { 0302 return false; 0303 } 0304 0305 // I have never seen yet an UTF16 epub 0306 if (data.startsWith("<?xml")) { 0307 int endxmltag = data.indexOf("?>"); 0308 int utf16 = data.indexOf("UTF-16"); 0309 0310 if (utf16 > 0 && utf16 < endxmltag) { 0311 QMessageBox::critical(nullptr, i18n("Unsupported encoding"), i18n("The encoding of this ebook is not supported yet. Please open a bug in https://bugs.kde.org for support to be added")); 0312 return false; 0313 } 0314 } 0315 0316 str = QString::fromUtf8(data); 0317 return true; 0318 } 0319 0320 bool EBook_EPUB::getFileAsBinary(QByteArray &data, const QString &path) const 0321 { 0322 // Retrieve the file size 0323 struct zip_stat fileinfo; 0324 QString completeUrl; 0325 0326 if (!path.isEmpty() && path[0] == QLatin1Char('/')) { 0327 completeUrl = m_documentRoot + path.mid(1); 0328 } else { 0329 completeUrl = m_documentRoot + path; 0330 } 0331 0332 // qDebug("URL requested: %s (%s)", qPrintable(path), qPrintable(completeUrl)); 0333 0334 // http://www.nih.at/libzip/zip_stat.html 0335 if (zip_stat(m_zipFile, completeUrl.toUtf8().constData(), 0, &fileinfo) != 0) { 0336 qDebug("File %s is not found in the archive", qPrintable(completeUrl)); 0337 return false; 0338 } 0339 0340 // Make sure the size field is valid 0341 if ((fileinfo.valid & ZIP_STAT_SIZE) == 0 || (fileinfo.valid & ZIP_STAT_INDEX) == 0) { 0342 return false; 0343 } 0344 0345 // Open the file 0346 struct zip_file *file = zip_fopen_index(m_zipFile, fileinfo.index, 0); 0347 0348 if (!file) { 0349 return false; 0350 } 0351 0352 // Allocate the memory and read the file 0353 data.resize(fileinfo.size); 0354 0355 // Could it return a positive number but not fileinfo.size??? 0356 int ret = zip_fread(file, data.data(), fileinfo.size); 0357 if (ret != (int)fileinfo.size) { 0358 zip_fclose(file); 0359 return false; 0360 } 0361 0362 zip_fclose(file); 0363 return true; 0364 }