File indexing completed on 2025-01-26 04:24:56
0001 /* Copyright 2013-2014 Robert Schroll 0002 * 0003 * This file is part of Beru and is distributed under the terms of 0004 * the GPL. See the file COPYING for full details. 0005 */ 0006 0007 #include "epubreader.h" 0008 #include <QJsonObject> 0009 #include <QJsonArray> 0010 #include <QJsonDocument> 0011 #include <QtGui/QImage> 0012 #include <QBuffer> 0013 #include <QDir> 0014 #include <QCryptographicHash> 0015 #include "quazip/quazip.h" 0016 #include "quazip/quazipfile.h" 0017 #include <QDebug> 0018 //#include "../qhttpserver/qhttpresponse.h" 0019 //#include "../mimetype/mimetype.h" 0020 0021 QString resolveRelativePath(QString relto, QString path) 0022 { 0023 int reldirlen = relto.lastIndexOf('/'); 0024 QString reldir = (reldirlen > 0) ? relto.left(reldirlen+1) : ""; 0025 return QDir::cleanPath(reldir + path); 0026 } 0027 0028 EpubReader::EpubReader(QObject *parent) : 0029 QObject(parent) 0030 { 0031 this->zip = nullptr; 0032 } 0033 0034 bool EpubReader::load(const QString &filename) 0035 { 0036 qDebug()<< "trying to open epub file <<" << filename; 0037 if (this->zip != nullptr) { 0038 delete this->zip; 0039 this->zip = nullptr; 0040 } 0041 this->_hash = ""; 0042 this->navhref = ""; 0043 this->ncxhref = ""; 0044 this->coverhtml = ""; 0045 this->spine.clear(); 0046 this->metadata.clear(); 0047 this->sortmetadata.clear(); 0048 0049 this->zip = new QuaZip(filename); 0050 if (!this->zip->open(QuaZip::mdUnzip)) { 0051 delete this->zip; 0052 this->zip = nullptr; 0053 0054 return false; 0055 } 0056 if (!this->parseOPF()) { 0057 delete this->zip; 0058 this->zip = nullptr; 0059 return false; 0060 } 0061 0062 this->getContents(); 0063 return true; 0064 } 0065 0066 QString EpubReader::hash() { 0067 if (this->_hash != "") 0068 return this->_hash; 0069 0070 if (!this->zip || !this->zip->isOpen()) 0071 return this->_hash; 0072 0073 QByteArray CRCarray; 0074 QDataStream CRCstream(&CRCarray, QIODevice::WriteOnly); 0075 QList<QuaZipFileInfo> fileList = this->zip->getFileInfoList(); 0076 foreach (const QuaZipFileInfo info, fileList) { 0077 CRCstream << info.crc; 0078 } 0079 this->_hash = QCryptographicHash::hash(CRCarray, QCryptographicHash::Md5).toHex(); 0080 return this->_hash; 0081 } 0082 0083 QString EpubReader::title() { 0084 return this->metadata.contains("title") ? this->metadata["title"].toString() : ""; 0085 } 0086 0087 QDomDocument* EpubReader::getFileAsDom(const QString &filename) 0088 { 0089 if (!this->zip || !this->zip->isOpen()) 0090 return NULL; 0091 0092 this->zip->setCurrentFile(filename); 0093 QuaZipFile zfile(this->zip); 0094 if (!zfile.open(QIODevice::ReadOnly)) 0095 return NULL; 0096 0097 QDomDocument* doc = new QDomDocument(); 0098 if (!doc->setContent(&zfile)) { 0099 delete doc; 0100 zfile.close(); 0101 return NULL; 0102 } 0103 zfile.close(); 0104 return doc; 0105 } 0106 0107 //void EpubReader::serveComponent(const QString &filename, QHttpResponse *response) 0108 //{ 0109 // if (!this->zip || !this->zip->isOpen()) { 0110 // response->writeHead(500); 0111 // response->end("Epub file not open for reading"); 0112 // return; 0113 // } 0114 0115 // this->zip->setCurrentFile(filename); 0116 // QuaZipFile zfile(this->zip); 0117 // if (!zfile.open(QIODevice::ReadOnly)) { 0118 // response->writeHead(404); 0119 // response->end("Could not find \"" + filename + "\" in epub file"); 0120 // return; 0121 // } 0122 0123 // response->setHeader("Content-Type", guessMimeType(filename)); 0124 // response->writeHead(200); 0125 // // Important -- use write instead of end, so binary data doesn't get messed up! 0126 // response->write(zfile.readAll()); 0127 // response->end(); 0128 // zfile.close(); 0129 //} 0130 0131 bool EpubReader::parseOPF() 0132 { 0133 // Get the container.xml file. 0134 QDomDocument* container = this->getFileAsDom("META-INF/container.xml"); 0135 if (container == NULL) 0136 return false; 0137 0138 // Find out where the OPF file lives. 0139 QString contentsfn; 0140 QDomNodeList nodes = container->elementsByTagName("rootfile"); 0141 for (int i=0; i<nodes.length(); i++) { 0142 QDomElement element = nodes.item(i).toElement(); 0143 if (element.attribute("media-type") == "application/oebps-package+xml") { 0144 contentsfn = element.attribute("full-path"); 0145 break; 0146 } 0147 } 0148 0149 // Open the OPF file. 0150 QDomDocument* contents = this->getFileAsDom(contentsfn); 0151 if (contents == NULL) 0152 return false; 0153 0154 // Read the manifest. 0155 nodes = contents->elementsByTagName("manifest"); 0156 if (nodes.isEmpty()) 0157 return false; 0158 QDomElement manifest = nodes.item(0).toElement(); 0159 QHash<QString, QString> idmap; 0160 nodes = manifest.elementsByTagName("item"); 0161 for (int i=0; i<nodes.length(); i++) { 0162 QDomElement item = nodes.item(i).toElement(); 0163 idmap[item.attribute("id")] = resolveRelativePath(contentsfn, item.attribute("href")); 0164 if (item.attribute("properties").split(" ").contains("nav")) 0165 this->navhref = idmap[item.attribute("id")]; 0166 } 0167 0168 // Read the spine. 0169 nodes = contents->elementsByTagName("spine"); 0170 if (nodes.isEmpty()) 0171 return false; 0172 QDomElement spine = nodes.item(0).toElement(); 0173 nodes = spine.elementsByTagName("itemref"); 0174 for (int i=0; i<nodes.length(); i++) { 0175 QDomElement item = nodes.item(i).toElement(); 0176 this->spine.append(idmap[item.attribute("idref")]); 0177 } 0178 0179 // Read the metadata. 0180 nodes = contents->elementsByTagName("metadata"); 0181 if (nodes.isEmpty()) 0182 return false; 0183 QDomElement metadata = nodes.item(0).toElement(); 0184 nodes = metadata.childNodes(); 0185 for (int i=0; i<nodes.length(); i++) { 0186 QDomElement item = nodes.item(i).toElement(); 0187 if (!item.isNull() && !item.firstChild().isNull()) { 0188 QString name = item.nodeName().split(":").last(); 0189 this->metadata[name] = item.firstChild().nodeValue(); 0190 // This should work, but doesn't: 0191 //QString fileas = item.attributeNS("http://www.idpf.org/2007/opf", "file-as"); 0192 QString fileas = item.attribute("opf:file-as"); 0193 if (!fileas.isEmpty()) 0194 this->sortmetadata[name] = fileas; 0195 } 0196 } 0197 0198 // If this is an Epub3, we've already found the table of contents. If not, 0199 // we'll get the Epub2 table of contents. 0200 if (this->navhref == "") 0201 this->ncxhref = idmap[spine.attribute("toc")]; 0202 0203 // Look for the HTML file that contains the cover image 0204 nodes = contents->elementsByTagName("guide"); 0205 if (!nodes.isEmpty()) { 0206 QDomElement guide = nodes.item(0).toElement(); 0207 nodes = guide.childNodes(); 0208 for (int i=0; i<nodes.length(); i++) { 0209 QDomElement reference = nodes.item(i).toElement(); 0210 if (!reference.isNull() && reference.attribute("type") == "cover") { 0211 this->coverhtml = resolveRelativePath(contentsfn, reference.attribute("href")); 0212 break; 0213 } 0214 } 0215 } 0216 // If it's not in the guide, guess the first element of the spine 0217 if (this->coverhtml == "") 0218 this->coverhtml = this->spine.first(); 0219 0220 return true; 0221 } 0222 0223 QVariantList EpubReader::getContents() 0224 { 0225 QVariantList res = (this->navhref != "") ? this->parseNav() : this->parseNCX(); 0226 0227 qDebug()<< res; 0228 Q_EMIT contentsReady(res); 0229 return res; 0230 } 0231 0232 QVariantList EpubReader::parseNav() 0233 { 0234 QDomDocument* navdoc = this->getFileAsDom(this->navhref); 0235 QDomNodeList nodes = navdoc->elementsByTagName("nav"); 0236 for (int i=0; i<nodes.length(); i++) { 0237 QDomElement nav = nodes.item(i).toElement(); 0238 if (nav.attribute("epub:type") == "toc") { 0239 QDomNodeList ols = nav.elementsByTagName("ol"); 0240 if (!ols.isEmpty()) 0241 return this->parseNavList(ols.item(0).toElement()); 0242 } 0243 } 0244 return QVariantList(); 0245 } 0246 0247 QVariantList EpubReader::parseNavList(QDomElement element) 0248 { 0249 QVariantList children; 0250 QDomNodeList nodes = element.childNodes(); 0251 for (int i=0; i<nodes.length(); i++) { 0252 QDomElement item = nodes.item(i).toElement(); 0253 if (!item.isNull() && item.nodeName() == "li") { 0254 QDomNodeList links = item.elementsByTagName("a"); 0255 if (links.isEmpty()) 0256 continue; 0257 QDomElement link = links.item(0).toElement(); 0258 QVariantMap entry; 0259 entry["title"] = link.firstChild().nodeValue(); 0260 entry["src"] = resolveRelativePath(this->navhref, link.attribute("href")); 0261 QDomNodeList olist = item.elementsByTagName("ol"); 0262 if (!olist.isEmpty()) 0263 entry["children"] = this->parseNavList(olist.item(0).toElement()); 0264 children.append(entry); 0265 } 0266 } 0267 return children; 0268 } 0269 0270 QVariantList EpubReader::parseNCX() 0271 { 0272 QDomDocument* ncxdoc = this->getFileAsDom(this->ncxhref); 0273 QDomNodeList nodes = ncxdoc->elementsByTagName("navMap"); 0274 if (nodes.isEmpty()) 0275 return QVariantList(); 0276 return this->parseNCXChildren(nodes.item(0).toElement()); 0277 } 0278 0279 QVariantList EpubReader::parseNCXChildren(QDomElement element) 0280 { 0281 QVariantList children; 0282 QDomNodeList nodes = element.childNodes(); 0283 for (int i=0; i<nodes.length(); i++) { 0284 QDomElement node = nodes.item(i).toElement(); 0285 if (!node.isNull() && node.nodeName() == "navPoint") { 0286 QVariantMap entry; 0287 QDomNodeList labels = node.elementsByTagName("text"); 0288 if (!labels.isEmpty()) 0289 entry["title"] = labels.item(0).firstChild().nodeValue(); 0290 QDomNodeList contents = node.elementsByTagName("content"); 0291 if (!contents.isEmpty()) 0292 entry["src"] = resolveRelativePath(this->ncxhref, 0293 contents.item(0).toElement().attribute("src")); 0294 QVariantList child_nav = this->parseNCXChildren(node); 0295 if (!child_nav.isEmpty()) 0296 entry["children"] = child_nav; 0297 children.append(entry); 0298 } 0299 } 0300 return children; 0301 } 0302 0303 //void EpubReader::serveBookData(QHttpResponse *response) 0304 //{ 0305 // if (!this->zip || !this->zip->isOpen()) { 0306 // response->writeHead(500); 0307 // response->end("Epub file not open for reading"); 0308 // return; 0309 // } 0310 0311 // response->setHeader("Content-Type", guessMimeType("js")); 0312 // response->writeHead(200); 0313 // QJsonDocument spine(QJsonArray::fromStringList(this->spine)); 0314 // QJsonDocument contents(QJsonArray::fromVariantList(this->getContents())); 0315 // QJsonDocument metadata(QJsonObject::fromVariantMap(this->metadata)); 0316 // QString res = "var bookData = {" \ 0317 // "getComponents: function () { return %1; }, " \ 0318 // "getContents: function () { return %2; }, " \ 0319 // "getComponent: function (component) { return { url: component }; }, " \ 0320 // "getMetaData: function (key) { return %3[key]; } }"; 0321 // response->write(res.arg(QString(spine.toJson()), QString(contents.toJson()), 0322 // QString(metadata.toJson()))); 0323 // response->end(); 0324 //} 0325 0326 QVariantMap EpubReader::getCoverInfo(int thumbsize, int fullsize) 0327 { 0328 QVariantMap res; 0329 if (!this->zip || !this->zip->isOpen()) 0330 return res; 0331 0332 res["title"] = this->metadata.contains("title") ? this->metadata["title"] : "ZZZnone"; 0333 res["author"] = this->metadata.contains("creator") ? this->metadata["creator"] : ""; 0334 res["authorsort"] = this->sortmetadata.contains("creator") ? this->sortmetadata["creator"] : "zzznone"; 0335 res["cover"] = "ZZZnone"; 0336 0337 QDomDocument* coverdoc = this->getFileAsDom(this->coverhtml); 0338 if (coverdoc == NULL) 0339 return res; 0340 0341 QString coversrc; 0342 QDomNodeList images = coverdoc->elementsByTagName("img"); 0343 if (!images.isEmpty()) { 0344 coversrc = images.item(0).toElement().attribute("src"); 0345 } else { 0346 // Image inside a SVG element 0347 images = coverdoc->elementsByTagName("image"); 0348 if (!images.isEmpty()) 0349 coversrc = images.item(0).toElement().attribute("xlink:href"); 0350 } 0351 if (coversrc.isEmpty()) 0352 return res; 0353 0354 this->zip->setCurrentFile(resolveRelativePath(this->coverhtml, coversrc)); 0355 QuaZipFile zfile(this->zip); 0356 if (!zfile.open(QIODevice::ReadOnly)) 0357 return res; 0358 0359 QImage coverimg; 0360 if (!coverimg.loadFromData(zfile.readAll())) { 0361 zfile.close(); 0362 return res; 0363 } 0364 zfile.close(); 0365 QByteArray byteArray; 0366 QBuffer buffer(&byteArray); 0367 coverimg.scaledToWidth(thumbsize, Qt::SmoothTransformation).save(&buffer, "PNG"); 0368 res["cover"] = "data:image/png;base64," + QString(byteArray.toBase64()); 0369 QByteArray byteArrayf; 0370 QBuffer bufferf(&byteArrayf); 0371 coverimg.scaledToWidth(fullsize, Qt::SmoothTransformation).save(&bufferf, "PNG"); 0372 res["fullcover"] = "data:image/png;base64," + QString(byteArrayf.toBase64()); 0373 return res; 0374 }