0001 /* Copyright 2013-2014 Robert Schroll
0002  *
0003  * This file is part of Beru and is distributed under the terms of
0004  * the GPL. See the file COPYING for full details.
0005  */
0007 #include "epubreader.h"
0008 #include <QJsonObject>
0009 #include <QJsonArray>
0010 #include <QJsonDocument>
0011 #include <QtGui/QImage>
0012 #include <QBuffer>
0013 #include <QDir>
0014 #include <QCryptographicHash>
0015 #include "quazip/quazip.h"
0016 #include "quazip/quazipfile.h"
0017 #include <QDebug>
0018 //#include "../qhttpserver/qhttpresponse.h"
0019 //#include "../mimetype/mimetype.h"
0021 QString resolveRelativePath(QString relto, QString path)
0022 {
0023     int reldirlen = relto.lastIndexOf('/');
0024     QString reldir = (reldirlen > 0) ? relto.left(reldirlen+1) : "";
0025     return QDir::cleanPath(reldir + path);
0026 }
0028 EpubReader::EpubReader(QObject *parent) :
0029     QObject(parent)
0030 {
0031     this->zip = nullptr;
0032 }
0034 bool EpubReader::load(const QString &filename)
0035 {
0036     qDebug()<< "trying to open epub file <<" << filename;
0037     if (this->zip != nullptr) {
0038         delete this->zip;
0039         this->zip = nullptr;
0040     }
0041     this->_hash = "";
0042     this->navhref = "";
0043     this->ncxhref = "";
0044     this->coverhtml = "";
0045     this->spine.clear();
0046     this->metadata.clear();
0047     this->sortmetadata.clear();
0049     this->zip = new QuaZip(filename);
0050     if (!this->zip->open(QuaZip::mdUnzip)) {
0051         delete this->zip;
0052         this->zip = nullptr;
0054         return false;
0055     }
0056     if (!this->parseOPF()) {
0057         delete this->zip;
0058         this->zip = nullptr;
0059         return false;
0060     }
0062     this->getContents();
0063     return true;
0064 }
0066 QString EpubReader::hash() {
0067     if (this->_hash != "")
0068         return this->_hash;
0070     if (!this->zip || !this->zip->isOpen())
0071         return this->_hash;
0073     QByteArray CRCarray;
0074     QDataStream CRCstream(&CRCarray, QIODevice::WriteOnly);
0075     QList<QuaZipFileInfo> fileList = this->zip->getFileInfoList();
0076     foreach (const QuaZipFileInfo info, fileList) {
0077         CRCstream << info.crc;
0078     }
0079     this->_hash = QCryptographicHash::hash(CRCarray, QCryptographicHash::Md5).toHex();
0080     return this->_hash;
0081 }
0083 QString EpubReader::title() {
0084     return this->metadata.contains("title") ? this->metadata["title"].toString() : "";
0085 }
0087 QDomDocument* EpubReader::getFileAsDom(const QString &filename)
0088 {
0089     if (!this->zip || !this->zip->isOpen())
0090         return NULL;
0092     this->zip->setCurrentFile(filename);
0093     QuaZipFile zfile(this->zip);
0094     if (!
0095         return NULL;
0097     QDomDocument* doc = new QDomDocument();
0098     if (!doc->setContent(&zfile)) {
0099         delete doc;
0100         zfile.close();
0101         return NULL;
0102     }
0103     zfile.close();
0104     return doc;
0105 }
0107 //void EpubReader::serveComponent(const QString &filename, QHttpResponse *response)
0108 //{
0109 //    if (!this->zip || !this->zip->isOpen()) {
0110 //        response->writeHead(500);
0111 //        response->end("Epub file not open for reading");
0112 //        return;
0113 //    }
0115 //    this->zip->setCurrentFile(filename);
0116 //    QuaZipFile zfile(this->zip);
0117 //    if (! {
0118 //        response->writeHead(404);
0119 //        response->end("Could not find \"" + filename + "\" in epub file");
0120 //        return;
0121 //    }
0123 //    response->setHeader("Content-Type", guessMimeType(filename));
0124 //    response->writeHead(200);
0125 //    // Important -- use write instead of end, so binary data doesn't get messed up!
0126 //    response->write(zfile.readAll());
0127 //    response->end();
0128 //    zfile.close();
0129 //}
0131 bool EpubReader::parseOPF()
0132 {
0133     // Get the container.xml file.
0134     QDomDocument* container = this->getFileAsDom("META-INF/container.xml");
0135     if (container == NULL)
0136         return false;
0138     // Find out where the OPF file lives.
0139     QString contentsfn;
0140     QDomNodeList nodes = container->elementsByTagName("rootfile");
0141     for (int i=0; i<nodes.length(); i++) {
0142         QDomElement element = nodes.item(i).toElement();
0143         if (element.attribute("media-type") == "application/oebps-package+xml") {
0144             contentsfn = element.attribute("full-path");
0145             break;
0146         }
0147     }
0149     // Open the OPF file.
0150     QDomDocument* contents = this->getFileAsDom(contentsfn);
0151     if (contents == NULL)
0152         return false;
0154     // Read the manifest.
0155     nodes = contents->elementsByTagName("manifest");
0156     if (nodes.isEmpty())
0157         return false;
0158     QDomElement manifest = nodes.item(0).toElement();
0159     QHash<QString, QString> idmap;
0160     nodes = manifest.elementsByTagName("item");
0161     for (int i=0; i<nodes.length(); i++) {
0162         QDomElement item = nodes.item(i).toElement();
0163         idmap[item.attribute("id")] = resolveRelativePath(contentsfn, item.attribute("href"));
0164         if (item.attribute("properties").split(" ").contains("nav"))
0165             this->navhref = idmap[item.attribute("id")];
0166     }
0168     // Read the spine.
0169     nodes = contents->elementsByTagName("spine");
0170     if (nodes.isEmpty())
0171         return false;
0172     QDomElement spine = nodes.item(0).toElement();
0173     nodes = spine.elementsByTagName("itemref");
0174     for (int i=0; i<nodes.length(); i++) {
0175         QDomElement item = nodes.item(i).toElement();
0176         this->spine.append(idmap[item.attribute("idref")]);
0177     }
0179     // Read the metadata.
0180     nodes = contents->elementsByTagName("metadata");
0181     if (nodes.isEmpty())
0182         return false;
0183     QDomElement metadata = nodes.item(0).toElement();
0184     nodes = metadata.childNodes();
0185     for (int i=0; i<nodes.length(); i++) {
0186         QDomElement item = nodes.item(i).toElement();
0187         if (!item.isNull() && !item.firstChild().isNull()) {
0188             QString name = item.nodeName().split(":").last();
0189             this->metadata[name] = item.firstChild().nodeValue();
0190             // This should work, but doesn't:
0191             //QString fileas = item.attributeNS("", "file-as");
0192             QString fileas = item.attribute("opf:file-as");
0193             if (!fileas.isEmpty())
0194                 this->sortmetadata[name] = fileas;
0195         }
0196     }
0198     // If this is an Epub3, we've already found the table of contents.  If not,
0199     // we'll get the Epub2 table of contents.
0200     if (this->navhref == "")
0201         this->ncxhref = idmap[spine.attribute("toc")];
0203     // Look for the HTML file that contains the cover image
0204     nodes = contents->elementsByTagName("guide");
0205     if (!nodes.isEmpty()) {
0206         QDomElement guide = nodes.item(0).toElement();
0207         nodes = guide.childNodes();
0208         for (int i=0; i<nodes.length(); i++) {
0209             QDomElement reference = nodes.item(i).toElement();
0210             if (!reference.isNull() && reference.attribute("type") == "cover") {
0211                 this->coverhtml = resolveRelativePath(contentsfn, reference.attribute("href"));
0212                 break;
0213             }
0214         }
0215     }
0216     // If it's not in the guide, guess the first element of the spine
0217     if (this->coverhtml == "")
0218         this->coverhtml = this->spine.first();
0220     return true;
0221 }
0223 QVariantList EpubReader::getContents()
0224 {
0225     QVariantList res = (this->navhref != "") ? this->parseNav() : this->parseNCX();
0227     qDebug()<< res;
0228     Q_EMIT contentsReady(res);
0229     return res;
0230 }
0232 QVariantList EpubReader::parseNav()
0233 {
0234     QDomDocument* navdoc = this->getFileAsDom(this->navhref);
0235     QDomNodeList nodes = navdoc->elementsByTagName("nav");
0236     for (int i=0; i<nodes.length(); i++) {
0237         QDomElement nav = nodes.item(i).toElement();
0238         if (nav.attribute("epub:type") == "toc") {
0239             QDomNodeList ols = nav.elementsByTagName("ol");
0240             if (!ols.isEmpty())
0241                 return this->parseNavList(ols.item(0).toElement());
0242         }
0243     }
0244     return QVariantList();
0245 }
0247 QVariantList EpubReader::parseNavList(QDomElement element)
0248 {
0249     QVariantList children;
0250     QDomNodeList nodes = element.childNodes();
0251     for (int i=0; i<nodes.length(); i++) {
0252         QDomElement item = nodes.item(i).toElement();
0253         if (!item.isNull() && item.nodeName() == "li") {
0254             QDomNodeList links = item.elementsByTagName("a");
0255             if (links.isEmpty())
0256                 continue;
0257             QDomElement link = links.item(0).toElement();
0258             QVariantMap entry;
0259             entry["title"] = link.firstChild().nodeValue();
0260             entry["src"] = resolveRelativePath(this->navhref, link.attribute("href"));
0261             QDomNodeList olist = item.elementsByTagName("ol");
0262             if (!olist.isEmpty())
0263                 entry["children"] = this->parseNavList(olist.item(0).toElement());
0264             children.append(entry);
0265         }
0266     }
0267     return children;
0268 }
0270 QVariantList EpubReader::parseNCX()
0271 {
0272     QDomDocument* ncxdoc = this->getFileAsDom(this->ncxhref);
0273     QDomNodeList nodes = ncxdoc->elementsByTagName("navMap");
0274     if (nodes.isEmpty())
0275         return QVariantList();
0276     return this->parseNCXChildren(nodes.item(0).toElement());
0277 }
0279 QVariantList EpubReader::parseNCXChildren(QDomElement element)
0280 {
0281     QVariantList children;
0282     QDomNodeList nodes = element.childNodes();
0283     for (int i=0; i<nodes.length(); i++) {
0284         QDomElement node = nodes.item(i).toElement();
0285         if (!node.isNull() && node.nodeName() == "navPoint") {
0286             QVariantMap entry;
0287             QDomNodeList labels = node.elementsByTagName("text");
0288             if (!labels.isEmpty())
0289                 entry["title"] = labels.item(0).firstChild().nodeValue();
0290             QDomNodeList contents = node.elementsByTagName("content");
0291             if (!contents.isEmpty())
0292                 entry["src"] = resolveRelativePath(this->ncxhref,
0293                                                    contents.item(0).toElement().attribute("src"));
0294             QVariantList child_nav = this->parseNCXChildren(node);
0295             if (!child_nav.isEmpty())
0296                 entry["children"] = child_nav;
0297             children.append(entry);
0298         }
0299     }
0300     return children;
0301 }
0303 //void EpubReader::serveBookData(QHttpResponse *response)
0304 //{
0305 //    if (!this->zip || !this->zip->isOpen()) {
0306 //        response->writeHead(500);
0307 //        response->end("Epub file not open for reading");
0308 //        return;
0309 //    }
0311 //    response->setHeader("Content-Type", guessMimeType("js"));
0312 //    response->writeHead(200);
0313 //    QJsonDocument spine(QJsonArray::fromStringList(this->spine));
0314 //    QJsonDocument contents(QJsonArray::fromVariantList(this->getContents()));
0315 //    QJsonDocument metadata(QJsonObject::fromVariantMap(this->metadata));
0316 //    QString res = "var bookData = {" \
0317 //            "getComponents: function () { return %1; }, " \
0318 //            "getContents:   function () { return %2; }, " \
0319 //            "getComponent:  function (component) { return { url: component }; }, " \
0320 //            "getMetaData:   function (key) { return %3[key]; } }";
0321 //    response->write(res.arg(QString(spine.toJson()), QString(contents.toJson()),
0322 //                            QString(metadata.toJson())));
0323 //    response->end();
0324 //}
0326 QVariantMap EpubReader::getCoverInfo(int thumbsize, int fullsize)
0327 {
0328     QVariantMap res;
0329     if (!this->zip || !this->zip->isOpen())
0330         return res;
0332     res["title"] = this->metadata.contains("title") ? this->metadata["title"] : "ZZZnone";
0333     res["author"] = this->metadata.contains("creator") ? this->metadata["creator"] : "";
0334     res["authorsort"] = this->sortmetadata.contains("creator") ? this->sortmetadata["creator"] : "zzznone";
0335     res["cover"] = "ZZZnone";
0337     QDomDocument* coverdoc = this->getFileAsDom(this->coverhtml);
0338     if (coverdoc == NULL)
0339         return res;
0341     QString coversrc;
0342     QDomNodeList images = coverdoc->elementsByTagName("img");
0343     if (!images.isEmpty()) {
0344         coversrc = images.item(0).toElement().attribute("src");
0345     } else {
0346         // Image inside a SVG element
0347         images = coverdoc->elementsByTagName("image");
0348         if (!images.isEmpty())
0349             coversrc = images.item(0).toElement().attribute("xlink:href");
0350     }
0351     if (coversrc.isEmpty())
0352         return res;
0354     this->zip->setCurrentFile(resolveRelativePath(this->coverhtml, coversrc));
0355     QuaZipFile zfile(this->zip);
0356     if (!
0357         return res;
0359     QImage coverimg;
0360     if (!coverimg.loadFromData(zfile.readAll())) {
0361         zfile.close();
0362         return res;
0363     }
0364     zfile.close();
0365     QByteArray byteArray;
0366     QBuffer buffer(&byteArray);
0367     coverimg.scaledToWidth(thumbsize, Qt::SmoothTransformation).save(&buffer, "PNG");
0368     res["cover"] = "data:image/png;base64," + QString(byteArray.toBase64());
0369     QByteArray byteArrayf;
0370     QBuffer bufferf(&byteArrayf);
0371     coverimg.scaledToWidth(fullsize, Qt::SmoothTransformation).save(&bufferf, "PNG");
0372     res["fullcover"] = "data:image/png;base64," + QString(byteArrayf.toBase64());
0373     return res;
0374 }