File indexing completed on 2024-05-12 16:06:41

0001 /*
0002     SPDX-FileCopyrightText: 2008 Ely Levy <elylevy@cs.huji.ac.il>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #include "converter.h"
0008 
0009 #include <QAbstractTextDocumentLayout>
0010 #include <QApplication> // Because of the HACK
0011 #include <QFileInfo>
0012 #include <QRegularExpression>
0013 #include <QTextDocument>
0014 #include <QTextDocumentFragment>
0015 #include <QTextFrame>
0016 
0017 #include <KLocalizedString>
0018 #include <QDebug>
0019 
0020 #include <QStandardPaths>
0021 #include <core/action.h>
0022 #include <core/annotations.h>
0023 #include <core/movie.h>
0024 #include <core/sound.h>
0025 
0026 using namespace Epub;
0027 
0028 Converter::Converter()
0029     : mTextDocument(nullptr)
0030 {
0031 }
0032 
0033 Converter::~Converter()
0034 {
0035 }
0036 
0037 // join the char * array into one QString
0038 QString _strPack(char **str, int size)
0039 {
0040     QString res;
0041 
0042     res = QString::fromUtf8(str[0]);
0043 
0044     for (int i = 1; i < size; i++) {
0045         res += QLatin1String(", ");
0046         res += QString::fromUtf8(str[i]);
0047     }
0048 
0049     return res;
0050 }
0051 
0052 // Q_EMIT data wrap function that map between epub metadata to okular's
0053 void Converter::_emitData(Okular::DocumentInfo::Key key, enum epub_metadata type)
0054 {
0055     int size;
0056     unsigned char **data;
0057 
0058     data = epub_get_metadata(mTextDocument->getEpub(), type, &size);
0059 
0060     if (data) {
0061         Q_EMIT addMetaData(key, _strPack((char **)data, size));
0062         for (int i = 0; i < size; i++) {
0063             free(data[i]);
0064         }
0065         free(data);
0066     }
0067 }
0068 
0069 // Got over the blocks from start and add them to hashes use name as the
0070 // prefix for local links
0071 void Converter::_handle_anchors(const QTextBlock &start, const QString &name)
0072 {
0073     const QString curDir = QFileInfo(name).path();
0074 
0075     for (QTextBlock bit = start; bit != mTextDocument->end(); bit = bit.next()) {
0076         for (QTextBlock::iterator fit = bit.begin(); !(fit.atEnd()); ++fit) {
0077             QTextFragment frag = fit.fragment();
0078 
0079             if (frag.isValid() && frag.charFormat().isAnchor()) {
0080                 QString hrefString = frag.charFormat().anchorHref();
0081 
0082                 // remove ./ or ../
0083                 // making it easier to compare, with links
0084                 while (!hrefString.isNull() && (hrefString.at(0) == QLatin1Char('.') || hrefString.at(0) == QLatin1Char('/'))) {
0085                     hrefString.remove(0, 1);
0086                 }
0087 
0088                 QUrl href(hrefString);
0089                 if (href.isValid() && !href.isEmpty()) {
0090                     if (href.isRelative()) { // Inside document link
0091                         if (!hrefString.indexOf(QLatin1Char('#'))) {
0092                             hrefString = name + hrefString;
0093                         } else if (QFileInfo(hrefString).path() == QLatin1String(".") && curDir != QLatin1String(".")) {
0094                             hrefString = curDir + QLatin1Char('/') + hrefString;
0095                         }
0096 
0097                         // QTextCharFormat sometimes splits a link in two
0098                         // if there's no white space between words & the first one is an anchor
0099                         // consider whole word to be an anchor
0100                         ++fit;
0101                         int fragLen = frag.length();
0102                         if (!fit.atEnd() && ((fit.fragment().position() - frag.position()) == 1)) {
0103                             fragLen += fit.fragment().length();
0104                         }
0105                         --fit;
0106 
0107                         _insert_local_links(hrefString, QPair<int, int>(frag.position(), frag.position() + fragLen));
0108                     } else { // Outside document link
0109                         Okular::BrowseAction *action = new Okular::BrowseAction(QUrl(href.toString()));
0110 
0111                         Q_EMIT addAction(action, frag.position(), frag.position() + frag.length());
0112                     }
0113                 }
0114 
0115                 const QStringList &names = frag.charFormat().anchorNames();
0116                 if (!names.empty()) {
0117                     for (QStringList::const_iterator lit = names.constBegin(); lit != names.constEnd(); ++lit) {
0118                         mSectionMap.insert(name + QLatin1Char('#') + *lit, bit);
0119                     }
0120                 }
0121 
0122             } // end anchor case
0123         }
0124     }
0125 }
0126 
0127 void Converter::_insert_local_links(const QString &key, const QPair<int, int> value)
0128 {
0129     if (mLocalLinks.contains(key)) {
0130         mLocalLinks[key].append(value);
0131     } else {
0132         QVector<QPair<int, int>> vec;
0133         vec.append(value);
0134         mLocalLinks.insert(key, vec);
0135     }
0136 }
0137 
0138 static QPoint calculateXYPosition(QTextDocument *document, int startPosition)
0139 {
0140     const QTextBlock startBlock = document->findBlock(startPosition);
0141     const QRectF startBoundingRect = document->documentLayout()->blockBoundingRect(startBlock);
0142 
0143     QTextLayout *startLayout = startBlock.layout();
0144     if (!startLayout) {
0145         qWarning() << "Start layout not found" << startLayout;
0146         return QPoint();
0147     }
0148 
0149     int startPos = startPosition - startBlock.position();
0150     const QTextLine startLine = startLayout->lineForTextPosition(startPos);
0151 
0152     double x = startBoundingRect.x();
0153     double y = startBoundingRect.y() + startLine.y();
0154 
0155     y = (int)y % 800;
0156 
0157     return QPoint(x, y);
0158 }
0159 
0160 QTextDocument *Converter::convert(const QString &fileName)
0161 {
0162     EpubDocument *newDocument = new EpubDocument(fileName, generator()->generalSettings()->font());
0163     if (!newDocument->isValid()) {
0164         Q_EMIT error(i18n("Error while opening the EPub document."), -1);
0165         delete newDocument;
0166         return nullptr;
0167     }
0168     mTextDocument = newDocument;
0169 
0170     QTextCursor *_cursor = new QTextCursor(mTextDocument);
0171 
0172     mLocalLinks.clear();
0173     mSectionMap.clear();
0174 
0175     // Emit the document meta data
0176     _emitData(Okular::DocumentInfo::Title, EPUB_TITLE);
0177     _emitData(Okular::DocumentInfo::Author, EPUB_CREATOR);
0178     _emitData(Okular::DocumentInfo::Subject, EPUB_SUBJECT);
0179     _emitData(Okular::DocumentInfo::Creator, EPUB_PUBLISHER);
0180 
0181     _emitData(Okular::DocumentInfo::Description, EPUB_DESCRIPTION);
0182 
0183     _emitData(Okular::DocumentInfo::CreationDate, EPUB_DATE);
0184     _emitData(Okular::DocumentInfo::Category, EPUB_TYPE);
0185     _emitData(Okular::DocumentInfo::Copyright, EPUB_RIGHTS);
0186     Q_EMIT addMetaData(Okular::DocumentInfo::MimeType, QStringLiteral("application/epub+zip"));
0187 
0188     struct eiterator *it;
0189 
0190     // iterate over the book
0191     it = epub_get_iterator(mTextDocument->getEpub(), EITERATOR_SPINE, 0);
0192 
0193     // if the background color of the document is non-white it will be handled by QTextDocument::setHtml()
0194     bool firstPage = true;
0195     QVector<Okular::MovieAnnotation *> movieAnnots;
0196     QVector<Okular::SoundAction *> soundActions;
0197 
0198     // HACK BEGIN Get the links without CSS to be blue
0199     //            Remove if Qt ever gets fixed and the code in textdocumentgenerator.cpp works
0200     const QPalette orig = qApp->palette();
0201     QPalette p = orig;
0202     p.setColor(QPalette::Link, Qt::blue);
0203     // HACK END
0204 
0205     const QSize videoSize(320, 240);
0206     do {
0207         if (!epub_it_get_curr(it)) {
0208             continue;
0209         }
0210 
0211         movieAnnots.clear();
0212         soundActions.clear();
0213 
0214         const QString link = QString::fromUtf8(epub_it_get_curr_url(it));
0215         mTextDocument->setCurrentSubDocument(link);
0216         QString htmlContent = QString::fromUtf8(epub_it_get_curr(it));
0217 
0218         // as QTextCharFormat::anchorNames() ignores sections, replace it with <p>
0219         static const QRegularExpression sectionStart {QStringLiteral("< *section")};
0220         htmlContent.replace(sectionStart, QStringLiteral("<p"));
0221         static const QRegularExpression sectionEnd {QStringLiteral("< */ *section")};
0222         htmlContent.replace(sectionEnd, QStringLiteral("</p"));
0223 
0224         // convert svg tags to img
0225         const int maxHeight = mTextDocument->maxContentHeight();
0226         const int maxWidth = mTextDocument->maxContentWidth();
0227         QDomDocument dom;
0228         if (dom.setContent(htmlContent)) {
0229             QDomNodeList svgs = dom.elementsByTagName(QStringLiteral("svg"));
0230             if (!svgs.isEmpty()) {
0231                 QList<QDomNode> imgNodes;
0232                 for (int i = 0; i < svgs.length(); ++i) {
0233                     QDomNodeList images = svgs.at(i).toElement().elementsByTagName(QStringLiteral("image"));
0234                     for (int j = 0; j < images.length(); ++j) {
0235                         QString lnk = images.at(i).toElement().attribute(QStringLiteral("xlink:href"));
0236                         int ht = images.at(i).toElement().attribute(QStringLiteral("height")).toInt();
0237                         int wd = images.at(i).toElement().attribute(QStringLiteral("width")).toInt();
0238                         QImage img = mTextDocument->loadResource(QTextDocument::ImageResource, QUrl(lnk)).value<QImage>();
0239                         if (ht == 0) {
0240                             ht = img.height();
0241                         }
0242                         if (wd == 0) {
0243                             wd = img.width();
0244                         }
0245                         if (ht > maxHeight) {
0246                             ht = maxHeight;
0247                         }
0248                         if (wd > maxWidth) {
0249                             wd = maxWidth;
0250                         }
0251                         mTextDocument->addResource(QTextDocument::ImageResource, QUrl(lnk), img);
0252                         QDomDocument newDoc;
0253                         newDoc.setContent(QStringLiteral("<img src=\"%1\" height=\"%2\" width=\"%3\" />").arg(lnk).arg(ht).arg(wd));
0254                         imgNodes.append(newDoc.documentElement());
0255                     }
0256                     for (const QDomNode &nd : std::as_const(imgNodes)) {
0257                         svgs.at(i).parentNode().replaceChild(nd, svgs.at(i));
0258                     }
0259                 }
0260             }
0261 
0262             // handle embedded videos
0263             QDomNodeList videoTags = dom.elementsByTagName(QStringLiteral("video"));
0264             while (!videoTags.isEmpty()) {
0265                 QDomNodeList sourceTags = videoTags.at(0).toElement().elementsByTagName(QStringLiteral("source"));
0266                 if (!sourceTags.isEmpty()) {
0267                     QString lnk = sourceTags.at(0).toElement().attribute(QStringLiteral("src"));
0268 
0269                     Okular::Movie *movie = new Okular::Movie(mTextDocument->loadResource(EpubDocument::MovieResource, QUrl(lnk)).toString());
0270                     movie->setSize(videoSize);
0271                     movie->setShowControls(true);
0272 
0273                     Okular::MovieAnnotation *annot = new Okular::MovieAnnotation;
0274                     annot->setMovie(movie);
0275 
0276                     movieAnnots.push_back(annot);
0277                     QDomDocument tempDoc;
0278                     tempDoc.setContent(QStringLiteral("<pre>&lt;video&gt;&lt;/video&gt;</pre>"));
0279                     videoTags.at(0).parentNode().replaceChild(tempDoc.documentElement(), videoTags.at(0));
0280                 }
0281             }
0282 
0283             // handle embedded audio
0284             QDomNodeList audioTags = dom.elementsByTagName(QStringLiteral("audio"));
0285             while (!audioTags.isEmpty()) {
0286                 QDomElement element = audioTags.at(0).toElement();
0287                 bool repeat = element.hasAttribute(QStringLiteral("loop"));
0288                 QString lnk = element.attribute(QStringLiteral("src"));
0289 
0290                 Okular::Sound *sound = new Okular::Sound(mTextDocument->loadResource(EpubDocument::AudioResource, QUrl(lnk)).toByteArray());
0291 
0292                 Okular::SoundAction *soundAction = new Okular::SoundAction(1.0, true, repeat, false, sound);
0293                 soundActions.push_back(soundAction);
0294 
0295                 QDomDocument tempDoc;
0296                 tempDoc.setContent(QStringLiteral("<pre>&lt;audio&gt;&lt;/audio&gt;</pre>"));
0297                 audioTags.at(0).parentNode().replaceChild(tempDoc.documentElement(), audioTags.at(0));
0298             }
0299             htmlContent = dom.toString();
0300         }
0301 
0302         // HACK BEGIN
0303         qApp->setPalette(p);
0304         // HACK END
0305 
0306         QTextBlock before;
0307         if (firstPage) {
0308             mTextDocument->setHtml(htmlContent);
0309             firstPage = false;
0310             before = mTextDocument->begin();
0311         } else {
0312             before = _cursor->block();
0313             _cursor->insertHtml(htmlContent);
0314         }
0315         // HACK BEGIN
0316         qApp->setPalette(orig);
0317         // HACK END
0318 
0319         QTextCursor csr(before); // a temporary cursor pointing at the begin of the last inserted block
0320         int index = 0;
0321 
0322         while (!movieAnnots.isEmpty() && !(csr = mTextDocument->find(QStringLiteral("<video></video>"), csr)).isNull()) {
0323             const int posStart = csr.position();
0324             const QPoint startPoint = calculateXYPosition(mTextDocument, posStart);
0325             QImage img(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("okular/pics/okular-epub-movie.png")));
0326             img = img.scaled(videoSize);
0327             csr.insertImage(img);
0328             const int posEnd = csr.position();
0329             const QRect videoRect(startPoint, videoSize);
0330             movieAnnots[index]->setBoundingRectangle(Okular::NormalizedRect(videoRect, mTextDocument->pageSize().width(), mTextDocument->pageSize().height()));
0331             Q_EMIT addAnnotation(movieAnnots[index++], posStart, posEnd);
0332             csr.movePosition(QTextCursor::NextWord);
0333         }
0334 
0335         csr = QTextCursor(before);
0336         index = 0;
0337         const QString keyToSearch(QStringLiteral("<audio></audio>"));
0338         while (!soundActions.isEmpty() && !(csr = mTextDocument->find(keyToSearch, csr)).isNull()) {
0339             const int posStart = csr.position() - keyToSearch.size();
0340             const QImage img(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("okular/pics/okular-epub-sound-icon.png")));
0341             csr.insertImage(img);
0342             const int posEnd = csr.position();
0343             qDebug() << posStart << posEnd;
0344             ;
0345             Q_EMIT addAction(soundActions[index++], posStart, posEnd);
0346             csr.movePosition(QTextCursor::NextWord);
0347         }
0348 
0349         mSectionMap.insert(link, before);
0350 
0351         _handle_anchors(before, link);
0352 
0353         const int page = mTextDocument->pageCount();
0354 
0355         // it will clear the previous format
0356         // useful when the last line had a bullet
0357         _cursor->insertBlock(QTextBlockFormat());
0358 
0359         while (mTextDocument->pageCount() == page) {
0360             _cursor->insertText(QStringLiteral("\n"));
0361         }
0362 
0363     } while (epub_it_get_next(it));
0364 
0365     epub_free_iterator(it);
0366 
0367     // handle toc
0368     struct titerator *tit;
0369 
0370     // FIXME: support other method beside NAVMAP and GUIDE
0371     tit = epub_get_titerator(mTextDocument->getEpub(), TITERATOR_NAVMAP, 0);
0372     if (!tit) {
0373         tit = epub_get_titerator(mTextDocument->getEpub(), TITERATOR_GUIDE, 0);
0374     }
0375 
0376     if (tit) {
0377         do {
0378             if (epub_tit_curr_valid(tit)) {
0379                 char *clink = epub_tit_get_curr_link(tit);
0380                 QString link = QString::fromUtf8(clink);
0381                 char *label = epub_tit_get_curr_label(tit);
0382                 QTextBlock block = mTextDocument->begin(); // must point somewhere
0383 
0384                 if (mSectionMap.contains(link)) {
0385                     block = mSectionMap.value(link);
0386                 } else {
0387                     const QString percentDecodedLink = QUrl::fromPercentEncoding(link.toUtf8());
0388                     if (mSectionMap.contains(percentDecodedLink)) {
0389                         block = mSectionMap.value(percentDecodedLink);
0390                     } else { // load missing resource
0391                         char *data = nullptr;
0392                         // epub_get_data can't handle whitespace url encodings
0393                         QByteArray ba = link.replace(QLatin1String("%20"), QLatin1String(" ")).toLatin1();
0394                         const char *clinkClean = ba.data();
0395                         int size = epub_get_data(mTextDocument->getEpub(), clinkClean, &data);
0396 
0397                         if (data) {
0398                             _cursor->insertBlock();
0399 
0400                             // try to load as image and if not load as html
0401                             block = _cursor->block();
0402                             QImage image;
0403                             mSectionMap.insert(link, block);
0404                             if (image.loadFromData((unsigned char *)data, size)) {
0405                                 mTextDocument->addResource(QTextDocument::ImageResource, QUrl(link), image);
0406                                 _cursor->insertImage(link);
0407                             } else {
0408                                 _cursor->insertHtml(QString::fromUtf8(data));
0409                                 // Add anchors to hashes
0410                                 _handle_anchors(block, link);
0411                             }
0412 
0413                             // Start new file in a new page
0414                             int page = mTextDocument->pageCount();
0415                             while (mTextDocument->pageCount() == page) {
0416                                 _cursor->insertText(QStringLiteral("\n"));
0417                             }
0418                         }
0419 
0420                         free(data);
0421                     }
0422                 }
0423 
0424                 if (block.isValid()) { // be sure we actually got a block
0425                     Q_EMIT addTitle(epub_tit_get_curr_depth(tit), QString::fromUtf8(label), block);
0426                 } else {
0427                     qDebug() << "Error: no block found for" << link;
0428                 }
0429 
0430                 if (clink) {
0431                     free(clink);
0432                 }
0433                 if (label) {
0434                     free(label);
0435                 }
0436             }
0437         } while (epub_tit_next(tit));
0438 
0439         epub_free_titerator(tit);
0440     } else {
0441         qDebug() << "no toc found";
0442     }
0443 
0444     // adding link actions
0445     QHashIterator<QString, QVector<QPair<int, int>>> hit(mLocalLinks);
0446     while (hit.hasNext()) {
0447         hit.next();
0448 
0449         const QTextBlock block = mSectionMap.value(hit.key());
0450 
0451         for (int i = 0; i < hit.value().size(); ++i) {
0452             if (block.isValid()) { // be sure we actually got a block
0453                 Okular::DocumentViewport viewport = calculateViewport(mTextDocument, block);
0454 
0455                 Okular::GotoAction *action = new Okular::GotoAction(QString(), viewport);
0456 
0457                 Q_EMIT addAction(action, hit.value()[i].first, hit.value()[i].second);
0458             } else {
0459                 qDebug() << "Error: no block found for " << hit.key();
0460             }
0461         }
0462     }
0463 
0464     delete _cursor;
0465 
0466     return mTextDocument;
0467 }