File indexing completed on 2024-05-12 16:06:41
0001 /* 0002 SPDX-FileCopyrightText: 2008 Ely Levy <elylevy@cs.huji.ac.il> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #include "converter.h" 0008 0009 #include <QAbstractTextDocumentLayout> 0010 #include <QApplication> // Because of the HACK 0011 #include <QFileInfo> 0012 #include <QRegularExpression> 0013 #include <QTextDocument> 0014 #include <QTextDocumentFragment> 0015 #include <QTextFrame> 0016 0017 #include <KLocalizedString> 0018 #include <QDebug> 0019 0020 #include <QStandardPaths> 0021 #include <core/action.h> 0022 #include <core/annotations.h> 0023 #include <core/movie.h> 0024 #include <core/sound.h> 0025 0026 using namespace Epub; 0027 0028 Converter::Converter() 0029 : mTextDocument(nullptr) 0030 { 0031 } 0032 0033 Converter::~Converter() 0034 { 0035 } 0036 0037 // join the char * array into one QString 0038 QString _strPack(char **str, int size) 0039 { 0040 QString res; 0041 0042 res = QString::fromUtf8(str[0]); 0043 0044 for (int i = 1; i < size; i++) { 0045 res += QLatin1String(", "); 0046 res += QString::fromUtf8(str[i]); 0047 } 0048 0049 return res; 0050 } 0051 0052 // Q_EMIT data wrap function that map between epub metadata to okular's 0053 void Converter::_emitData(Okular::DocumentInfo::Key key, enum epub_metadata type) 0054 { 0055 int size; 0056 unsigned char **data; 0057 0058 data = epub_get_metadata(mTextDocument->getEpub(), type, &size); 0059 0060 if (data) { 0061 Q_EMIT addMetaData(key, _strPack((char **)data, size)); 0062 for (int i = 0; i < size; i++) { 0063 free(data[i]); 0064 } 0065 free(data); 0066 } 0067 } 0068 0069 // Got over the blocks from start and add them to hashes use name as the 0070 // prefix for local links 0071 void Converter::_handle_anchors(const QTextBlock &start, const QString &name) 0072 { 0073 const QString curDir = QFileInfo(name).path(); 0074 0075 for (QTextBlock bit = start; bit != mTextDocument->end(); bit = bit.next()) { 0076 for (QTextBlock::iterator fit = bit.begin(); !(fit.atEnd()); ++fit) { 0077 QTextFragment frag = fit.fragment(); 0078 0079 if (frag.isValid() && frag.charFormat().isAnchor()) { 0080 QString hrefString = frag.charFormat().anchorHref(); 0081 0082 // remove ./ or ../ 0083 // making it easier to compare, with links 0084 while (!hrefString.isNull() && (hrefString.at(0) == QLatin1Char('.') || hrefString.at(0) == QLatin1Char('/'))) { 0085 hrefString.remove(0, 1); 0086 } 0087 0088 QUrl href(hrefString); 0089 if (href.isValid() && !href.isEmpty()) { 0090 if (href.isRelative()) { // Inside document link 0091 if (!hrefString.indexOf(QLatin1Char('#'))) { 0092 hrefString = name + hrefString; 0093 } else if (QFileInfo(hrefString).path() == QLatin1String(".") && curDir != QLatin1String(".")) { 0094 hrefString = curDir + QLatin1Char('/') + hrefString; 0095 } 0096 0097 // QTextCharFormat sometimes splits a link in two 0098 // if there's no white space between words & the first one is an anchor 0099 // consider whole word to be an anchor 0100 ++fit; 0101 int fragLen = frag.length(); 0102 if (!fit.atEnd() && ((fit.fragment().position() - frag.position()) == 1)) { 0103 fragLen += fit.fragment().length(); 0104 } 0105 --fit; 0106 0107 _insert_local_links(hrefString, QPair<int, int>(frag.position(), frag.position() + fragLen)); 0108 } else { // Outside document link 0109 Okular::BrowseAction *action = new Okular::BrowseAction(QUrl(href.toString())); 0110 0111 Q_EMIT addAction(action, frag.position(), frag.position() + frag.length()); 0112 } 0113 } 0114 0115 const QStringList &names = frag.charFormat().anchorNames(); 0116 if (!names.empty()) { 0117 for (QStringList::const_iterator lit = names.constBegin(); lit != names.constEnd(); ++lit) { 0118 mSectionMap.insert(name + QLatin1Char('#') + *lit, bit); 0119 } 0120 } 0121 0122 } // end anchor case 0123 } 0124 } 0125 } 0126 0127 void Converter::_insert_local_links(const QString &key, const QPair<int, int> value) 0128 { 0129 if (mLocalLinks.contains(key)) { 0130 mLocalLinks[key].append(value); 0131 } else { 0132 QVector<QPair<int, int>> vec; 0133 vec.append(value); 0134 mLocalLinks.insert(key, vec); 0135 } 0136 } 0137 0138 static QPoint calculateXYPosition(QTextDocument *document, int startPosition) 0139 { 0140 const QTextBlock startBlock = document->findBlock(startPosition); 0141 const QRectF startBoundingRect = document->documentLayout()->blockBoundingRect(startBlock); 0142 0143 QTextLayout *startLayout = startBlock.layout(); 0144 if (!startLayout) { 0145 qWarning() << "Start layout not found" << startLayout; 0146 return QPoint(); 0147 } 0148 0149 int startPos = startPosition - startBlock.position(); 0150 const QTextLine startLine = startLayout->lineForTextPosition(startPos); 0151 0152 double x = startBoundingRect.x(); 0153 double y = startBoundingRect.y() + startLine.y(); 0154 0155 y = (int)y % 800; 0156 0157 return QPoint(x, y); 0158 } 0159 0160 QTextDocument *Converter::convert(const QString &fileName) 0161 { 0162 EpubDocument *newDocument = new EpubDocument(fileName, generator()->generalSettings()->font()); 0163 if (!newDocument->isValid()) { 0164 Q_EMIT error(i18n("Error while opening the EPub document."), -1); 0165 delete newDocument; 0166 return nullptr; 0167 } 0168 mTextDocument = newDocument; 0169 0170 QTextCursor *_cursor = new QTextCursor(mTextDocument); 0171 0172 mLocalLinks.clear(); 0173 mSectionMap.clear(); 0174 0175 // Emit the document meta data 0176 _emitData(Okular::DocumentInfo::Title, EPUB_TITLE); 0177 _emitData(Okular::DocumentInfo::Author, EPUB_CREATOR); 0178 _emitData(Okular::DocumentInfo::Subject, EPUB_SUBJECT); 0179 _emitData(Okular::DocumentInfo::Creator, EPUB_PUBLISHER); 0180 0181 _emitData(Okular::DocumentInfo::Description, EPUB_DESCRIPTION); 0182 0183 _emitData(Okular::DocumentInfo::CreationDate, EPUB_DATE); 0184 _emitData(Okular::DocumentInfo::Category, EPUB_TYPE); 0185 _emitData(Okular::DocumentInfo::Copyright, EPUB_RIGHTS); 0186 Q_EMIT addMetaData(Okular::DocumentInfo::MimeType, QStringLiteral("application/epub+zip")); 0187 0188 struct eiterator *it; 0189 0190 // iterate over the book 0191 it = epub_get_iterator(mTextDocument->getEpub(), EITERATOR_SPINE, 0); 0192 0193 // if the background color of the document is non-white it will be handled by QTextDocument::setHtml() 0194 bool firstPage = true; 0195 QVector<Okular::MovieAnnotation *> movieAnnots; 0196 QVector<Okular::SoundAction *> soundActions; 0197 0198 // HACK BEGIN Get the links without CSS to be blue 0199 // Remove if Qt ever gets fixed and the code in textdocumentgenerator.cpp works 0200 const QPalette orig = qApp->palette(); 0201 QPalette p = orig; 0202 p.setColor(QPalette::Link, Qt::blue); 0203 // HACK END 0204 0205 const QSize videoSize(320, 240); 0206 do { 0207 if (!epub_it_get_curr(it)) { 0208 continue; 0209 } 0210 0211 movieAnnots.clear(); 0212 soundActions.clear(); 0213 0214 const QString link = QString::fromUtf8(epub_it_get_curr_url(it)); 0215 mTextDocument->setCurrentSubDocument(link); 0216 QString htmlContent = QString::fromUtf8(epub_it_get_curr(it)); 0217 0218 // as QTextCharFormat::anchorNames() ignores sections, replace it with <p> 0219 static const QRegularExpression sectionStart {QStringLiteral("< *section")}; 0220 htmlContent.replace(sectionStart, QStringLiteral("<p")); 0221 static const QRegularExpression sectionEnd {QStringLiteral("< */ *section")}; 0222 htmlContent.replace(sectionEnd, QStringLiteral("</p")); 0223 0224 // convert svg tags to img 0225 const int maxHeight = mTextDocument->maxContentHeight(); 0226 const int maxWidth = mTextDocument->maxContentWidth(); 0227 QDomDocument dom; 0228 if (dom.setContent(htmlContent)) { 0229 QDomNodeList svgs = dom.elementsByTagName(QStringLiteral("svg")); 0230 if (!svgs.isEmpty()) { 0231 QList<QDomNode> imgNodes; 0232 for (int i = 0; i < svgs.length(); ++i) { 0233 QDomNodeList images = svgs.at(i).toElement().elementsByTagName(QStringLiteral("image")); 0234 for (int j = 0; j < images.length(); ++j) { 0235 QString lnk = images.at(i).toElement().attribute(QStringLiteral("xlink:href")); 0236 int ht = images.at(i).toElement().attribute(QStringLiteral("height")).toInt(); 0237 int wd = images.at(i).toElement().attribute(QStringLiteral("width")).toInt(); 0238 QImage img = mTextDocument->loadResource(QTextDocument::ImageResource, QUrl(lnk)).value<QImage>(); 0239 if (ht == 0) { 0240 ht = img.height(); 0241 } 0242 if (wd == 0) { 0243 wd = img.width(); 0244 } 0245 if (ht > maxHeight) { 0246 ht = maxHeight; 0247 } 0248 if (wd > maxWidth) { 0249 wd = maxWidth; 0250 } 0251 mTextDocument->addResource(QTextDocument::ImageResource, QUrl(lnk), img); 0252 QDomDocument newDoc; 0253 newDoc.setContent(QStringLiteral("<img src=\"%1\" height=\"%2\" width=\"%3\" />").arg(lnk).arg(ht).arg(wd)); 0254 imgNodes.append(newDoc.documentElement()); 0255 } 0256 for (const QDomNode &nd : std::as_const(imgNodes)) { 0257 svgs.at(i).parentNode().replaceChild(nd, svgs.at(i)); 0258 } 0259 } 0260 } 0261 0262 // handle embedded videos 0263 QDomNodeList videoTags = dom.elementsByTagName(QStringLiteral("video")); 0264 while (!videoTags.isEmpty()) { 0265 QDomNodeList sourceTags = videoTags.at(0).toElement().elementsByTagName(QStringLiteral("source")); 0266 if (!sourceTags.isEmpty()) { 0267 QString lnk = sourceTags.at(0).toElement().attribute(QStringLiteral("src")); 0268 0269 Okular::Movie *movie = new Okular::Movie(mTextDocument->loadResource(EpubDocument::MovieResource, QUrl(lnk)).toString()); 0270 movie->setSize(videoSize); 0271 movie->setShowControls(true); 0272 0273 Okular::MovieAnnotation *annot = new Okular::MovieAnnotation; 0274 annot->setMovie(movie); 0275 0276 movieAnnots.push_back(annot); 0277 QDomDocument tempDoc; 0278 tempDoc.setContent(QStringLiteral("<pre><video></video></pre>")); 0279 videoTags.at(0).parentNode().replaceChild(tempDoc.documentElement(), videoTags.at(0)); 0280 } 0281 } 0282 0283 // handle embedded audio 0284 QDomNodeList audioTags = dom.elementsByTagName(QStringLiteral("audio")); 0285 while (!audioTags.isEmpty()) { 0286 QDomElement element = audioTags.at(0).toElement(); 0287 bool repeat = element.hasAttribute(QStringLiteral("loop")); 0288 QString lnk = element.attribute(QStringLiteral("src")); 0289 0290 Okular::Sound *sound = new Okular::Sound(mTextDocument->loadResource(EpubDocument::AudioResource, QUrl(lnk)).toByteArray()); 0291 0292 Okular::SoundAction *soundAction = new Okular::SoundAction(1.0, true, repeat, false, sound); 0293 soundActions.push_back(soundAction); 0294 0295 QDomDocument tempDoc; 0296 tempDoc.setContent(QStringLiteral("<pre><audio></audio></pre>")); 0297 audioTags.at(0).parentNode().replaceChild(tempDoc.documentElement(), audioTags.at(0)); 0298 } 0299 htmlContent = dom.toString(); 0300 } 0301 0302 // HACK BEGIN 0303 qApp->setPalette(p); 0304 // HACK END 0305 0306 QTextBlock before; 0307 if (firstPage) { 0308 mTextDocument->setHtml(htmlContent); 0309 firstPage = false; 0310 before = mTextDocument->begin(); 0311 } else { 0312 before = _cursor->block(); 0313 _cursor->insertHtml(htmlContent); 0314 } 0315 // HACK BEGIN 0316 qApp->setPalette(orig); 0317 // HACK END 0318 0319 QTextCursor csr(before); // a temporary cursor pointing at the begin of the last inserted block 0320 int index = 0; 0321 0322 while (!movieAnnots.isEmpty() && !(csr = mTextDocument->find(QStringLiteral("<video></video>"), csr)).isNull()) { 0323 const int posStart = csr.position(); 0324 const QPoint startPoint = calculateXYPosition(mTextDocument, posStart); 0325 QImage img(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("okular/pics/okular-epub-movie.png"))); 0326 img = img.scaled(videoSize); 0327 csr.insertImage(img); 0328 const int posEnd = csr.position(); 0329 const QRect videoRect(startPoint, videoSize); 0330 movieAnnots[index]->setBoundingRectangle(Okular::NormalizedRect(videoRect, mTextDocument->pageSize().width(), mTextDocument->pageSize().height())); 0331 Q_EMIT addAnnotation(movieAnnots[index++], posStart, posEnd); 0332 csr.movePosition(QTextCursor::NextWord); 0333 } 0334 0335 csr = QTextCursor(before); 0336 index = 0; 0337 const QString keyToSearch(QStringLiteral("<audio></audio>")); 0338 while (!soundActions.isEmpty() && !(csr = mTextDocument->find(keyToSearch, csr)).isNull()) { 0339 const int posStart = csr.position() - keyToSearch.size(); 0340 const QImage img(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("okular/pics/okular-epub-sound-icon.png"))); 0341 csr.insertImage(img); 0342 const int posEnd = csr.position(); 0343 qDebug() << posStart << posEnd; 0344 ; 0345 Q_EMIT addAction(soundActions[index++], posStart, posEnd); 0346 csr.movePosition(QTextCursor::NextWord); 0347 } 0348 0349 mSectionMap.insert(link, before); 0350 0351 _handle_anchors(before, link); 0352 0353 const int page = mTextDocument->pageCount(); 0354 0355 // it will clear the previous format 0356 // useful when the last line had a bullet 0357 _cursor->insertBlock(QTextBlockFormat()); 0358 0359 while (mTextDocument->pageCount() == page) { 0360 _cursor->insertText(QStringLiteral("\n")); 0361 } 0362 0363 } while (epub_it_get_next(it)); 0364 0365 epub_free_iterator(it); 0366 0367 // handle toc 0368 struct titerator *tit; 0369 0370 // FIXME: support other method beside NAVMAP and GUIDE 0371 tit = epub_get_titerator(mTextDocument->getEpub(), TITERATOR_NAVMAP, 0); 0372 if (!tit) { 0373 tit = epub_get_titerator(mTextDocument->getEpub(), TITERATOR_GUIDE, 0); 0374 } 0375 0376 if (tit) { 0377 do { 0378 if (epub_tit_curr_valid(tit)) { 0379 char *clink = epub_tit_get_curr_link(tit); 0380 QString link = QString::fromUtf8(clink); 0381 char *label = epub_tit_get_curr_label(tit); 0382 QTextBlock block = mTextDocument->begin(); // must point somewhere 0383 0384 if (mSectionMap.contains(link)) { 0385 block = mSectionMap.value(link); 0386 } else { 0387 const QString percentDecodedLink = QUrl::fromPercentEncoding(link.toUtf8()); 0388 if (mSectionMap.contains(percentDecodedLink)) { 0389 block = mSectionMap.value(percentDecodedLink); 0390 } else { // load missing resource 0391 char *data = nullptr; 0392 // epub_get_data can't handle whitespace url encodings 0393 QByteArray ba = link.replace(QLatin1String("%20"), QLatin1String(" ")).toLatin1(); 0394 const char *clinkClean = ba.data(); 0395 int size = epub_get_data(mTextDocument->getEpub(), clinkClean, &data); 0396 0397 if (data) { 0398 _cursor->insertBlock(); 0399 0400 // try to load as image and if not load as html 0401 block = _cursor->block(); 0402 QImage image; 0403 mSectionMap.insert(link, block); 0404 if (image.loadFromData((unsigned char *)data, size)) { 0405 mTextDocument->addResource(QTextDocument::ImageResource, QUrl(link), image); 0406 _cursor->insertImage(link); 0407 } else { 0408 _cursor->insertHtml(QString::fromUtf8(data)); 0409 // Add anchors to hashes 0410 _handle_anchors(block, link); 0411 } 0412 0413 // Start new file in a new page 0414 int page = mTextDocument->pageCount(); 0415 while (mTextDocument->pageCount() == page) { 0416 _cursor->insertText(QStringLiteral("\n")); 0417 } 0418 } 0419 0420 free(data); 0421 } 0422 } 0423 0424 if (block.isValid()) { // be sure we actually got a block 0425 Q_EMIT addTitle(epub_tit_get_curr_depth(tit), QString::fromUtf8(label), block); 0426 } else { 0427 qDebug() << "Error: no block found for" << link; 0428 } 0429 0430 if (clink) { 0431 free(clink); 0432 } 0433 if (label) { 0434 free(label); 0435 } 0436 } 0437 } while (epub_tit_next(tit)); 0438 0439 epub_free_titerator(tit); 0440 } else { 0441 qDebug() << "no toc found"; 0442 } 0443 0444 // adding link actions 0445 QHashIterator<QString, QVector<QPair<int, int>>> hit(mLocalLinks); 0446 while (hit.hasNext()) { 0447 hit.next(); 0448 0449 const QTextBlock block = mSectionMap.value(hit.key()); 0450 0451 for (int i = 0; i < hit.value().size(); ++i) { 0452 if (block.isValid()) { // be sure we actually got a block 0453 Okular::DocumentViewport viewport = calculateViewport(mTextDocument, block); 0454 0455 Okular::GotoAction *action = new Okular::GotoAction(QString(), viewport); 0456 0457 Q_EMIT addAction(action, hit.value()[i].first, hit.value()[i].second); 0458 } else { 0459 qDebug() << "Error: no block found for " << hit.key(); 0460 } 0461 } 0462 } 0463 0464 delete _cursor; 0465 0466 return mTextDocument; 0467 }