File indexing completed on 2024-05-12 16:06:33

0001 /*
0002     SPDX-FileCopyrightText: 2005 Piotr SzymaƄski <niedakh@gmail.com>
0003     SPDX-FileCopyrightText: 2008 Albert Astals Cid <aacid@kde.org>
0004 
0005     SPDX-License-Identifier: GPL-2.0-or-later
0006 */
0007 
0008 #include "generator_chm.h"
0009 
0010 #include <QDomElement>
0011 #include <QEventLoop>
0012 #include <QMutex>
0013 #include <QPainter>
0014 
0015 #include <KAboutData>
0016 #include <KHTMLView>
0017 #include <KLocalizedString>
0018 #include <QUrl>
0019 #include <dom/dom_html.h>
0020 #include <dom/dom_node.h>
0021 #include <dom/html_misc.h>
0022 #include <khtml_part.h>
0023 
0024 #include <core/action.h>
0025 #include <core/page.h>
0026 #include <core/textpage.h>
0027 #include <core/utils.h>
0028 
0029 OKULAR_EXPORT_PLUGIN(CHMGenerator, "libokularGenerator_chmlib.json")
0030 
0031 static QString absolutePath(const QString &baseUrl, const QString &path)
0032 {
0033     QString absPath;
0034     if (path.startsWith(QLatin1Char('/'))) {
0035         // already absolute
0036         absPath = path;
0037     } else {
0038         QUrl url = QUrl::fromLocalFile(baseUrl).adjusted(QUrl::RemoveFilename);
0039         url.setPath(url.path() + path);
0040         absPath = url.toLocalFile();
0041     }
0042     return absPath;
0043 }
0044 
0045 CHMGenerator::CHMGenerator(QObject *parent, const QVariantList &args)
0046     : Okular::Generator(parent, args)
0047 {
0048     setFeature(TextExtraction);
0049 
0050     m_syncGen = nullptr;
0051     m_file = nullptr;
0052     m_request = nullptr;
0053 }
0054 
0055 CHMGenerator::~CHMGenerator()
0056 {
0057     delete m_syncGen;
0058 }
0059 
0060 bool CHMGenerator::loadDocument(const QString &fileName, QVector<Okular::Page *> &pagesVector)
0061 {
0062     m_file = EBook::loadFile(fileName);
0063     if (!m_file) {
0064         return false;
0065     }
0066     m_fileName = fileName;
0067     QList<EBookTocEntry> topics;
0068     m_file->getTableOfContents(topics);
0069 
0070     // fill m_docSyn
0071     QMap<int, QDomElement> lastIndentElement;
0072     QMap<QString, int> tmpPageList;
0073     int pageNum = 0;
0074 
0075     for (const EBookTocEntry &e : std::as_const(topics)) {
0076         QDomElement item = m_docSyn.createElement(e.name);
0077         if (!e.url.isEmpty()) {
0078             QString url = e.url.toString();
0079             item.setAttribute(QStringLiteral("ViewportName"), url);
0080             if (!tmpPageList.contains(url)) { // add a page only once
0081                 tmpPageList.insert(url, pageNum);
0082                 pageNum++;
0083             }
0084         }
0085         item.setAttribute(QStringLiteral("Icon"), e.iconid);
0086         if (e.indent == 0) {
0087             m_docSyn.appendChild(item);
0088         } else {
0089             lastIndentElement[e.indent - 1].appendChild(item);
0090         }
0091         lastIndentElement[e.indent] = item;
0092     }
0093 
0094     // fill m_urlPage and m_pageUrl
0095     QList<QUrl> pageList;
0096     m_file->enumerateFiles(pageList);
0097     const QUrl home = m_file->homeUrl();
0098     if (home.path() != QLatin1String("/")) {
0099         pageList.prepend(home);
0100     }
0101     m_pageUrl.resize(pageNum);
0102 
0103     for (const QUrl &qurl : std::as_const(pageList)) {
0104         QString url = qurl.toString();
0105         const QString urlLower = url.toLower();
0106         if (!urlLower.endsWith(QLatin1String(".html")) && !urlLower.endsWith(QLatin1String(".htm"))) {
0107             continue;
0108         }
0109 
0110         int pos = url.indexOf(QLatin1Char(('#')));
0111         // insert the url into the maps, but insert always the variant without the #ref part
0112         QString tmpUrl = pos == -1 ? url : url.left(pos);
0113 
0114         // url already there, abort insertion
0115         if (m_urlPage.contains(tmpUrl)) {
0116             continue;
0117         }
0118 
0119         int foundPage = tmpPageList.value(tmpUrl, -1);
0120         if (foundPage != -1) {
0121             m_urlPage.insert(tmpUrl, foundPage);
0122             m_pageUrl[foundPage] = tmpUrl;
0123         } else {
0124             // add pages not present in toc
0125             m_urlPage.insert(tmpUrl, pageNum);
0126             m_pageUrl.append(tmpUrl);
0127             pageNum++;
0128         }
0129     }
0130 
0131     pagesVector.resize(m_pageUrl.count());
0132     m_textpageAddedList.fill(false, pagesVector.count());
0133     m_rectsGenerated.fill(false, pagesVector.count());
0134 
0135     if (!m_syncGen) {
0136         m_syncGen = new KHTMLPart();
0137     }
0138     disconnect(m_syncGen, nullptr, this, nullptr);
0139 
0140     for (int i = 0; i < m_pageUrl.count(); ++i) {
0141         preparePageForSyncOperation(m_pageUrl.at(i));
0142         pagesVector[i] = new Okular::Page(i, m_syncGen->view()->contentsWidth(), m_syncGen->view()->contentsHeight(), Okular::Rotation0);
0143     }
0144 
0145     connect(m_syncGen, QOverload<>::of(&KHTMLPart::completed), this, &CHMGenerator::slotCompleted);
0146     connect(m_syncGen, &KParts::ReadOnlyPart::canceled, this, &CHMGenerator::slotCompleted);
0147 
0148     return true;
0149 }
0150 
0151 bool CHMGenerator::doCloseDocument()
0152 {
0153     // delete the document information of the old document
0154     delete m_file;
0155     m_file = nullptr;
0156     m_textpageAddedList.clear();
0157     m_rectsGenerated.clear();
0158     m_urlPage.clear();
0159     m_pageUrl.clear();
0160     m_docSyn.clear();
0161     if (m_syncGen) {
0162         m_syncGen->closeUrl();
0163     }
0164 
0165     return true;
0166 }
0167 
0168 void CHMGenerator::preparePageForSyncOperation(const QString &url)
0169 {
0170     QString pAddress = QStringLiteral("ms-its:") + m_fileName + QStringLiteral("::") + m_file->urlToPath(QUrl(url));
0171     m_chmUrl = url;
0172 
0173     m_syncGen->openUrl(QUrl(pAddress));
0174     m_syncGen->view()->layout();
0175 
0176     QEventLoop loop;
0177     connect(m_syncGen, QOverload<>::of(&KHTMLPart::completed), &loop, &QEventLoop::quit);
0178     connect(m_syncGen, &KParts::ReadOnlyPart::canceled, &loop, &QEventLoop::quit);
0179     // discard any user input, otherwise it breaks the "synchronicity" of this
0180     // function
0181     loop.exec(QEventLoop::ExcludeUserInputEvents);
0182 }
0183 
0184 void CHMGenerator::slotCompleted()
0185 {
0186     if (!m_request) {
0187         return;
0188     }
0189 
0190     QImage image(m_request->width(), m_request->height(), QImage::Format_ARGB32);
0191     image.fill(Qt::white);
0192 
0193     QPainter p(&image);
0194     QRect r(0, 0, m_request->width(), m_request->height());
0195 
0196     bool moreToPaint;
0197     m_syncGen->paint(&p, r, 0, &moreToPaint);
0198 
0199     p.end();
0200 
0201     if (!m_textpageAddedList.at(m_request->pageNumber())) {
0202         additionalRequestData();
0203         m_textpageAddedList[m_request->pageNumber()] = true;
0204     }
0205 
0206     m_syncGen->closeUrl();
0207     m_chmUrl = QString();
0208 
0209     userMutex()->unlock();
0210 
0211     Okular::PixmapRequest *req = m_request;
0212     m_request = nullptr;
0213 
0214     if (!req->page()->isBoundingBoxKnown()) {
0215         updatePageBoundingBox(req->page()->number(), Okular::Utils::imageBoundingBox(&image));
0216     }
0217     req->page()->setPixmap(req->observer(), new QPixmap(QPixmap::fromImage(image)));
0218     signalPixmapRequestDone(req);
0219 }
0220 
0221 Okular::DocumentInfo CHMGenerator::generateDocumentInfo(const QSet<Okular::DocumentInfo::Key> &keys) const
0222 {
0223     Okular::DocumentInfo docInfo;
0224     if (keys.contains(Okular::DocumentInfo::MimeType)) {
0225         docInfo.set(Okular::DocumentInfo::MimeType, QStringLiteral("application/x-chm"));
0226     }
0227     if (keys.contains(Okular::DocumentInfo::Title)) {
0228         docInfo.set(Okular::DocumentInfo::Title, m_file->title());
0229     }
0230     return docInfo;
0231 }
0232 
0233 const Okular::DocumentSynopsis *CHMGenerator::generateDocumentSynopsis()
0234 {
0235     return &m_docSyn;
0236 }
0237 
0238 bool CHMGenerator::canGeneratePixmap() const
0239 {
0240     bool isLocked = true;
0241     if (userMutex()->tryLock()) {
0242         userMutex()->unlock();
0243         isLocked = false;
0244     }
0245 
0246     return !isLocked;
0247 }
0248 
0249 void CHMGenerator::generatePixmap(Okular::PixmapRequest *request)
0250 {
0251     int requestWidth = request->width();
0252     int requestHeight = request->height();
0253 
0254     userMutex()->lock();
0255     QString url = m_pageUrl[request->pageNumber()];
0256 
0257     QString pAddress = QStringLiteral("ms-its:") + m_fileName + QStringLiteral("::") + m_file->urlToPath(QUrl(url));
0258     m_chmUrl = url;
0259     m_syncGen->view()->resizeContents(requestWidth, requestHeight);
0260     m_request = request;
0261     // will Q_EMIT openURL without problems
0262     m_syncGen->openUrl(QUrl(pAddress));
0263 }
0264 
0265 void CHMGenerator::recursiveExploreNodes(DOM::Node node, Okular::TextPage *tp)
0266 {
0267     if (node.nodeType() == DOM::Node::TEXT_NODE && !node.getRect().isNull()) {
0268         QString nodeText = node.nodeValue().string();
0269         QRect r = node.getRect();
0270         int vWidth = m_syncGen->view()->width();
0271         int vHeight = m_syncGen->view()->height();
0272         Okular::NormalizedRect *nodeNormRect;
0273 #define NOEXP
0274 #ifndef NOEXP
0275         int x, y, height;
0276         int x_next, y_next, height_next;
0277         int nodeTextLength = nodeText.length();
0278         if (nodeTextLength == 1) {
0279             nodeNormRect = new Okular::NormalizedRect(r, vWidth, vHeight);
0280             tp->append(nodeText, nodeNormRect /*, nodeNormRect->bottom, 0, (nodeText == "\n")*/);
0281         } else {
0282             for (int i = 0; i < nodeTextLength; i++) {
0283                 node.getCursor(i, x, y, height);
0284                 if (i == 0)
0285                 // i is 0, use left rect boundary
0286                 {
0287                     //                     if (nodeType[i+1]
0288                     node.getCursor(i + 1, x_next, y_next, height_next);
0289                     nodeNormRect = new Okular::NormalizedRect(QRect(x, y, x_next - x - 1, height), vWidth, vHeight);
0290                 } else if (i < nodeTextLength - 1)
0291                 // i is between zero and the last element
0292                 {
0293                     node.getCursor(i + 1, x_next, y_next, height_next);
0294                     nodeNormRect = new Okular::NormalizedRect(QRect(x, y, x_next - x - 1, height), vWidth, vHeight);
0295                 } else
0296                 // the last element use right rect boundary
0297                 {
0298                     node.getCursor(i - 1, x_next, y_next, height_next);
0299                 }
0300             }
0301         }
0302 #else
0303         nodeNormRect = new Okular::NormalizedRect(r, vWidth, vHeight);
0304         tp->append(nodeText, nodeNormRect /*,0*/);
0305 #endif
0306     }
0307     DOM::Node child = node.firstChild();
0308     while (!child.isNull()) {
0309         recursiveExploreNodes(child, tp);
0310         child = child.nextSibling();
0311     }
0312 }
0313 
0314 void CHMGenerator::additionalRequestData()
0315 {
0316     Okular::Page *page = m_request->page();
0317     const bool genObjectRects = !m_rectsGenerated.at(m_request->page()->number());
0318     const bool genTextPage = !m_request->page()->hasTextPage() && genObjectRects;
0319 
0320     if (genObjectRects || genTextPage) {
0321         DOM::HTMLDocument domDoc = m_syncGen->htmlDocument();
0322         // only generate object info when generating a full page not a thumbnail
0323         if (genObjectRects) {
0324             QList<Okular::ObjectRect *> objRects;
0325             int xScale = m_syncGen->view()->width();
0326             int yScale = m_syncGen->view()->height();
0327             // getting links
0328             DOM::HTMLCollection coll = domDoc.links();
0329             DOM::Node n;
0330             QRect r;
0331             if (!coll.isNull()) {
0332                 int size = coll.length();
0333                 for (int i = 0; i < size; i++) {
0334                     n = coll.item(i);
0335                     if (!n.isNull()) {
0336                         QString url = n.attributes().getNamedItem("href").nodeValue().string();
0337                         r = n.getRect();
0338                         // there is no way for us to support javascript properly
0339                         if (url.startsWith(QLatin1String("JavaScript:")), Qt::CaseInsensitive) {
0340                             continue;
0341                         } else if (url.contains(QStringLiteral(":"))) {
0342                             objRects.push_back(new Okular::ObjectRect(Okular::NormalizedRect(r, xScale, yScale), false, Okular::ObjectRect::Action, new Okular::BrowseAction(QUrl(url))));
0343                         } else {
0344                             Okular::DocumentViewport viewport(metaData(QStringLiteral("NamedViewport"), absolutePath(m_chmUrl, url)).toString());
0345                             objRects.push_back(new Okular::ObjectRect(Okular::NormalizedRect(r, xScale, yScale), false, Okular::ObjectRect::Action, new Okular::GotoAction(QString(), viewport)));
0346                         }
0347                     }
0348                 }
0349             }
0350 
0351             // getting images
0352             coll = domDoc.images();
0353             if (!coll.isNull()) {
0354                 int size = coll.length();
0355                 for (int i = 0; i < size; i++) {
0356                     n = coll.item(i);
0357                     if (!n.isNull()) {
0358                         objRects.push_back(new Okular::ObjectRect(Okular::NormalizedRect(n.getRect(), xScale, yScale), false, Okular::ObjectRect::Image, nullptr));
0359                     }
0360                 }
0361             }
0362             m_request->page()->setObjectRects(objRects);
0363             m_rectsGenerated[m_request->page()->number()] = true;
0364         }
0365 
0366         if (genTextPage) {
0367             Okular::TextPage *tp = new Okular::TextPage();
0368             recursiveExploreNodes(domDoc, tp);
0369             page->setTextPage(tp);
0370         }
0371     }
0372 }
0373 
0374 Okular::TextPage *CHMGenerator::textPage(Okular::TextRequest *request)
0375 {
0376     userMutex()->lock();
0377 
0378     const Okular::Page *page = request->page();
0379     m_syncGen->view()->resize(page->width(), page->height());
0380 
0381     preparePageForSyncOperation(m_pageUrl[page->number()]);
0382     Okular::TextPage *tp = new Okular::TextPage();
0383     recursiveExploreNodes(m_syncGen->htmlDocument(), tp);
0384     userMutex()->unlock();
0385     return tp;
0386 }
0387 
0388 QVariant CHMGenerator::metaData(const QString &key, const QVariant &option) const
0389 {
0390     if (key == QLatin1String("NamedViewport") && !option.toString().isEmpty()) {
0391         const int pos = option.toString().indexOf(QLatin1Char('#'));
0392         QString tmpUrl = pos == -1 ? option.toString() : option.toString().left(pos);
0393         Okular::DocumentViewport viewport;
0394         QMap<QString, int>::const_iterator it = m_urlPage.find(tmpUrl);
0395         if (it != m_urlPage.end()) {
0396             viewport.pageNumber = it.value();
0397             return viewport.toString();
0398         }
0399 
0400     } else if (key == QLatin1String("DocumentTitle")) {
0401         return m_file->title();
0402     }
0403     return QVariant();
0404 }
0405 
0406 /* kate: replace-tabs on; tab-width 4; */
0407 
0408 #include "generator_chm.moc"