File indexing completed on 2024-05-19 04:35:09
0001 /* 0002 SPDX-FileCopyrightText: 2005 Piotr SzymaĆski <niedakh@gmail.com> 0003 SPDX-FileCopyrightText: 2008 Albert Astals Cid <aacid@kde.org> 0004 0005 SPDX-License-Identifier: GPL-2.0-or-later 0006 */ 0007 0008 #include "generator_chm.h" 0009 0010 #include <QDomElement> 0011 #include <QEventLoop> 0012 #include <QMutex> 0013 #include <QPainter> 0014 0015 #include <KAboutData> 0016 #include <KHTMLView> 0017 #include <KLocalizedString> 0018 #include <QUrl> 0019 #include <dom/dom_html.h> 0020 #include <dom/dom_node.h> 0021 #include <dom/html_misc.h> 0022 #include <khtml_part.h> 0023 0024 #include <core/action.h> 0025 #include <core/page.h> 0026 #include <core/textpage.h> 0027 #include <core/utils.h> 0028 0029 OKULAR_EXPORT_PLUGIN(CHMGenerator, "libokularGenerator_chmlib.json") 0030 0031 static QString absolutePath(const QString &baseUrl, const QString &path) 0032 { 0033 QString absPath; 0034 if (path.startsWith(QLatin1Char('/'))) { 0035 // already absolute 0036 absPath = path; 0037 } else { 0038 QUrl url = QUrl::fromLocalFile(baseUrl).adjusted(QUrl::RemoveFilename); 0039 url.setPath(url.path() + path); 0040 absPath = url.toLocalFile(); 0041 } 0042 return absPath; 0043 } 0044 0045 CHMGenerator::CHMGenerator(QObject *parent, const QVariantList &args) 0046 : Okular::Generator(parent, args) 0047 { 0048 setFeature(TextExtraction); 0049 0050 m_syncGen = nullptr; 0051 m_file = nullptr; 0052 m_request = nullptr; 0053 } 0054 0055 CHMGenerator::~CHMGenerator() 0056 { 0057 delete m_syncGen; 0058 } 0059 0060 bool CHMGenerator::loadDocument(const QString &fileName, QVector<Okular::Page *> &pagesVector) 0061 { 0062 m_file = EBook::loadFile(fileName); 0063 if (!m_file) { 0064 return false; 0065 } 0066 m_fileName = fileName; 0067 QList<EBookTocEntry> topics; 0068 m_file->getTableOfContents(topics); 0069 0070 // fill m_docSyn 0071 QMap<int, QDomElement> lastIndentElement; 0072 QMap<QString, int> tmpPageList; 0073 int pageNum = 0; 0074 0075 for (const EBookTocEntry &e : std::as_const(topics)) { 0076 QDomElement item = m_docSyn.createElement(e.name); 0077 if (!e.url.isEmpty()) { 0078 QString url = e.url.toString(); 0079 item.setAttribute(QStringLiteral("ViewportName"), url); 0080 if (!tmpPageList.contains(url)) { // add a page only once 0081 tmpPageList.insert(url, pageNum); 0082 pageNum++; 0083 } 0084 } 0085 item.setAttribute(QStringLiteral("Icon"), e.iconid); 0086 if (e.indent == 0) { 0087 m_docSyn.appendChild(item); 0088 } else { 0089 lastIndentElement[e.indent - 1].appendChild(item); 0090 } 0091 lastIndentElement[e.indent] = item; 0092 } 0093 0094 // fill m_urlPage and m_pageUrl 0095 QList<QUrl> pageList; 0096 m_file->enumerateFiles(pageList); 0097 const QUrl home = m_file->homeUrl(); 0098 if (home.path() != QLatin1String("/")) { 0099 pageList.prepend(home); 0100 } 0101 m_pageUrl.resize(pageNum); 0102 0103 for (const QUrl &qurl : std::as_const(pageList)) { 0104 QString url = qurl.toString(); 0105 const QString urlLower = url.toLower(); 0106 if (!urlLower.endsWith(QLatin1String(".html")) && !urlLower.endsWith(QLatin1String(".htm"))) { 0107 continue; 0108 } 0109 0110 int pos = url.indexOf(QLatin1Char(('#'))); 0111 // insert the url into the maps, but insert always the variant without the #ref part 0112 QString tmpUrl = pos == -1 ? url : url.left(pos); 0113 0114 // url already there, abort insertion 0115 if (m_urlPage.contains(tmpUrl)) { 0116 continue; 0117 } 0118 0119 int foundPage = tmpPageList.value(tmpUrl, -1); 0120 if (foundPage != -1) { 0121 m_urlPage.insert(tmpUrl, foundPage); 0122 m_pageUrl[foundPage] = tmpUrl; 0123 } else { 0124 // add pages not present in toc 0125 m_urlPage.insert(tmpUrl, pageNum); 0126 m_pageUrl.append(tmpUrl); 0127 pageNum++; 0128 } 0129 } 0130 0131 pagesVector.resize(m_pageUrl.count()); 0132 m_textpageAddedList.fill(false, pagesVector.count()); 0133 m_rectsGenerated.fill(false, pagesVector.count()); 0134 0135 if (!m_syncGen) { 0136 m_syncGen = new KHTMLPart(); 0137 } 0138 disconnect(m_syncGen, nullptr, this, nullptr); 0139 0140 for (int i = 0; i < m_pageUrl.count(); ++i) { 0141 preparePageForSyncOperation(m_pageUrl.at(i)); 0142 pagesVector[i] = new Okular::Page(i, m_syncGen->view()->contentsWidth(), m_syncGen->view()->contentsHeight(), Okular::Rotation0); 0143 } 0144 0145 connect(m_syncGen, QOverload<>::of(&KHTMLPart::completed), this, &CHMGenerator::slotCompleted); 0146 connect(m_syncGen, &KParts::ReadOnlyPart::canceled, this, &CHMGenerator::slotCompleted); 0147 0148 return true; 0149 } 0150 0151 bool CHMGenerator::doCloseDocument() 0152 { 0153 // delete the document information of the old document 0154 delete m_file; 0155 m_file = nullptr; 0156 m_textpageAddedList.clear(); 0157 m_rectsGenerated.clear(); 0158 m_urlPage.clear(); 0159 m_pageUrl.clear(); 0160 m_docSyn.clear(); 0161 if (m_syncGen) { 0162 m_syncGen->closeUrl(); 0163 } 0164 0165 return true; 0166 } 0167 0168 void CHMGenerator::preparePageForSyncOperation(const QString &url) 0169 { 0170 QString pAddress = QStringLiteral("ms-its:") + m_fileName + QStringLiteral("::") + m_file->urlToPath(QUrl(url)); 0171 m_chmUrl = url; 0172 0173 m_syncGen->openUrl(QUrl(pAddress)); 0174 m_syncGen->view()->layout(); 0175 0176 QEventLoop loop; 0177 connect(m_syncGen, QOverload<>::of(&KHTMLPart::completed), &loop, &QEventLoop::quit); 0178 connect(m_syncGen, &KParts::ReadOnlyPart::canceled, &loop, &QEventLoop::quit); 0179 // discard any user input, otherwise it breaks the "synchronicity" of this 0180 // function 0181 loop.exec(QEventLoop::ExcludeUserInputEvents); 0182 } 0183 0184 void CHMGenerator::slotCompleted() 0185 { 0186 if (!m_request) { 0187 return; 0188 } 0189 0190 QImage image(m_request->width(), m_request->height(), QImage::Format_ARGB32); 0191 image.fill(Qt::white); 0192 0193 QPainter p(&image); 0194 QRect r(0, 0, m_request->width(), m_request->height()); 0195 0196 bool moreToPaint; 0197 m_syncGen->paint(&p, r, 0, &moreToPaint); 0198 0199 p.end(); 0200 0201 if (!m_textpageAddedList.at(m_request->pageNumber())) { 0202 additionalRequestData(); 0203 m_textpageAddedList[m_request->pageNumber()] = true; 0204 } 0205 0206 m_syncGen->closeUrl(); 0207 m_chmUrl = QString(); 0208 0209 userMutex()->unlock(); 0210 0211 Okular::PixmapRequest *req = m_request; 0212 m_request = nullptr; 0213 0214 if (!req->page()->isBoundingBoxKnown()) { 0215 updatePageBoundingBox(req->page()->number(), Okular::Utils::imageBoundingBox(&image)); 0216 } 0217 req->page()->setPixmap(req->observer(), new QPixmap(QPixmap::fromImage(image))); 0218 signalPixmapRequestDone(req); 0219 } 0220 0221 Okular::DocumentInfo CHMGenerator::generateDocumentInfo(const QSet<Okular::DocumentInfo::Key> &keys) const 0222 { 0223 Okular::DocumentInfo docInfo; 0224 if (keys.contains(Okular::DocumentInfo::MimeType)) { 0225 docInfo.set(Okular::DocumentInfo::MimeType, QStringLiteral("application/x-chm")); 0226 } 0227 if (keys.contains(Okular::DocumentInfo::Title)) { 0228 docInfo.set(Okular::DocumentInfo::Title, m_file->title()); 0229 } 0230 return docInfo; 0231 } 0232 0233 const Okular::DocumentSynopsis *CHMGenerator::generateDocumentSynopsis() 0234 { 0235 return &m_docSyn; 0236 } 0237 0238 bool CHMGenerator::canGeneratePixmap() const 0239 { 0240 bool isLocked = true; 0241 if (userMutex()->tryLock()) { 0242 userMutex()->unlock(); 0243 isLocked = false; 0244 } 0245 0246 return !isLocked; 0247 } 0248 0249 void CHMGenerator::generatePixmap(Okular::PixmapRequest *request) 0250 { 0251 int requestWidth = request->width(); 0252 int requestHeight = request->height(); 0253 0254 userMutex()->lock(); 0255 QString url = m_pageUrl[request->pageNumber()]; 0256 0257 QString pAddress = QStringLiteral("ms-its:") + m_fileName + QStringLiteral("::") + m_file->urlToPath(QUrl(url)); 0258 m_chmUrl = url; 0259 m_syncGen->view()->resizeContents(requestWidth, requestHeight); 0260 m_request = request; 0261 // will Q_EMIT openURL without problems 0262 m_syncGen->openUrl(QUrl(pAddress)); 0263 } 0264 0265 void CHMGenerator::recursiveExploreNodes(DOM::Node node, Okular::TextPage *tp) 0266 { 0267 if (node.nodeType() == DOM::Node::TEXT_NODE && !node.getRect().isNull()) { 0268 QString nodeText = node.nodeValue().string(); 0269 QRect r = node.getRect(); 0270 int vWidth = m_syncGen->view()->width(); 0271 int vHeight = m_syncGen->view()->height(); 0272 Okular::NormalizedRect *nodeNormRect; 0273 #define NOEXP 0274 #ifndef NOEXP 0275 int x, y, height; 0276 int x_next, y_next, height_next; 0277 int nodeTextLength = nodeText.length(); 0278 if (nodeTextLength == 1) { 0279 nodeNormRect = new Okular::NormalizedRect(r, vWidth, vHeight); 0280 tp->append(nodeText, nodeNormRect /*, nodeNormRect->bottom, 0, (nodeText == "\n")*/); 0281 } else { 0282 for (int i = 0; i < nodeTextLength; i++) { 0283 node.getCursor(i, x, y, height); 0284 if (i == 0) 0285 // i is 0, use left rect boundary 0286 { 0287 // if (nodeType[i+1] 0288 node.getCursor(i + 1, x_next, y_next, height_next); 0289 nodeNormRect = new Okular::NormalizedRect(QRect(x, y, x_next - x - 1, height), vWidth, vHeight); 0290 } else if (i < nodeTextLength - 1) 0291 // i is between zero and the last element 0292 { 0293 node.getCursor(i + 1, x_next, y_next, height_next); 0294 nodeNormRect = new Okular::NormalizedRect(QRect(x, y, x_next - x - 1, height), vWidth, vHeight); 0295 } else 0296 // the last element use right rect boundary 0297 { 0298 node.getCursor(i - 1, x_next, y_next, height_next); 0299 } 0300 } 0301 } 0302 #else 0303 nodeNormRect = new Okular::NormalizedRect(r, vWidth, vHeight); 0304 tp->append(nodeText, nodeNormRect /*,0*/); 0305 #endif 0306 } 0307 DOM::Node child = node.firstChild(); 0308 while (!child.isNull()) { 0309 recursiveExploreNodes(child, tp); 0310 child = child.nextSibling(); 0311 } 0312 } 0313 0314 void CHMGenerator::additionalRequestData() 0315 { 0316 Okular::Page *page = m_request->page(); 0317 const bool genObjectRects = !m_rectsGenerated.at(m_request->page()->number()); 0318 const bool genTextPage = !m_request->page()->hasTextPage() && genObjectRects; 0319 0320 if (genObjectRects || genTextPage) { 0321 DOM::HTMLDocument domDoc = m_syncGen->htmlDocument(); 0322 // only generate object info when generating a full page not a thumbnail 0323 if (genObjectRects) { 0324 QList<Okular::ObjectRect *> objRects; 0325 int xScale = m_syncGen->view()->width(); 0326 int yScale = m_syncGen->view()->height(); 0327 // getting links 0328 DOM::HTMLCollection coll = domDoc.links(); 0329 DOM::Node n; 0330 QRect r; 0331 if (!coll.isNull()) { 0332 int size = coll.length(); 0333 for (int i = 0; i < size; i++) { 0334 n = coll.item(i); 0335 if (!n.isNull()) { 0336 QString url = n.attributes().getNamedItem("href").nodeValue().string(); 0337 r = n.getRect(); 0338 // there is no way for us to support javascript properly 0339 if (url.startsWith(QLatin1String("JavaScript:")), Qt::CaseInsensitive) { 0340 continue; 0341 } else if (url.contains(QStringLiteral(":"))) { 0342 objRects.push_back(new Okular::ObjectRect(Okular::NormalizedRect(r, xScale, yScale), false, Okular::ObjectRect::Action, new Okular::BrowseAction(QUrl(url)))); 0343 } else { 0344 Okular::DocumentViewport viewport(metaData(QStringLiteral("NamedViewport"), absolutePath(m_chmUrl, url)).toString()); 0345 objRects.push_back(new Okular::ObjectRect(Okular::NormalizedRect(r, xScale, yScale), false, Okular::ObjectRect::Action, new Okular::GotoAction(QString(), viewport))); 0346 } 0347 } 0348 } 0349 } 0350 0351 // getting images 0352 coll = domDoc.images(); 0353 if (!coll.isNull()) { 0354 int size = coll.length(); 0355 for (int i = 0; i < size; i++) { 0356 n = coll.item(i); 0357 if (!n.isNull()) { 0358 objRects.push_back(new Okular::ObjectRect(Okular::NormalizedRect(n.getRect(), xScale, yScale), false, Okular::ObjectRect::Image, nullptr)); 0359 } 0360 } 0361 } 0362 m_request->page()->setObjectRects(objRects); 0363 m_rectsGenerated[m_request->page()->number()] = true; 0364 } 0365 0366 if (genTextPage) { 0367 Okular::TextPage *tp = new Okular::TextPage(); 0368 recursiveExploreNodes(domDoc, tp); 0369 page->setTextPage(tp); 0370 } 0371 } 0372 } 0373 0374 Okular::TextPage *CHMGenerator::textPage(Okular::TextRequest *request) 0375 { 0376 userMutex()->lock(); 0377 0378 const Okular::Page *page = request->page(); 0379 m_syncGen->view()->resize(page->width(), page->height()); 0380 0381 preparePageForSyncOperation(m_pageUrl[page->number()]); 0382 Okular::TextPage *tp = new Okular::TextPage(); 0383 recursiveExploreNodes(m_syncGen->htmlDocument(), tp); 0384 userMutex()->unlock(); 0385 return tp; 0386 } 0387 0388 QVariant CHMGenerator::metaData(const QString &key, const QVariant &option) const 0389 { 0390 if (key == QLatin1String("NamedViewport") && !option.toString().isEmpty()) { 0391 const int pos = option.toString().indexOf(QLatin1Char('#')); 0392 QString tmpUrl = pos == -1 ? option.toString() : option.toString().left(pos); 0393 Okular::DocumentViewport viewport; 0394 QMap<QString, int>::const_iterator it = m_urlPage.find(tmpUrl); 0395 if (it != m_urlPage.end()) { 0396 viewport.pageNumber = it.value(); 0397 return viewport.toString(); 0398 } 0399 0400 } else if (key == QLatin1String("DocumentTitle")) { 0401 return m_file->title(); 0402 } 0403 return QVariant(); 0404 } 0405 0406 /* kate: replace-tabs on; tab-width 4; */ 0407 0408 #include "generator_chm.moc"