File indexing completed on 2025-02-16 13:03:32
0001 #include "docbookxslt.h" 0002 #include "docbookxslt_p.h" 0003 0004 #ifdef Q_OS_WIN 0005 // one of the xslt/xml headers pulls in windows.h and breaks <limits> 0006 #define NOMINMAX 0007 #include <QHash> 0008 #endif 0009 0010 #include "../config-kdoctools.h" 0011 #include "loggingcategory.h" 0012 0013 #include <libxml/catalog.h> 0014 #include <libxml/parser.h> 0015 #include <libxml/parserInternals.h> 0016 #include <libxml/xmlIO.h> 0017 #include <libxml/xmlsave.h> 0018 #include <libxslt/transform.h> 0019 #include <libxslt/xsltInternals.h> 0020 #include <libxslt/xsltconfig.h> 0021 #include <libxslt/xsltutils.h> 0022 0023 #include <QByteArray> 0024 #include <QDir> 0025 #include <QFile> 0026 #include <QStandardPaths> 0027 #include <QString> 0028 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) 0029 #include <QTextCodec> 0030 #endif 0031 #include <QUrl> 0032 #include <QVector> 0033 0034 #if !defined(SIMPLE_XSLT) 0035 extern HelpProtocol *slave; 0036 #define INFO(x) \ 0037 if (slave) \ 0038 slave->infoMessage(x); 0039 #else 0040 #define INFO(x) 0041 #endif 0042 0043 int writeToQString(void *context, const char *buffer, int len) 0044 { 0045 QString *t = (QString *)context; 0046 *t += QString::fromUtf8(buffer, len); 0047 return len; 0048 } 0049 0050 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN) 0051 0052 #define MAX_PATHS 64 0053 xmlExternalEntityLoader defaultEntityLoader = NULL; 0054 static xmlChar *paths[MAX_PATHS + 1]; 0055 static int nbpaths = 0; 0056 static QHash<QString, QString> replaceURLList; 0057 0058 /* 0059 * Entity loading control and customization. 0060 * taken from xsltproc.c 0061 */ 0062 static xmlParserInputPtr xsltprocExternalEntityLoader(const char *_URL, const char *ID, xmlParserCtxtPtr ctxt) 0063 { 0064 xmlParserInputPtr ret; 0065 warningSAXFunc warning = NULL; 0066 0067 // use local available dtd versions instead of fetching it every time from the internet 0068 QString url = QLatin1String(_URL); 0069 QHash<QString, QString>::const_iterator i; 0070 for (i = replaceURLList.constBegin(); i != replaceURLList.constEnd(); i++) { 0071 if (url.startsWith(i.key())) { 0072 url.replace(i.key(), i.value()); 0073 qCDebug(KDocToolsLog) << "converted" << _URL << "to" << url; 0074 } 0075 } 0076 char URL[1024]; 0077 strcpy(URL, url.toLatin1().constData()); 0078 0079 const char *lastsegment = URL; 0080 const char *iter = URL; 0081 0082 if (nbpaths > 0) { 0083 while (*iter != 0) { 0084 if (*iter == '/') { 0085 lastsegment = iter + 1; 0086 } 0087 iter++; 0088 } 0089 } 0090 0091 if ((ctxt != NULL) && (ctxt->sax != NULL)) { 0092 warning = ctxt->sax->warning; 0093 ctxt->sax->warning = NULL; 0094 } 0095 0096 if (defaultEntityLoader != NULL) { 0097 ret = defaultEntityLoader(URL, ID, ctxt); 0098 if (ret != NULL) { 0099 if (warning != NULL) { 0100 ctxt->sax->warning = warning; 0101 } 0102 qCDebug(KDocToolsLog) << "Loaded URL=\"" << URL << "\" ID=\"" << ID << "\""; 0103 return (ret); 0104 } 0105 } 0106 for (int i = 0; i < nbpaths; i++) { 0107 xmlChar *newURL; 0108 0109 newURL = xmlStrdup((const xmlChar *)paths[i]); 0110 newURL = xmlStrcat(newURL, (const xmlChar *)"/"); 0111 newURL = xmlStrcat(newURL, (const xmlChar *)lastsegment); 0112 if (newURL != NULL) { 0113 ret = defaultEntityLoader((const char *)newURL, ID, ctxt); 0114 if (ret != NULL) { 0115 if (warning != NULL) { 0116 ctxt->sax->warning = warning; 0117 } 0118 qCDebug(KDocToolsLog) << "Loaded URL=\"" << newURL << "\" ID=\"" << ID << "\""; 0119 xmlFree(newURL); 0120 return (ret); 0121 } 0122 xmlFree(newURL); 0123 } 0124 } 0125 if (warning != NULL) { 0126 ctxt->sax->warning = warning; 0127 if (URL != NULL) { 0128 warning(ctxt, "failed to load external entity \"%s\"\n", URL); 0129 } else if (ID != NULL) { 0130 warning(ctxt, "failed to load external entity \"%s\"\n", ID); 0131 } 0132 } 0133 return (NULL); 0134 } 0135 #endif 0136 0137 QString KDocTools::transform(const QString &pat, const QString &tss, const QVector<const char *> ¶ms) 0138 { 0139 QString parsed; 0140 0141 INFO(i18n("Parsing stylesheet")); 0142 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN) 0143 // prepare use of local available dtd versions instead of fetching every time from the internet 0144 // this approach is url based 0145 if (!defaultEntityLoader) { 0146 defaultEntityLoader = xmlGetExternalEntityLoader(); 0147 xmlSetExternalEntityLoader(xsltprocExternalEntityLoader); 0148 0149 replaceURLList[QLatin1String("http://www.oasis-open.org/docbook/xml/4.5")] = QString("file:///%1").arg(DOCBOOK_XML_CURRDTD); 0150 } 0151 #endif 0152 0153 xsltStylesheetPtr style_sheet = xsltParseStylesheetFile((const xmlChar *)QFile::encodeName(tss).constData()); 0154 0155 if (!style_sheet) { 0156 return parsed; 0157 } 0158 if (style_sheet->indent == 1) { 0159 xmlIndentTreeOutput = 1; 0160 } else { 0161 xmlIndentTreeOutput = 0; 0162 } 0163 0164 INFO(i18n("Parsing document")); 0165 0166 xmlParserCtxtPtr pctxt; 0167 0168 pctxt = xmlNewParserCtxt(); 0169 if (pctxt == nullptr) { 0170 return parsed; 0171 } 0172 0173 xmlDocPtr doc = xmlCtxtReadFile(pctxt, QFile::encodeName(pat).constData(), nullptr, XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_NONET); 0174 /* Clean the context pointer, now useless */ 0175 const bool context_valid = (pctxt->valid == 0); 0176 xmlFreeParserCtxt(pctxt); 0177 0178 /* Check both the returned doc (for parsing errors) and the context 0179 (for validation errors) */ 0180 if (doc == nullptr) { 0181 return parsed; 0182 } else { 0183 if (context_valid) { 0184 xmlFreeDoc(doc); 0185 return parsed; 0186 } 0187 } 0188 0189 INFO(i18n("Applying stylesheet")); 0190 QVector<const char *> p = params; 0191 p.append(nullptr); 0192 xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, const_cast<const char **>(&p[0])); 0193 xmlFreeDoc(doc); 0194 if (res != nullptr) { 0195 xmlOutputBufferPtr outp = xmlOutputBufferCreateIO(writeToQString, nullptr, &parsed, nullptr); 0196 outp->written = 0; 0197 INFO(i18n("Writing document")); 0198 xsltSaveResultTo(outp, res, style_sheet); 0199 xmlOutputBufferClose(outp); 0200 xmlFreeDoc(res); 0201 } 0202 xsltFreeStylesheet(style_sheet); 0203 0204 if (parsed.isEmpty()) { 0205 parsed = QLatin1Char(' '); // avoid error message 0206 } 0207 return parsed; 0208 } 0209 0210 /* 0211 xmlParserInputPtr meinExternalEntityLoader(const char *URL, const char *ID, 0212 xmlParserCtxtPtr ctxt) { 0213 xmlParserInputPtr ret = NULL; 0214 0215 // fprintf(stderr, "loading %s %s %s\n", URL, ID, ctxt->directory); 0216 0217 if (URL == NULL) { 0218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 0219 ctxt->sax->warning(ctxt, 0220 "failed to load external entity \"%s\"\n", ID); 0221 return(NULL); 0222 } 0223 if (!qstrcmp(ID, "-//OASIS//DTD DocBook XML V4.1.2//EN")) 0224 URL = "docbook/xml-dtd-4.1.2/docbookx.dtd"; 0225 if (!qstrcmp(ID, "-//OASIS//DTD XML DocBook V4.1.2//EN")) 0226 URL = "docbook/xml-dtd-4.1.2/docbookx.dtd"; 0227 0228 QString file; 0229 if (QFile::exists( QDir::currentPath() + "/" + URL ) ) 0230 file = QDir::currentPath() + "/" + URL; 0231 else 0232 file = locate("dtd", URL); 0233 0234 ret = xmlNewInputFromFile(ctxt, file.toLatin1().constData()); 0235 if (ret == NULL) { 0236 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 0237 ctxt->sax->warning(ctxt, 0238 0239 "failed to load external entity \"%s\"\n", URL); 0240 } 0241 return(ret); 0242 } 0243 */ 0244 0245 QString splitOut(const QString &parsed, int index) 0246 { 0247 int start_index = index + 1; 0248 while (parsed.at(start_index - 1) != QLatin1Char('>')) { 0249 start_index++; 0250 } 0251 0252 int inside = 0; 0253 0254 QString filedata; 0255 0256 while (true) { 0257 int endindex = parsed.indexOf(QStringLiteral("</FILENAME>"), index); 0258 int startindex = parsed.indexOf(QStringLiteral("<FILENAME "), index) + 1; 0259 0260 // qCDebug(KDocToolsLog) << "FILENAME " << startindex << " " << endindex << " " << inside << " " << parsed.mid(startindex + 18, 15)<< " " << 0261 // parsed.length(); 0262 0263 if (startindex > 0) { 0264 if (startindex < endindex) { 0265 // qCDebug(KDocToolsLog) << "finding another"; 0266 index = startindex + 8; 0267 inside++; 0268 } else { 0269 index = endindex + 8; 0270 inside--; 0271 } 0272 } else { 0273 inside--; 0274 index = endindex + 1; 0275 } 0276 0277 if (inside == 0) { 0278 filedata = parsed.mid(start_index, endindex - start_index); 0279 break; 0280 } 0281 } 0282 0283 index = filedata.indexOf(QStringLiteral("<FILENAME ")); 0284 0285 if (index > 0) { 0286 int endindex = filedata.lastIndexOf(QStringLiteral("</FILENAME>")); 0287 while (filedata.at(endindex) != QLatin1Char('>')) { 0288 endindex++; 0289 } 0290 endindex++; 0291 filedata = filedata.left(index) + filedata.mid(endindex); 0292 } 0293 0294 return filedata; 0295 } 0296 0297 QByteArray fromUnicode(const QString &data) 0298 { 0299 #if defined(Q_OS_WIN) || QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) 0300 return data.toUtf8(); 0301 #else 0302 QTextCodec *locale = QTextCodec::codecForLocale(); 0303 QByteArray result; 0304 constexpr int bufferSize = 30000; 0305 char buffer[bufferSize]; 0306 uint buffer_len = 0; 0307 uint len = 0; 0308 int offset = 0; 0309 const int part_len = 5000; 0310 0311 QString part; 0312 0313 while (offset < data.length()) { 0314 part = data.mid(offset, part_len); 0315 QByteArray test = locale->fromUnicode(part); 0316 if (locale->toUnicode(test) == part) { 0317 result += test; 0318 offset += part_len; 0319 continue; 0320 } 0321 len = part.length(); 0322 buffer_len = 0; 0323 for (uint i = 0; i < len; i++) { 0324 QByteArray test = locale->fromUnicode(part.mid(i, 1)); 0325 if (locale->toUnicode(test) == part.mid(i, 1)) { 0326 if (buffer_len + test.length() + 1 > bufferSize) { 0327 break; 0328 } 0329 strcpy(buffer + buffer_len, test.data()); 0330 buffer_len += test.length(); 0331 } else { 0332 QString res = QStringLiteral("&#%1;").arg(part.at(i).unicode()); 0333 test = locale->fromUnicode(res); 0334 if (buffer_len + test.length() + 1 > bufferSize) { 0335 break; 0336 } 0337 strcpy(buffer + buffer_len, test.data()); 0338 buffer_len += test.length(); 0339 } 0340 } 0341 result += QByteArray(buffer, buffer_len + 1); 0342 offset += part_len; 0343 } 0344 return result; 0345 #endif 0346 } 0347 0348 void replaceCharsetHeader(QString &output) 0349 { 0350 QString name; 0351 #if defined(Q_OS_WIN) || QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) 0352 name = "utf-8"; 0353 // may be required for all xml output 0354 if (output.contains("<table-of-contents>")) 0355 output.replace(QLatin1String("<?xml version=\"1.0\"?>"), QLatin1String("<?xml version=\"1.0\" encoding=\"%1\"?>").arg(name)); 0356 #else 0357 name = QLatin1String(QTextCodec::codecForLocale()->name()); 0358 name.replace(QLatin1String("ISO "), QLatin1String("iso-")); 0359 output.replace(QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"), 0360 QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%1\">").arg(name)); 0361 #endif 0362 } 0363 0364 QByteArray KDocTools::extractFileToBuffer(const QString &content, const QString &filename) 0365 { 0366 const int index = content.indexOf(QLatin1String("<FILENAME filename=\"%1\"").arg(filename)); 0367 if (index == -1) { 0368 if (filename == QLatin1String("index.html")) { 0369 return fromUnicode(content); 0370 } else { 0371 return QByteArray(); // null value, not just empty 0372 } 0373 } 0374 QString data_file = splitOut(content, index); 0375 replaceCharsetHeader(data_file); 0376 return fromUnicode(data_file); 0377 } 0378 0379 class DtdStandardDirs 0380 { 0381 public: 0382 QString srcdir; 0383 }; 0384 0385 Q_GLOBAL_STATIC(DtdStandardDirs, s_dtdDirs) 0386 0387 void KDocTools::setupStandardDirs(const QString &srcdir) 0388 { 0389 QByteArray catalogs; 0390 0391 if (srcdir.isEmpty()) { 0392 catalogs += getKDocToolsCatalogs().join(" ").toLocal8Bit(); 0393 } else { 0394 catalogs += QUrl::fromLocalFile(srcdir + QStringLiteral("/customization/catalog.xml")).toEncoded(); 0395 s_dtdDirs()->srcdir = srcdir; 0396 } 0397 // qCDebug(KDocToolsLog) << "XML_CATALOG_FILES: " << catalogs; 0398 qputenv("XML_CATALOG_FILES", catalogs); 0399 xmlInitializeCatalog(); 0400 #if defined(_MSC_VER) 0401 /* Workaround: apparently setting XML_CATALOG_FILES set here 0402 has no effect on the libxml2 functions. 0403 This code path could be used in all cases instead of setting the 0404 variable, but this requires more investigation on the reason of 0405 the issue. */ 0406 xmlLoadCatalogs(catalogs.constData()); 0407 #endif 0408 } 0409 0410 QString KDocTools::locateFileInDtdResource(const QString &file, const QStandardPaths::LocateOptions option) 0411 { 0412 const QStringList lst = locateFilesInDtdResource(file, option); 0413 return lst.isEmpty() ? QString() : lst.first(); 0414 } 0415 0416 QStringList locateFilesInDtdResource(const QString &file, const QStandardPaths::LocateOptions option) 0417 { 0418 QFileInfo info(file); 0419 if (info.exists() && info.isAbsolute()) { 0420 return QStringList() << file; 0421 } 0422 0423 const QString srcdir = s_dtdDirs()->srcdir; 0424 if (!srcdir.isEmpty()) { 0425 const QString test = srcdir + QLatin1Char('/') + file; 0426 if (QFile::exists(test)) { 0427 return QStringList() << test; 0428 } 0429 qCDebug(KDocToolsLog) << "Could not locate file" << file << "in" << srcdir; 0430 return QStringList(); 0431 } 0432 // Using locateAll() is necessary to be able to find all catalogs when 0433 // running in environments where every repository is installed in its own 0434 // prefix. 0435 // This is the case on build.kde.org where kdelibs4support installs catalogs 0436 // in a different prefix than kdoctools. 0437 const QString fileName = QStringLiteral("kf" QT_STRINGIFY(QT_VERSION_MAJOR) "/kdoctools/") + file; 0438 QStringList result = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, fileName, option); 0439 0440 // fallback to stuff installed with KDocTools 0441 const QFileInfo fileInInstallDataDir(QStringLiteral(KDOCTOOLS_INSTALL_DATADIR_KF) + QStringLiteral("/kdoctools/") + file); 0442 if (fileInInstallDataDir.exists()) { 0443 if ((option == QStandardPaths::LocateFile) && fileInInstallDataDir.isFile()) { 0444 result.append(fileInInstallDataDir.absoluteFilePath()); 0445 } 0446 if ((option == QStandardPaths::LocateDirectory) && fileInInstallDataDir.isDir()) { 0447 result.append(fileInInstallDataDir.absoluteFilePath()); 0448 } 0449 } 0450 0451 if (result.isEmpty()) { 0452 qCDebug(KDocToolsLog) << "Could not locate file" << fileName << "in" << QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation); 0453 } 0454 return result; 0455 } 0456 0457 QStringList getKDocToolsCatalogs() 0458 { 0459 // Find all catalogs as catalog*.xml, and add them to the list, starting 0460 // from catalog.xml (the main one). 0461 const QStringList dirNames = locateFilesInDtdResource(QStringLiteral("customization"), QStandardPaths::LocateDirectory); 0462 if (dirNames.isEmpty()) { 0463 return QStringList(); 0464 } 0465 QStringList catalogFiles; 0466 for (const QString &customizationDirName : dirNames) { 0467 QDir customizationDir = QDir(customizationDirName); 0468 const QStringList catalogFileFilters(QStringLiteral("catalog*.xml")); 0469 const QFileInfoList catalogInfoFiles = customizationDir.entryInfoList(catalogFileFilters, QDir::Files, QDir::Name); 0470 for (const QFileInfo &fileInfo : catalogInfoFiles) { 0471 const QString fullFileName = QUrl::fromLocalFile(fileInfo.absoluteFilePath()).toEncoded(); 0472 if (fileInfo.fileName() == QStringLiteral("catalog.xml")) { 0473 catalogFiles.prepend(fullFileName); 0474 } else { 0475 catalogFiles.append(fullFileName); 0476 } 0477 } 0478 } 0479 0480 return catalogFiles; 0481 } 0482 0483 QStringList KDocTools::documentationDirs() 0484 { 0485 /* List of paths containing documentation */ 0486 return QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("doc/HTML"), QStandardPaths::LocateDirectory); 0487 }