File indexing completed on 2024-05-12 11:49:47

0001 #include "docbookxslt.h"
0002 #include "docbookxslt_p.h"
0003 
0004 #ifdef Q_OS_WIN
0005 // one of the xslt/xml headers pulls in windows.h and breaks <limits>
0006 #define NOMINMAX
0007 #include <QHash>
0008 #endif
0009 
0010 #include "../config-kdoctools.h"
0011 #include "loggingcategory.h"
0012 
0013 #include <libxml/catalog.h>
0014 #include <libxml/parser.h>
0015 #include <libxml/parserInternals.h>
0016 #include <libxml/xmlIO.h>
0017 #include <libxml/xmlsave.h>
0018 #include <libxslt/transform.h>
0019 #include <libxslt/xsltInternals.h>
0020 #include <libxslt/xsltconfig.h>
0021 #include <libxslt/xsltutils.h>
0022 
0023 #include <QByteArray>
0024 #include <QDir>
0025 #include <QFile>
0026 #include <QStandardPaths>
0027 #include <QString>
0028 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0029 #include <QTextCodec>
0030 #endif
0031 #include <QUrl>
0032 #include <QVector>
0033 
0034 #if !defined(SIMPLE_XSLT)
0035 extern HelpProtocol *slave;
0036 #define INFO(x)                                                                                                                                                \
0037     if (slave)                                                                                                                                                 \
0038         slave->infoMessage(x);
0039 #else
0040 #define INFO(x)
0041 #endif
0042 
0043 int writeToQString(void *context, const char *buffer, int len)
0044 {
0045     QString *t = (QString *)context;
0046     *t += QString::fromUtf8(buffer, len);
0047     return len;
0048 }
0049 
0050 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
0051 
0052 #define MAX_PATHS 64
0053 xmlExternalEntityLoader defaultEntityLoader = NULL;
0054 static xmlChar *paths[MAX_PATHS + 1];
0055 static int nbpaths = 0;
0056 static QHash<QString, QString> replaceURLList;
0057 
0058 /*
0059  * Entity loading control and customization.
0060  * taken from xsltproc.c
0061  */
0062 static xmlParserInputPtr xsltprocExternalEntityLoader(const char *_URL, const char *ID, xmlParserCtxtPtr ctxt)
0063 {
0064     xmlParserInputPtr ret;
0065     warningSAXFunc warning = NULL;
0066 
0067     // use local available dtd versions instead of fetching it every time from the internet
0068     QString url = QLatin1String(_URL);
0069     QHash<QString, QString>::const_iterator i;
0070     for (i = replaceURLList.constBegin(); i != replaceURLList.constEnd(); i++) {
0071         if (url.startsWith(i.key())) {
0072             url.replace(i.key(), i.value());
0073             qCDebug(KDocToolsLog) << "converted" << _URL << "to" << url;
0074         }
0075     }
0076     char URL[1024];
0077     strcpy(URL, url.toLatin1().constData());
0078 
0079     const char *lastsegment = URL;
0080     const char *iter = URL;
0081 
0082     if (nbpaths > 0) {
0083         while (*iter != 0) {
0084             if (*iter == '/') {
0085                 lastsegment = iter + 1;
0086             }
0087             iter++;
0088         }
0089     }
0090 
0091     if ((ctxt != NULL) && (ctxt->sax != NULL)) {
0092         warning = ctxt->sax->warning;
0093         ctxt->sax->warning = NULL;
0094     }
0095 
0096     if (defaultEntityLoader != NULL) {
0097         ret = defaultEntityLoader(URL, ID, ctxt);
0098         if (ret != NULL) {
0099             if (warning != NULL) {
0100                 ctxt->sax->warning = warning;
0101             }
0102             qCDebug(KDocToolsLog) << "Loaded URL=\"" << URL << "\" ID=\"" << ID << "\"";
0103             return (ret);
0104         }
0105     }
0106     for (int i = 0; i < nbpaths; i++) {
0107         xmlChar *newURL;
0108 
0109         newURL = xmlStrdup((const xmlChar *)paths[i]);
0110         newURL = xmlStrcat(newURL, (const xmlChar *)"/");
0111         newURL = xmlStrcat(newURL, (const xmlChar *)lastsegment);
0112         if (newURL != NULL) {
0113             ret = defaultEntityLoader((const char *)newURL, ID, ctxt);
0114             if (ret != NULL) {
0115                 if (warning != NULL) {
0116                     ctxt->sax->warning = warning;
0117                 }
0118                 qCDebug(KDocToolsLog) << "Loaded URL=\"" << newURL << "\" ID=\"" << ID << "\"";
0119                 xmlFree(newURL);
0120                 return (ret);
0121             }
0122             xmlFree(newURL);
0123         }
0124     }
0125     if (warning != NULL) {
0126         ctxt->sax->warning = warning;
0127         if (URL != NULL) {
0128             warning(ctxt, "failed to load external entity \"%s\"\n", URL);
0129         } else if (ID != NULL) {
0130             warning(ctxt, "failed to load external entity \"%s\"\n", ID);
0131         }
0132     }
0133     return (NULL);
0134 }
0135 #endif
0136 
0137 QString KDocTools::transform(const QString &pat, const QString &tss, const QVector<const char *> &params)
0138 {
0139     QString parsed;
0140 
0141     INFO(i18n("Parsing stylesheet"));
0142 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
0143     // prepare use of local available dtd versions instead of fetching every time from the internet
0144     // this approach is url based
0145     if (!defaultEntityLoader) {
0146         defaultEntityLoader = xmlGetExternalEntityLoader();
0147         xmlSetExternalEntityLoader(xsltprocExternalEntityLoader);
0148 
0149         replaceURLList[QLatin1String("http://www.oasis-open.org/docbook/xml/4.5")] = QString("file:///%1").arg(DOCBOOK_XML_CURRDTD);
0150     }
0151 #endif
0152 
0153     xsltStylesheetPtr style_sheet = xsltParseStylesheetFile((const xmlChar *)QFile::encodeName(tss).constData());
0154 
0155     if (!style_sheet) {
0156         return parsed;
0157     }
0158     if (style_sheet->indent == 1) {
0159         xmlIndentTreeOutput = 1;
0160     } else {
0161         xmlIndentTreeOutput = 0;
0162     }
0163 
0164     INFO(i18n("Parsing document"));
0165 
0166     xmlParserCtxtPtr pctxt;
0167 
0168     pctxt = xmlNewParserCtxt();
0169     if (pctxt == nullptr) {
0170         return parsed;
0171     }
0172 
0173     xmlDocPtr doc = xmlCtxtReadFile(pctxt, QFile::encodeName(pat).constData(), nullptr, XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_NONET);
0174     /* Clean the context pointer, now useless */
0175     const bool context_valid = (pctxt->valid == 0);
0176     xmlFreeParserCtxt(pctxt);
0177 
0178     /* Check both the returned doc (for parsing errors) and the context
0179        (for validation errors) */
0180     if (doc == nullptr) {
0181         return parsed;
0182     } else {
0183         if (context_valid) {
0184             xmlFreeDoc(doc);
0185             return parsed;
0186         }
0187     }
0188 
0189     INFO(i18n("Applying stylesheet"));
0190     QVector<const char *> p = params;
0191     p.append(nullptr);
0192     xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, const_cast<const char **>(&p[0]));
0193     xmlFreeDoc(doc);
0194     if (res != nullptr) {
0195         xmlOutputBufferPtr outp = xmlOutputBufferCreateIO(writeToQString, nullptr, &parsed, nullptr);
0196         outp->written = 0;
0197         INFO(i18n("Writing document"));
0198         xsltSaveResultTo(outp, res, style_sheet);
0199         xmlOutputBufferClose(outp);
0200         xmlFreeDoc(res);
0201     }
0202     xsltFreeStylesheet(style_sheet);
0203 
0204     if (parsed.isEmpty()) {
0205         parsed = QLatin1Char(' '); // avoid error message
0206     }
0207     return parsed;
0208 }
0209 
0210 /*
0211 xmlParserInputPtr meinExternalEntityLoader(const char *URL, const char *ID,
0212                        xmlParserCtxtPtr ctxt) {
0213     xmlParserInputPtr ret = NULL;
0214 
0215     // fprintf(stderr, "loading %s %s %s\n", URL, ID, ctxt->directory);
0216 
0217     if (URL == NULL) {
0218         if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
0219             ctxt->sax->warning(ctxt,
0220                     "failed to load external entity \"%s\"\n", ID);
0221         return(NULL);
0222     }
0223     if (!qstrcmp(ID, "-//OASIS//DTD DocBook XML V4.1.2//EN"))
0224         URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
0225     if (!qstrcmp(ID, "-//OASIS//DTD XML DocBook V4.1.2//EN"))
0226     URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
0227 
0228     QString file;
0229     if (QFile::exists( QDir::currentPath() + "/" + URL ) )
0230         file = QDir::currentPath() + "/" + URL;
0231     else
0232         file = locate("dtd", URL);
0233 
0234     ret = xmlNewInputFromFile(ctxt, file.toLatin1().constData());
0235     if (ret == NULL) {
0236         if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
0237             ctxt->sax->warning(ctxt,
0238 
0239                 "failed to load external entity \"%s\"\n", URL);
0240     }
0241     return(ret);
0242 }
0243 */
0244 
0245 QString splitOut(const QString &parsed, int index)
0246 {
0247     int start_index = index + 1;
0248     while (parsed.at(start_index - 1) != QLatin1Char('>')) {
0249         start_index++;
0250     }
0251 
0252     int inside = 0;
0253 
0254     QString filedata;
0255 
0256     while (true) {
0257         int endindex = parsed.indexOf(QStringLiteral("</FILENAME>"), index);
0258         int startindex = parsed.indexOf(QStringLiteral("<FILENAME "), index) + 1;
0259 
0260         // qCDebug(KDocToolsLog) << "FILENAME " << startindex << " " << endindex << " " << inside << " " << parsed.mid(startindex + 18, 15)<< " " <<
0261         // parsed.length();
0262 
0263         if (startindex > 0) {
0264             if (startindex < endindex) {
0265                 // qCDebug(KDocToolsLog) << "finding another";
0266                 index = startindex + 8;
0267                 inside++;
0268             } else {
0269                 index = endindex + 8;
0270                 inside--;
0271             }
0272         } else {
0273             inside--;
0274             index = endindex + 1;
0275         }
0276 
0277         if (inside == 0) {
0278             filedata = parsed.mid(start_index, endindex - start_index);
0279             break;
0280         }
0281     }
0282 
0283     index = filedata.indexOf(QStringLiteral("<FILENAME "));
0284 
0285     if (index > 0) {
0286         int endindex = filedata.lastIndexOf(QStringLiteral("</FILENAME>"));
0287         while (filedata.at(endindex) != QLatin1Char('>')) {
0288             endindex++;
0289         }
0290         endindex++;
0291         filedata = filedata.left(index) + filedata.mid(endindex);
0292     }
0293 
0294     return filedata;
0295 }
0296 
0297 QByteArray fromUnicode(const QString &data)
0298 {
0299 #if defined(Q_OS_WIN) || QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
0300     return data.toUtf8();
0301 #else
0302     QTextCodec *locale = QTextCodec::codecForLocale();
0303     QByteArray result;
0304     constexpr int bufferSize = 30000;
0305     char buffer[bufferSize];
0306     uint buffer_len = 0;
0307     uint len = 0;
0308     int offset = 0;
0309     const int part_len = 5000;
0310 
0311     QString part;
0312 
0313     while (offset < data.length()) {
0314         part = data.mid(offset, part_len);
0315         QByteArray test = locale->fromUnicode(part);
0316         if (locale->toUnicode(test) == part) {
0317             result += test;
0318             offset += part_len;
0319             continue;
0320         }
0321         len = part.length();
0322         buffer_len = 0;
0323         for (uint i = 0; i < len; i++) {
0324             QByteArray test = locale->fromUnicode(part.mid(i, 1));
0325             if (locale->toUnicode(test) == part.mid(i, 1)) {
0326                 if (buffer_len + test.length() + 1 > bufferSize) {
0327                     break;
0328                 }
0329                 strcpy(buffer + buffer_len, test.data());
0330                 buffer_len += test.length();
0331             } else {
0332                 QString res = QStringLiteral("&#%1;").arg(part.at(i).unicode());
0333                 test = locale->fromUnicode(res);
0334                 if (buffer_len + test.length() + 1 > bufferSize) {
0335                     break;
0336                 }
0337                 strcpy(buffer + buffer_len, test.data());
0338                 buffer_len += test.length();
0339             }
0340         }
0341         result += QByteArray(buffer, buffer_len + 1);
0342         offset += part_len;
0343     }
0344     return result;
0345 #endif
0346 }
0347 
0348 void replaceCharsetHeader(QString &output)
0349 {
0350     QString name;
0351 #if defined(Q_OS_WIN) || QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
0352     name = "utf-8";
0353     // may be required for all xml output
0354     if (output.contains("<table-of-contents>"))
0355         output.replace(QLatin1String("<?xml version=\"1.0\"?>"), QLatin1String("<?xml version=\"1.0\" encoding=\"%1\"?>").arg(name));
0356 #else
0357     name = QLatin1String(QTextCodec::codecForLocale()->name());
0358     name.replace(QLatin1String("ISO "), QLatin1String("iso-"));
0359     output.replace(QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"),
0360                    QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%1\">").arg(name));
0361 #endif
0362 }
0363 
0364 QByteArray KDocTools::extractFileToBuffer(const QString &content, const QString &filename)
0365 {
0366     const int index = content.indexOf(QLatin1String("<FILENAME filename=\"%1\"").arg(filename));
0367     if (index == -1) {
0368         if (filename == QLatin1String("index.html")) {
0369             return fromUnicode(content);
0370         } else {
0371             return QByteArray(); // null value, not just empty
0372         }
0373     }
0374     QString data_file = splitOut(content, index);
0375     replaceCharsetHeader(data_file);
0376     return fromUnicode(data_file);
0377 }
0378 
0379 class DtdStandardDirs
0380 {
0381 public:
0382     QString srcdir;
0383 };
0384 
0385 Q_GLOBAL_STATIC(DtdStandardDirs, s_dtdDirs)
0386 
0387 void KDocTools::setupStandardDirs(const QString &srcdir)
0388 {
0389     QByteArray catalogs;
0390 
0391     if (srcdir.isEmpty()) {
0392         catalogs += getKDocToolsCatalogs().join(" ").toLocal8Bit();
0393     } else {
0394         catalogs += QUrl::fromLocalFile(srcdir + QStringLiteral("/customization/catalog.xml")).toEncoded();
0395         s_dtdDirs()->srcdir = srcdir;
0396     }
0397     // qCDebug(KDocToolsLog) << "XML_CATALOG_FILES: " << catalogs;
0398     qputenv("XML_CATALOG_FILES", catalogs);
0399     xmlInitializeCatalog();
0400 #if defined(_MSC_VER)
0401     /* Workaround: apparently setting XML_CATALOG_FILES set here
0402        has no effect on the libxml2 functions.
0403        This code path could be used in all cases instead of setting the
0404        variable, but this requires more investigation on the reason of
0405        the issue. */
0406     xmlLoadCatalogs(catalogs.constData());
0407 #endif
0408 }
0409 
0410 QString KDocTools::locateFileInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
0411 {
0412     const QStringList lst = locateFilesInDtdResource(file, option);
0413     return lst.isEmpty() ? QString() : lst.first();
0414 }
0415 
0416 QStringList locateFilesInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
0417 {
0418     QFileInfo info(file);
0419     if (info.exists() && info.isAbsolute()) {
0420         return QStringList() << file;
0421     }
0422 
0423     const QString srcdir = s_dtdDirs()->srcdir;
0424     if (!srcdir.isEmpty()) {
0425         const QString test = srcdir + QLatin1Char('/') + file;
0426         if (QFile::exists(test)) {
0427             return QStringList() << test;
0428         }
0429         qCDebug(KDocToolsLog) << "Could not locate file" << file << "in" << srcdir;
0430         return QStringList();
0431     }
0432     // Using locateAll() is necessary to be able to find all catalogs when
0433     // running in environments where every repository is installed in its own
0434     // prefix.
0435     // This is the case on build.kde.org where kdelibs4support installs catalogs
0436     // in a different prefix than kdoctools.
0437     const QString fileName = QStringLiteral("kf" QT_STRINGIFY(QT_VERSION_MAJOR) "/kdoctools/") + file;
0438     QStringList result = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, fileName, option);
0439 
0440     // fallback to stuff installed with KDocTools
0441     const QFileInfo fileInInstallDataDir(QStringLiteral(KDOCTOOLS_INSTALL_DATADIR_KF) + QStringLiteral("/kdoctools/") + file);
0442     if (fileInInstallDataDir.exists()) {
0443         if ((option == QStandardPaths::LocateFile) && fileInInstallDataDir.isFile()) {
0444             result.append(fileInInstallDataDir.absoluteFilePath());
0445         }
0446         if ((option == QStandardPaths::LocateDirectory) && fileInInstallDataDir.isDir()) {
0447             result.append(fileInInstallDataDir.absoluteFilePath());
0448         }
0449     }
0450 
0451     if (result.isEmpty()) {
0452         qCDebug(KDocToolsLog) << "Could not locate file" << fileName << "in" << QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation);
0453     }
0454     return result;
0455 }
0456 
0457 QStringList getKDocToolsCatalogs()
0458 {
0459     // Find all catalogs as catalog*.xml, and add them to the list, starting
0460     // from catalog.xml (the main one).
0461     const QStringList dirNames = locateFilesInDtdResource(QStringLiteral("customization"), QStandardPaths::LocateDirectory);
0462     if (dirNames.isEmpty()) {
0463         return QStringList();
0464     }
0465     QStringList catalogFiles;
0466     for (const QString &customizationDirName : dirNames) {
0467         QDir customizationDir = QDir(customizationDirName);
0468         const QStringList catalogFileFilters(QStringLiteral("catalog*.xml"));
0469         const QFileInfoList catalogInfoFiles = customizationDir.entryInfoList(catalogFileFilters, QDir::Files, QDir::Name);
0470         for (const QFileInfo &fileInfo : catalogInfoFiles) {
0471             const QString fullFileName = QUrl::fromLocalFile(fileInfo.absoluteFilePath()).toEncoded();
0472             if (fileInfo.fileName() == QStringLiteral("catalog.xml")) {
0473                 catalogFiles.prepend(fullFileName);
0474             } else {
0475                 catalogFiles.append(fullFileName);
0476             }
0477         }
0478     }
0479 
0480     return catalogFiles;
0481 }
0482 
0483 QStringList KDocTools::documentationDirs()
0484 {
0485     /* List of paths containing documentation */
0486     return QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("doc/HTML"), QStandardPaths::LocateDirectory);
0487 }