File indexing completed on 2024-05-05 03:54:34

0001 #include "docbookxslt.h"
0002 #include "docbookxslt_p.h"
0003 
0004 #ifdef Q_OS_WIN
0005 // one of the xslt/xml headers pulls in windows.h and breaks <limits>
0006 #define NOMINMAX
0007 #include <QHash>
0008 #endif
0009 
0010 #include "../config-kdoctools.h"
0011 #include "loggingcategory.h"
0012 
0013 #include <libxml/catalog.h>
0014 #include <libxml/parser.h>
0015 #include <libxml/parserInternals.h>
0016 #include <libxml/xmlIO.h>
0017 #include <libxml/xmlsave.h>
0018 #include <libxslt/transform.h>
0019 #include <libxslt/xsltInternals.h>
0020 #include <libxslt/xsltconfig.h>
0021 #include <libxslt/xsltutils.h>
0022 
0023 #include <QByteArray>
0024 #include <QDir>
0025 #include <QFile>
0026 #include <QList>
0027 #include <QStandardPaths>
0028 #include <QString>
0029 #include <QUrl>
0030 
0031 #if !defined(SIMPLE_XSLT)
0032 extern HelpProtocol *slave;
0033 #define INFO(x)                                                                                                                                                \
0034     if (slave)                                                                                                                                                 \
0035         slave->infoMessage(x);
0036 #else
0037 #define INFO(x)
0038 #endif
0039 
0040 int writeToQString(void *context, const char *buffer, int len)
0041 {
0042     QString *t = (QString *)context;
0043     *t += QString::fromUtf8(buffer, len);
0044     return len;
0045 }
0046 
0047 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
0048 
0049 #define MAX_PATHS 64
0050 xmlExternalEntityLoader defaultEntityLoader = NULL;
0051 static xmlChar *paths[MAX_PATHS + 1];
0052 static int nbpaths = 0;
0053 static QHash<QString, QString> replaceURLList;
0054 
0055 /*
0056  * Entity loading control and customization.
0057  * taken from xsltproc.c
0058  */
0059 static xmlParserInputPtr xsltprocExternalEntityLoader(const char *_URL, const char *ID, xmlParserCtxtPtr ctxt)
0060 {
0061     xmlParserInputPtr ret;
0062     warningSAXFunc warning = NULL;
0063 
0064     // use local available dtd versions instead of fetching it every time from the internet
0065     QString url = QLatin1String(_URL);
0066     QHash<QString, QString>::const_iterator i;
0067     for (i = replaceURLList.constBegin(); i != replaceURLList.constEnd(); i++) {
0068         if (url.startsWith(i.key())) {
0069             url.replace(i.key(), i.value());
0070             qCDebug(KDocToolsLog) << "converted" << _URL << "to" << url;
0071         }
0072     }
0073     char URL[1024];
0074     strcpy(URL, url.toLatin1().constData());
0075 
0076     const char *lastsegment = URL;
0077     const char *iter = URL;
0078 
0079     if (nbpaths > 0) {
0080         while (*iter != 0) {
0081             if (*iter == '/') {
0082                 lastsegment = iter + 1;
0083             }
0084             iter++;
0085         }
0086     }
0087 
0088     if ((ctxt != NULL) && (ctxt->sax != NULL)) {
0089         warning = ctxt->sax->warning;
0090         ctxt->sax->warning = NULL;
0091     }
0092 
0093     if (defaultEntityLoader != NULL) {
0094         ret = defaultEntityLoader(URL, ID, ctxt);
0095         if (ret != NULL) {
0096             if (warning != NULL) {
0097                 ctxt->sax->warning = warning;
0098             }
0099             qCDebug(KDocToolsLog) << "Loaded URL=\"" << URL << "\" ID=\"" << ID << "\"";
0100             return (ret);
0101         }
0102     }
0103     for (int i = 0; i < nbpaths; i++) {
0104         xmlChar *newURL;
0105 
0106         newURL = xmlStrdup((const xmlChar *)paths[i]);
0107         newURL = xmlStrcat(newURL, (const xmlChar *)"/");
0108         newURL = xmlStrcat(newURL, (const xmlChar *)lastsegment);
0109         if (newURL != NULL) {
0110             ret = defaultEntityLoader((const char *)newURL, ID, ctxt);
0111             if (ret != NULL) {
0112                 if (warning != NULL) {
0113                     ctxt->sax->warning = warning;
0114                 }
0115                 qCDebug(KDocToolsLog) << "Loaded URL=\"" << newURL << "\" ID=\"" << ID << "\"";
0116                 xmlFree(newURL);
0117                 return (ret);
0118             }
0119             xmlFree(newURL);
0120         }
0121     }
0122     if (warning != NULL) {
0123         ctxt->sax->warning = warning;
0124         if (URL != NULL) {
0125             warning(ctxt, "failed to load external entity \"%s\"\n", URL);
0126         } else if (ID != NULL) {
0127             warning(ctxt, "failed to load external entity \"%s\"\n", ID);
0128         }
0129     }
0130     return (NULL);
0131 }
0132 #endif
0133 
0134 QString KDocTools::transform(const QString &pat, const QString &tss, const QList<const char *> &params)
0135 {
0136     QString parsed;
0137 
0138     INFO(i18n("Parsing stylesheet"));
0139 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
0140     // prepare use of local available dtd versions instead of fetching every time from the internet
0141     // this approach is url based
0142     if (!defaultEntityLoader) {
0143         defaultEntityLoader = xmlGetExternalEntityLoader();
0144         xmlSetExternalEntityLoader(xsltprocExternalEntityLoader);
0145 
0146         replaceURLList[QLatin1String("http://www.oasis-open.org/docbook/xml/4.5")] = QString("file:///%1").arg(DOCBOOK_XML_CURRDTD);
0147     }
0148 #endif
0149 
0150     xsltStylesheetPtr style_sheet = xsltParseStylesheetFile((const xmlChar *)QFile::encodeName(tss).constData());
0151 
0152     if (!style_sheet) {
0153         return parsed;
0154     }
0155     if (style_sheet->indent == 1) {
0156         xmlIndentTreeOutput = 1;
0157     } else {
0158         xmlIndentTreeOutput = 0;
0159     }
0160 
0161     INFO(i18n("Parsing document"));
0162 
0163     xmlParserCtxtPtr pctxt;
0164 
0165     pctxt = xmlNewParserCtxt();
0166     if (pctxt == nullptr) {
0167         return parsed;
0168     }
0169 
0170     xmlDocPtr doc = xmlCtxtReadFile(pctxt, QFile::encodeName(pat).constData(), nullptr, XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_NONET);
0171     /* Clean the context pointer, now useless */
0172     const bool context_valid = (pctxt->valid == 0);
0173     xmlFreeParserCtxt(pctxt);
0174 
0175     /* Check both the returned doc (for parsing errors) and the context
0176        (for validation errors) */
0177     if (doc == nullptr) {
0178         return parsed;
0179     } else {
0180         if (context_valid) {
0181             xmlFreeDoc(doc);
0182             return parsed;
0183         }
0184     }
0185 
0186     INFO(i18n("Applying stylesheet"));
0187     QList<const char *> p = params;
0188     p.append(nullptr);
0189     xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, const_cast<const char **>(&p[0]));
0190     xmlFreeDoc(doc);
0191     if (res != nullptr) {
0192         xmlOutputBufferPtr outp = xmlOutputBufferCreateIO(writeToQString, nullptr, &parsed, nullptr);
0193         outp->written = 0;
0194         INFO(i18n("Writing document"));
0195         xsltSaveResultTo(outp, res, style_sheet);
0196         xmlOutputBufferClose(outp);
0197         xmlFreeDoc(res);
0198     }
0199     xsltFreeStylesheet(style_sheet);
0200 
0201     if (parsed.isEmpty()) {
0202         parsed = QLatin1Char(' '); // avoid error message
0203     }
0204     return parsed;
0205 }
0206 
0207 /*
0208 xmlParserInputPtr meinExternalEntityLoader(const char *URL, const char *ID,
0209                        xmlParserCtxtPtr ctxt) {
0210     xmlParserInputPtr ret = NULL;
0211 
0212     // fprintf(stderr, "loading %s %s %s\n", URL, ID, ctxt->directory);
0213 
0214     if (URL == NULL) {
0215         if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
0216             ctxt->sax->warning(ctxt,
0217                     "failed to load external entity \"%s\"\n", ID);
0218         return(NULL);
0219     }
0220     if (!qstrcmp(ID, "-//OASIS//DTD DocBook XML V4.1.2//EN"))
0221         URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
0222     if (!qstrcmp(ID, "-//OASIS//DTD XML DocBook V4.1.2//EN"))
0223     URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
0224 
0225     QString file;
0226     if (QFile::exists( QDir::currentPath() + "/" + URL ) )
0227         file = QDir::currentPath() + "/" + URL;
0228     else
0229         file = locate("dtd", URL);
0230 
0231     ret = xmlNewInputFromFile(ctxt, file.toLatin1().constData());
0232     if (ret == NULL) {
0233         if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
0234             ctxt->sax->warning(ctxt,
0235 
0236                 "failed to load external entity \"%s\"\n", URL);
0237     }
0238     return(ret);
0239 }
0240 */
0241 
0242 QString splitOut(const QString &parsed, int index)
0243 {
0244     int start_index = index + 1;
0245     while (parsed.at(start_index - 1) != QLatin1Char('>')) {
0246         start_index++;
0247     }
0248 
0249     int inside = 0;
0250 
0251     QString filedata;
0252 
0253     while (true) {
0254         int endindex = parsed.indexOf(QStringLiteral("</FILENAME>"), index);
0255         int startindex = parsed.indexOf(QStringLiteral("<FILENAME "), index) + 1;
0256 
0257         // qCDebug(KDocToolsLog) << "FILENAME " << startindex << " " << endindex << " " << inside << " " << parsed.mid(startindex + 18, 15)<< " " <<
0258         // parsed.length();
0259 
0260         if (startindex > 0) {
0261             if (startindex < endindex) {
0262                 // qCDebug(KDocToolsLog) << "finding another";
0263                 index = startindex + 8;
0264                 inside++;
0265             } else {
0266                 index = endindex + 8;
0267                 inside--;
0268             }
0269         } else {
0270             inside--;
0271             index = endindex + 1;
0272         }
0273 
0274         if (inside == 0) {
0275             filedata = parsed.mid(start_index, endindex - start_index);
0276             break;
0277         }
0278     }
0279 
0280     index = filedata.indexOf(QStringLiteral("<FILENAME "));
0281 
0282     if (index > 0) {
0283         int endindex = filedata.lastIndexOf(QStringLiteral("</FILENAME>"));
0284         while (filedata.at(endindex) != QLatin1Char('>')) {
0285             endindex++;
0286         }
0287         endindex++;
0288         filedata = filedata.left(index) + filedata.mid(endindex);
0289     }
0290 
0291     return filedata;
0292 }
0293 
0294 QByteArray fromUnicode(const QString &data)
0295 {
0296     return data.toUtf8();
0297 }
0298 
0299 void replaceCharsetHeader(QString &output)
0300 {
0301     // may be required for all xml output
0302     if (output.contains("<table-of-contents>"))
0303         output.replace(QLatin1String("<?xml version=\"1.0\"?>"), QLatin1String("<?xml version=\"1.0\" encoding=\"utf-8\"?>"));
0304 }
0305 
0306 QByteArray KDocTools::extractFileToBuffer(const QString &content, const QString &filename)
0307 {
0308     const int index = content.indexOf(QLatin1String("<FILENAME filename=\"%1\"").arg(filename));
0309     if (index == -1) {
0310         if (filename == QLatin1String("index.html")) {
0311             return fromUnicode(content);
0312         } else {
0313             return QByteArray(); // null value, not just empty
0314         }
0315     }
0316     QString data_file = splitOut(content, index);
0317     replaceCharsetHeader(data_file);
0318     return fromUnicode(data_file);
0319 }
0320 
0321 class DtdStandardDirs
0322 {
0323 public:
0324     QString srcdir;
0325 };
0326 
0327 Q_GLOBAL_STATIC(DtdStandardDirs, s_dtdDirs)
0328 
0329 void KDocTools::setupStandardDirs(const QString &srcdir)
0330 {
0331     QByteArray catalogs;
0332 
0333     if (srcdir.isEmpty()) {
0334         catalogs += getKDocToolsCatalogs().join(" ").toLocal8Bit();
0335     } else {
0336         catalogs += QUrl::fromLocalFile(srcdir + QStringLiteral("/customization/catalog.xml")).toEncoded();
0337         s_dtdDirs()->srcdir = srcdir;
0338     }
0339     // qCDebug(KDocToolsLog) << "XML_CATALOG_FILES: " << catalogs;
0340     qputenv("XML_CATALOG_FILES", catalogs);
0341     xmlInitializeCatalog();
0342 #if defined(_MSC_VER)
0343     /* Workaround: apparently setting XML_CATALOG_FILES set here
0344        has no effect on the libxml2 functions.
0345        This code path could be used in all cases instead of setting the
0346        variable, but this requires more investigation on the reason of
0347        the issue. */
0348     xmlLoadCatalogs(catalogs.constData());
0349 #endif
0350 }
0351 
0352 QString KDocTools::locateFileInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
0353 {
0354     const QStringList lst = locateFilesInDtdResource(file, option);
0355     return lst.isEmpty() ? QString() : lst.first();
0356 }
0357 
0358 QStringList locateFilesInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
0359 {
0360     QFileInfo info(file);
0361     if (info.exists() && info.isAbsolute()) {
0362         return QStringList() << file;
0363     }
0364 
0365     const QString srcdir = s_dtdDirs()->srcdir;
0366     if (!srcdir.isEmpty()) {
0367         const QString test = srcdir + QLatin1Char('/') + file;
0368         if (QFile::exists(test)) {
0369             return QStringList() << test;
0370         }
0371         qCDebug(KDocToolsLog) << "Could not locate file" << file << "in" << srcdir;
0372         return QStringList();
0373     }
0374     // Using locateAll() is necessary to be able to find all catalogs when
0375     // running in environments where every repository is installed in its own
0376     // prefix.
0377     // This is the case on build.kde.org where kdelibs4support installs catalogs
0378     // in a different prefix than kdoctools.
0379     const QString fileName = QStringLiteral("kf6/kdoctools/") + file;
0380     QStringList result = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, fileName, option);
0381 
0382     // fallback to stuff installed with KDocTools
0383     const QFileInfo fileInInstallDataDir(QStringLiteral(KDOCTOOLS_INSTALL_DATADIR_KF) + QStringLiteral("/kdoctools/") + file);
0384     if (fileInInstallDataDir.exists()) {
0385         if ((option == QStandardPaths::LocateFile) && fileInInstallDataDir.isFile()) {
0386             result.append(fileInInstallDataDir.absoluteFilePath());
0387         }
0388         if ((option == QStandardPaths::LocateDirectory) && fileInInstallDataDir.isDir()) {
0389             result.append(fileInInstallDataDir.absoluteFilePath());
0390         }
0391     }
0392 
0393     if (result.isEmpty()) {
0394         qCDebug(KDocToolsLog) << "Could not locate file" << fileName << "in" << QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation);
0395     }
0396     return result;
0397 }
0398 
0399 QStringList getKDocToolsCatalogs()
0400 {
0401     // Find all catalogs as catalog*.xml, and add them to the list, starting
0402     // from catalog.xml (the main one).
0403     const QStringList dirNames = locateFilesInDtdResource(QStringLiteral("customization"), QStandardPaths::LocateDirectory);
0404     if (dirNames.isEmpty()) {
0405         return QStringList();
0406     }
0407     QStringList catalogFiles;
0408     for (const QString &customizationDirName : dirNames) {
0409         QDir customizationDir = QDir(customizationDirName);
0410         const QStringList catalogFileFilters(QStringLiteral("catalog*.xml"));
0411         const QFileInfoList catalogInfoFiles = customizationDir.entryInfoList(catalogFileFilters, QDir::Files, QDir::Name);
0412         for (const QFileInfo &fileInfo : catalogInfoFiles) {
0413             const QString fullFileName = QUrl::fromLocalFile(fileInfo.absoluteFilePath()).toEncoded();
0414             if (fileInfo.fileName() == QStringLiteral("catalog.xml")) {
0415                 catalogFiles.prepend(fullFileName);
0416             } else {
0417                 catalogFiles.append(fullFileName);
0418             }
0419         }
0420     }
0421 
0422     return catalogFiles;
0423 }
0424 
0425 QStringList KDocTools::documentationDirs()
0426 {
0427     /* List of paths containing documentation */
0428     return QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("doc/HTML"), QStandardPaths::LocateDirectory);
0429 }