kio-extras/man/man2html.cpp

0001 /*
0002     This file is part of the KDE libraries
0003
0004     SPDX-FileCopyrightText: 2000 Stephan Kulow <coolo@kde.org>
0005     SPDX-FileCopyrightText: 2005 Nicolas GOUTTE <goutte@kde.org>
0006     SPDX-FileCopyrightText: 2011 Martin Koller <kollix@aon.at>
0007
0008     ... and others (see SVN history)
0009 */
0010
0011 // Start of verbatim comment
0012
0013 /*
0014 ** This program was written by Richard Verhoeven (NL:5482ZX35)
0015 ** at the Eindhoven University of Technology. Email: rcb5@win.tue.nl
0016 **
0017 ** Permission is granted to distribute, modify and use this program as long
0018 ** as this comment is not removed or changed.
0019 */
0020
0021 // End of verbatim comment
0022
0023 /*
0024  * man2html-linux-1.0/1.1
0025  * This version modified for Redhat/Caldera linux - March 1996.
0026  * Michael Hamilton <michael@actrix.gen.nz>.
0027  *
0028  * man2html-linux-1.2
0029  * Added support for BSD mandoc pages - I didn't have any documentation
0030  * on the mandoc macros, so I may have missed some.
0031  * Michael Hamilton <michael@actrix.gen.nz>.
0032  *
0033  * vh-man2html-1.3
0034  * Renamed to avoid confusion (V for Verhoeven, H for Hamilton).
0035  *
0036  * vh-man2html-1.4
0037  * Now uses /etc/man.config
0038  * Added support for compressed pages.
0039  * Added "length-safe" string operations for client input parameters.
0040  * More secure, -M secured, and client input string lengths checked.
0041  *
0042  */
0043
0044 /*
0045 ** If you want to use this program for your WWW server, adjust the line
0046 ** which defines the CGIBASE or compile it with the -DCGIBASE='"..."' option.
0047 **
0048 ** You have to adjust the built-in manpath to your local system. Note that
0049 ** every directory should start and end with the '/' and that the first
0050 ** directory should be "/" to allow a full path as an argument.
0051 **
0052 ** The program first check if PATH_INFO contains some information.
0053 ** If it does (t.i. man2html/some/thing is used), the program will look
0054 ** for a manpage called PATH_INFO in the manpath.
0055 **
0056 ** Otherwise the manpath is searched for the specified command line argument,
0057 ** where the following options can be used:
0058 **
0059 ** name      name of manpage (csh, printf, xv, troff)
0060 ** section   the section (1 2 3 4 5 6 7 8 9 n l 1v ...)
0061 ** -M path   an extra directory to look for manpages (replaces "/")
0062 **
0063 ** If man2html finds multiple manpages that satisfy the options, an index
0064 ** is displayed and the user can make a choice. If only one page is
0065 ** found, that page will be displayed.
0066 **
0067 ** man2html will add links to the converted manpages. The function add_links
0068 ** is used for that. At the moment it will add links as follows, where
0069 **     indicates what should match to start with:
0070 ** ^^^
0071 ** Recognition           Item            Link
0072 ** ----------------------------------------------------------
0073 ** name(*)               Manpage         ../man?/name.*
0074 **     ^
0075 ** name@hostname         Email address   mailto:name@hostname
0076 **     ^
0077 ** method://string       URL             method://string
0078 **       ^^^
0079 ** www.host.name         WWW server      http://www.host.name
0080 ** ^^^^
0081 ** ftp.host.name         FTP server      ftp://ftp.host.name
0082 ** ^^^^
0083 ** <file.h>              Include file    file:/usr/include/file.h
0084 **      ^^^
0085 **
0086 ** Since man2html does not check if manpages, hosts or email addresses exist,
0087 ** some links might not work. For manpages, some extra checks are performed
0088 ** to make sure not every () pair creates a link. Also out of date pages
0089 ** might point to incorrect places.
0090 **
0091 ** The program will not allow users to get system specific files, such as
0092 ** /etc/passwd. It will check that "man" is part of the specified file and
0093 ** that  "/../" isn't. Even if someone manages to get such file, man2html will
0094 ** handle it like a manpage and will usually not produce any output (or crash).
0095 **
0096 ** If you find any bugs when normal manpages are converted, please report
0097 ** them to me (rcb5@win.tue.nl) after you have checked that man(1) can handle
0098 ** the manpage correct.
0099 **
0100 ** Known bugs and missing features:
0101 **
0102 **  * Equations are not converted at all.
0103 **  * Tables are converted but some features are not possible in html.
0104 **  * The tabbing environment is converted by counting characters and adding
0105 **    spaces. This might go wrong (outside <PRE>)
0106 **  * Some manpages rely on the fact that troff/nroff is used to convert
0107 **    them and use features which are not described in the man manpages.
0108 **    (definitions, calculations, conditionals, requests). I can't guarantee
0109 **    that all these features work on all manpages. (I didn't have the
0110 **    time to look through all the available manpages.)
0111 */
0112
0113 #include "man2html.h"
0114 #include "kio_man_debug.h"
0115 #include "request_hash.h"
0116
0117 #include <config-runtime.h>
0118
0119 #include <ctype.h>
0120
0121 #include <string.h>
0122 #include <unistd.h>
0123
0124 #include <stdio.h>
0125
0126 #include <QByteArray>
0127 #include <QDateTime>
0128 #include <QDebug>
0129 #include <QMap>
0130 #include <QRegularExpression>
0131 #include <QStack>
0132 #include <QString>
0133 #include <QTextCodec>
0134
0135 #ifdef SIMPLE_MAN2HTML
0136 #include <KCompressionDevice>
0137 #include <QDir>
0138 #include <QFile>
0139 #include <QFileInfo>
0140 #include <dirent.h>
0141 #include <iostream>
0142 #include <stdlib.h>
0143 #include <sys/stat.h>
0144 #define BYTEARRAY(x) x.constData()
0145 #else
0146 #include <KLocalizedString>
0147 #define BYTEARRAY(x) x
0148 #endif
0149
0150 #define NULL_TERMINATED(n) ((n) + 1)
0151
0152 #define HUGE_STR_MAX 10000
0153 #define LARGE_STR_MAX 2000
0154 #define MED_STR_MAX 500
0155
0156 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
0157
0158 /* mdoc(7) Bl/El lists to HTML list types */
0159 #define BL_DESC_LIST 1
0160 #define BL_BULLET_LIST 2
0161 #define BL_ENUM_LIST 4
0162
0163 /* mdoc(7) Bd/Ed example(?) blocks */
0164 #define BD_LITERAL 1
0165 #define BD_INDENT 2
0166
0167 static int s_nroff = 1; // NROFF mode by default
0168
0169 static QByteArray mandoc_name; // Nm can store the first used name
0170
0171 static int mandoc_name_count = 0; /* Don't break on the first Nm */
0172
0173 /* below this you should not change anything unless you know a lot
0174 ** about this program or about troff.
0175 */
0176
0177 /// Structure for character definitions
0178 struct CSTRDEF {
0179     int nr, slen;
0180     const char *st;
0181 };
0182
0183 const char NEWLINE[2] = "\n";
0184
0185 /**
0186  * Class for defining strings and macros
0187  */
0188 class StringDefinition
0189 {
0190 public:
0191     StringDefinition(void)
0192         : m_length(0)
0193     {
0194     }
0195     StringDefinition(int len, const char *cstr)
0196         : m_length(len)
0197         , m_output(cstr)
0198     {
0199     }
0200
0201 public:
0202     int m_length; ///< Length of output text
0203     QByteArray m_output; ///< Defined string
0204 };
0205
0206 /**
0207  * Class for defining number registers
0208  * \note Not for internal read-only registers
0209  */
0210 class NumberDefinition
0211 {
0212 public:
0213     NumberDefinition(void)
0214         : m_value(0)
0215         , m_increment(0)
0216     {
0217     }
0218     NumberDefinition(int value)
0219         : m_value(value)
0220         , m_increment(0)
0221     {
0222     }
0223     NumberDefinition(int value, int incr)
0224         : m_value(value)
0225         , m_increment(incr)
0226     {
0227     }
0228
0229 public:
0230     int m_value; ///< value of number register
0231     int m_increment; ///< Increment of number register
0232     // ### TODO: display form (.af)
0233 };
0234
0235 /**
0236  * Map of character definitions
0237  */
0238 static QMap<QByteArray, StringDefinition> s_characterDefinitionMap;
0239
0240 /**
0241  * Map of string variable and macro definitions
0242  * \note String variables and macros are the same thing!
0243  */
0244 static QMap<QByteArray, StringDefinition> s_stringDefinitionMap;
0245
0246 /**
0247  * Map of number registers
0248  * \note Intern number registers (starting with a dot are not handled here)
0249  */
0250 static QMap<QByteArray, NumberDefinition> s_numberDefinitionMap;
0251
0252 static void fill_old_character_definitions(void);
0253
0254 /**
0255  * Initialize character variables
0256  */
0257 static void InitCharacterDefinitions(void)
0258 {
0259     fill_old_character_definitions();
0260     // ### HACK: as we are converting to HTML too early, define characters with HTML references
0261     s_characterDefinitionMap.insert("&lt;-", StringDefinition(1, "&larr;")); // <-
0262     s_characterDefinitionMap.insert("-&gt;", StringDefinition(1, "&rarr;")); // ->
0263     s_characterDefinitionMap.insert("&lt;&gt;", StringDefinition(1, "&harr;")); // <>
0264     s_characterDefinitionMap.insert("&lt;=", StringDefinition(1, "&le;")); // <=
0265     s_characterDefinitionMap.insert("&gt;=", StringDefinition(1, "&ge;")); // >=
0266     // End HACK
0267 }
0268
0269 /**
0270  * Initialize string variables
0271  */
0272 static void InitStringDefinitions(void)
0273 {
0274     // mdoc-only, see mdoc.samples(7)
0275     s_stringDefinitionMap.insert("<=", StringDefinition(1, "&le;"));
0276     s_stringDefinitionMap.insert(">=", StringDefinition(1, "&ge;"));
0277     s_stringDefinitionMap.insert("Rq", StringDefinition(1, "&rdquo;"));
0278     s_stringDefinitionMap.insert("Lq", StringDefinition(1, "&ldquo;"));
0279     s_stringDefinitionMap.insert("ua", StringDefinition(1, "&circ")); // Note this is different from \(ua
0280     s_stringDefinitionMap.insert("aa", StringDefinition(1, "&acute;"));
0281     s_stringDefinitionMap.insert("ga", StringDefinition(1, "`"));
0282     s_stringDefinitionMap.insert("q", StringDefinition(1, "&quot;"));
0283     s_stringDefinitionMap.insert("Pi", StringDefinition(1, "&pi;"));
0284     s_stringDefinitionMap.insert("Ne", StringDefinition(1, "&ne;"));
0285     s_stringDefinitionMap.insert("Le", StringDefinition(1, "&le;"));
0286     s_stringDefinitionMap.insert("Ge", StringDefinition(1, "&ge;"));
0287     s_stringDefinitionMap.insert("Lt", StringDefinition(1, "&lt;"));
0288     s_stringDefinitionMap.insert("Gt", StringDefinition(1, "&gt;"));
0289     s_stringDefinitionMap.insert("Pm", StringDefinition(1, "&plusmn;"));
0290     s_stringDefinitionMap.insert("If", StringDefinition(1, "&infin;"));
0291     s_stringDefinitionMap.insert("Na", StringDefinition(3, "NaN"));
0292     s_stringDefinitionMap.insert("Ba", StringDefinition(1, "|"));
0293     // end mdoc-only
0294     // man(7)
0295     s_stringDefinitionMap.insert("Tm", StringDefinition(1, "&trade;")); // \*(TM
0296     s_stringDefinitionMap.insert("R", StringDefinition(1, "&reg;")); // \*R
0297     s_stringDefinitionMap.insert("lq", StringDefinition(1, "&ldquo;")); // Left angled double quote
0298     s_stringDefinitionMap.insert("rq", StringDefinition(1, "&rdquo;")); // Right angled double quote
0299     // end man(7)
0300     // Missing characters from man(7):
0301     // \*S "Change to default font size"
0302 #ifndef SIMPLE_MAN2HTML
0303     // Special KDE KIO man:
0304     const QByteArray kdeversion(KDE_VERSION_STRING);
0305     s_stringDefinitionMap.insert(".KDE_VERSION_STRING", StringDefinition(kdeversion.length(), kdeversion));
0306 #endif
0307 }
0308
0309 /**
0310  * Initialize number registers
0311  * \note Internal read-only registers are not handled here
0312  */
0313 static void InitNumberDefinitions(void)
0314 {
0315     // As the date number registers are more for end-users, better choose local time.
0316     // Groff seems to support Gregorian dates only
0317     QDate today(QDate::currentDate());
0318     s_numberDefinitionMap.insert("year", today.year()); // Y2K-correct year
0319     s_numberDefinitionMap.insert("yr", today.year() - 1900); // Y2K-incorrect year
0320     s_numberDefinitionMap.insert("mo", today.month());
0321     s_numberDefinitionMap.insert("dy", today.day());
0322     s_numberDefinitionMap.insert("dw", today.dayOfWeek());
0323 }
0324
0325 #define V(A, B) ((A)*256 + (B))
0326
0327 // used in expand_char, e.g. for "\(bu"
0328 //  see groff_char(7) for list
0329 static const CSTRDEF standardchars[] = {
0330     {V('*', '*'), 1, "*"},
0331     {V('*', 'A'), 1, "&Alpha;"},
0332     {V('*', 'B'), 1, "&Beta;"},
0333     {V('*', 'C'), 1, "&Xi;"},
0334     {V('*', 'D'), 1, "&Delta;"},
0335     {V('*', 'E'), 1, "&Epsilon;"},
0336     {V('*', 'F'), 1, "&Phi;"},
0337     {V('*', 'G'), 1, "&Gamma;"},
0338     {V('*', 'H'), 1, "&Theta;"},
0339     {V('*', 'I'), 1, "&Iota;"},
0340     {V('*', 'K'), 1, "&Kappa;"},
0341     {V('*', 'L'), 1, "&Lambda;"},
0342     {V('*', 'M'), 1, "&Mu:"},
0343     {V('*', 'N'), 1, "&Nu;"},
0344     {V('*', 'O'), 1, "&Omicron;"},
0345     {V('*', 'P'), 1, "&Pi;"},
0346     {V('*', 'Q'), 1, "&Psi;"},
0347     {V('*', 'R'), 1, "&Rho;"},
0348     {V('*', 'S'), 1, "&Sigma;"},
0349     {V('*', 'T'), 1, "&Tau;"},
0350     {V('*', 'U'), 1, "&Upsilon;"},
0351     {V('*', 'W'), 1, "&Omega;"},
0352     {V('*', 'X'), 1, "&Chi;"},
0353     {V('*', 'Y'), 1, "&Eta;"},
0354     {V('*', 'Z'), 1, "&Zeta;"},
0355     {V('*', 'a'), 1, "&alpha;"},
0356     {V('*', 'b'), 1, "&beta;"},
0357     {V('*', 'c'), 1, "&xi;"},
0358     {V('*', 'd'), 1, "&delta;"},
0359     {V('*', 'e'), 1, "&epsilon;"},
0360     {V('*', 'f'), 1, "&phi;"},
0361     {V('*', 'g'), 1, "&gamma;"},
0362     {V('*', 'h'), 1, "&theta;"},
0363     {V('*', 'i'), 1, "&iota;"},
0364     {V('*', 'k'), 1, "&kappa;"},
0365     {V('*', 'l'), 1, "&lambda;"},
0366     {V('*', 'm'), 1, "&mu;"},
0367     {V('*', 'n'), 1, "&nu;"},
0368     {V('*', 'o'), 1, "&omicron;"},
0369     {V('*', 'p'), 1, "&pi;"},
0370     {V('*', 'q'), 1, "&psi;"},
0371     {V('*', 'r'), 1, "&rho;"},
0372     {V('*', 's'), 1, "&sigma;"},
0373     {V('*', 't'), 1, "&tau;"},
0374     {V('*', 'u'), 1, "&upsilon;"},
0375     {V('*', 'w'), 1, "&omega;"},
0376     {V('*', 'x'), 1, "&chi;"},
0377     {V('*', 'y'), 1, "&eta;"},
0378     {V('*', 'z'), 1, "&zeta;"},
0379     {V('+', '-'), 1, "&plusmn;"}, // not in groff_char(7)
0380     {V('+', 'f'), 1, "&phi;"}, // phi1, we use the standard phi
0381     {V('+', 'h'), 1, "&theta;"}, // theta1, we use the standard theta
0382     {V('+', 'p'), 1, "&omega;"}, // omega1, we use the standard omega
0383     {V('1', '2'), 1, "&frac12;"},
0384     {V('1', '4'), 1, "&frac14;"},
0385     {V('3', '4'), 1, "&frac34;"},
0386     {V('F', 'i'), 1, "&#xFB03;"}, // ffi ligature
0387     {V('F', 'l'), 1, "&#xFB04;"}, // ffl ligature
0388     {V('a', 'p'), 1, "~"},
0389     {V('b', 'r'), 1, "|"},
0390     {V('b', 'u'), 1, "&bull;"},
0391     {V('b', 'v'), 1, "|"},
0392     {V('c', 'i'), 1, "&#x25CB;"}, // circle
0393     {V('c', 'o'), 1, "&copy;"},
0394     {V('c', 't'), 1, "&cent;"},
0395     {V('d', 'e'), 1, "&deg;"},
0396     {V('d', 'g'), 1, "&dagger;"},
0397     {V('d', 'i'), 1, "&divide;"},
0398     {V('e', 'm'), 1, "&mdash;"},
0399     {V('e', 'n'), 1, "&ndash;"},
0400     {V('e', 'q'), 1, "="},
0401     {V('e', 's'), 1, "&empty;"},
0402     {V('f', 'f'), 1, "&#0xFB00;"}, // ff ligature
0403     {V('f', 'i'), 1, "&#0xFB01;"}, // fi ligature
0404     {V('f', 'l'), 1, "&#0xFB02;"}, // fl ligature
0405     {V('f', 'm'), 1, "&prime;"},
0406     {V('g', 'a'), 1, "`"},
0407     {V('h', 'y'), 1, "-"},
0408     {V('l', 'c'), 2, "|&#175;"}, // ### TODO: not in groff_char(7)
0409     {V('l', 'f'), 2, "|_"}, // ### TODO: not in groff_char(7)
0410     {V('l', 'k'), 1, "<FONT SIZE=+2>{</FONT>"}, // ### TODO: not in groff_char(7)
0411     {V('m', 'i'), 1, "-"}, // ### TODO: not in groff_char(7)
0412     {V('m', 'u'), 1, "&times;"},
0413     {V('n', 'o'), 1, "&not;"},
0414     {V('o', 'r'), 1, "|"},
0415     {V('p', 'l'), 1, "+"},
0416     {V('r', 'c'), 2, "&#175;|"}, // ### TODO: not in groff_char(7)
0417     {V('r', 'f'), 2, "_|"}, // ### TODO: not in groff_char(7)
0418     {V('r', 'g'), 1, "&reg;"},
0419     {V('r', 'k'), 1, "<FONT SIZE=+2>}</FONT>"}, // ### TODO: not in groff_char(7)
0420     {V('r', 'n'), 1, "&oline;"},
0421     {V('r', 'u'), 1, "_"},
0422     {V('s', 'c'), 1, "&sect;"},
0423     {V('s', 'l'), 1, "/"},
0424     {V('s', 'q'), 2, "&#x25A1"}, // WHITE SQUARE
0425     {V('t', 's'), 1, "&#x03C2;"}, // FINAL SIGMA
0426     {V('u', 'l'), 1, "_"},
0427     {V('-', 'D'), 1, "&ETH;"},
0428     {V('S', 'd'), 1, "&eth;"},
0429     {V('T', 'P'), 1, "&THORN;"},
0430     {V('T', 'p'), 1, "&thorn;"},
0431     {V('A', 'E'), 1, "&AElig;"},
0432     {V('a', 'e'), 1, "&aelig;"},
0433     {V('O', 'E'), 1, "&OElig;"},
0434     {V('o', 'e'), 1, "&oelig;"},
0435     {V('s', 's'), 1, "&szlig;"},
0436     {V('\'', 'A'), 1, "&Aacute;"},
0437     {V('\'', 'E'), 1, "&Eacute;"},
0438     {V('\'', 'I'), 1, "&Iacute;"},
0439     {V('\'', 'O'), 1, "&Oacute;"},
0440     {V('\'', 'U'), 1, "&Uacute;"},
0441     {V('\'', 'Y'), 1, "&Yacute;"},
0442     {V('\'', 'a'), 1, "&aacute;"},
0443     {V('\'', 'e'), 1, "&eacute;"},
0444     {V('\'', 'i'), 1, "&iacute;"},
0445     {V('\'', 'o'), 1, "&oacute;"},
0446     {V('\'', 'u'), 1, "&uacute;"},
0447     {V('\'', 'y'), 1, "&yacute;"},
0448     {V(':', 'A'), 1, "&Auml;"},
0449     {V(':', 'E'), 1, "&Euml;"},
0450     {V(':', 'I'), 1, "&Iuml;"},
0451     {V(':', 'O'), 1, "&Ouml;"},
0452     {V(':', 'U'), 1, "&Uuml;"},
0453     {V(':', 'a'), 1, "&auml;"},
0454     {V(':', 'e'), 1, "&euml;"},
0455     {V(':', 'i'), 1, "&iuml;"},
0456     {V(':', 'o'), 1, "&ouml;"},
0457     {V(':', 'u'), 1, "&uuml;"},
0458     {V(':', 'y'), 1, "&yuml;"},
0459     {V('^', 'A'), 1, "&Acirc;"},
0460     {V('^', 'E'), 1, "&Ecirc;"},
0461     {V('^', 'I'), 1, "&Icirc;"},
0462     {V('^', 'O'), 1, "&Ocirc;"},
0463     {V('^', 'U'), 1, "&Ucirc;"},
0464     {V('^', 'a'), 1, "&acirc;"},
0465     {V('^', 'e'), 1, "&ecirc;"},
0466     {V('^', 'i'), 1, "&icirc;"},
0467     {V('^', 'o'), 1, "&ocirc;"},
0468     {V('^', 'u'), 1, "&ucirc;"},
0469     {V('`', 'A'), 1, "&Agrave;"},
0470     {V('`', 'E'), 1, "&Egrave;"},
0471     {V('`', 'I'), 1, "&Igrave;"},
0472     {V('`', 'O'), 1, "&Ograve;"},
0473     {V('`', 'U'), 1, "&Ugrave;"},
0474     {V('`', 'a'), 1, "&agrave;"},
0475     {V('`', 'e'), 1, "&egrave;"},
0476     {V('`', 'i'), 1, "&igrave;"},
0477     {V('`', 'o'), 1, "&ograve;"},
0478     {V('`', 'u'), 1, "&ugrave;"},
0479     {V('~', 'A'), 1, "&Atilde;"},
0480     {V('~', 'N'), 1, "&Ntilde;"},
0481     {V('~', 'O'), 1, "&Otilde;"},
0482     {V('~', 'a'), 1, "&atilde"},
0483     {V('~', 'n'), 1, "&ntilde;"},
0484     {V('~', 'o'), 1, "&otilde;"},
0485     {V(',', 'C'), 1, "&Ccedil;"},
0486     {V(',', 'c'), 1, "&ccedil;"},
0487     {V('/', 'L'), 1, "&#x0141;"},
0488     {V('/', 'l'), 1, "&#x0142;"},
0489     {V('/', 'O'), 1, "&Oslash;"},
0490     {V('/', 'o'), 1, "&oslash;"},
0491     {V('o', 'A'), 1, "&Aring;"},
0492     {V('o', 'a'), 1, "&aring;"},
0493     {V('a', '"'), 1, "\""},
0494     {V('a', '-'), 1, "&macr;"},
0495     {V('a', '.'), 1, "."},
0496     {V('a', '^'), 1, "&circ;"},
0497     {V('a', 'a'), 1, "&acute;"},
0498     {V('a', 'b'), 1, "`"},
0499     {V('a', 'c'), 1, "&cedil;"},
0500     {V('a', 'd'), 1, "&uml;"},
0501     {V('a', 'h'), 1, "&#x02C2;"}, // caron
0502     {V('a', 'o'), 1, "&#x02DA;"}, // ring
0503     {V('a', '~'), 1, "&tilde;"},
0504     {V('h', 'o'), 1, "&#x02DB;"}, // ogonek
0505     {V('.', 'i'), 1, "&#x0131;"}, // dot less i
0506     {V('C', 's'), 1, "&curren;"}, // krazy:exclude=spelling
0507     {V('D', 'o'), 1, "$"},
0508     {V('P', 'o'), 1, "&pound;"},
0509     {V('Y', 'e'), 1, "&yen;"},
0510     {V('F', 'n'), 1, "&fnof;"},
0511     {V('F', 'o'), 1, "&laquo;"},
0512     {V('F', 'c'), 1, "&raquo;"},
0513     {V('f', 'o'), 1, "&#x2039;"}, // single left guillemet
0514     {V('f', 'c'), 1, "&#x203A;"}, // single right guillemet
0515     {V('r', '!'), 1, "&iecl;"},
0516     {V('r', '?'), 1, "&iquest;"},
0517     {V('O', 'f'), 1, "&ordf"},
0518     {V('O', 'm'), 1, "&ordm;"},
0519     {V('p', 'c'), 1, "&middot;"},
0520     {V('S', '1'), 1, "&sup1;"},
0521     {V('S', '2'), 1, "&sup2;"},
0522     {V('S', '3'), 1, "&sup3;"},
0523     {V('<', '-'), 1, "&larr;"},
0524     {V('-', '>'), 1, "&rarr;"},
0525     {V('<', '>'), 1, "&harr;"},
0526     {V('d', 'a'), 1, "&darr;"},
0527     {V('u', 'a'), 1, "&uarr;"},
0528     {V('l', 'A'), 1, "&lArr;"},
0529     {V('r', 'A'), 1, "&rArr;"},
0530     {V('h', 'A'), 1, "&hArr;"},
0531     {V('d', 'A'), 1, "&dArr;"},
0532     {V('u', 'A'), 1, "&uArr;"},
0533     {V('b', 'a'), 1, "|"},
0534     {V('b', 'b'), 1, "&brvbar;"},
0535     {V('t', 'm'), 1, "&trade;"},
0536     {V('d', 'd'), 1, "&Dagger;"},
0537     {V('p', 's'), 1, "&para;"},
0538     {V('%', '0'), 1, "&permil;"},
0539     {V('f', '/'), 1, "&frasl;"}, // Fraction slash
0540     {V('s', 'd'), 1, "&Prime;"},
0541     {V('h', 'a'), 1, "^"},
0542     {V('t', 'i'), 1, "&tilde;"},
0543     {V('l', 'B'), 1, "["},
0544     {V('r', 'B'), 1, "]"},
0545     {V('l', 'C'), 1, "{"},
0546     {V('r', 'C'), 1, "}"},
0547     {V('l', 'a'), 1, "&lt;"},
0548     {V('r', 'a'), 1, "&gt;"},
0549     {V('l', 'h'), 1, "&le;"},
0550     {V('r', 'h'), 1, "&ge;"},
0551     {V('B', 'q'), 1, "&bdquo;"},
0552     {V('b', 'q'), 1, "&sbquo;"},
0553     {V('l', 'q'), 1, "&ldquo;"},
0554     {V('r', 'q'), 1, "&rdquo;"},
0555     {V('o', 'q'), 1, "&lsquo;"},
0556     {V('c', 'q'), 1, "&rsquo;"},
0557     {V('a', 'q'), 1, "'"},
0558     {V('d', 'q'), 1, "\""},
0559     {V('a', 't'), 1, "@"},
0560     {V('s', 'h'), 1, "#"},
0561     {V('r', 's'), 1, "\\"},
0562     {V('t', 'f'), 1, "&there4;"},
0563     {V('~', '~'), 1, "&cong;"},
0564     {V('~', '='), 1, "&asymp;"},
0565     {V('!', '='), 1, "&ne;"},
0566     {V('<', '='), 1, "&le;"},
0567     {V('=', '='), 1, "&equiv;"},
0568     {V('=', '~'), 1, "&cong;"}, // ### TODO: verify
0569     {V('>', '='), 1, "&ge;"},
0570     {V('A', 'N'), 1, "&and;"},
0571     {V('O', 'R'), 1, "&or;"},
0572     {V('t', 'e'), 1, "&exist;"},
0573     {V('f', 'a'), 1, "&forall;"},
0574     {V('A', 'h'), 1, "&alefsym;"},
0575     {V('I', 'm'), 1, "&image;"},
0576     {V('R', 'e'), 1, "&real;"},
0577     {V('i', 'f'), 1, "&infin;"},
0578     {V('m', 'd'), 1, "&sdot;"},
0579     {V('m', 'o'), 1, "&#x2206;"}, // element ### TODO verify
0580     {V('n', 'm'), 1, "&notin;"},
0581     {V('p', 't'), 1, "&prop;"},
0582     {V('p', 'p'), 1, "&perp;"},
0583     {V('s', 'b'), 1, "&sub;"},
0584     {V('s', 'p'), 1, "&sup;"},
0585     {V('i', 'b'), 1, "&sube;"},
0586     {V('i', 'p'), 1, "&supe;"},
0587     {V('i', 's'), 1, "&int;"},
0588     {V('s', 'r'), 1, "&radic;"},
0589     {V('p', 'd'), 1, "&part;"},
0590     {V('c', '*'), 1, "&otimes;"},
0591     {V('c', '+'), 1, "&oplus;"},
0592     {V('c', 'a'), 1, "&cap;"},
0593     {V('c', 'u'), 1, "&cup;"},
0594     {V('g', 'r'), 1, "V"}, // gradient ### TODO Where in Unicode?
0595     {V('C', 'R'), 1, "&crarr;"},
0596     {V('s', 't'), 2, "-)"}, // "such that" ### TODO Where in Unicode?
0597     {V('/', '_'), 1, "&ang;"},
0598     {V('w', 'p'), 1, "&weierp;"},
0599     {V('l', 'z'), 1, "&loz;"},
0600     {V('a', 'n'), 1, "-"}, // "horizontal arrow extension"  ### TODO Where in Unicode?
0601 };
0602
0603 // long form for abbreviated standard names (.St macro)
0604 struct StandardName {
0605     const char *abbrev;
0606     const char *formalName;
0607 };
0608
0609 static const StandardName STANDARD_NAMES[] = {{"-ansiC", "ANSI X3.159-1989 ('ANSI C89')"},
0610                                               {"-ansiC-89", "ANSI X3.159-1989 ('ANSI C89')"},
0611                                               {"-isoC", "ISO/IEC 9899:1990 ('ISO C90')"},
0612                                               {"-isoC-90", "ISO/IEC 9899:1990 ('ISO C90')"},
0613                                               {"-isoC-99", "ISO/IEC 9899:1999 ('ISO C99')"},
0614                                               {"-isoC-2011", "ISO/IEC 9899:2011 ('ISO C11')"},
0615                                               {"-iso9945-1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')"},
0616                                               {"-iso9945-1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')"},
0617                                               {"-p1003.1", "IEEE Std 1003.1 ('POSIX.1')"},
0618                                               {"-p1003.1-88", "IEEE Std 1003.1-1988 ('POSIX.1')"},
0619                                               {"-p1003.1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')"},
0620                                               {"-p1003.1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')"},
0621                                               {"-p1003.1b-93", "IEEE Std 1003.1b-1993 ('POSIX.1')"},
0622                                               {"-p1003.1c-95", "IEEE Std 1003.1c-1995 ('POSIX.1')"},
0623                                               {"-p1003.1g-2000", "IEEE Std 1003.1g-2000 ('POSIX.1')"},
0624                                               {"-p1003.1i-95", "IEEE Std 1003.1i-1995 ('POSIX.1')"},
0625                                               {"-p1003.1-2001", "IEEE Std 1003.1-2001 ('POSIX.1')"},
0626                                               {"-p1003.1-2004", "IEEE Std 1003.1-2004 ('POSIX.1')"},
0627                                               {"-p1003.1-2008", "IEEE Std 1003.1-2008 ('POSIX.1')"},
0628                                               {"-iso9945-2-93", "ISO/IEC 9945-2:1993 ('POSIX.2')"},
0629                                               {"-p1003.2", "IEEE Std 1003.2 ('POSIX.2')"},
0630                                               {"-p1003.2-92", "IEEE Std 1003.2-1992 ('POSIX.2')"},
0631                                               {"-p1003.2a-92", "IEEE Std 1003.2a-1992 ('POSIX.2')"},
0632                                               {"-susv2", "Version 2 of the Single UNIX Specification ('SUSv2')"},
0633                                               {"-susv3", "Version 3 of the Single UNIX Specification ('SUSv3')"},
0634                                               {"-svid4", "System V Interface Definition, Fourth Edition ('SVID4')"},
0635                                               {"-xbd5", "X/Open Base Definitions Issue 5 ('XBD5')"},
0636                                               {"-xcu5", "X/Open Commands and Utilities Issue 5 ('XCU5')"},
0637                                               {"-xcurses4.2", "X/Open Curses Issue 4, Version 2 ('XCURSES4.2')"},
0638                                               {"-xns5", "X/Open Networking Services Issue 5 ('XNS5')"},
0639                                               {"-xns5.2", "X/Open Networking Services Issue 5.2 ('XNS5.2')"},
0640                                               {"-xpg3", "X/Open Portability Guide Issue 3 ('XPG3')"},
0641                                               {"-xpg4", "X/Open Portability Guide Issue 4 ('XPG4')"},
0642                                               {"-xpg4.2", "X/Open Portability Guide Issue 4, Version 2 ('XPG4.2')"},
0643                                               {"-xsh5", "X/Open System Interfaces and Headers Issue 5 ('XSH5')"},
0644                                               {"-ieee754", "IEEE Std 754-1985"},
0645                                               {"-iso8802-3", "ISO/IEC 8802-3:1989"}};
0646
0647 /* default: print code */
0648
0649 /* static char eqndelimopen=0, eqndelimclose=0; */
0650 static char escapesym = '\\', nobreaksym = '\'', controlsym = '.', fieldsym = 0, padsym = 0;
0651
0652 static char *buffer = nullptr;
0653 static int buffpos = 0, buffmax = 0;
0654 static bool scaninbuff = false;
0655 static int itemdepth = 0;
0656 static int in_div = 0;
0657 static int dl_set[20] = {0};
0658 static QStack<QByteArray> listItemStack;
0659 static bool still_dd = 0;
0660 static int tabstops[20] = {8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96};
0661 static int maxtstop = 12;
0662 static int curpos = 0;
0663 static bool break_the_while_loop = false;
0664
0665 static char *scan_troff(char *c, bool san, char **result);
0666 static char *scan_troff_mandoc(char *c, bool san, char **result);
0667 static int getNumberRegisterValue(const QByteArray &name, int sign = 0);
0668
0669 static QList<QByteArray> s_argumentList;
0670
0671 static QByteArray cssFile;
0672
0673 static QByteArray s_dollarZero; // Value of $0
0674
0675 void setCssFile(const QByteArray &_cssFile)
0676 {
0677     cssFile = _cssFile;
0678 }
0679
0680 static void fill_old_character_definitions(void)
0681 {
0682     for (const CSTRDEF &standardchar : standardchars) {
0683         const int nr = standardchar.nr;
0684         const char temp[3] = {char(nr / 256), char(nr % 256), 0};
0685         QByteArray name(temp);
0686         s_characterDefinitionMap.insert(name, StringDefinition(standardchar.slen, standardchar.st));
0687     }
0688 }
0689
0690 static char outbuffer[NULL_TERMINATED(HUGE_STR_MAX)];
0691 static int no_newline_output = 0;
0692 static int newline_for_fun = 0;
0693 static bool output_possible = false;
0694
0695 static const char *const includedirs[] = {"/usr/include",
0696                                           "/usr/include/sys",
0697                                           "/usr/local/include",
0698                                           "/opt/local/include",
0699                                           "/usr/ccs",
0700                                           "/usr/X11R6/include",
0701                                           "/usr/openwin/include",
0702                                           "/usr/include/g++",
0703                                           nullptr};
0704
0705 static bool ignore_links = false;
0706
0707 static void add_links(char *c)
0708 {
0709     /*
0710     ** Add the links to the output.
0711     ** At the moment the following are recognized:
0712     **
0713     ** name(*)                 -> ../man?/name.*
0714     ** method://string         -> method://string
0715     ** www.host.name           -> http://www.host.name
0716     ** ftp.host.name           -> ftp://ftp.host.name
0717     ** name@host               -> mailto:name@host
0718     ** <name.h>                -> file:/usr/include/name.h   (guess)
0719     **
0720     ** Other possible links to add in the future:
0721     **
0722     ** /dir/dir/file  -> file:/dir/dir/file
0723     */
0724
0725     if (ignore_links) {
0726         output_real(c);
0727         return;
0728     }
0729
0730     int i, j, nr;
0731     char *f, *g, *h;
0732     const int numtests = 6; // Nmber of tests
0733     char *idtest[numtests]; // url, mailto, www, ftp, manpage, C header file
0734     bool ok;
0735     /* search for (section) */
0736     nr = 0;
0737     idtest[0] = strstr(c + 1, "://");
0738     idtest[1] = strchr(c + 1, '@');
0739     idtest[2] = strstr(c, "www.");
0740     idtest[3] = strstr(c, "ftp.");
0741     idtest[4] = strchr(c + 1, '(');
0742     idtest[5] = strstr(c + 1, ".h&gt;");
0743     for (i = 0; i < numtests; ++i)
0744         nr += (idtest[i] != nullptr);
0745     while (nr) {
0746         j = -1;
0747         for (i = 0; i < numtests; i++)
0748             if (idtest[i] && (j < 0 || idtest[i] < idtest[j]))
0749                 j = i;
0750         switch (j) {
0751         case 5: /* <name.h> */
0752         {
0753             f = idtest[5];
0754             h = f + 2;
0755             g = f;
0756             while (g > c && g[-1] != ';')
0757                 g--;
0758             bool wrote_include = false;
0759
0760             if (g != c) {
0761                 QByteArray dir;
0762                 QByteArray file(g, h - g);
0763                 file = file.trimmed();
0764                 for (int index = 0; includedirs[index]; index++) {
0765                     QByteArray str(includedirs[index]);
0766                     str.append('/');
0767                     str.append(file);
0768                     if (!access(str.data(), R_OK)) {
0769                         dir = includedirs[index];
0770                         break;
0771                     }
0772                 }
0773                 if (!dir.isEmpty()) {
0774                     char t;
0775                     t = *g;
0776                     *g = 0;
0777                     output_real(c);
0778                     *g = t;
0779                     *h = 0;
0780
0781                     QByteArray str;
0782                     str.append("<A HREF=\"file:");
0783                     str.append(dir.data());
0784                     str.append("/");
0785                     str.append(file.data());
0786                     str.append("\">");
0787                     str.append(file.data());
0788                     str.append("</A>&gt;");
0789
0790                     output_real(str.data());
0791                     c = f + 6;
0792                     wrote_include = true;
0793                 }
0794             }
0795
0796             if (!wrote_include) {
0797                 f[5] = 0;
0798                 output_real(c);
0799                 f[5] = ';';
0800                 c = f + 5;
0801             }
0802         } break;
0803         case 4: /* manpage */
0804             f = idtest[j];
0805             /* check section */
0806             g = strchr(f, ')');
0807             // The character before f must be alphanumeric, the end of a HTML tag or the end of a &nbsp;
0808             if (g != nullptr && f > c && (g - f) < 12 && (isalnum(f[-1]) || f[-1] == '>' || (f[-1] == ';')) && (isdigit(f[1]) || (f[1] == 'n')) && f[1] != '0'
0809                 && ((g - f) <= 2 || isalpha(f[2]))) {
0810                 ok = true;
0811                 h = f + 2;
0812                 while (h < g) {
0813                     if (!isalnum(*h++)) {
0814                         ok = false;
0815                         break;
0816                     }
0817                 }
0818             } else
0819                 ok = false;
0820
0821             h = f - 1;
0822             if (ok) {
0823                 // Skip &nbsp;
0824                 qCDebug(KIO_MAN_LOG) << "BEFORE SECTION:" << *h;
0825                 if ((h > c + 5) && (!memcmp(h - 5, "&nbsp;", 6))) {
0826                     h -= 6;
0827                     qCDebug(KIO_MAN_LOG) << "Skip &nbsp;";
0828                 } else if ((h > (c + 6)) && (!memcmp(h - 6, "&#8239;", 7))) // &#8239;  narrow space
0829                 {
0830                     h -= 7;
0831                 } else if (*h == ';') {
0832                     // Not a non-breaking space, so probably not ok
0833                     ok = false;
0834                 }
0835             }
0836
0837             if (ok) {
0838                 /* this might be a link */
0839                 /* skip html makeup */
0840                 while (h > c && *h == '>') {
0841                     while (h != c && *h != '<')
0842                         h--;
0843                     if (h != c)
0844                         h--;
0845                 }
0846                 if (isalnum(*h)) {
0847                     char t, sec, *e;
0848                     QByteArray fstr(f);
0849                     e = h + 1;
0850                     sec = f[1];
0851                     const int index = fstr.indexOf(')', 2);
0852                     QByteArray subsec;
0853                     if (index != -1)
0854                         subsec = fstr.mid(2, index - 2);
0855                     else // No closing ')' found, take first character as subsection.
0856                         subsec = fstr.mid(2, 1);
0857                     while (h > c && (isalnum(h[-1]) || h[-1] == '_' || h[-1] == ':' || h[-1] == '-' || h[-1] == '.'))
0858                         h--;
0859                     t = *h;
0860                     *h = '\0';
0861                     output_real(c);
0862                     *h = t;
0863                     t = *e;
0864                     *e = '\0';
0865                     QByteArray str("<a href=\"man:/");
0866                     str += h;
0867                     str += '(';
0868                     str += char(sec);
0869                     if (!subsec.isEmpty())
0870                         str += subsec.toLower();
0871                     str += ")\">";
0872                     str += h;
0873                     str += "</a>";
0874                     output_real(str.data());
0875                     *e = t;
0876                     c = e;
0877                 }
0878             }
0879             *f = '\0';
0880             output_real(c);
0881             *f = '(';
0882             idtest[4] = f - 1;
0883             c = f;
0884             break; /* manpage */
0885         case 3: /* ftp */
0886         case 2: /* www */
0887             g = f = idtest[j];
0888             while (*g && (isalnum(*g) || *g == '_' || *g == '-' || *g == '+' || *g == '.' || *g == '/'))
0889                 g++;
0890             if (g[-1] == '.')
0891                 g--;
0892             if (g - f > 4) {
0893                 char t;
0894                 t = *f;
0895                 *f = '\0';
0896                 output_real(c);
0897                 *f = t;
0898                 t = *g;
0899                 *g = '\0';
0900                 QByteArray str;
0901                 str.append("<A HREF=\"");
0902                 str.append(j == 3 ? "ftp" : "http");
0903                 str.append("://");
0904                 str.append(f);
0905                 str.append("\">");
0906                 str.append(f);
0907                 str.append("</A>");
0908                 output_real(str.data());
0909                 *g = t;
0910                 c = g;
0911             } else {
0912                 f[3] = '\0';
0913                 output_real(c);
0914                 c = f + 3;
0915                 f[3] = '.';
0916             }
0917             break;
0918         case 1: /* mailto */
0919             g = f = idtest[1];
0920             while (g > c && (isalnum(g[-1]) || g[-1] == '_' || g[-1] == '-' || g[-1] == '+' || g[-1] == '.' || g[-1] == '%'))
0921                 g--;
0922             if (g - 7 >= c && g[-1] == ':') {
0923                 // We have perhaps an email address starting with mailto:
0924                 if (!qstrncmp("mailto:", g - 7, 7))
0925                     g -= 7;
0926             }
0927             h = f + 1;
0928             while (*h && (isalnum(*h) || *h == '_' || *h == '-' || *h == '+' || *h == '.'))
0929                 h++;
0930             if (*h == '.')
0931                 h--;
0932             if (h - f > 4 && f - g > 1) {
0933                 char t;
0934                 t = *g;
0935                 *g = '\0';
0936                 output_real(c);
0937                 *g = t;
0938                 t = *h;
0939                 *h = '\0';
0940                 QByteArray str;
0941                 str.append("<A HREF=\"mailto:");
0942                 str.append(g);
0943                 str.append("\">");
0944                 str.append(g);
0945                 str.append("</A>");
0946                 output_real(str.data());
0947                 *h = t;
0948                 c = h;
0949             } else {
0950                 *f = '\0';
0951                 output_real(c);
0952                 *f = '@';
0953                 idtest[1] = c;
0954                 c = f;
0955             }
0956             break;
0957         case 0: /* url */
0958             g = f = idtest[0]; // ://foo...
0959
0960             // backup before :// to get protocol
0961             while (g > c && isalpha(g[-1]) && islower(g[-1]))
0962                 g--;
0963             h = f + 3; // start past ://
0964             // determine length of path and part of query it looks like...
0965             while (*h && !isspace(*h) && *h != '<' && *h != '>' && *h != '"' && *h != '&')
0966                 h++;
0967             // if protocol length 3-6 characters and path has any length at all...
0968             // more tests added because this code breaks stylesheet links that use
0969             // the correct file:/// stuff.
0970             if (f - g > 2 && f - g < 7 && h - f > 3 && (strstr(c, "http://") != nullptr || strstr(c, "ftp://") != nullptr)) {
0971                 char t;
0972                 t = *g;
0973                 *g = '\0';
0974                 output_real(c);
0975                 *g = t;
0976                 t = *h;
0977                 *h = '\0';
0978                 QByteArray str;
0979                 str.append("<A HREF=\"");
0980                 str.append(g);
0981                 str.append("\">");
0982                 str.append(g);
0983                 str.append("</A>");
0984                 output_real(str.data());
0985                 *h = t;
0986                 c = h;
0987             } else {
0988                 f[1] = '\0';
0989                 output_real(c);
0990                 f[1] = '/';
0991                 c = f + 1;
0992             }
0993             break;
0994         default:
0995             break;
0996         }
0997         nr = 0;
0998         if (idtest[0] && idtest[0] <= c)
0999             idtest[0] = strstr(c + 1, "://");
1000         if (idtest[1] && idtest[1] <= c)
1001             idtest[1] = strchr(c + 1, '@');
1002         if (idtest[2] && idtest[2] < c)
1003             idtest[2] = strstr(c, "www.");
1004         if (idtest[3] && idtest[3] < c)
1005             idtest[3] = strstr(c, "ftp.");
1006         if (idtest[4] && idtest[4] <= c)
1007             idtest[4] = strchr(c + 1, '(');
1008         if (idtest[5] && idtest[5] <= c)
1009             idtest[5] = strstr(c + 1, ".h&gt;");
1010         for (i = 0; i < numtests; i++)
1011             nr += (idtest[i] != nullptr);
1012     }
1013     output_real(c);
1014 }
1015
1016 //---------------------------------------------------------------------
1017
1018 static QByteArray current_font;
1019 static int current_size = 0;
1020
1021 /*
1022  "fillout" is the mode of text output:
1023  1 = fill mode (line breaks happen when the browser wants them. Normal HTML text)
1024  0 = no-fill mode (preformatted text (<pre>..</pre>).
1025      Input lines are output as-is, retaining line breaks and ignoring the current line length.
1026 */
1027 static int fillout = 1;
1028
1029 //---------------------------------------------------------------------
1030
1031 static void out_html(const char *c)
1032 {
1033     if (!c || !*c)
1034         return;
1035
1036     // Added, probably due to the const?
1037     char *c2 = qstrdup(c);
1038     char *c3 = c2;
1039
1040     static int obp = 0;
1041
1042     if (no_newline_output) {
1043         int i = 0;
1044         no_newline_output = 1;
1045         while (c2[i]) {
1046             if (!no_newline_output)
1047                 c2[i - 1] = c2[i];
1048             if (c2[i] == '\n')
1049                 no_newline_output = 0;
1050             i++;
1051         }
1052         if (!no_newline_output)
1053             c2[i - 1] = 0;
1054     }
1055     if (scaninbuff) {
1056         while (*c2) {
1057             if (buffpos >= buffmax) {
1058                 char *h = new char[buffmax * 2];
1059
1060                 memcpy(h, buffer, buffmax);
1061                 delete[] buffer;
1062                 buffer = h;
1063                 buffmax = buffmax * 2;
1064             }
1065             buffer[buffpos++] = *c2++;
1066         }
1067     } else if (output_possible) {
1068         while (*c2) {
1069             outbuffer[obp++] = *c2;
1070             if (*c2 == '\n' || obp >= HUGE_STR_MAX) {
1071                 outbuffer[obp] = '\0';
1072                 add_links(outbuffer);
1073                 obp = 0;
1074             }
1075             c2++;
1076         }
1077     }
1078     delete[] c3;
1079 }
1080
1081 //---------------------------------------------------------------------
1082
1083 void checkListStack() // see if we need to end a previously begun list item
1084 {
1085     if (!listItemStack.isEmpty() && (listItemStack.size() == itemdepth)) {
1086         out_html("</");
1087         out_html(listItemStack.pop());
1088         out_html(">");
1089     }
1090 }
1091
1092 //---------------------------------------------------------------------
1093
1094 static QByteArray set_font(const QByteArray &name)
1095 {
1096     // Every font but R (Regular) creates <span> elements
1097     QByteArray markup;
1098     if ((current_font != "R") && (current_font != "P") && !current_font.isEmpty())
1099         markup += "</span>";
1100     const uint len = name.length();
1101     bool fontok = true;
1102     if (len == 1) {
1103         const char lead = name[0];
1104         switch (lead) {
1105         case 'P': // ### TODO: this seems to mean "precedent font"
1106         case 'R':
1107             break; // regular, do nothing
1108         case 'I':
1109             markup += "<span style=\"font-style:italic\">";
1110             break;
1111         case 'B':
1112             markup += "<span style=\"font-weight:bold\">";
1113             break;
1114         case 'L':
1115             markup += "<span style=\"font-family:monospace\">";
1116             break; // ### What's L?
1117         default:
1118             fontok = false;
1119         }
1120     } else if (len == 2) {
1121         if (name == "BI")
1122             markup += "<span style=\"font-style:italic;font-weight:bold\">";
1123         // Courier
1124         else if (name == "CR")
1125             markup += "<span style=\"font-family:monospace\">";
1126         else if (name == "CW") // CW is used by pod2man(1) (part of perldoc(1))
1127             markup += "<span style=\"font-family:monospace\">";
1128         else if (name == "CI")
1129             markup += "<span style=\"font-family:monospace;font-style:italic\">";
1130         else if (name == "CB")
1131             markup += "<span style=\"font-family:monospace;font-weight:bold\">";
1132         // Times
1133         else if (name == "TR")
1134             markup += "<span style=\"font-family:serif\">";
1135         else if (name == "TI")
1136             markup += "<span style=\"font-family:serif;font-style:italic\">";
1137         else if (name == "TB")
1138             markup += "<span style=\"font-family:serif;font-weight:bold\">";
1139         // Helvetica
1140         else if (name == "HR")
1141             markup += "<span style=\"font-family:sans-serif\">";
1142         else if (name == "HI")
1143             markup += "<span style=\"font-family:sans-serif;font-style:italic\">";
1144         else if (name == "HB")
1145             markup += "<span style=\"font-family:sans-serif;font-weight:bold\">";
1146         else
1147             fontok = false;
1148     } else if (len == 3) {
1149         if (name == "CBI")
1150             markup += "<span style=\"font-family:monospace;font-style:italic;font-weight:bold\">";
1151         else if (name == "TBI")
1152             markup += "<span style=\"font-family:serif;font-style:italic;font-weight:bold\">";
1153         else if (name == "HBI")
1154             markup += "<span style=\"font-family:sans-serif;font-style:italic;font-weight:bold\">";
1155         else
1156             fontok = false;
1157     } else
1158         fontok = false;
1159
1160     if (fontok)
1161         current_font = name;
1162     else
1163         current_font = "R"; // Still nothing, then it is 'R' (Regular) // krazy:exclude=doublequote_chars
1164     return markup;
1165 }
1166
1167 //---------------------------------------------------------------------
1168
1169 static QByteArray change_to_size(int nr)
1170 {
1171     switch (nr) {
1172     case '0':
1173     case '1':
1174     case '2':
1175     case '3':
1176     case '4':
1177     case '5':
1178     case '6':
1179     case '7':
1180     case '8':
1181     case '9':
1182         nr = nr - '0';
1183         break;
1184     case '\0':
1185         break;
1186     default:
1187         nr = current_size + nr;
1188         if (nr > 9)
1189             nr = 9;
1190         if (nr < -9)
1191             nr = -9;
1192         break;
1193     }
1194     if (nr == current_size)
1195         return "";
1196     const QByteArray font(current_font);
1197     QByteArray markup;
1198     markup = set_font("R");
1199     if (current_size)
1200         markup += "</span>";
1201     current_size = nr;
1202     if (nr) {
1203         int percent = 100 + nr * 1;
1204         markup += "<span style=\"font-size:";
1205         markup += QByteArray::number(percent);
1206         markup += "%\">";
1207     }
1208     markup += set_font(font);
1209     return markup;
1210 }
1211
1212 //---------------------------------------------------------------------
1213
1214 /* static int asint=0; */
1215 static int intresult = 0;
1216
1217 static bool skip_escape = false;
1218 static bool single_escape = false;
1219
1220 static char *scan_escape_direct(char *c, QByteArray &cstr);
1221
1222 /**
1223  * scan a named character
1224  * param c position
1225  */
1226 static QByteArray scan_named_character(char *&c)
1227 {
1228     QByteArray name;
1229     if (*c == '(') {
1230         // \*(ab  Name of two characters
1231         if (c[1] == escapesym) {
1232             QByteArray cstr;
1233             c = scan_escape_direct(c + 2, cstr);
1234             // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the
1235             // variable are to be used.
1236             name = cstr;
1237         } else {
1238             name += c[1];
1239             name += c[2];
1240             c += 3;
1241         }
1242     } else if (*c == '[') {
1243         // \*[long_name]  Long name
1244         // Named character groff(7)
1245         // We must find the ] to get a name
1246         c++;
1247         while (*c && *c != ']' && *c != '\n') {
1248             if (*c == escapesym) {
1249                 QByteArray cstr;
1250                 c = scan_escape_direct(c + 1, cstr);
1251                 const int result = cstr.indexOf(']');
1252                 if (result == -1)
1253                     name += cstr;
1254                 else {
1255                     // Note: we drop the characters after the ]
1256                     name += cstr.left(result);
1257                 }
1258             } else {
1259                 name += *c;
1260                 c++;
1261             }
1262         }
1263         if (!*c || *c == '\n') {
1264             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse character name: " << BYTEARRAY(name);
1265             return "";
1266         }
1267         c++;
1268     } else if (*c == 'C' || c[1] == '\'') {
1269         // \C'name'
1270         c += 2;
1271         while (*c && *c != '\'' && *c != '\n') {
1272             if (*c == escapesym) {
1273                 QByteArray cstr;
1274                 c = scan_escape_direct(c + 1, cstr);
1275                 const int result = cstr.indexOf('\'');
1276                 if (result == -1)
1277                     name += cstr;
1278                 else {
1279                     // Note: we drop the characters after the ]
1280                     name += cstr.left(result);
1281                 }
1282             } else {
1283                 name += *c;
1284                 c++;
1285             }
1286         }
1287         if (!*c || *c == '\n') {
1288             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse (\\C mode) character name: " << BYTEARRAY(name);
1289             return "";
1290         }
1291         c++;
1292     }
1293     // Note: characters with a one character length name do not exist, as they would collide with other escapes
1294
1295     // Now we have the name, let us find it between the string names
1296     QMap<QByteArray, StringDefinition>::const_iterator it = s_characterDefinitionMap.constFind(name);
1297     if (it == s_characterDefinitionMap.constEnd()) {
1298         qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find character with name: " << BYTEARRAY(name);
1299         // No output, as an undefined string is empty by default
1300         return "";
1301     } else {
1302         qCDebug(KIO_MAN_LOG) << "Character with name: \"" << BYTEARRAY(name) << "\" => " << BYTEARRAY((*it).m_output);
1303         return (*it).m_output;
1304     }
1305 }
1306
1307 //---------------------------------------------------------------------
1308
1309 static QByteArray scan_named_string(char *&c)
1310 {
1311     QByteArray name;
1312     if (*c == '(') {
1313         // \*(ab  Name of two characters
1314         if (c[1] == escapesym) {
1315             QByteArray cstr;
1316             c = scan_escape_direct(c + 2, cstr);
1317             qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr);
1318             // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the
1319             // variable are to be used.
1320             name = cstr;
1321         } else {
1322             name += c[1];
1323             name += c[2];
1324             c += 3;
1325         }
1326     } else if (*c == '[') {
1327         // \*[long_name]  Long name
1328         // Named character groff(7)
1329         // We must find the ] to get a name
1330         c++;
1331         while (*c && *c != ']' && *c != '\n') {
1332             if (*c == escapesym) {
1333                 QByteArray cstr;
1334                 c = scan_escape_direct(c + 1, cstr);
1335                 const int result = cstr.indexOf(']');
1336                 if (result == -1)
1337                     name += cstr;
1338                 else {
1339                     // Note: we drop the characters after the ]
1340                     name += cstr.left(result);
1341                 }
1342             } else {
1343                 name += *c;
1344                 c++;
1345             }
1346         }
1347         if (!*c || *c == '\n') {
1348             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse string name: " << BYTEARRAY(name);
1349             return "";
1350         }
1351         c++;
1352     } else {
1353         // \*a Name of one character
1354         name += *c;
1355         c++;
1356     }
1357     // Now we have the name, let us find it between the string names
1358     QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(name);
1359     if (it == s_stringDefinitionMap.constEnd()) {
1360         // try a number register:
1361         return QByteArray::number(getNumberRegisterValue(name));
1362
1363         // qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string with name: " << BYTEARRAY(name);
1364         //  No output, as an undefined string is empty by default
1365         // return "";
1366     } else {
1367         qCDebug(KIO_MAN_LOG) << "String with name: '" << BYTEARRAY(name) << "' => >>>" << BYTEARRAY((*it).m_output) << "<<<";
1368         return (*it).m_output;
1369     }
1370 }
1371
1372 //---------------------------------------------------------------------
1373
1374 static QByteArray scan_dollar_parameter(char *&c)
1375 {
1376     int argno = 0; // No dollar argument number yet!
1377     if (*c == '0') {
1378         // qCDebug(KIO_MAN_LOG) << "$0";
1379         c++;
1380         return s_dollarZero;
1381     } else if (*c >= '1' && *c <= '9') {
1382         // qCDebug(KIO_MAN_LOG) << "$ direct";
1383         argno = (*c - '0');
1384         c++;
1385     } else if (*c == '(') {
1386         // qCDebug(KIO_MAN_LOG) << "$(";
1387         if (c[1] && c[2] && c[1] >= '0' && c[1] <= '9' && c[2] >= '0' && c[2] <= '9') {
1388             argno = (c[1] - '0') * 10 + (c[2] - '0');
1389             c += 3;
1390         } else {
1391             if (!c[1])
1392                 c++;
1393             else if (!c[2])
1394                 c += 2;
1395             else
1396                 c += 3;
1397             return "";
1398         }
1399     } else if (*c == '[') {
1400         // qCDebug(KIO_MAN_LOG) << "$[";
1401         argno = 0;
1402         c++;
1403         while (*c && *c >= '0' && *c <= '9' && *c != ']') {
1404             argno *= 10;
1405             argno += (*c - '0');
1406             c++;
1407         }
1408         if (*c != ']') {
1409             return "";
1410         }
1411         c++;
1412     } else if ((*c == '*') || (*c == '@')) {
1413         const bool quote = (*c == '@');
1414         QList<QByteArray>::const_iterator it = s_argumentList.constBegin();
1415         QByteArray param;
1416         bool space = false;
1417         for (; it != s_argumentList.constEnd(); ++it) {
1418             if (space)
1419                 param += ' ';
1420             if (quote)
1421                 param += '\"'; // Not as HTML, as it could be used by macros !
1422             param += (*it);
1423             if (quote)
1424                 param += '\"'; // Not as HTML, as it could be used by macros!
1425             space = true;
1426         }
1427         c++;
1428         return param;
1429     } else {
1430         qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown parameter $" << *c;
1431         return "";
1432     }
1433     // qCDebug(KIO_MAN_LOG) << "ARG $" << argno;
1434     if (!s_argumentList.isEmpty() && argno > 0) {
1435         // qCDebug(KIO_MAN_LOG) << "ARG $" << argno << " OK!";
1436         argno--;
1437         if (argno >= s_argumentList.size()) {
1438             qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find parameter $" << (argno + 1);
1439             return "";
1440         }
1441
1442         return s_argumentList[argno];
1443     }
1444     return "";
1445 }
1446
1447 //---------------------------------------------------------------------
1448 /// return the value of read-only number registers
1449
1450 static int read_only_number_register(const QByteArray &name)
1451 {
1452     // Internal read-only variables
1453     if (name == ".$") {
1454         qCDebug(KIO_MAN_LOG) << "\\n[.$] == " << s_argumentList.size();
1455         return s_argumentList.size();
1456     } else if (name == ".g")
1457         return 0; // We are not groff(1)
1458     else if (name == ".s")
1459         return current_size;
1460 #if 0
1461     // ### TODO: map the fonts to a number
1462     else if (name == ".f")
1463         return current_font;
1464 #endif
1465     else if (name == ".P")
1466         return 0; // We are not printing
1467     else if (name == ".A")
1468         return s_nroff;
1469 #ifndef SIMPLE_MAN2HTML
1470     // Special KDE KIO man:
1471     const QString version_string(KDE_VERSION_STRING);
1472     const int version_major = version_string.section('.', 0, 0).toInt();
1473     const int version_minor = version_string.section('.', 1, 1).toInt();
1474     const int version_patch = version_string.section('.', 2, 2).toInt();
1475     if (name == ".KDE_VERSION_MAJOR")
1476         return version_major;
1477     else if (name == ".KDE_VERSION_MINOR")
1478         return version_minor;
1479     else if (name == ".KDE_VERSION_RELEASE")
1480         return version_patch;
1481     else if (name == ".KDE_VERSION")
1482         return (version_major << 16) | (version_minor << 8) | version_patch;
1483 #endif
1484     else if (name == ".T")
1485         return 0; // Set to 1 in nroff, if -T option used; always 0 in troff.
1486
1487     // ### TODO: groff defines many more read-only number registers
1488     qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown read-only number register: " << BYTEARRAY(name);
1489
1490     return 0; // Undefined variable
1491 }
1492
1493 //---------------------------------------------------------------------
1494
1495 static int getNumberRegisterValue(const QByteArray &name, int sign)
1496 {
1497     if (name[0] == '.') {
1498         return read_only_number_register(name);
1499     } else {
1500         QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
1501         if (it == s_numberDefinitionMap.end()) {
1502             return 0; // Undefined variable
1503         } else {
1504             (*it).m_value += sign * (*it).m_increment;
1505             return (*it).m_value;
1506         }
1507     }
1508 }
1509
1510 //---------------------------------------------------------------------
1511 /// get the value of a number register and auto-increment if asked
1512
1513 static int scan_number_register(char *&c)
1514 {
1515     int sign = 0; // Sign for auto-increment (if any)
1516     switch (*c) {
1517     case '+':
1518         sign = 1;
1519         c++;
1520         break;
1521     case '-':
1522         sign = -1;
1523         c++;
1524         break;
1525     default:
1526         break;
1527     }
1528     QByteArray name;
1529     if (*c == '[') {
1530         c++;
1531         if (*c == '+') {
1532             sign = 1;
1533             c++;
1534         } else if (*c == '-') {
1535             sign = -1;
1536             c++;
1537         }
1538         while (*c && *c != ']' && *c != '\n') {
1539             // ### TODO: a \*[string] could be inside and should be processed
1540             name += *c;
1541             c++;
1542         }
1543         if (!*c || *c == '\n') {
1544             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse number register name: " << BYTEARRAY(name);
1545             return 0;
1546         }
1547         c++;
1548     } else if (*c == '(') {
1549         c++;
1550         if (*c == '+') {
1551             sign = 1;
1552             c++;
1553         } else if (*c == '-') {
1554             sign = -1;
1555             c++;
1556         }
1557         name += c[0];
1558         name += c[1];
1559         c += 2;
1560     } else {
1561         name += *c;
1562         c++;
1563     }
1564
1565     return getNumberRegisterValue(name, sign);
1566 }
1567
1568 //---------------------------------------------------------------------
1569 // scan a name from the following
1570 // x     ... return x    (one char)
1571 // (xx   ... return xx   (two chars)
1572 // [xxx] ... return xxx  (any chars)
1573 // after scanning, c points to the terminating char (0, \n or ])
1574
1575 static QByteArray scan_name(char *&c)
1576 {
1577     QByteArray name;
1578     if (*c == '(') {
1579         int i = 0;
1580         for (c++; *c && (*c != '\n') && (i < 2); c++, i++)
1581             name += *c;
1582     } else if (*c == '[') {
1583         for (c++; *c && (*c != ']') && (*c != '\n'); c++)
1584             name += *c;
1585     } else
1586         name += *c;
1587
1588     return name;
1589 }
1590
1591 //---------------------------------------------------------------------
1592 /// get and set font
1593
1594 static QByteArray scan_named_font(char *&c)
1595 {
1596     QByteArray name;
1597     if (*c == '(') {
1598         // \f(ab  Name of two characters
1599         if (c[1] == escapesym) {
1600             QByteArray cstr;
1601             c = scan_escape_direct(c + 2, cstr);
1602             qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr);
1603             // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the
1604             // variable are to be used.
1605             name = cstr;
1606         } else {
1607             name += c[1];
1608             name += c[2];
1609             c += 3;
1610         }
1611     } else if (*c == '[') {
1612         // \f[long_name]  Long name
1613         // We must find the ] to get a name
1614         c++;
1615         while (*c && *c != ']' && *c != '\n') {
1616             if (*c == escapesym) {
1617                 QByteArray cstr;
1618                 c = scan_escape_direct(c + 1, cstr);
1619                 const int result = cstr.indexOf(']');
1620                 if (result == -1)
1621                     name += cstr;
1622                 else {
1623                     // Note: we drop the characters after the ]
1624                     name += cstr.left(result);
1625                 }
1626             } else {
1627                 name += *c;
1628                 c++;
1629             }
1630         }
1631         if (!*c || *c == '\n') {
1632             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse font name: " << BYTEARRAY(name);
1633             return "";
1634         }
1635         c++;
1636     } else if (*c) // \f alone makes c point at 0-byte
1637     {
1638         // \fa Font name with one character or one digit
1639         // ### HACK do *not* use:  name = *c;  or name would be empty
1640         name += *c;
1641         c++;
1642     }
1643     // qCDebug(KIO_MAN_LOG) << "FONT NAME: " << BYTEARRAY( name );
1644     //  Now we have the name, let us find the font
1645     bool ok = false;
1646     const unsigned int number = name.toUInt(&ok);
1647     if (ok) {
1648         if (number < 5) {
1649             const char *const fonts[] = {"R", "I", "B", "BI", "CR"}; // Regular, Italic, Bold, Bold Italic, Courier regular
1650             name = fonts[number];
1651         } else {
1652             qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has too big number: " << BYTEARRAY(name) << " => " << number;
1653             name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars
1654         }
1655     } else if (name.isEmpty()) {
1656         qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has no name => using R";
1657         name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars
1658     }
1659     if (!skip_escape)
1660         return set_font(name);
1661     else
1662         return "";
1663 }
1664
1665 //---------------------------------------------------------------------
1666
1667 static QByteArray scan_number_code(char *&c)
1668 {
1669     QByteArray number;
1670     if (*c != '\'')
1671         return "";
1672     c++; // Go past the opening single quote
1673     while (*c && (*c != '\n') && (*c != '\'')) {
1674         number += *c;
1675         c++;
1676     }
1677     bool ok = false;
1678     unsigned int result = number.toUInt(&ok);
1679     if ((result < ' ') || (result > 65535))
1680         return "";
1681     else if (result == '\t') {
1682         curpos += 8;
1683         curpos &= 0xfff8;
1684         return "\t";
1685     }
1686     number.setNum(result);
1687     number.prepend("&#");
1688     number.append(";");
1689     curpos++;
1690     c++; // Go past the closing single quote
1691     return number;
1692 }
1693
1694 //---------------------------------------------------------------------
1695 // ### TODO known missing escapes from groff(7):
1696 // ### TODO \R
1697
1698 static char *scan_escape_direct(char *c, QByteArray &cstr)
1699 {
1700     bool exoutputp;
1701     bool exskipescape;
1702     int i, j;
1703     bool cplusplus = true; // Should the c++ call be executed at the end of the function
1704
1705     cstr.clear();
1706     intresult = 0;
1707     switch (*c) {
1708     case 'e':
1709         cstr += escapesym;
1710         curpos++;
1711         break;
1712     case '0': // space of digit width
1713         cstr = "&#8199;"; // Unicode FIGURE SPACE
1714         curpos++;
1715         break;
1716     case '~': // non-breakable-space (resizeable!)
1717     case ' ':
1718         cstr = "&nbsp;";
1719         curpos++;
1720         break;
1721     case '|': // half-non-breakable-space
1722     case '^': // quarter-non-breakable-space
1723         cstr = "&#8239;"; // Unicode NARROW NO-BREAK SPACE
1724         curpos++;
1725         break;
1726     case ':':
1727         break; // ignore optional line break
1728     case ',':
1729         break; //  left italic correction, always a zero motion
1730     case '/':
1731         cstr = "&#8201;"; // Unicode THIN SPACE
1732         curpos++;
1733         break; // italic correction, i.e. a small piece of horizontal motion
1734     case '"': // comment. skip rest of line
1735         for (c++; *c && (*c != '\n'); c++)
1736             ;
1737         cplusplus = false;
1738         break;
1739     // ### TODO \# like \" but does not ignore the end of line (groff(7))
1740     case '$': {
1741         c++;
1742         cstr = scan_dollar_parameter(c);
1743         cplusplus = false;
1744         break;
1745     }
1746     case 'z': {
1747         c++;
1748         if (*c == '\\') {
1749             c = scan_escape_direct(c + 1, cstr);
1750             c--;
1751         } else
1752             cstr = QByteArray(c, 1);
1753         break;
1754     }
1755     case 'k': {
1756         // Store the current horizontal position in the _input_ line in
1757         // number register with name POSITION
1758         c++;
1759         cstr = scan_name(c);
1760         cstr.clear(); // TODO not implemented; discard it
1761         break;
1762     }
1763     case '!':
1764     case '%':
1765     case 'a':
1766     case 'd':
1767     case 'r':
1768     case 'u':
1769     case '\n':
1770     case '&': // Non-printing, zero width character
1771     case ')': // Transparent non-printing zero width character
1772         break;
1773     case '(':
1774     case '[':
1775     case 'C': {
1776         // Do not go forward as scan_named_character needs the leading symbol
1777         cstr = scan_named_character(c);
1778         cplusplus = false;
1779         break;
1780     }
1781     case '*': {
1782         c++;
1783         cstr = scan_named_string(c);
1784         cplusplus = false;
1785         break;
1786     }
1787     case 'f': {
1788         c++;
1789         cstr = scan_named_font(c);
1790         cplusplus = false;
1791         break;
1792     }
1793     case 'F': // font family
1794     {
1795         c++;
1796         cstr = scan_name(c);
1797
1798         if (cstr == "C")
1799             cstr = set_font("CR");
1800         else if (cstr == "T")
1801             cstr = set_font("TR");
1802         else if (cstr == "H")
1803             cstr = set_font("HR");
1804         else
1805             cstr = set_font(cstr);
1806
1807         break;
1808     }
1809     case 'm': // color
1810     {
1811         c++;
1812         cstr = scan_name(c);
1813
1814         if (cstr.isEmpty())
1815             cstr = "</span>";
1816         else
1817             cstr = "<span style='color:" + cstr + "'>";
1818
1819         break;
1820     }
1821     case 's': // ### FIXME: many forms are missing
1822         c++;
1823         j = 0;
1824         i = 0;
1825         if (*c == '-') {
1826             j = -1;
1827             c++;
1828         } else if (*c == '+') {
1829             j = 1;
1830             c++;
1831         }
1832         if (*c == '0')
1833             c++;
1834         else if (*c == '\\') {
1835             c++;
1836             c = scan_escape_direct(c, cstr);
1837             i = intresult;
1838             if (!j)
1839                 j = 1;
1840         } else
1841             while (isdigit(*c) && (!i || (!j && i < 4)))
1842                 i = i * 10 + (*c++) - '0';
1843         if (!j) {
1844             j = 1;
1845             if (i)
1846                 i = i - 10;
1847         }
1848         if (!skip_escape)
1849             cstr = change_to_size(i * j);
1850         c--;
1851         break;
1852     case 'n': {
1853         c++;
1854         intresult = scan_number_register(c);
1855         cplusplus = false;
1856         break;
1857     }
1858     case 'w':
1859         c++;
1860         i = *c;
1861         c++;
1862         exoutputp = output_possible;
1863         exskipescape = skip_escape;
1864         output_possible = false;
1865         skip_escape = true;
1866         j = 0;
1867         while (*c != i) {
1868             j++;
1869             if (*c == escapesym)
1870                 c = scan_escape_direct(c + 1, cstr);
1871             else
1872                 c++;
1873         }
1874         output_possible = exoutputp;
1875         skip_escape = exskipescape;
1876         intresult = j;
1877         break;
1878     case 'l':
1879         cstr = "<HR>";
1880         curpos = 0;
1881     case 'b':
1882     case 'v':
1883     case 'x':
1884     case 'o':
1885     case 'L':
1886     case 'h':
1887         c++;
1888         i = *c;
1889         c++;
1890         exoutputp = output_possible;
1891         exskipescape = skip_escape;
1892         output_possible = 0;
1893         skip_escape = true;
1894         while (*c != i)
1895             if (*c == escapesym)
1896                 c = scan_escape_direct(c + 1, cstr);
1897             else
1898                 c++;
1899         output_possible = exoutputp;
1900         skip_escape = exskipescape;
1901         break;
1902     case 'c':
1903         no_newline_output = 1;
1904         break;
1905     case '{':
1906         newline_for_fun++;
1907         break; // Start conditional block
1908     case '}':
1909         if (newline_for_fun)
1910             newline_for_fun--;
1911         break; // End conditional block
1912     case 'p':
1913         cstr = "<BR>\n";
1914         curpos = 0;
1915         break;
1916     case 't':
1917         cstr = "\t";
1918         curpos = (curpos + 8) & 0xfff8;
1919         break;
1920     case '<':
1921         cstr = "&lt;";
1922         curpos++;
1923         break;
1924     case '>':
1925         cstr = "&gt;";
1926         curpos++;
1927         break;
1928     case '\\': {
1929         if (single_escape)
1930             c--;
1931         else
1932             cstr = "\\";
1933         break;
1934     }
1935     case 'N': {
1936         c++;
1937         cstr = scan_number_code(c);
1938         cplusplus = false;
1939         break;
1940     }
1941     case '\'':
1942         cstr = "&acute;";
1943         curpos++;
1944         break; // groff(7) ### TODO verify
1945     case '`':
1946         cstr = "`"; // krazy:exclude=doublequote_chars
1947         curpos++;
1948         break; // groff(7)
1949     case '-':
1950         cstr = "-"; // krazy:exclude=doublequote_chars
1951         curpos++;
1952         break; // groff(7)
1953     case '.':
1954         cstr = "."; // krazy:exclude=doublequote_chars
1955         curpos++;
1956         break; // groff(7)
1957     default:
1958         cstr = QByteArray(c, 1);
1959         curpos++;
1960         break;
1961     }
1962     if (cplusplus && *c)
1963         c++;
1964     return c;
1965 }
1966
1967 //---------------------------------------------------------------------
1968
1969 static char *scan_escape(char *c)
1970 {
1971     QByteArray cstr;
1972     char *result = scan_escape_direct(c, cstr);
1973     if (!skip_escape)
1974         out_html(cstr);
1975     return result;
1976 }
1977
1978 //---------------------------------------------------------------------
1979
1980 class TABLEROW;
1981
1982 class TABLEITEM
1983 {
1984 public:
1985     TABLEITEM(TABLEROW *row);
1986     ~TABLEITEM()
1987     {
1988         delete[] contents;
1989     }
1990     void setContents(const char *_contents)
1991     {
1992         delete[] contents;
1993         contents = qstrdup(_contents);
1994     }
1995     const char *getContents() const
1996     {
1997         return contents;
1998     }
1999
2000     void init()
2001     {
2002         delete[] contents;
2003         contents = nullptr;
2004         size = 0;
2005         align = 0;
2006         valign = 0;
2007         colspan = 1;
2008         rowspan = 1;
2009         font = 0;
2010         vleft = 0;
2011         vright = 0;
2012         space = 0;
2013         width = 0;
2014     }
2015
2016     void copyLayout(const TABLEITEM *orig)
2017     {
2018         size = orig->size;
2019         align = orig->align;
2020         valign = orig->valign;
2021         colspan = orig->colspan;
2022         rowspan = orig->rowspan;
2023         font = orig->font;
2024         vleft = orig->vleft;
2025         vright = orig->vright;
2026         space = orig->space;
2027         width = orig->width;
2028     }
2029
2030 public:
2031     int size, align, valign, colspan, rowspan, font, vleft, vright, space, width;
2032
2033 private:
2034     char *contents;
2035     TABLEROW *_parent;
2036 };
2037
2038 class TABLEROW
2039 {
2040     char *test;
2041
2042 public:
2043     TABLEROW()
2044     {
2045         test = new char;
2046         prev = nullptr;
2047         next = nullptr;
2048     }
2049     ~TABLEROW()
2050     {
2051         qDeleteAll(items);
2052         items.clear();
2053         delete test;
2054     }
2055     int length() const
2056     {
2057         return items.count();
2058     }
2059     bool has(int index)
2060     {
2061         return (index >= 0) && (index < (int)items.count());
2062     }
2063     TABLEITEM &at(int index)
2064     {
2065         return *items.at(index);
2066     }
2067
2068     TABLEROW *copyLayout() const;
2069
2070     void addItem(TABLEITEM *item)
2071     {
2072         items.append(item);
2073     }
2074     TABLEROW *prev, *next;
2075
2076 private:
2077     QList<TABLEITEM *> items;
2078 };
2079
2080 TABLEITEM::TABLEITEM(TABLEROW *row)
2081     : contents(nullptr)
2082     , _parent(row)
2083 {
2084     init();
2085     _parent->addItem(this);
2086 }
2087
2088 TABLEROW *TABLEROW::copyLayout() const
2089 {
2090     TABLEROW *newrow = new TABLEROW();
2091
2092     QListIterator<TABLEITEM *> it(items);
2093     while (it.hasNext()) {
2094         TABLEITEM *newitem = new TABLEITEM(newrow);
2095         newitem->copyLayout(it.next());
2096     }
2097     return newrow;
2098 }
2099
2100 static const char *const tableopt[] = {"center", "expand", "box", "allbox", "doublebox", "tab", "linesize", "delim", nullptr};
2101 static const int tableoptl[] = {6, 6, 3, 6, 9, 3, 8, 5, 0};
2102
2103 static void clear_table(TABLEROW *table)
2104 {
2105     TABLEROW *tr1, *tr2;
2106
2107     tr1 = table;
2108     while (tr1->prev)
2109         tr1 = tr1->prev;
2110     while (tr1) {
2111         tr2 = tr1;
2112         tr1 = tr1->next;
2113         delete tr2;
2114     }
2115 }
2116
2117 //---------------------------------------------------------------------
2118
2119 static char *scan_expression(char *c, int *result);
2120
2121 //---------------------------------------------------------------------
2122
2123 static char *scan_format(char *c, TABLEROW **result, int *maxcol)
2124 {
2125     TABLEROW *layout, *currow;
2126     TABLEITEM *curfield;
2127     int i, j;
2128     if (*result) {
2129         clear_table(*result);
2130     }
2131     layout = currow = new TABLEROW();
2132     curfield = new TABLEITEM(currow);
2133     while (*c && *c != '.') {
2134         switch (*c) {
2135         case 'C':
2136         case 'c':
2137         case 'N':
2138         case 'n':
2139         case 'R':
2140         case 'r':
2141         case 'A':
2142         case 'a':
2143         case 'L':
2144         case 'l':
2145         case 'S':
2146         case 's':
2147         case '^':
2148         case '_':
2149             if (curfield->align)
2150                 curfield = new TABLEITEM(currow);
2151             curfield->align = toupper(*c);
2152             c++;
2153             break;
2154         case 'i':
2155         case 'I':
2156         case 'B':
2157         case 'b':
2158             curfield->font = toupper(*c);
2159             c++;
2160             break;
2161         case 'f':
2162         case 'F':
2163             c++;
2164             curfield->font = toupper(*c);
2165             c++;
2166             if (!isspace(*c) && *c != '.')
2167                 c++;
2168             break;
2169         case 't':
2170         case 'T':
2171             curfield->valign = 't';
2172             c++;
2173             break;
2174         case 'p':
2175         case 'P':
2176             c++;
2177             i = j = 0;
2178             if (*c == '+') {
2179                 j = 1;
2180                 c++;
2181             }
2182             if (*c == '-') {
2183                 j = -1;
2184                 c++;
2185             }
2186             while (isdigit(*c))
2187                 i = i * 10 + (*c++) - '0';
2188             if (j)
2189                 curfield->size = i * j;
2190             else
2191                 curfield->size = j - 10;
2192             break;
2193         case 'v':
2194         case 'V':
2195         case 'w':
2196         case 'W':
2197             c = scan_expression(c + 2, &curfield->width);
2198             break;
2199         case '|':
2200             if (curfield->align)
2201                 curfield->vleft++;
2202             else
2203                 curfield->vright++;
2204             c++;
2205             break;
2206         case 'e':
2207         case 'E':
2208             c++;
2209             break;
2210         case '0':
2211         case '1':
2212         case '2':
2213         case '3':
2214         case '4':
2215         case '5':
2216         case '6':
2217         case '7':
2218         case '8':
2219         case '9':
2220             i = 0;
2221             while (isdigit(*c))
2222                 i = i * 10 + (*c++) - '0';
2223             curfield->space = i;
2224             break;
2225         case ',':
2226         case '\n':
2227             currow->next = new TABLEROW();
2228             currow->next->prev = currow;
2229             currow = currow->next;
2230             currow->next = nullptr;
2231             curfield = new TABLEITEM(currow);
2232             c++;
2233             break;
2234         default:
2235             c++;
2236             break;
2237         }
2238     }
2239     if (*c == '.')
2240         while (*c++ != '\n')
2241             ;
2242     *maxcol = 0;
2243     currow = layout;
2244     while (currow) {
2245         i = currow->length();
2246         if (i > *maxcol)
2247             *maxcol = i;
2248         currow = currow->next;
2249     }
2250     *result = layout;
2251     return c;
2252 }
2253
2254 //---------------------------------------------------------------------
2255
2256 static TABLEROW *next_row(TABLEROW *tr)
2257 {
2258     if (tr->next) {
2259         tr = tr->next;
2260         if (!tr->next)
2261             return next_row(tr);
2262         return tr;
2263     } else {
2264         tr->next = tr->copyLayout();
2265         tr->next->prev = tr;
2266         return tr->next;
2267     }
2268 }
2269
2270 //---------------------------------------------------------------------
2271
2272 static char itemreset[20] = "\\fR\\s0";
2273
2274 #define FORWARDCUR                                                                                                                                             \
2275     do {                                                                                                                                                       \
2276         curfield++;                                                                                                                                            \
2277     } while (currow->has(curfield) && currow->at(curfield).align == 'S');
2278
2279 static char *scan_table(char *c)
2280 {
2281     char *h;
2282     char *g;
2283     int center = 0, expand = 0, box = 0, border = 0, linesize = 1;
2284     int i, j, maxcol = 0, finished = 0;
2285     QByteArray oldfont;
2286     int oldsize, oldfillout;
2287     char itemsep = '\t';
2288     TABLEROW *layout = nullptr, *currow;
2289     int curfield = -1;
2290     while (*c++ != '\n')
2291         ;
2292     h = c;
2293     if (*h == '.')
2294         return c - 1;
2295     oldfont = current_font;
2296     oldsize = current_size;
2297     oldfillout = fillout;
2298     out_html(set_font("R"));
2299     out_html(change_to_size(0));
2300     if (!fillout) {
2301         fillout = 1;
2302         out_html("</PRE>");
2303     }
2304     while (*h && *h != '\n')
2305         h++;
2306     if (h[-1] == ';') {
2307         /* scan table options */
2308         while (c < h) {
2309             while (isspace(*c))
2310                 c++;
2311             for (i = 0; tableopt[i] && qstrncmp(tableopt[i], c, tableoptl[i]); i++)
2312                 ;
2313             c = c + tableoptl[i];
2314             switch (i) {
2315             case 0:
2316                 center = 1;
2317                 break;
2318             case 1:
2319                 expand = 1;
2320                 break;
2321             case 2:
2322                 box = 1;
2323                 break;
2324             case 3:
2325                 border = 1;
2326                 break;
2327             case 4:
2328                 box = 2;
2329                 break;
2330             case 5:
2331                 while (*c++ != '(')
2332                     ;
2333                 itemsep = *c++;
2334                 break;
2335             case 6:
2336                 while (*c++ != '(')
2337                     ;
2338                 linesize = 0;
2339                 while (isdigit(*c))
2340                     linesize = linesize * 10 + (*c++) - '0';
2341                 break;
2342             case 7:
2343                 while (*c != ')')
2344                     c++;
2345             default:
2346                 break;
2347             }
2348             c++;
2349         }
2350         c = h + 1;
2351     }
2352     /* scan layout */
2353     c = scan_format(c, &layout, &maxcol);
2354     //    currow=layout;
2355     currow = next_row(layout);
2356     curfield = 0;
2357     i = 0;
2358     while (!finished && *c) {
2359         /* search item */
2360         h = c;
2361         if ((*c == '_' || *c == '=') && (c[1] == itemsep || c[1] == '\n')) {
2362             if (c[-1] == '\n' && c[1] == '\n') {
2363                 if (currow->prev) {
2364                     currow->prev->next = new TABLEROW();
2365                     currow->prev->next->next = currow;
2366                     currow->prev->next->prev = currow->prev;
2367                     currow->prev = currow->prev->next;
2368                 } else {
2369                     currow->prev = layout = new TABLEROW();
2370                     currow->prev->prev = nullptr;
2371                     currow->prev->next = currow;
2372                 }
2373                 TABLEITEM *newitem = new TABLEITEM(currow->prev);
2374                 newitem->align = *c;
2375                 newitem->colspan = maxcol;
2376                 curfield = 0;
2377                 c = c + 2;
2378             } else {
2379                 if (currow->has(curfield)) {
2380                     currow->at(curfield).align = *c;
2381                     FORWARDCUR;
2382                 }
2383                 if (c[1] == '\n') {
2384                     currow = next_row(currow);
2385                     curfield = 0;
2386                 }
2387                 c = c + 2;
2388             }
2389         } else if (*c == 'T' && c[1] == '{') {
2390             h = c + 2;
2391             c = strstr(h, "\nT}");
2392             c++;
2393             *c = '\0';
2394             g = nullptr;
2395             scan_troff(h, 0, &g);
2396             scan_troff(itemreset, 0, &g);
2397             *c = 'T';
2398             c += 3;
2399             if (currow->has(curfield)) {
2400                 currow->at(curfield).setContents(g);
2401                 FORWARDCUR;
2402             }
2403             delete[] g;
2404
2405             if (c[-1] == '\n') {
2406                 currow = next_row(currow);
2407                 curfield = 0;
2408             }
2409         } else if (*c == '.' && c[1] == 'T' && c[2] == '&' && c[-1] == '\n') {
2410             TABLEROW *hr;
2411             while (*c++ != '\n')
2412                 ;
2413             hr = currow;
2414             currow = currow->prev;
2415             hr->prev = nullptr;
2416             c = scan_format(c, &hr, &i);
2417             hr->prev = currow;
2418             currow->next = hr;
2419             currow = hr;
2420             next_row(currow);
2421             curfield = 0;
2422         } else if (*c == '.' && c[1] == 'T' && c[2] == 'E' && c[-1] == '\n') {
2423             finished = 1;
2424             while (*c++ != '\n')
2425                 ;
2426             if (currow->prev)
2427                 currow->prev->next = nullptr;
2428             currow->prev = nullptr;
2429             clear_table(currow);
2430             currow = nullptr;
2431         } else if (*c == '.' && c[-1] == '\n' && !isdigit(c[1])) {
2432             /* skip troff request inside table (usually only .sp ) */
2433             while (*c++ != '\n')
2434                 ;
2435         } else {
2436             h = c;
2437             while (*c && (*c != itemsep || c[-1] == '\\') && (*c != '\n' || c[-1] == '\\'))
2438                 c++;
2439             i = 0;
2440             if (*c == itemsep) {
2441                 i = 1;
2442                 *c = '\n';
2443             }
2444             if (h[0] == '\\' && h[2] == '\n' && (h[1] == '_' || h[1] == '^')) {
2445                 if (currow->has(curfield)) {
2446                     currow->at(curfield).align = h[1];
2447                     FORWARDCUR;
2448                 }
2449                 h = h + 3;
2450             } else {
2451                 g = nullptr;
2452                 h = scan_troff(h, 1, &g);
2453                 scan_troff(itemreset, 0, &g);
2454                 if (currow->has(curfield)) {
2455                     currow->at(curfield).setContents(g);
2456                     FORWARDCUR;
2457                 }
2458                 delete[] g;
2459             }
2460             if (i)
2461                 *c = itemsep;
2462             c = h;
2463             if (c[-1] == '\n') {
2464                 currow = next_row(currow);
2465                 curfield = 0;
2466             }
2467         }
2468     }
2469     /* calculate colspan and rowspan */
2470     currow = layout;
2471     while (currow->next)
2472         currow = currow->next;
2473     while (currow) {
2474         int ti = 0, ti1 = 0, ti2 = -1;
2475         TABLEROW *prev = currow->prev;
2476         if (!prev)
2477             break;
2478
2479         while (prev->has(ti1)) {
2480             if (currow->has(ti))
2481                 switch (currow->at(ti).align) {
2482                 case 'S':
2483                     if (currow->has(ti2)) {
2484                         currow->at(ti2).colspan++;
2485                         if (currow->at(ti2).rowspan < prev->at(ti1).rowspan)
2486                             currow->at(ti2).rowspan = prev->at(ti1).rowspan;
2487                     }
2488                     break;
2489                 case '^':
2490                     if (prev->has(ti1))
2491                         prev->at(ti1).rowspan++;
2492                 default:
2493                     if (ti2 < 0)
2494                         ti2 = ti;
2495                     else {
2496                         do {
2497                             ti2++;
2498                         } while (currow->has(ti2) && currow->at(ti2).align == 'S');
2499                     }
2500                     break;
2501                 }
2502             ti++;
2503             if (ti1 >= 0)
2504                 ti1++;
2505         }
2506         currow = currow->prev;
2507     }
2508     /* produce html output */
2509     if (center)
2510         out_html("<CENTER>");
2511     if (box == 2)
2512         out_html("<TABLE BORDER><TR><TD>");
2513     out_html("<TABLE");
2514     if (box || border) {
2515         out_html(" BORDER");
2516         if (!border)
2517             out_html("><TR><TD><TABLE");
2518         if (expand)
2519             out_html(" WIDTH=\"100%\"");
2520     }
2521     out_html(">\n");
2522     currow = layout;
2523     while (currow) {
2524         j = 0;
2525         out_html("<TR VALIGN=top>");
2526         curfield = 0;
2527         while (currow->has(curfield)) {
2528             if (currow->at(curfield).align != 'S' && currow->at(curfield).align != '^') {
2529                 out_html("<TD style='padding-right:10px; padding-left:10px;'");
2530                 switch (currow->at(curfield).align) {
2531                 case 'N':
2532                     currow->at(curfield).space += 4;
2533                 case 'R':
2534                     out_html(" ALIGN=right");
2535                     break;
2536                 case 'C':
2537                     out_html(" ALIGN=center");
2538                 default:
2539                     break;
2540                 }
2541                 if (!currow->at(curfield).valign && currow->at(curfield).rowspan > 1)
2542                     out_html(" VALIGN=center");
2543                 if (currow->at(curfield).colspan > 1) {
2544                     out_html(" COLSPAN=");
2545                     out_html(QByteArray::number(currow->at(curfield).colspan));
2546                 }
2547                 if (currow->at(curfield).rowspan > 1) {
2548                     out_html(" ROWSPAN=");
2549                     out_html(QByteArray::number(currow->at(curfield).rowspan));
2550                 }
2551                 j = j + currow->at(curfield).colspan;
2552                 out_html(">");
2553                 if (currow->at(curfield).size)
2554                     out_html(change_to_size(currow->at(curfield).size));
2555                 if (currow->at(curfield).font)
2556                     out_html(set_font(QByteArray::number(currow->at(curfield).font)));
2557                 switch (currow->at(curfield).align) {
2558                 case '=':
2559                     out_html("<HR><HR>");
2560                     break;
2561                 case '_':
2562                     out_html("<HR>");
2563                     break;
2564                 default:
2565                     out_html(currow->at(curfield).getContents());
2566                     break;
2567                 }
2568                 if (currow->at(curfield).space)
2569                     for (i = 0; i < currow->at(curfield).space; i++)
2570                         out_html("&nbsp;");
2571                 if (currow->at(curfield).font)
2572                     out_html(set_font("R"));
2573                 if (currow->at(curfield).size)
2574                     out_html(change_to_size(0));
2575                 if (j >= maxcol && currow->at(curfield).align > '@' && currow->at(curfield).align != '_')
2576                     out_html("<BR>");
2577                 out_html("</TD>");
2578             }
2579             curfield++;
2580         }
2581         out_html("</TR>\n");
2582         currow = currow->next;
2583     }
2584
2585     clear_table(layout);
2586
2587     if (box && !border)
2588         out_html("</TABLE>");
2589     out_html("</TABLE>");
2590     if (box == 2)
2591         out_html("</TABLE>");
2592     if (center)
2593         out_html("</CENTER>\n");
2594     else
2595         out_html("\n");
2596     if (!oldfillout)
2597         out_html("<PRE>");
2598     fillout = oldfillout;
2599     out_html(change_to_size(oldsize));
2600     out_html(set_font(oldfont));
2601     return c;
2602 }
2603
2604 //---------------------------------------------------------------------
2605
2606 static char *scan_expression(char *c, int *result, const unsigned int numLoop)
2607 {
2608     int value = 0, value2, sign = 1, opex = 0;
2609     char oper = 'c';
2610     bool oldSkipEscape = skip_escape;
2611     skip_escape = true; // evaluating an expression shall not print it
2612
2613     if (*c == '!') {
2614         c = scan_expression(c + 1, &value);
2615         value = (!value);
2616     } else if (*c == 'n') {
2617         c++;
2618         value = s_nroff;
2619     } else if (*c == 't') {
2620         c++;
2621         value = 1 - s_nroff;
2622     } else if (*c == '\'' || *c == '"' || *c < ' ' || (*c == '\\' && c[1] == '(')) {
2623         /* ?string1?string2?
2624         ** test if string1 equals string2.
2625         */
2626         char *st1 = nullptr, *st2 = nullptr, *h;
2627         char *tcmp = nullptr;
2628         char sep;
2629         sep = *c;
2630         if (sep == '\\') {
2631             tcmp = c;
2632             c = c + 3;
2633         }
2634         c++;
2635         h = c;
2636         while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4)))
2637             c++;
2638         *c = '\n';
2639         scan_troff(h, 1, &st1);
2640         *c = sep;
2641         if (tcmp)
2642             c = c + 3;
2643         c++;
2644         h = c;
2645         while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4)))
2646             c++;
2647         *c = '\n';
2648         scan_troff(h, 1, &st2);
2649         *c = sep;
2650         if (!st1 && !st2)
2651             value = 1;
2652         else if (!st1 || !st2)
2653             value = 0;
2654         else
2655             value = (!qstrcmp(st1, st2));
2656         delete[] st1;
2657         delete[] st2;
2658         if (tcmp)
2659             c = c + 3;
2660         c++;
2661     } else {
2662         while (*c && (!isspace(*c) || (numLoop > 0)) && *c != ')' && opex >= 0) {
2663             opex = 0;
2664             switch (*c) {
2665             case '(':
2666                 c = scan_expression(c + 1, &value2, numLoop + 1);
2667                 value2 = sign * value2;
2668                 opex = 1;
2669                 break;
2670             case '.':
2671             case '0':
2672             case '1':
2673             case '2':
2674             case '3':
2675             case '4':
2676             case '5':
2677             case '6':
2678             case '7':
2679             case '8':
2680             case '9': {
2681                 int num = 0, denum = 1;
2682                 value2 = 0;
2683                 while (isdigit(*c))
2684                     value2 = value2 * 10 + ((*c++) - '0');
2685                 if (*c == '.' && isdigit(c[1])) {
2686                     c++;
2687                     while (isdigit(*c)) {
2688                         num = num * 10 + ((*c++) - '0');
2689                         denum = denum * 10;
2690                     }
2691                 }
2692                 if (isalpha(*c)) {
2693                     /* scale indicator */
2694                     switch (*c) {
2695                     case 'i': /* inch -> 10pt */
2696                         value2 = value2 * 10 + (num * 10 + denum / 2) / denum;
2697                         num = 0;
2698                         break;
2699                     default:
2700                         break;
2701                     }
2702                     c++;
2703                 }
2704                 value2 = value2 + (num + denum / 2) / denum;
2705                 value2 = sign * value2;
2706                 opex = 1;
2707                 if (*c == '.')
2708                     opex = -1;
2709
2710             } break;
2711             case '\\':
2712                 c = scan_escape(c + 1);
2713                 value2 = intresult * sign;
2714                 if (isalpha(*c))
2715                     c++; /* scale indicator */
2716                 opex = 1;
2717                 break;
2718             case '-':
2719                 if (oper) {
2720                     sign = -1;
2721                     c++;
2722                     break;
2723                 }
2724                 Q_FALLTHROUGH();
2725             case '>':
2726             case '<':
2727             case '+':
2728             case '/':
2729             case '*':
2730             case '%':
2731             case '&':
2732             case '=':
2733             case ':':
2734                 if (c[1] == '=')
2735                     oper = (*c++) + 16;
2736                 else
2737                     oper = *c;
2738                 c++;
2739                 break;
2740             default:
2741                 c++;
2742                 break;
2743             }
2744             if (opex > 0) {
2745                 sign = 1;
2746                 switch (oper) {
2747                 case 'c':
2748                     value = value2;
2749                     break;
2750                 case '-':
2751                     value = value - value2;
2752                     break;
2753                 case '+':
2754                     value = value + value2;
2755                     break;
2756                 case '*':
2757                     value = value * value2;
2758                     break;
2759                 case '/':
2760                     if (value2)
2761                         value = value / value2;
2762                     break;
2763                 case '%':
2764                     if (value2)
2765                         value = value % value2;
2766                     break;
2767                 case '<':
2768                     value = (value < value2);
2769                     break;
2770                 case '>':
2771                     value = (value > value2);
2772                     break;
2773                 case '>' + 16:
2774                     value = (value >= value2);
2775                     break;
2776                 case '<' + 16:
2777                     value = (value <= value2);
2778                     break;
2779                 case '=':
2780                 case '=' + 16:
2781                     value = (value == value2);
2782                     break;
2783                 case '&':
2784                     value = (value && value2);
2785                     break;
2786                 case ':':
2787                     value = (value || value2);
2788                     break;
2789                 default: {
2790                     qCDebug(KIO_MAN_LOG) << "Unknown operator " << char(oper);
2791                 }
2792                 }
2793                 oper = 0;
2794             }
2795         }
2796         if (*c == ')')
2797             c++;
2798     }
2799     *result = value;
2800
2801     skip_escape = oldSkipEscape;
2802
2803     return c;
2804 }
2805
2806 //---------------------------------------------------------------------
2807
2808 static char *scan_expression(char *c, int *result)
2809 {
2810     return scan_expression(c, result, 0);
2811 }
2812
2813 //---------------------------------------------------------------------
2814
2815 static void trans_char(char *c, char s, char t)
2816 {
2817     char *sl = c;
2818     int slash = 0;
2819     while (*sl != '\n' || slash) {
2820         if (!slash) {
2821             if (*sl == escapesym)
2822                 slash = 1;
2823             else if (*sl == s)
2824                 *sl = t;
2825         } else
2826             slash = 0;
2827         sl++;
2828     }
2829 }
2830
2831 //---------------------------------------------------------------------
2832 // parse 1 line (or a line which stretches multiple lines by \(enter) )
2833 // return all arguments starting at \p c in \p args
2834 // returns the pointer to the next char where scanning should continue
2835 // (which is the char after the ending \n)
2836 // argPointers .. a list of pointers to the startchars of each arg pointing into the string given with c
2837
2838 void getArguments(/* const */ char *&c, QList<QByteArray> &args, QList<char *> *argPointers = nullptr)
2839 {
2840     args.clear();
2841     if (argPointers)
2842         argPointers->clear();
2843
2844     QByteArray arg;
2845     arg.reserve(30); // reduce num of reallocs
2846     bool inString = false;
2847     bool inArgument = false;
2848
2849     for (; *c && (*c != '\n'); c++) {
2850         if (*c == '"') {
2851             if (!inString) {
2852                 inString = true; // start of quoted argument
2853             } else {
2854                 // according to http://heirloom.sourceforge.net/doctools/troff.pdf chapter 7.3
2855                 // two consecutive quotes inside a string is one quote char
2856                 if (*(c + 1) == '"') {
2857                     arg += '"';
2858                     c++;
2859                 } else // end of quoted argument
2860                 {
2861                     args.append(arg);
2862                     arg.clear();
2863                     inString = false;
2864                     inArgument = false;
2865                 }
2866             }
2867         } else if (*c == ' ') {
2868             if (inString) {
2869                 arg += *c;
2870                 if (!inArgument) // argument not yet found (leading spaces)
2871                 {
2872                     inArgument = true;
2873
2874                     if (argPointers)
2875                         argPointers->append(c);
2876                 }
2877             } else if (inArgument) {
2878                 // end of previous argument
2879                 args.append(arg);
2880                 arg.clear();
2881                 inArgument = false;
2882             }
2883         } else if ((*c == escapesym) && (*(c + 1) == ' ')) {
2884             // special handling \<SP> shall be kept as is
2885             arg += *c++;
2886             arg += *c;
2887
2888             if (!inArgument) // argument not yet found (leading spaces)
2889             {
2890                 inArgument = true;
2891
2892                 if (argPointers)
2893                     argPointers->append(c);
2894             }
2895         } else if ((*c == escapesym) && (*(c + 1) == '\n')) {
2896             c++;
2897         } else if ((*c == escapesym) && (*(c + 1) == '"')) // start of comment; skip rest of line
2898         {
2899             if (inArgument) {
2900                 // end of previous argument
2901                 args.append(arg);
2902                 arg.clear();
2903                 inArgument = false;
2904             }
2905
2906             // skip rest of line
2907             while (*c && (*c != '\n'))
2908                 c++;
2909             break;
2910         } else if (*c != ' ') {
2911             arg += *c;
2912             if (!inArgument) // argument not yet found (leading spaces)
2913             {
2914                 inArgument = true;
2915
2916                 if (argPointers)
2917                     argPointers->append(c);
2918             }
2919         }
2920     }
2921
2922     if (inArgument) {
2923         // end of previous argument
2924         args.append(arg);
2925     }
2926
2927     if (*c)
2928         c++;
2929 }
2930
2931 //---------------------------------------------------------------------
2932
2933 static const char *const abbrev_list[] = {"GSBG",     "Getting Started ",
2934                                           "SUBG",     "Customizing SunOS",
2935                                           "SHBG",     "Basic Troubleshooting",
2936                                           "SVBG",     "SunView User's Guide",
2937                                           "MMBG",     "Mail and Messages",
2938                                           "DMBG",     "Doing More with SunOS",
2939                                           "UNBG",     "Using the Network",
2940                                           "GDBG",     "Games, Demos &amp; Other Pursuits",
2941                                           "CHANGE",   "SunOS 4.1 Release Manual",
2942                                           "INSTALL",  "Installing SunOS 4.1",
2943                                           "ADMIN",    "System and Network Administration",
2944                                           "SECUR",    "Security Features Guide",
2945                                           "PROM",     "PROM User's Manual",
2946                                           "DIAG",     "Sun System Diagnostics",
2947                                           "SUNDIAG",  "Sundiag User's Guide",
2948                                           "MANPAGES", "SunOS Reference Manual",
2949                                           "REFMAN",   "SunOS Reference Manual",
2950                                           "SSI",      "Sun System Introduction",
2951                                           "SSO",      "System Services Overview",
2952                                           "TEXT",     "Editing Text Files",
2953                                           "DOCS",     "Formatting Documents",
2954                                           "TROFF",    "Using <B>nroff</B> and <B>troff</B>",
2955                                           "INDEX",    "Global Index",
2956                                           "CPG",      "C Programmer's Guide",
2957                                           "CREF",     "C Reference Manual",
2958                                           "ASSY",     "Assembly Language Reference",
2959                                           "PUL",      "Programming Utilities and Libraries",
2960                                           "DEBUG",    "Debugging Tools",
2961                                           "NETP",     "Network Programming",
2962                                           "DRIVER",   "Writing Device Drivers",
2963                                           "STREAMS",  "STREAMS Programming",
2964                                           "SBDK",     "SBus Developer's Kit",
2965                                           "WDDS",     "Writing Device Drivers for the SBus",
2966                                           "FPOINT",   "Floating-Point Programmer's Guide",
2967                                           "SVPG",     "SunView 1 Programmer's Guide",
2968                                           "SVSPG",    "SunView 1 System Programmer's Guide",
2969                                           "PIXRCT",   "Pixrect Reference Manual",
2970                                           "CGI",      "SunCGI Reference Manual",
2971                                           "CORE",     "SunCore Reference Manual",
2972                                           "4ASSY",    "Sun-4 Assembly Language Reference",
2973                                           "SARCH",    "<FONT SIZE=\"-1\">SPARC</FONT> Architecture Manual",
2974                                           "KR",       "The C Programming Language",
2975                                           nullptr,    nullptr};
2976
2977 static const char *lookup_abbrev(const char *c)
2978 {
2979     int i = 0;
2980
2981     if (!c)
2982         return "";
2983     while (abbrev_list[i] && qstrcmp(c, abbrev_list[i]))
2984         i = i + 2;
2985     if (abbrev_list[i])
2986         return abbrev_list[i + 1];
2987     else
2988         return c;
2989 }
2990
2991 //---------------------------------------------------------------------
2992
2993 static const char *const section_list[] = {
2994 #ifdef Q_OS_SOLARIS
2995     // for Solaris
2996     "1",
2997     "User Commands",
2998     "1B",
2999     "SunOS/BSD Compatibility Package Commands",
3000     "1b",
3001     "SunOS/BSD Compatibility Package Commands",
3002     "1C",
3003     "Communication Commands ",
3004     "1c",
3005     "Communication Commands",
3006     "1F",
3007     "FMLI Commands ",
3008     "1f",
3009     "FMLI Commands",
3010     "1G",
3011     "Graphics and CAD Commands ",
3012     "1g",
3013     "Graphics and CAD Commands ",
3014     "1M",
3015     "Maintenance Commands",
3016     "1m",
3017     "Maintenance Commands",
3018     "1S",
3019     "SunOS Specific Commands",
3020     "1s",
3021     "SunOS Specific Commands",
3022     "2",
3023     "System Calls",
3024     "3",
3025     "C Library Functions",
3026     "3B",
3027     "SunOS/BSD Compatibility Library Functions",
3028     "3b",
3029     "SunOS/BSD Compatibility Library Functions",
3030     "3C",
3031     "C Library Functions",
3032     "3c",
3033     "C Library Functions",
3034     "3E",
3035     "C Library Functions",
3036     "3e",
3037     "C Library Functions",
3038     "3F",
3039     "Fortran Library Routines",
3040     "3f",
3041     "Fortran Library Routines",
3042     "3G",
3043     "C Library Functions",
3044     "3g",
3045     "C Library Functions",
3046     "3I",
3047     "Wide Character Functions",
3048     "3i",
3049     "Wide Character Functions",
3050     "3K",
3051     "Kernel VM Library Functions",
3052     "3k",
3053     "Kernel VM Library Functions",
3054     "3L",
3055     "Lightweight Processes Library",
3056     "3l",
3057     "Lightweight Processes Library",
3058     "3M",
3059     "Mathematical Library",
3060     "3m",
3061     "Mathematical Library",
3062     "3N",
3063     "Network Functions",
3064     "3n",
3065     "Network Functions",
3066     "3R",
3067     "Realtime Library",
3068     "3r",
3069     "Realtime Library",
3070     "3S",
3071     "Standard I/O Functions",
3072     "3s",
3073     "Standard I/O Functions",
3074     "3T",
3075     "Threads Library",
3076     "3t",
3077     "Threads Library",
3078     "3W",
3079     "C Library Functions",
3080     "3w",
3081     "C Library Functions",
3082     "3X",
3083     "Miscellaneous Library Functions",
3084     "3x",
3085     "Miscellaneous Library Functions",
3086     "4",
3087     "File Formats",
3088     "4B",
3089     "SunOS/BSD Compatibility Package File Formats",
3090     "4b",
3091     "SunOS/BSD Compatibility Package File Formats",
3092     "5",
3093     "Headers, Tables, and Macros",
3094     "6",
3095     "Games and Demos",
3096     "7",
3097     "Special Files",
3098     "7B",
3099     "SunOS/BSD Compatibility Special Files",
3100     "7b",
3101     "SunOS/BSD Compatibility Special Files",
3102     "8",
3103     "Maintenance Procedures",
3104     "8C",
3105     "Maintenance Procedures",
3106     "8c",
3107     "Maintenance Procedures",
3108     "8S",
3109     "Maintenance Procedures",
3110     "8s",
3111     "Maintenance Procedures",
3112     "9",
3113     "DDI and DKI",
3114     "9E",
3115     "DDI and DKI Driver Entry Points",
3116     "9e",
3117     "DDI and DKI Driver Entry Points",
3118     "9F",
3119     "DDI and DKI Kernel Functions",
3120     "9f",
3121     "DDI and DKI Kernel Functions",
3122     "9S",
3123     "DDI and DKI Data Structures",
3124     "9s",
3125     "DDI and DKI Data Structures",
3126     "L",
3127     "Local Commands",
3128 #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
3129     "1",
3130     "General Commands",
3131     "2",
3132     "System Calls",
3133     "3",
3134     "Library Functions",
3135     "4",
3136     "Kernel Interfaces",
3137     "5",
3138     "File Formats",
3139     "6",
3140     "Games",
3141     "7",
3142     "Miscellaneous Information",
3143     "8",
3144     "System Manager's Manuals",
3145     "9",
3146     "Kernel Developer's Manuals",
3147 #else
3148     // Other OS
3149     "1",
3150     "User Commands ",
3151     "1C",
3152     "User Commands",
3153     "1G",
3154     "User Commands",
3155     "1S",
3156     "User Commands",
3157     "1V",
3158     "User Commands ",
3159     "2",
3160     "System Calls",
3161     "2V",
3162     "System Calls",
3163     "3",
3164     "C Library Functions",
3165     "3C",
3166     "Compatibility Functions",
3167     "3F",
3168     "Fortran Library Routines",
3169     "3K",
3170     "Kernel VM Library Functions",
3171     "3L",
3172     "Lightweight Processes Library",
3173     "3M",
3174     "Mathematical Library",
3175     "3N",
3176     "Network Functions",
3177     "3R",
3178     "RPC Services Library",
3179     "3S",
3180     "Standard I/O Functions",
3181     "3V",
3182     "C Library Functions",
3183     "3X",
3184     "Miscellaneous Library Functions",
3185     "4",
3186     "Devices and Network Interfaces",
3187     "4F",
3188     "Protocol Families",
3189     "4I",
3190     "Devices and Network Interfaces",
3191     "4M",
3192     "Devices and Network Interfaces",
3193     "4N",
3194     "Devices and Network Interfaces",
3195     "4P",
3196     "Protocols",
3197     "4S",
3198     "Devices and Network Interfaces",
3199     "4V",
3200     "Devices and Network Interfaces",
3201     "5",
3202     "File Formats",
3203     "5V",
3204     "File Formats",
3205     "6",
3206     "Games and Demos",
3207     "7",
3208     "Environments, Tables, and Troff Macros",
3209     "7V",
3210     "Environments, Tables, and Troff Macros",
3211     "8",
3212     "Maintenance Commands",
3213     "8C",
3214     "Maintenance Commands",
3215     "8S",
3216     "Maintenance Commands",
3217     "8V",
3218     "Maintenance Commands",
3219     "L",
3220     "Local Commands",
3221 #endif
3222     // The defaults
3223     nullptr,
3224     "Misc. Reference Manual Pages",
3225     nullptr,
3226     nullptr};
3227
3228 static const char *section_name(char *c)
3229 {
3230     int i = 0;
3231
3232     if (!c)
3233         return "";
3234     while (section_list[i] && qstrcmp(c, section_list[i]))
3235         i = i + 2;
3236     if (section_list[i + 1])
3237         return section_list[i + 1];
3238     else
3239         return c;
3240 }
3241
3242 static char *skip_till_newline(char *c)
3243 {
3244     int lvl = 0;
3245
3246     while (*c && (*c != '\n' || lvl > 0)) {
3247         if (*c == '\\') {
3248             c++;
3249             if (*c == '}')
3250                 lvl--;
3251             else if (*c == '{')
3252                 lvl++;
3253             else if (*c == '\0')
3254                 break;
3255         }
3256         c++;
3257     }
3258     if (*c)
3259         c++;
3260     if (lvl < 0 && newline_for_fun) {
3261         newline_for_fun = newline_for_fun + lvl;
3262         if (newline_for_fun < 0)
3263             newline_for_fun = 0;
3264     }
3265     return c;
3266 }
3267
3268 //---------------------------------------------------------------------
3269
3270 static bool s_whileloop = false;
3271
3272 /// Processing the .while request
3273 static void request_while(char *&c, int j, bool mdoc)
3274 {
3275     // ### TODO: .continue
3276     qCDebug(KIO_MAN_LOG) << "Entering .while";
3277     c += j;
3278     char *newline = skip_till_newline(c);
3279     const char oldchar = *newline;
3280     *newline = 0;
3281     // We store the full .while stuff into a QByteArray as if it would be a macro
3282     const QByteArray macro = c;
3283     qCDebug(KIO_MAN_LOG) << "'Macro' of .while" << BYTEARRAY(macro);
3284     // Prepare for continuing after .while loop end
3285     *newline = oldchar;
3286     c = newline;
3287     // Process -while loop
3288     const bool oldwhileloop = s_whileloop;
3289     s_whileloop = true;
3290     int result = true; // It must be an int due to the call to scan_expression
3291     break_the_while_loop = false;
3292     while (result && !break_the_while_loop) {
3293         // Unlike for a normal macro, we have the condition at start, so we do not need to prepend extra bytes
3294         char *liveloop = qstrdup(macro.data());
3295         qCDebug(KIO_MAN_LOG) << "Scanning .while condition";
3296         qCDebug(KIO_MAN_LOG) << "Loop macro " << liveloop;
3297         char *end_expression = scan_expression(liveloop, &result);
3298         qCDebug(KIO_MAN_LOG) << "After " << end_expression;
3299         if (result) {
3300             qCDebug(KIO_MAN_LOG) << "New .while iteration";
3301             // The condition is true, so call the .while's content
3302             char *help = end_expression + 1;
3303             while (*help && (*help == ' ' || *help == '\t'))
3304                 ++help;
3305             if (!*help) {
3306                 // We have a problem, so stop .while
3307                 result = false;
3308                 break;
3309             }
3310             if (mdoc)
3311                 scan_troff_mandoc(help, false, nullptr);
3312             else
3313                 scan_troff(help, false, nullptr);
3314         }
3315         delete[] liveloop;
3316     }
3317     break_the_while_loop = false;
3318
3319     //
3320     s_whileloop = oldwhileloop;
3321     qCDebug(KIO_MAN_LOG) << "Ending .while";
3322 }
3323
3324 //---------------------------------------------------------------------
3325 // Processing mixed fonts requests like .BI
3326
3327 static void request_mixed_fonts(char *&c, int j, const char *font1, const char *font2, const bool mode, const bool inFMode)
3328 {
3329     c += j;
3330     if (*c == '\n')
3331         c++;
3332
3333     QList<QByteArray> args;
3334     getArguments(c, args);
3335
3336     for (int i = 0; i < args.count(); i++) {
3337         if (mode || inFMode) {
3338             out_html(" ");
3339             curpos++;
3340         }
3341         out_html(set_font((i & 1) ? font2 : font1));
3342         scan_troff(args[i].data(), 1, nullptr);
3343     }
3344     out_html(set_font("R"));
3345     if (mode) {
3346         out_html(" ]");
3347         curpos++;
3348     }
3349     out_html(NEWLINE);
3350     if (!fillout)
3351         curpos = 0;
3352     else
3353         curpos++;
3354 }
3355
3356 //---------------------------------------------------------------------
3357
3358 // &%(#@ c programs !!!
3359 // static int ifelseval=0;
3360 // If/else can be nested!
3361 static QStack<int> s_ifelseval;
3362
3363 //---------------------------------------------------------------------
3364
3365 // Process a (mdoc) request involving quotes
3366 static char *process_quote(char *c, int j, const char *open, const char *close)
3367 {
3368     trans_char(c, '"', '\a');
3369     c += j;
3370     if (*c == '\n')
3371         c++; // ### TODO: why? Quote requests cannot be empty!
3372     out_html(open);
3373     c = scan_troff_mandoc(c, 1, nullptr);
3374     out_html(close);
3375     out_html(NEWLINE);
3376     if (fillout)
3377         curpos++;
3378     else
3379         curpos = 0;
3380     return c;
3381 }
3382
3383 //---------------------------------------------------------------------
3384 /**
3385  * Is the char \p ch a punctuation in sense of mdoc(7)
3386  */
3387
3388 static bool is_mdoc_punctuation(const char ch)
3389 {
3390     if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
3391         return false;
3392     else if (ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == '(' || ch == ')' || ch == '[' || ch == ']')
3393         return true;
3394     else
3395         return false;
3396 }
3397
3398 //---------------------------------------------------------------------
3399 /**
3400  * Can the char \p c be part of an identifier
3401  * \note For groff, an identifier can consist of nearly all ASCII printable non-white-space characters
3402  * See info:/groff/Identifiers
3403  */
3404
3405 static bool is_identifier_char(const char c)
3406 {
3407     if (c >= '!' && c <= '[') // Include digits and upper case
3408         return true;
3409     else if (c >= ']' && c <= '~') // Include lower case
3410         return true;
3411     else if (c == '\\')
3412         return false; // ### TODO: it should be treated as escape instead!
3413     return false;
3414 }
3415
3416 //---------------------------------------------------------------------
3417
3418 static QByteArray scan_identifier(char *&c)
3419 {
3420     char *h = c; // help pointer
3421     // ### TODO Groff seems to eat nearly everything as identifier name (info:/groff/Identifiers)
3422     while (*h && *h != '\a' && *h != '\n' && is_identifier_char(*h))
3423         ++h;
3424     const char tempchar = *h;
3425     *h = 0;
3426     const QByteArray name = c;
3427     *h = tempchar;
3428     if (name.isEmpty()) {
3429         qCDebug(KIO_MAN_LOG) << "EXCEPTION: identifier empty!";
3430     }
3431     c = h;
3432     return name;
3433 }
3434
3435 //---------------------------------------------------------------------
3436
3437 static char *scan_request(char *c)
3438 {
3439     // mdoc(7) stuff
3440     static bool mandoc_synopsis = false; /* True if we are in the synopsis section */
3441     static bool mandoc_command = false; /* True if this is mdoc(7) page */
3442     static int mandoc_bd_options; /* Only copes with non-nested Bd's */
3443     static int function_argument = 0; // Number of function argument (.Fo, .Fa, .Fc)
3444
3445     int i = 0;
3446     bool mode = false;
3447     char *h = nullptr;
3448     char *sl;
3449     QList<QByteArray> args;
3450
3451     while (*c == ' ' || *c == '\t')
3452         c++; // Spaces or tabs allowed between control character and request
3453     if (c[0] == '\n')
3454         return c + 1;
3455     if (c[0] == escapesym) {
3456         /* some pages use .\" .\$1 .\} */
3457         /* .\$1 is too difficult/stuppid */
3458         if (c[1] == '$') {
3459             qCDebug(KIO_MAN_LOG) << "Found .\\$";
3460             c = skip_till_newline(c); // ### TODO
3461         } else {
3462             // the result of the escape expansion must be parsed again
3463             c++;
3464             QByteArray cstr;
3465             c = scan_escape_direct(c, cstr);
3466             for (; *c && (*c != '\n'); c++)
3467                 cstr += *c;
3468             if (cstr.length())
3469                 scan_request(cstr.data());
3470         }
3471     } else {
3472         int nlen = 0;
3473         QByteArray macroName;
3474         while (c[nlen] && (c[nlen] != ' ') && (c[nlen] != '\t') && (c[nlen] != '\n') && (c[nlen] != escapesym)) {
3475             macroName += c[nlen];
3476             nlen++;
3477         }
3478         int j = nlen;
3479         while (c[j] == ' ' || c[j] == '\t')
3480             j++;
3481         /* search macro database of self-defined macros */
3482         QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(macroName);
3483
3484         // ### HACK: e.g. nmap, smb.conf redefine SH, SS to increase the font, etc. for non-TTY output
3485         // Ignore those to make the HTML result look better
3486         if ((macroName != "SH") && (macroName != "SS") && it != s_stringDefinitionMap.constEnd()) {
3487             qCDebug(KIO_MAN_LOG) << "CALLING MACRO: " << BYTEARRAY(macroName);
3488             const QByteArray oldDollarZero = s_dollarZero; // Previous value of $0
3489             s_dollarZero = macroName;
3490
3491             c += j;
3492             getArguments(c, args);
3493             for (i = 0; i < args.count(); i++) {
3494                 char *h = nullptr;
3495
3496                 if (mandoc_command)
3497                     scan_troff_mandoc(args[i].data(), 1, &h);
3498                 else
3499                     scan_troff(args[i].data(), 1, &h);
3500
3501                 args[i] = h;
3502                 delete[] h;
3503             }
3504
3505             if (!(*it).m_output.isEmpty()) {
3506                 // qCDebug(KIO_MAN_LOG) << "Macro content is: "<< BYTEARRAY( (*it).m_output );
3507                 const unsigned int length = (*it).m_output.length();
3508                 char *work = new char[length + 2];
3509                 work[0] = '\n'; // The macro must start after an end of line to allow a request on first line
3510                 qstrncpy(work + 1, (*it).m_output.data(), length + 1);
3511                 const QList<QByteArray> oldArgumentList(s_argumentList);
3512                 s_argumentList.clear();
3513                 for (i = 0; i < args.count(); i++)
3514                     s_argumentList.push_back(args[i]);
3515
3516                 const int onff = newline_for_fun;
3517                 if (mandoc_command)
3518                     scan_troff_mandoc(work + 1, 0, nullptr);
3519                 else
3520                     scan_troff(work + 1, 0, nullptr);
3521                 delete[] work;
3522                 newline_for_fun = onff;
3523                 s_argumentList = oldArgumentList;
3524             }
3525             s_dollarZero = oldDollarZero;
3526             qCDebug(KIO_MAN_LOG) << "ENDING MACRO: " << BYTEARRAY(macroName);
3527         } else {
3528             qCDebug(KIO_MAN_LOG) << "REQUEST: " << BYTEARRAY(macroName);
3529             switch (RequestNum request = RequestHash::getRequest(macroName, macroName.length())) {
3530             case REQ_ab: // groff(7) "ABort"
3531             {
3532                 h = c + j;
3533                 while (*h && *h != '\n')
3534                     h++;
3535                 *h = '\0';
3536                 if (scaninbuff && buffpos) {
3537                     buffer[buffpos] = '\0';
3538                     qCDebug(KIO_MAN_LOG) << "ABORT: " << buffer;
3539                 }
3540                 // ### TODO find a way to display it to the user
3541                 qCDebug(KIO_MAN_LOG) << "Aborting: .ab " << (c + j);
3542                 return nullptr;
3543                 break;
3544             }
3545             case REQ_An: // mdoc(7) "Author Name"
3546             {
3547                 c += j;
3548                 c = scan_troff_mandoc(c, 1, nullptr);
3549                 break;
3550             }
3551             case REQ_di: // groff(7) "end current DIversion"
3552             {
3553                 qCDebug(KIO_MAN_LOG) << "Start .di";
3554                 c += j;
3555                 if (*c == '\n') {
3556                     ++c;
3557                     break;
3558                 }
3559                 const QByteArray name(scan_identifier(c));
3560                 while (*c && *c != '\n')
3561                     c++;
3562                 c++;
3563                 h = c;
3564                 while (*c && qstrncmp(c, ".di", 3))
3565                     while (*c && *c++ != '\n')
3566                         ;
3567                 *c = '\0';
3568                 char *result = nullptr;
3569                 scan_troff(h, 0, &result);
3570                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
3571                 if (it == s_stringDefinitionMap.end()) {
3572                     StringDefinition def;
3573                     def.m_length = 0;
3574                     def.m_output = result;
3575                     s_stringDefinitionMap.insert(name, def);
3576                 } else {
3577                     (*it).m_length = 0;
3578                     (*it).m_output = result;
3579                 }
3580                 delete[] result;
3581                 if (*c)
3582                     *c = '.';
3583                 c = skip_till_newline(c);
3584                 qCDebug(KIO_MAN_LOG) << "end .di";
3585                 break;
3586             }
3587             case REQ_ds: // groff(7) "Define String variable"
3588                 mode = true;
3589                 Q_FALLTHROUGH();
3590             case REQ_as: // groff (7) "Append String variable"
3591             {
3592                 qCDebug(KIO_MAN_LOG) << "start .ds/.as";
3593                 int oldcurpos = curpos;
3594                 c += j;
3595                 const QByteArray name(scan_identifier(c));
3596                 if (name.isEmpty())
3597                     break;
3598                 // an initial " is removed to allow leading space
3599                 while (*c && isspace(*c))
3600                     c++;
3601                 if (*c == '"')
3602                     c++;
3603
3604                 single_escape = true;
3605                 curpos = 0;
3606                 char *result = nullptr;
3607                 c = scan_troff(c, 1, &result);
3608                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
3609                 if (it == s_stringDefinitionMap.end()) {
3610                     StringDefinition def;
3611                     def.m_length = curpos;
3612                     def.m_output = result;
3613                     s_stringDefinitionMap.insert(name, def);
3614                 } else {
3615                     if (mode) { // .ds Defining String
3616                         (*it).m_length = curpos;
3617                         (*it).m_output = result;
3618                     } else { // .as Appending String
3619                         (*it).m_length += curpos;
3620                         (*it).m_output += result;
3621                     }
3622                 }
3623                 delete[] result;
3624                 single_escape = false;
3625                 curpos = oldcurpos;
3626                 qCDebug(KIO_MAN_LOG) << "end .ds/.as";
3627                 break;
3628             }
3629             case REQ_br: // groff(7) "line BReak"
3630             {
3631                 if (still_dd)
3632                     out_html("<DD>"); // ### VERIFY (does not look like generating good HTML)
3633                 else
3634                     out_html("<BR>\n");
3635                 curpos = 0;
3636                 c = c + j;
3637                 if (c[0] == escapesym)
3638                     c = scan_escape(c + 1);
3639                 c = skip_till_newline(c);
3640                 break;
3641             }
3642             case REQ_c2: // groff(7) "reset non-break Control character" (2 means non-break)
3643             {
3644                 c = c + j;
3645                 if (*c != '\n')
3646                     nobreaksym = *c;
3647                 else
3648                     nobreaksym = '\'';
3649                 c = skip_till_newline(c);
3650                 break;
3651             }
3652             case REQ_cc: // groff(7) "reset Control Character"
3653             {
3654                 c = c + j;
3655                 if (*c != '\n')
3656                     controlsym = *c;
3657                 else
3658                     controlsym = '.';
3659                 c = skip_till_newline(c);
3660                 break;
3661             }
3662             case REQ_ce: // groff (7) "CEnter"
3663             {
3664                 c = c + j;
3665                 if (*c == '\n')
3666                     i = 1;
3667                 else {
3668                     i = 0;
3669                     while ('0' <= *c && *c <= '9') {
3670                         i = i * 10 + *c - '0';
3671                         c++;
3672                     }
3673                 }
3674                 c = skip_till_newline(c);
3675                 /* center next i lines */
3676                 if (i > 0) {
3677                     out_html("<CENTER>\n");
3678                     while (i && *c) {
3679                         char *line = nullptr;
3680                         c = scan_troff(c, 1, &line);
3681                         if (line && qstrncmp(line, "<BR>", 4)) {
3682                             out_html(line);
3683                             out_html("<BR>\n");
3684                             delete[] line; // ### FIXME: memory leak!
3685                             i--;
3686                         }
3687                     }
3688                     out_html("</CENTER>\n");
3689                     curpos = 0;
3690                 }
3691                 break;
3692             }
3693             case REQ_ec: // groff(7) "reset Escape Character"
3694             {
3695                 c = c + j;
3696                 if (*c != '\n')
3697                     escapesym = *c;
3698                 else
3699                     escapesym = '\\';
3700                 break;
3701             }
3702             case REQ_eo: // groff(7) "turn Escape character Off"
3703             {
3704                 escapesym = '\0';
3705                 c = skip_till_newline(c);
3706                 break;
3707             }
3708             case REQ_ex: // groff(7) "EXit"
3709             {
3710                 return nullptr;
3711                 break;
3712             }
3713             case REQ_fc: // groff(7) "set Field and pad Character"
3714             {
3715                 c = c + j;
3716                 if (*c == '\n')
3717                     fieldsym = padsym = '\0';
3718                 else {
3719                     fieldsym = c[0];
3720                     padsym = c[1];
3721                 }
3722                 c = skip_till_newline(c);
3723                 break;
3724             }
3725             case REQ_fi: // groff(7) "FIll"
3726             {
3727                 if (!fillout) {
3728                     out_html(set_font("R"));
3729                     out_html(change_to_size('0'));
3730                     out_html("</PRE>\n");
3731                 }
3732                 curpos = 0;
3733                 fillout = 1;
3734                 c = skip_till_newline(c);
3735                 break;
3736             }
3737             case REQ_ft: // groff(7) "FonT"
3738             {
3739                 c += j;
3740                 h = skip_till_newline(c);
3741                 const char oldChar = *h;
3742                 *h = 0;
3743                 const QByteArray name = c;
3744                 // ### TODO: name might contain a variable
3745                 if (name.isEmpty())
3746                     out_html(set_font("P")); // Previous font
3747                 else
3748                     out_html(set_font(name));
3749                 *h = oldChar;
3750                 c = h;
3751                 break;
3752             }
3753             case REQ_el: // groff(7) "ELse"
3754             {
3755                 int ifelseval = s_ifelseval.pop();
3756                 /* .el anything : else part of if else */
3757                 if (ifelseval) {
3758                     c = c + j;
3759                     c[-1] = '\n';
3760                     c = scan_troff(c, 1, nullptr);
3761                 } else
3762                     c = skip_till_newline(c + j);
3763                 break;
3764             }
3765             case REQ_ie: // groff(7) "If with Else"
3766             /* .ie c anything : then part of if else */
3767             // fallthrough
3768             case REQ_if: // groff(7) "IF"
3769             {
3770                 /* .if c anything
3771                  * .if !c anything
3772                  * .if N anything
3773                  * .if !N anything
3774                  * .if 'string1'string2' anything
3775                  * .if !'string1'string2' anything
3776                  */
3777                 c = c + j;
3778                 c = scan_expression(c, &i);
3779                 if (request == REQ_ie) {
3780                     int ifelseval = !i;
3781                     s_ifelseval.push(ifelseval);
3782                 }
3783                 if (i) {
3784                     *c = '\n';
3785                     c++;
3786                     c = scan_troff(c, 1, nullptr);
3787                 } else
3788                     c = skip_till_newline(c);
3789                 break;
3790             }
3791             case REQ_ig: // groff(7) "IGnore"
3792             {
3793                 const char *endwith = "..\n";
3794                 i = 3;
3795                 c = c + j;
3796                 if (*c != '\n' && *c != '\\') {
3797                     /* Not newline or comment */
3798                     endwith = c - 1;
3799                     i = 1;
3800                     c[-1] = '.';
3801                     while (*c && *c != '\n')
3802                         c++, i++;
3803                 }
3804                 c++;
3805                 while (*c && qstrncmp(c, endwith, i))
3806                     while (*c++ != '\n')
3807                         ;
3808                 while (*c && *c++ != '\n')
3809                     ;
3810                 break;
3811             }
3812             case REQ_nf: // groff(7) "No Filling"
3813             {
3814                 if (fillout) {
3815                     out_html(set_font("R"));
3816                     out_html(change_to_size('0'));
3817                     out_html("<PRE>\n");
3818                 }
3819                 curpos = 0;
3820                 fillout = 0;
3821                 c = skip_till_newline(c);
3822                 break;
3823             }
3824             case REQ_ps: // groff(7) "previous Point Size"
3825             {
3826                 c += j;
3827                 getArguments(c, args);
3828                 if (args.count() == 0)
3829                     out_html(change_to_size('0'));
3830                 else {
3831                     char *h = args[0].data();
3832                     int sign = 0;
3833                     i = 0;
3834                     if (*h == '-') {
3835                         sign = -1;
3836                         h++;
3837                     } else if (*h == '+') {
3838                         sign = 1;
3839                         h++;
3840                     }
3841                     scan_expression(h, &i);
3842                     if (sign == 0) {
3843                         sign = 1;
3844                         if (i > 5)
3845                             i = i - 10;
3846                     }
3847                     out_html(change_to_size(sign * i));
3848                 }
3849                 break;
3850             }
3851             case REQ_sp: // groff(7) "SKip one line"
3852             {
3853                 c += j;
3854                 if (fillout)
3855                     out_html("<br><br>");
3856                 else
3857                     out_html(NEWLINE);
3858                 curpos = 0;
3859                 c = skip_till_newline(c);
3860                 break;
3861             }
3862             case REQ_so: // groff(7) "Include SOurce file"
3863             {
3864                 char *buf;
3865                 char *name = nullptr;
3866                 curpos = 0;
3867                 c = c + j;
3868                 if (*c == '/')
3869                     h = c;
3870                 else {
3871                     h = c - 3;
3872                     h[0] = '.';
3873                     h[1] = '.';
3874                     h[2] = '/';
3875                 }
3876                 while (*c != '\n')
3877                     c++;
3878                 *c = '\0';
3879                 scan_troff(h, 1, &name);
3880                 if (name[3] == '/')
3881                     h = name + 3;
3882                 else
3883                     h = name;
3884
3885                 // The format of the argument to .so varies among man pages.
3886                 // Some of them, e.g. pam.8, use "PAM.8".  Others, e.g. telinit.8,
3887                 // use "man8/init.8".  So they are not always true relative paths,
3888                 // although the man(1) command seems to handle them with no problem.
3889                 //
3890                 // The code above starting "h = c - 3" attempts to turn the argument
3891                 // into a relative path, but that is not correct in the case of pam.8
3892                 // as above.  So this removes the "../" prefix again if there is
3893                 // no other slash following it.
3894                 char *firstSlash = strchr(h, '/');
3895                 if (firstSlash != 0) {
3896                     char *nextSlash = strchr(firstSlash + 1, '/');
3897                     if (nextSlash == 0)
3898                         h = firstSlash + 1;
3899                 }
3900
3901                 /* this works alright, except for section 3 */
3902                 buf = read_man_page(h);
3903                 if (!buf) {
3904                     qCDebug(KIO_MAN_LOG) << "Unable to open or read file: .so " << (h);
3905                     out_html(
3906                         "<BLOCKQUOTE>"
3907                         "man2html: unable to open or read file.\n");
3908                     out_html(h);
3909                     out_html("</BLOCKQUOTE>\n");
3910                 } else
3911                     scan_troff(buf + 1, 0, nullptr);
3912                 delete[] buf;
3913                 delete[] name;
3914
3915                 *c++ = '\n';
3916                 break;
3917             }
3918             case REQ_ta: // gorff(7) "set TAbulators"
3919             {
3920                 c = c + j;
3921                 j = 0;
3922                 while (*c != '\n') {
3923                     sl = scan_expression(c, &tabstops[j]);
3924                     if (j > 0 && (*c == '-' || *c == '+'))
3925                         tabstops[j] += tabstops[j - 1];
3926                     c = sl;
3927                     while (*c == ' ' || *c == '\t')
3928                         c++;
3929                     j++;
3930                 }
3931                 maxtstop = j;
3932                 curpos = 0;
3933                 break;
3934             }
3935             case REQ_ti: // groff(7) "Temporary Indent"
3936             {
3937                 /*while (itemdepth || dl_set[itemdepth]) {
3938                     out_html("</DL>\n");
3939                     if (dl_set[itemdepth]) dl_set[itemdepth]=0;
3940                     else itemdepth--;
3941                 }*/
3942                 out_html("<BR>\n");
3943                 c = c + j;
3944                 c = scan_expression(c, &j);
3945                 for (i = 0; i < j; i++)
3946                     out_html("&nbsp;");
3947                 curpos = j;
3948                 c = skip_till_newline(c);
3949                 break;
3950             }
3951             case REQ_tm: // groff(7) "TerMinal" ### TODO: what are useful uses for it
3952             {
3953                 c += j;
3954                 getArguments(c, args);
3955                 if (args.count())
3956                     qCDebug(KIO_MAN_LOG) << ".tm " << args[0];
3957                 break;
3958             }
3959             case REQ_B: // man(7) "Bold"
3960                 mode = true;
3961                 Q_FALLTHROUGH();
3962             case REQ_I: // man(7) "Italic"
3963             {
3964                 /* parse one line in a certain font */
3965                 c += j;
3966                 getArguments(c, args);
3967
3968                 out_html(set_font(mode ? "B" : "I"));
3969
3970                 for (int i = 0; i < args.count(); i++) {
3971                     scan_troff(args[i].data(), 1, nullptr);
3972                     out_html(" ");
3973                 }
3974
3975                 out_html(set_font("R"));
3976
3977                 if (fillout)
3978                     curpos++;
3979                 else {
3980                     out_html(NEWLINE);
3981                     curpos = 0;
3982                 }
3983                 break;
3984             }
3985             case REQ_Fd: // mdoc(7) "Function Definition"
3986             {
3987                 // Normal text must be printed in bold, punctuation in regular font
3988                 c += j;
3989                 if (*c == '\n')
3990                     c++;
3991                 getArguments(c, args);
3992
3993                 for (i = 0; i < args.count(); i++) {
3994                     // ### FIXME In theory, only a single punctuation character is recognized as punctuation
3995                     if (is_mdoc_punctuation(args[i][0]))
3996                         out_html(set_font("R"));
3997                     else
3998                         out_html(set_font("B"));
3999                     scan_troff(args[i].data(), 1, nullptr);
4000                     out_html(" ");
4001                 }
4002                 // In the mdoc synopsis, there are automatical line breaks (### TODO: before or after?)
4003                 if (mandoc_synopsis)
4004                     out_html("<br>");
4005
4006                 out_html(set_font("R"));
4007                 out_html(NEWLINE);
4008                 if (!fillout)
4009                     curpos = 0;
4010                 else
4011                     curpos++;
4012                 break;
4013             }
4014             case REQ_Fn: // mdoc(7)  for "Function calls"
4015             {
4016                 // brackets and commas have to be inserted automatically
4017                 c += j;
4018                 if (*c == '\n')
4019                     c++;
4020                 getArguments(c, args);
4021                 if (args.count()) {
4022                     for (i = 0; i < args.count(); i++) {
4023                         if (i)
4024                             out_html(set_font("I"));
4025                         else
4026                             out_html(set_font("B"));
4027                         scan_troff(args[i].data(), 1, nullptr);
4028                         out_html(set_font("R"));
4029                         if (i == 0) {
4030                             out_html(" (");
4031                         } else if (i < args.count() - 1)
4032                             out_html(", ");
4033                     }
4034                     out_html(")");
4035                 }
4036                 out_html(set_font("R"));
4037                 if (mandoc_synopsis)
4038                     out_html("<br>");
4039                 out_html(NEWLINE);
4040                 if (!fillout)
4041                     curpos = 0;
4042                 else
4043                     curpos++;
4044                 break;
4045             }
4046             case REQ_Fo: // mdoc(7) "Function definition Opening"
4047             {
4048                 char *font[2] = {(char *)"B", (char *)"R"};
4049                 c += j;
4050                 if (*c == '\n')
4051                     c++;
4052                 char *eol = strchr(c, '\n');
4053                 char *semicolon = strchr(c, ';');
4054                 if ((semicolon != nullptr) && (semicolon < eol))
4055                     *semicolon = ' ';
4056
4057                 getArguments(c, args);
4058                 // Normally a .Fo has only one parameter
4059                 for (i = 0; i < args.count(); i++) {
4060                     out_html(set_font(font[i & 1]));
4061                     scan_troff(args[i].data(), 1, nullptr);
4062                     if (i == 0) {
4063                         out_html(" (");
4064                     }
4065                     // ### TODO What should happen if there is more than one argument
4066                     // else if (i<args.count()-1) out_html(", ");
4067                 }
4068                 function_argument = 1; // Must be > 0
4069                 out_html(set_font("R"));
4070                 out_html(NEWLINE);
4071                 if (!fillout)
4072                     curpos = 0;
4073                 else
4074                     curpos++;
4075                 break;
4076             }
4077             case REQ_Fc: // mdoc(7) "Function definition Close"
4078             {
4079                 // .Fc has no parameter
4080                 c += j;
4081                 c = skip_till_newline(c);
4082                 char *font[2] = {(char *)"B", (char *)"R"};
4083                 out_html(set_font(font[i & 1]));
4084                 out_html(")");
4085                 out_html(set_font("R"));
4086                 if (mandoc_synopsis)
4087                     out_html("<br>");
4088                 out_html(NEWLINE);
4089                 if (!fillout)
4090                     curpos = 0;
4091                 else
4092                     curpos++;
4093                 function_argument = 0; // Reset the count variable
4094                 break;
4095             }
4096             case REQ_Fa: // mdoc(7) "Function definition argument"
4097             {
4098                 char *font[2] = {(char *)"B", (char *)"R"};
4099                 c += j;
4100                 if (*c == '\n')
4101                     c++;
4102                 getArguments(c, args);
4103                 out_html(set_font(font[i & 1]));
4104                 // function_argument==0 means that we had no .Fo  before, e.g. in mdoc.samples(7)
4105                 if (function_argument > 1) {
4106                     out_html(", ");
4107                     curpos += 2;
4108                     function_argument++;
4109                 } else if (function_argument == 1) {
4110                     // We are only at the first parameter
4111                     function_argument++;
4112                 }
4113                 for (i = 0; i < args.count(); i++)
4114                     scan_troff(args[i].data(), 1, nullptr);
4115
4116                 out_html(set_font("R"));
4117                 if (!fillout)
4118                     curpos = 0;
4119                 else
4120                     curpos++;
4121                 break;
4122             }
4123
4124             case REQ_OP: /* groff manpages use this construction */
4125             {
4126                 /* .OP a b : [ <B>a</B> <I>b</I> ] */
4127                 out_html(set_font("R"));
4128                 out_html("[");
4129                 curpos++;
4130                 request_mixed_fonts(c, j, "B", "I", true, false);
4131                 break;
4132             }
4133             case REQ_Ft: // perhaps "Function return type"
4134             {
4135                 request_mixed_fonts(c, j, "B", "I", false, true);
4136                 break;
4137             }
4138             case REQ_BR: {
4139                 request_mixed_fonts(c, j, "B", "R", false, false);
4140                 break;
4141             }
4142             case REQ_BI: {
4143                 request_mixed_fonts(c, j, "B", "I", false, false);
4144                 break;
4145             }
4146             case REQ_IB: {
4147                 request_mixed_fonts(c, j, "I", "B", false, false);
4148                 break;
4149             }
4150             case REQ_IR: {
4151                 request_mixed_fonts(c, j, "I", "R", false, false);
4152                 break;
4153             }
4154             case REQ_RB: {
4155                 request_mixed_fonts(c, j, "R", "B", false, false);
4156                 break;
4157             }
4158             case REQ_RI: {
4159                 request_mixed_fonts(c, j, "R", "I", false, false);
4160                 break;
4161             }
4162             case REQ_DT: // man(7) "Default Tabulators"
4163             {
4164                 for (j = 0; j < 20; j++)
4165                     tabstops[j] = (j + 1) * 8;
4166                 maxtstop = 20;
4167                 c = skip_till_newline(c);
4168                 break;
4169             }
4170             case REQ_IP: // man(7) "Ident Paragraph"
4171             {
4172                 c += j;
4173                 getArguments(c, args);
4174
4175                 if (!dl_set[itemdepth]) {
4176                     out_html("<DL>\n");
4177                     dl_set[itemdepth] = 1;
4178                 }
4179                 out_html("<DT>");
4180
4181                 if (args.count())
4182                     scan_troff(args[0].data(), 1, nullptr);
4183
4184                 out_html("</DT>\n<DD>");
4185                 listItemStack.push("DD");
4186                 curpos = 0;
4187                 break;
4188             }
4189             case REQ_TP: // man(7) "hanging Tag Paragraph"
4190             {
4191                 if (!dl_set[itemdepth]) {
4192                     out_html("<DL>\n");
4193                     dl_set[itemdepth] = 1;
4194                 }
4195                 out_html(set_font("R"));
4196                 out_html("<DT>");
4197                 c = skip_till_newline(c);
4198                 /* somewhere a definition ends with '.TP' */
4199                 if (!*c)
4200                     still_dd = true;
4201                 else {
4202                     // HACK for proc(5)
4203                     while (c[0] == '.' && c[1] == '\\' && c[2] == '\"') {
4204                         // We have a comment, so skip the line
4205                         c = skip_till_newline(c);
4206                     }
4207                     c = scan_troff(c, 1, nullptr);
4208                     out_html("<DD>");
4209                     listItemStack.push("DD");
4210                 }
4211                 curpos = 0;
4212                 break;
4213             }
4214             case REQ_IX: // Indexing term (printed on standard error)
4215             {
4216                 c = skip_till_newline(c); // ignore
4217                 break;
4218             }
4219             case REQ_P: // man(7) "Paragraph"
4220             case REQ_LP: // man(7) "Paragraph"
4221             case REQ_PP: // man(7) "Paragraph; reset Prevailing indent"
4222             {
4223                 if (dl_set[itemdepth]) {
4224                     out_html("</DL>\n");
4225                     dl_set[itemdepth] = 0;
4226                 } else if (fillout)
4227                     out_html("<br>");
4228
4229                 if (fillout)
4230                     out_html("<br>\n");
4231                 else
4232                     out_html(NEWLINE);
4233
4234                 curpos = 0;
4235                 c = skip_till_newline(c);
4236                 break;
4237             }
4238             case REQ_HP: // man(7) "Hanging indent Paragraph"
4239             {
4240                 if (!dl_set[itemdepth]) {
4241                     out_html("<DL>");
4242                     dl_set[itemdepth] = 1;
4243                 }
4244                 out_html("<DT>\n");
4245                 still_dd = true;
4246                 c = skip_till_newline(c);
4247                 curpos = 0;
4248                 break;
4249             }
4250             case REQ_PD: // man(7) "Paragraph Distance"
4251             {
4252                 c = skip_till_newline(c);
4253                 break;
4254             }
4255             case REQ_Rs: // mdoc(7) "Relative margin Start"
4256             case REQ_RS: // man(7) "Relative margin Start"
4257             {
4258                 c += j;
4259                 getArguments(c, args);
4260                 j = 1;
4261                 if (args.count() > 0)
4262                     scan_expression(args[0].data(), &j);
4263                 if (j >= 0) {
4264                     itemdepth++;
4265                     dl_set[itemdepth] = 0;
4266                     out_html("<DL><DT></DT><DD>");
4267                     listItemStack.push("DD");
4268                     curpos = 0;
4269                 }
4270                 break;
4271             }
4272             case REQ_Re: // mdoc(7) "Relative margin End"
4273             case REQ_RE: // man(7) "Relative margin End"
4274             {
4275                 if (itemdepth > 0) {
4276                     if (dl_set[itemdepth])
4277                         out_html("</DL>");
4278                     out_html("</DL>\n");
4279                     itemdepth--;
4280                 }
4281                 c = skip_till_newline(c);
4282                 curpos = 0;
4283                 break;
4284             }
4285             case REQ_SB: // man(7) "Small; Bold"
4286             {
4287                 out_html(set_font("B"));
4288                 out_html("<small>");
4289                 c = scan_troff(c + j, 1, nullptr);
4290                 out_html("</small>");
4291                 out_html(set_font("R"));
4292                 break;
4293             }
4294             case REQ_SM: // man(7) "SMall"
4295             {
4296                 c = c + j;
4297                 if (*c == '\n')
4298                     c++;
4299                 out_html("<small>");
4300                 c = scan_troff(c, 1, nullptr);
4301                 out_html("</small>");
4302                 break;
4303             }
4304             case REQ_Ss: // mdoc(7) "Sub Section"
4305                 mandoc_command = 1;
4306                 Q_FALLTHROUGH();
4307             case REQ_SS: // mdoc(7) "Sub Section"
4308                 mode = true;
4309                 Q_FALLTHROUGH();
4310             case REQ_Sh: // mdoc(7) "Sub Header"
4311                 /* hack for fallthru from above */
4312                 mandoc_command = !mode || mandoc_command;
4313                 Q_FALLTHROUGH();
4314             case REQ_SH: // man(7) "Sub Header"
4315             {
4316                 c = c + j;
4317                 if (*c == '\n')
4318                     c++;
4319                 while (itemdepth || dl_set[itemdepth]) {
4320                     out_html("</DL>\n");
4321                     if (dl_set[itemdepth])
4322                         dl_set[itemdepth] = 0;
4323                     else if (itemdepth > 0)
4324                         itemdepth--;
4325                 }
4326                 out_html(set_font("R"));
4327                 out_html(change_to_size(0));
4328                 if (!fillout) {
4329                     fillout = 1;
4330                     out_html("</PRE>");
4331                 }
4332                 trans_char(c, '"', '\a');
4333                 if (in_div) {
4334                     out_html("</div>\n");
4335                     in_div = 0;
4336                 }
4337                 if (mode)
4338                     out_html("\n<H3>");
4339                 else
4340                     out_html("\n<H2>");
4341                 mandoc_synopsis = qstrncmp(c, "SYNOPSIS", 8) == 0;
4342                 c = mandoc_command ? scan_troff_mandoc(c, 1, nullptr) : scan_troff(c, 1, nullptr);
4343                 if (mode)
4344                     out_html("</H3>\n");
4345                 else
4346                     out_html("</H2>\n");
4347
4348                 out_html("<div>\n");
4349                 in_div = 1;
4350                 curpos = 0;
4351                 break;
4352             }
4353             case REQ_Sx: // mdoc(7)
4354             {
4355                 // reference to a section header
4356                 out_html(set_font("B"));
4357                 trans_char(c, '"', '\a');
4358                 c = c + j;
4359                 if (*c == '\n')
4360                     c++;
4361                 c = scan_troff(c, 1, nullptr);
4362                 out_html(set_font("R"));
4363                 out_html(NEWLINE);
4364                 if (fillout)
4365                     curpos++;
4366                 else
4367                     curpos = 0;
4368                 break;
4369             }
4370             case REQ_St: // groff_mdoc
4371             {
4372                 c += j;
4373                 getArguments(c, args);
4374                 if (args.count()) {
4375                     bool found = false;
4376                     for (const StandardName &standardName : STANDARD_NAMES) {
4377                         if (args[0] == standardName.abbrev) {
4378                             found = true;
4379                             out_html(standardName.formalName);
4380                             break;
4381                         }
4382                     }
4383                     if (!found) // an unknown standard - print the abbreviation
4384                         out_html(args[0]);
4385                 }
4386                 break;
4387             }
4388             case REQ_TS: // Table Start tbl(1)
4389             {
4390                 c = scan_table(c);
4391                 break;
4392             }
4393             case REQ_Dt: /* mdoc(7) */
4394                 mandoc_command = true;
4395                 Q_FALLTHROUGH();
4396             case REQ_TH: // man(7) "Title Header"
4397             {
4398                 if (!output_possible) {
4399                     c += j;
4400                     getArguments(c, args);
4401                     output_possible = true;
4402                     out_html(DOCTYPE "<HTML>\n<HEAD>\n");
4403                     out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n");
4404                     out_html("<TITLE>");
4405                     if (args.count()) {
4406                         // work around the problem that in a title no HTML tags are allowed
4407                         // but args[0] can have formatting escapes, e.g. to switch a font
4408                         // which results in a HTML tag added to the output
4409                         char *result = nullptr;
4410                         scan_troff(args[0].data(), 0, &result);
4411                         char *p = result;
4412                         QByteArray title;
4413                         while (*p) {
4414                             if (*p == '<') // tag begin -> skip whole tag
4415                             {
4416                                 for (p++; *p && (*p != '>'); p++)
4417                                     ;
4418                                 if (*p)
4419                                     p++;
4420                             }
4421                             if (*p)
4422                                 title += *p++;
4423                         }
4424                         ignore_links = true;
4425                         title += '\n'; // needed so that out_html flushes buffer and ignore_links works
4426                         out_html(title);
4427                         ignore_links = false;
4428                         delete[] result;
4429                     }
4430                     out_html(" Manpage</TITLE>\n");
4431
4432                     // KDE defaults.
4433                     out_html("<link rel=\"stylesheet\" href=\"help:/kdoctools5-common/kde-default.css\"");
4434                     out_html(" type=\"text/css\">\n");
4435
4436                     // Output our custom stylesheet.
4437                     out_html("<link rel=\"stylesheet\" href=\"");
4438                     out_html(cssFile);
4439                     out_html("\" type=\"text/css\">\n");
4440
4441                     // Some elements need background images, but this
4442                     // could not be included in the stylesheet,
4443                     // include it now.
4444                     out_html(
4445                         "<style type=\"text/css\">\n#header_top { "
4446                         "background-image: url(\"help:/kdoctools5-common/top.jpg\"); }\n\n"
4447                         "#header_top div { "
4448                         "background-image: url(\"help:/kdoctools5-common/top-left.jpg\"); }\n\n"
4449                         "#header_top div div { "
4450                         "background-image: url(\"help:/kdoctools5-common/top-right.jpg\"); }\n\n"
4451                         "</style>\n\n");
4452
4453                     out_html("<meta name=\"ROFF_Type\" content=\"");
4454                     if (mandoc_command)
4455                         out_html("mdoc");
4456                     else
4457                         out_html("man");
4458                     out_html("\">\n");
4459
4460                     out_html("</HEAD>\n\n");
4461                     out_html("<BODY>\n\n");
4462
4463                     out_html("<div id=\"header\"><div id=\"header_top\">\n");
4464                     out_html("<div><div>\n");
4465                     out_html("<img src=\"help:/kdoctools5-common/top-kde.jpg\" alt=\"top-kde\"> ");
4466                     if (args.count())
4467                         scan_troff(args[0].data(), 0, nullptr);
4468                     out_html(" Manual Page");
4469                     out_html("</div></div></div></div>\n");
4470
4471                     out_html("<div style=\"margin-left: 5em; margin-right: 5em;\">\n");
4472                     out_html("<h1>");
4473                     if (args.count())
4474                         scan_troff(args[0].data(), 0, nullptr);
4475                     out_html("</h1>\n");
4476                     if (args.count() > 1) {
4477                         out_html("Section: ");
4478                         if (!mandoc_command && (args.count() > 4))
4479                             scan_troff(args[4].data(), 0, nullptr);
4480                         else
4481                             out_html(section_name(args[1].data()));
4482                         out_html(" (");
4483                         scan_troff(args[1].data(), 0, nullptr);
4484                         out_html(")\n");
4485                     } else {
4486                         out_html("Section not specified");
4487                     }
4488                 } else {
4489                     qCWarning(KIO_MAN_LOG) << ".TH found but output not possible";
4490                     c = skip_till_newline(c);
4491                 }
4492                 curpos = 0;
4493                 break;
4494             }
4495             case REQ_TX: // mdoc(7)
4496             {
4497                 c += j;
4498                 getArguments(c, args);
4499                 out_html(set_font("I"));
4500                 const char *c2 = lookup_abbrev(args[0]);
4501                 curpos += qstrlen(c2);
4502                 out_html(c2);
4503                 out_html(set_font("R"));
4504                 if (args.count() > 1)
4505                     out_html(args[1]);
4506                 break;
4507             }
4508             case REQ_rm: // groff(7) "ReMove"
4509                 /* .rm xx : Remove request, macro or string */
4510                 mode = true;
4511                 Q_FALLTHROUGH();
4512             case REQ_rn: // groff(7) "ReName"
4513                 /* .rn xx yy : Rename request, macro or string xx to yy */
4514                 {
4515                     qCDebug(KIO_MAN_LOG) << "start .rm/.rn";
4516                     c += j;
4517                     const QByteArray name(scan_identifier(c));
4518                     if (name.isEmpty()) {
4519                         qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename";
4520                         break;
4521                     }
4522                     QByteArray name2;
4523                     if (!mode) {
4524                         while (*c && isspace(*c) && *c != '\n')
4525                             ++c;
4526                         name2 = scan_identifier(c);
4527                         if (name2.isEmpty()) {
4528                             qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to rename";
4529                             break;
4530                         }
4531                     }
4532                     c = skip_till_newline(c);
4533                     QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
4534                     if (it == s_stringDefinitionMap.end()) {
4535                         qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to rename or remove: " << BYTEARRAY(name);
4536                     } else {
4537                         if (mode) {
4538                             // .rm ReMove
4539                             s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4540                         } else {
4541                             // .rn ReName
4542                             StringDefinition def = (*it);
4543                             s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4544                             s_stringDefinitionMap.insert(name2, def);
4545                         }
4546                     }
4547                     qCDebug(KIO_MAN_LOG) << "end .rm/.rn";
4548                     break;
4549                 }
4550             case REQ_nx:
4551             case REQ_in: // groff(7) "INdent"
4552             {
4553                 /* .in +-N : Indent */
4554                 c = skip_till_newline(c);
4555                 break;
4556             }
4557             case REQ_nr: // groff(7) "Number Register"
4558             {
4559                 qCDebug(KIO_MAN_LOG) << "start .nr";
4560                 c += j;
4561                 const QByteArray name(scan_identifier(c));
4562                 if (name.isEmpty()) {
4563                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty name for register variable";
4564                     break;
4565                 }
4566                 while (*c && (*c == ' ' || *c == '\t'))
4567                     c++;
4568                 int sign = 0;
4569                 if (*c && (*c == '+' || *c == '-')) {
4570                     if (*c == '+')
4571                         sign = 1;
4572                     else if (*c == '-')
4573                         sign = -1;
4574                 }
4575                 int value = 0;
4576                 int increment = 0;
4577                 c = scan_expression(c, &value);
4578                 if (*c && *c != '\n') {
4579                     while (*c && (*c == ' ' || *c == '\t'))
4580                         c++;
4581                     c = scan_expression(c, &increment);
4582                 }
4583                 c = skip_till_newline(c);
4584                 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
4585                 if (it == s_numberDefinitionMap.end()) {
4586                     if (sign < 1)
4587                         value = -value;
4588                     NumberDefinition def(value, increment);
4589                     s_numberDefinitionMap.insert(name, def);
4590                 } else {
4591                     if (sign > 0)
4592                         (*it).m_value += value;
4593                     else if (sign < 0)
4594                         (*it).m_value += -value;
4595                     else
4596                         (*it).m_value = value;
4597                     (*it).m_increment = increment;
4598                 }
4599                 qCDebug(KIO_MAN_LOG) << "end .nr";
4600                 break;
4601             }
4602             case REQ_am: // groff(7) "Append Macro"
4603                 /* .am xx yy : append to a macro. */
4604                 /* define or handle as .ig yy */
4605                 mode = true;
4606                 Q_FALLTHROUGH();
4607             case REQ_de: // groff(7) "DEfine macro"
4608             case REQ_de1: // groff(7) "DEfine macro"
4609             {
4610                 /* .de xx yy : define or redefine macro xx; end at .yy (..) */
4611                 /* define or handle as .ig yy */
4612                 qCDebug(KIO_MAN_LOG) << "Start .am/.de";
4613                 c += j;
4614                 getArguments(c, args);
4615                 if (args.count() == 0)
4616                     break;
4617
4618                 const QByteArray name(args[0]);
4619
4620                 QByteArray endmacro;
4621                 if (args.count() == 1)
4622                     endmacro = "..";
4623                 else
4624                     endmacro = "." + args[1]; // krazy:exclude=doublequote_chars
4625
4626                 sl = c;
4627                 while (*c && qstrncmp(c, endmacro, endmacro.length()))
4628                     c = skip_till_newline(c);
4629
4630                 QByteArray macro;
4631                 while (sl != c) {
4632                     if (sl[0] == '\\' && sl[1] == '\\') {
4633                         macro += '\\';
4634                         sl++;
4635                     } else
4636                         macro += *sl;
4637                     sl++;
4638                 }
4639
4640                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
4641                 if (it == s_stringDefinitionMap.end()) {
4642                     StringDefinition def;
4643                     def.m_length = 0;
4644                     def.m_output = macro;
4645                     s_stringDefinitionMap.insert(name, def);
4646                 } else if (mode) {
4647                     // .am Append Macro
4648                     (*it).m_length = 0; // It could be formerly a string
4649                     if (!(*it).m_output.endsWith('\n'))
4650                         (*it).m_output += '\n';
4651                     (*it).m_output += macro;
4652                 } else {
4653                     // .de DEfine macro
4654                     (*it).m_length = 0; // It could be formerly a string
4655                     (*it).m_output = macro;
4656                 }
4657                 c = skip_till_newline(c);
4658                 qCDebug(KIO_MAN_LOG) << "End .am/.de";
4659                 break;
4660             }
4661             case REQ_Bl: // mdoc(7) "Begin List"
4662             {
4663                 QByteArray list_options;
4664                 char *nl = strchr(c, '\n');
4665                 c = c + j;
4666                 if (dl_set[itemdepth]) {
4667                     /* These things can nest. */
4668                     itemdepth++;
4669                 }
4670                 if (nl) {
4671                     /* Parse list options */
4672                     list_options = QByteArray(c, nl - c);
4673                 }
4674                 if (list_options.contains("-bullet")) {
4675                     /* HTML Unnumbered List */
4676                     dl_set[itemdepth] = BL_BULLET_LIST;
4677                     out_html("<UL>\n");
4678                 } else if (list_options.contains("-enum")) {
4679                     /* HTML Ordered List */
4680                     dl_set[itemdepth] = BL_ENUM_LIST;
4681                     out_html("<OL>\n");
4682                 } else {
4683                     /* HTML Descriptive List */
4684                     dl_set[itemdepth] = BL_DESC_LIST;
4685                     out_html("<DL>\n");
4686                 }
4687                 curpos = 0;
4688                 c = skip_till_newline(c);
4689                 break;
4690             }
4691             case REQ_El: // mdoc(7) "End List"
4692             {
4693                 checkListStack();
4694                 c = c + j;
4695                 if (dl_set[itemdepth] & BL_DESC_LIST)
4696                     out_html("</DL>\n");
4697                 else if (dl_set[itemdepth] & BL_BULLET_LIST)
4698                     out_html("</UL>\n");
4699                 else if (dl_set[itemdepth] & BL_ENUM_LIST)
4700                     out_html("</OL>\n");
4701                 dl_set[itemdepth] = 0;
4702                 if (itemdepth > 0)
4703                     itemdepth--;
4704                 if (!fillout)
4705                     out_html(NEWLINE);
4706
4707                 curpos = 0;
4708                 c = skip_till_newline(c);
4709                 break;
4710             }
4711             case REQ_It: // mdoc(7) "list ITem"
4712             {
4713                 checkListStack();
4714                 c = c + j;
4715                 // if (qstrncmp(c, "Xo", 2) == 0 && isspace(*(c + 2)))
4716                 // c = skip_till_newline(c);
4717                 if (dl_set[itemdepth] & BL_DESC_LIST) {
4718                     out_html("<DT>");
4719                     out_html(set_font("B"));
4720                     if (*c == '\n') {
4721                         /* Don't allow embedded comms after a newline */
4722                         c++;
4723                         c = scan_troff(c, 1, nullptr);
4724                     } else {
4725                         /* Do allow embedded comms on the same line. */
4726                         c = scan_troff_mandoc(c, 1, nullptr);
4727                     }
4728                     out_html(set_font("R"));
4729                     out_html("</DT>");
4730                     out_html(NEWLINE);
4731                     out_html("<DD>");
4732                     listItemStack.push("DD");
4733                 } else if (dl_set[itemdepth] & (BL_BULLET_LIST | BL_ENUM_LIST)) {
4734                     out_html("<LI>");
4735                     listItemStack.push("LI");
4736                     c = scan_troff_mandoc(c, 1, nullptr);
4737                     out_html(NEWLINE);
4738                 }
4739                 if (fillout)
4740                     curpos++;
4741                 else
4742                     curpos = 0;
4743                 break;
4744             }
4745             case REQ_Bk: /* mdoc(7) */
4746             case REQ_Ek: /* mdoc(7) */
4747             case REQ_Dd: /* mdoc(7) */
4748             case REQ_Os: // mdoc(7) "Operating System"
4749             case REQ_Sm: // mdoc(7) space mode
4750                 c = skip_till_newline(c); // TODO
4751                 break;
4752             case REQ_Bt: // mdoc(7) "Beta Test"
4753             {
4754                 // trans_char(c, '"', '\a');
4755                 // c = c + j;
4756                 out_html(" is currently in beta test.");
4757                 if (fillout)
4758                     curpos++;
4759                 else
4760                     curpos = 0;
4761                 break;
4762             }
4763             case REQ_At: /* mdoc(7) */
4764             case REQ_Fx: /* mdoc(7) */
4765             case REQ_Nx: /* mdoc(7) */
4766             case REQ_Ox: /* mdoc(7) */
4767             case REQ_Bx: /* mdoc(7) */
4768             case REQ_Ux: /* mdoc(7) */
4769             case REQ_Dx: /* mdoc(7) */
4770             {
4771                 bool parsable = true;
4772                 trans_char(c, '"', '\a');
4773                 c = c + j;
4774                 if (*c == '\n')
4775                     c++;
4776                 if (request == REQ_At) {
4777                     out_html("AT&amp;T UNIX ");
4778                     parsable = false;
4779                 } else if (request == REQ_Fx) {
4780                     out_html("FreeBSD ");
4781                     parsable = false;
4782                 } else if (request == REQ_Nx)
4783                     out_html("NetBSD ");
4784                 else if (request == REQ_Ox)
4785                     out_html("OpenBSD ");
4786                 else if (request == REQ_Bx)
4787                     out_html("BSD ");
4788                 else if (request == REQ_Ux)
4789                     out_html("UNIX ");
4790                 else if (request == REQ_Dx)
4791                     out_html("DragonFly ");
4792                 if (parsable)
4793                     c = scan_troff_mandoc(c, 1, nullptr);
4794                 else
4795                     c = scan_troff(c, 1, nullptr);
4796                 if (fillout)
4797                     curpos++;
4798                 else
4799                     curpos = 0;
4800                 break;
4801             }
4802             case REQ_Dl: /* mdoc(7) */
4803             {
4804                 c = c + j;
4805                 out_html(NEWLINE);
4806                 out_html("<BLOCKQUOTE>");
4807                 if (*c == '\n')
4808                     c++;
4809                 c = scan_troff_mandoc(c, 1, nullptr);
4810                 out_html("</BLOCKQUOTE>");
4811                 if (fillout)
4812                     curpos++;
4813                 else
4814                     curpos = 0;
4815                 break;
4816             }
4817             case REQ_Bd: /* mdoc(7) */
4818             { /* Seems like a kind of example/literal mode */
4819                 QByteArray bd_options;
4820                 char *nl = strchr(c, '\n');
4821                 c = c + j;
4822                 if (nl)
4823                     bd_options = QByteArray(c, nl - c);
4824                 out_html(NEWLINE);
4825                 mandoc_bd_options = 0; /* Remember options for terminating Bl */
4826                 if (bd_options.contains("-offset indent")) {
4827                     mandoc_bd_options |= BD_INDENT;
4828                     out_html("<BLOCKQUOTE>\n");
4829                 }
4830                 if (bd_options.contains("-literal") || bd_options.contains("-unfilled")) {
4831                     if (fillout) {
4832                         mandoc_bd_options |= BD_LITERAL;
4833                         out_html(set_font("R"));
4834                         out_html(change_to_size('0'));
4835                         out_html("<PRE>\n");
4836                     }
4837                     curpos = 0;
4838                     fillout = 0;
4839                 }
4840                 c = skip_till_newline(c);
4841                 break;
4842             }
4843             case REQ_Ed: /* mdoc(7) */
4844             {
4845                 if (mandoc_bd_options & BD_LITERAL) {
4846                     if (!fillout) {
4847                         out_html(set_font("R"));
4848                         out_html(change_to_size('0'));
4849                         out_html("</PRE>\n");
4850                     }
4851                 }
4852                 if (mandoc_bd_options & BD_INDENT)
4853                     out_html("</BLOCKQUOTE>\n");
4854                 curpos = 0;
4855                 fillout = 1;
4856                 c = skip_till_newline(c);
4857                 break;
4858             }
4859             case REQ_Be: /* mdoc(7) */
4860             {
4861                 c = c + j;
4862                 if (fillout)
4863                     out_html("<br><br>");
4864                 else {
4865                     out_html(NEWLINE);
4866                 }
4867                 curpos = 0;
4868                 c = skip_till_newline(c);
4869                 break;
4870             }
4871             case REQ_Xr: /* mdoc(7) */ // ### FIXME: it should issue a <a href="man:somewhere(x)"> directly
4872             {
4873                 /* Translate xyz 1 to xyz(1)
4874                  * Allow for multiple spaces.  Allow the section to be missing.
4875                  */
4876                 char buff[NULL_TERMINATED(MED_STR_MAX)];
4877                 char *bufptr;
4878                 trans_char(c, '"', '\a');
4879                 bufptr = buff;
4880                 c = c + j;
4881                 if (*c == '\n')
4882                     c++; /* Skip spaces */
4883                 while (isspace(*c) && *c != '\n')
4884                     c++;
4885                 while (isalnum(*c) || *c == '.' || *c == ':' || *c == '_' || *c == '-') {
4886                     /* Copy the xyz part */
4887                     *bufptr = *c;
4888                     bufptr++;
4889                     if (bufptr >= buff + MED_STR_MAX)
4890                         break;
4891                     c++;
4892                 }
4893                 while (isspace(*c) && *c != '\n')
4894                     c++; /* Skip spaces */
4895                 if (isdigit(*c)) {
4896                     /* Convert the number if there is one */
4897                     *bufptr = '(';
4898                     bufptr++;
4899                     if (bufptr < buff + MED_STR_MAX) {
4900                         while (isalnum(*c)) {
4901                             *bufptr = *c;
4902                             bufptr++;
4903                             if (bufptr >= buff + MED_STR_MAX)
4904                                 break;
4905                             c++;
4906                         }
4907                         if (bufptr < buff + MED_STR_MAX) {
4908                             *bufptr = ')';
4909                             bufptr++;
4910                         }
4911                     }
4912                 }
4913                 while (*c != '\n') {
4914                     /* Copy the remainder */
4915                     if (!isspace(*c)) {
4916                         *bufptr = *c;
4917                         bufptr++;
4918                         if (bufptr >= buff + MED_STR_MAX)
4919                             break;
4920                     }
4921                     c++;
4922                 }
4923                 *bufptr = '\n';
4924                 bufptr[1] = 0;
4925                 scan_troff_mandoc(buff, 1, nullptr);
4926                 out_html(NEWLINE);
4927                 if (fillout)
4928                     curpos++;
4929                 else
4930                     curpos = 0;
4931                 break;
4932             }
4933             case REQ_Fl: // mdoc(7) "FLags"
4934             {
4935                 // trans_char(c, '"', '\a');
4936                 c += j;
4937                 QList<char *> argPointers;
4938                 getArguments(c, args, &argPointers);
4939                 out_html(set_font("B"));
4940                 out_html("-");
4941                 if (args.count() == 0) {
4942                     /*out_html("-");*/ // stdin or stdout
4943                 } else {
4944                     if (argPointers.count())
4945                         scan_troff_mandoc(argPointers[0], 1, nullptr);
4946                     /*
4947                     for (i = 0; i < args.count(); ++i)
4948                     {
4949                       if (ispunct(args[i][0]) && args[i][0] != '-')
4950                       {
4951                         scan_troff_mandoc(argPointers[i], 1, NULL);
4952                       }
4953                       else
4954                       {
4955                         if (i > 0)
4956                           out_html(" "); // Put a space between flags
4957                         out_html("-");
4958                         scan_troff_mandoc(argPointers[i], 1, NULL);
4959                       }
4960                     }
4961                     */
4962                 }
4963                 out_html(set_font("R"));
4964                 out_html(NEWLINE);
4965                 if (fillout)
4966                     curpos++;
4967                 else
4968                     curpos = 0;
4969                 break;
4970             }
4971             case REQ_Pa: /* mdoc(7) */
4972             case REQ_Pf: /* mdoc(7) */
4973             {
4974                 trans_char(c, '"', '\a');
4975                 c = c + j;
4976                 if (*c == '\n')
4977                     c++;
4978                 c = scan_troff_mandoc(c, 1, nullptr);
4979                 out_html(NEWLINE);
4980                 if (fillout)
4981                     curpos++;
4982                 else
4983                     curpos = 0;
4984                 break;
4985             }
4986             case REQ_Pp: /* mdoc(7) */
4987             {
4988                 if (fillout)
4989                     out_html("<br><br>\n");
4990                 else {
4991                     out_html(NEWLINE);
4992                 }
4993                 curpos = 0;
4994                 c = skip_till_newline(c);
4995                 break;
4996             }
4997             case REQ_Aq: // mdoc(7) "Angle bracket Quote"
4998                 c = process_quote(c, j, "&lt;", "&gt;");
4999                 break;
5000             case REQ_Bq: // mdoc(7) "Bracket Quote"
5001                 c = process_quote(c, j, "[", "]");
5002                 break;
5003             case REQ_Dq: // mdoc(7) "Double Quote"
5004                 c = process_quote(c, j, "&ldquo;", "&rdquo;");
5005                 break;
5006             case REQ_Pq: // mdoc(7) "Parenthese Quote"
5007                 c = process_quote(c, j, "(", ")");
5008                 break;
5009             case REQ_Qq: // mdoc(7) "straight double Quote"
5010                 c = process_quote(c, j, "&quot;", "&quot;");
5011                 break;
5012             case REQ_Sq: // mdoc(7) "Single Quote"
5013                 c = process_quote(c, j, "&lsquo;", "&rsquo;");
5014                 break;
5015             case REQ_Op: /* mdoc(7) */
5016             {
5017                 trans_char(c, '"', '\a');
5018                 c += j;
5019                 if (*c == '\n')
5020                     c++;
5021                 out_html(set_font("R"));
5022                 out_html("[");
5023                 c = scan_troff_mandoc(c, 1, nullptr);
5024                 out_html(set_font("R"));
5025                 out_html("]");
5026                 out_html(NEWLINE);
5027                 if (fillout)
5028                     curpos++;
5029                 else
5030                     curpos = 0;
5031                 break;
5032             }
5033             case REQ_Oo: /* mdoc(7) */
5034             {
5035                 trans_char(c, '"', '\a');
5036                 c += j;
5037                 if (*c == '\n')
5038                     c++;
5039                 out_html(set_font("R"));
5040                 out_html("[");
5041                 c = scan_troff_mandoc(c, 1, nullptr);
5042                 if (fillout)
5043                     curpos++;
5044                 else
5045                     curpos = 0;
5046                 break;
5047             }
5048             case REQ_Oc: /* mdoc(7) */
5049             {
5050                 trans_char(c, '"', '\a');
5051                 c += j;
5052                 out_html(set_font("R"));
5053                 out_html("]");
5054                 c = scan_troff_mandoc(c, 1, nullptr);
5055                 if (fillout)
5056                     curpos++;
5057                 else
5058                     curpos = 0;
5059                 break;
5060             }
5061             case REQ_Ql: /* mdoc(7) */
5062             {
5063                 /* Single quote first word in the line */
5064                 char *sp;
5065                 trans_char(c, '"', '\a');
5066                 c = c + j;
5067                 if (*c == '\n')
5068                     c++;
5069                 sp = c;
5070                 do {
5071                     /* Find first whitespace after the
5072                      * first word that isn't a mandoc macro
5073                      */
5074                     while (*sp && isspace(*sp))
5075                         sp++;
5076                     while (*sp && !isspace(*sp))
5077                         sp++;
5078                 } while (*sp && isupper(*(sp - 2)) && islower(*(sp - 1)));
5079
5080                 /* Use a newline to mark the end of text to
5081                  * be quoted
5082                  */
5083                 if (*sp)
5084                     *sp = '\n';
5085                 out_html("`"); /* Quote the text */
5086                 c = scan_troff_mandoc(c, 1, nullptr);
5087                 out_html("'");
5088                 out_html(NEWLINE);
5089                 if (fillout)
5090                     curpos++;
5091                 else
5092                     curpos = 0;
5093                 break;
5094             }
5095             case REQ_Ar: /* mdoc(7) */
5096             {
5097                 /* parse one line in italics */
5098                 out_html(set_font("I"));
5099                 c += j;
5100                 QList<char *> argPointers;
5101                 getArguments(c, args, &argPointers);
5102                 if (args.count() == 0) {
5103                     // An empty Ar means "file ..."
5104                     out_html("file ...");
5105                 } else {
5106                     if (argPointers.count())
5107                         c = scan_troff_mandoc(argPointers[0], 1, nullptr);
5108                 }
5109
5110                 out_html(set_font("R"));
5111                 out_html(NEWLINE);
5112                 if (fillout)
5113                     curpos++;
5114                 else
5115                     curpos = 0;
5116                 break;
5117             }
5118             case REQ_Em: /* mdoc(7) */
5119             {
5120                 out_html("<em>");
5121                 trans_char(c, '"', '\a');
5122                 c += j;
5123                 if (*c == '\n')
5124                     c++;
5125                 c = scan_troff_mandoc(c, 1, nullptr);
5126                 out_html("</em>");
5127                 out_html(NEWLINE);
5128                 if (fillout)
5129                     curpos++;
5130                 else
5131                     curpos = 0;
5132                 break;
5133             }
5134             case REQ_Ad: /* mdoc(7) */
5135             case REQ_Va: /* mdoc(7) */
5136             case REQ_Xo: /* mdoc(7) */
5137             case REQ_Xc: /* mdoc(7) */
5138             {
5139                 /* parse one line in italics */
5140                 out_html(set_font("I"));
5141                 trans_char(c, '"', '\a');
5142                 c = c + j;
5143                 if (*c == '\n')
5144                     c++;
5145                 c = scan_troff_mandoc(c, 1, nullptr);
5146                 out_html(set_font("R"));
5147                 out_html(NEWLINE);
5148                 if (fillout)
5149                     curpos++;
5150                 else
5151                     curpos = 0;
5152                 break;
5153             }
5154             case REQ_Nd: /* mdoc(7) */
5155             {
5156                 trans_char(c, '"', '\a');
5157                 c = c + j;
5158                 if (*c == '\n')
5159                     c++;
5160                 out_html(" - ");
5161                 c = scan_troff_mandoc(c, 1, nullptr);
5162                 out_html(NEWLINE);
5163                 if (fillout)
5164                     curpos++;
5165                 else
5166                     curpos = 0;
5167                 break;
5168             }
5169             case REQ_Nm: // mdoc(7) "Name Macro"
5170             {
5171                 c += j;
5172                 QList<char *> argPointers;
5173                 getArguments(c, args, &argPointers);
5174
5175                 if (mandoc_name.isEmpty() && args.count())
5176                     mandoc_name = args[0];
5177
5178                 if (mandoc_synopsis) {
5179                     /* Break lines only in the Synopsis.
5180                      * The Synopsis section seems to be treated
5181                      * as a special case - Bummer!
5182                      * Do not insert a break before the very first Nm in this section
5183                      */
5184
5185                     if (mandoc_name_count)
5186                         out_html("<BR>");
5187
5188                     mandoc_name_count++;
5189                 }
5190
5191                 out_html(set_font("B"));
5192
5193                 // only show name if
5194                 // .Nm (first not-null-length defined name)
5195                 // .Nm name
5196                 // do not show
5197                 // .Nm ""
5198                 if (args.count() == 0)
5199                     scan_troff(mandoc_name.data(), 0, nullptr);
5200                 else {
5201                     if (argPointers.count())
5202                         c = scan_troff_mandoc(argPointers[0], 1, nullptr);
5203                 }
5204
5205                 out_html(set_font("R"));
5206
5207                 if (fillout)
5208                     curpos++;
5209                 else
5210                     curpos = 0;
5211                 break;
5212             }
5213             case REQ_Cd: /* mdoc(7) */
5214             case REQ_Cm: /* mdoc(7) */
5215             case REQ_Ic: /* mdoc(7) */
5216             case REQ_Ms: /* mdoc(7) */
5217             case REQ_Or: /* mdoc(7) */
5218             case REQ_Sy: /* mdoc(7) */
5219             {
5220                 /* parse one line in bold */
5221                 out_html(set_font("B"));
5222                 trans_char(c, '"', '\a');
5223                 c = c + j;
5224                 if (*c == '\n')
5225                     c++;
5226                 c = scan_troff_mandoc(c, 1, nullptr);
5227                 out_html(set_font("R"));
5228                 out_html(NEWLINE);
5229                 if (fillout)
5230                     curpos++;
5231                 else
5232                     curpos = 0;
5233                 break;
5234             }
5235             case REQ_Ta: /* mdoc(7) */
5236             {
5237                 // ### FIXME: this is a simplification
5238                 // for a list item element in a ".Bl -tag -width indent" type list
5239                 // man:mdoc says: "indent == Six constant width spaces"
5240                 out_html("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
5241                 c = c + j;
5242                 if (*c == '\n')
5243                     c++;
5244                 break;
5245             }
5246             // ### FIXME: punctuation is handled badly!
5247             case REQ_Dv: /* mdoc(7) */
5248             case REQ_Ev: /* mdoc(7) */
5249             case REQ_Fr: /* mdoc(7) */
5250             case REQ_Li: /* mdoc(7) */
5251             case REQ_nN: /* mdoc(7) */
5252             {
5253                 trans_char(c, '"', '\a');
5254                 c += j;
5255                 if (*c == '\n')
5256                     c++;
5257                 out_html(set_font("B"));
5258                 c = scan_troff_mandoc(c, 1, nullptr);
5259                 out_html(set_font("R"));
5260                 out_html(NEWLINE);
5261                 if (fillout)
5262                     curpos++;
5263                 else
5264                     curpos = 0;
5265                 break;
5266             }
5267             case REQ_Tn: /* mdoc(7) Trade Names ... prints its arguments in a smaller font */
5268             {
5269                 trans_char(c, '"', '\a');
5270                 c += j;
5271                 if (*c == '\n')
5272                     c++;
5273                 out_html("<small>");
5274                 c = scan_troff_mandoc(c, 1, NULL);
5275                 out_html("</small>");
5276                 if (fillout)
5277                     curpos++;
5278                 else
5279                     curpos = 0;
5280                 break;
5281             }
5282             case REQ_Ns: /* mdoc(7) No-Space Macro */
5283             {
5284                 c += j;
5285                 while (*c && isspace(*c) && (*c != '\n'))
5286                     c++;
5287                 Q_FALLTHROUGH(); // (The '.Ns' macro always invokes the '.No' macro...)
5288             }
5289             case REQ_No: /* mdoc(7) Normal Text Macro */
5290             {
5291                 if (request == REQ_No) // not fallen through from REQ_Ns
5292                 {
5293                     trans_char(c, '"', '\a');
5294                     c += j;
5295                     if (*c == '\n')
5296                         c++;
5297                 }
5298                 out_html("<span style=\"font-style:normal\">");
5299                 c = scan_troff_mandoc(c, 1, NULL);
5300                 out_html("</span>");
5301                 out_html(NEWLINE);
5302                 if (fillout)
5303                     curpos++;
5304                 else
5305                     curpos = 0;
5306                 break;
5307             }
5308             case REQ_perc_A: /* mdoc(7) biblio stuff */
5309             case REQ_perc_D:
5310             case REQ_perc_N:
5311             case REQ_perc_O:
5312             case REQ_perc_P:
5313             case REQ_perc_Q:
5314             case REQ_perc_V: {
5315                 c = c + j;
5316                 if (*c == '\n')
5317                     c++;
5318                 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */
5319                 if (fillout)
5320                     curpos++;
5321                 else
5322                     curpos = 0;
5323                 break;
5324             }
5325             case REQ_perc_B:
5326             case REQ_perc_J:
5327             case REQ_perc_R:
5328             case REQ_perc_T: {
5329                 c = c + j;
5330                 out_html(set_font("I"));
5331                 if (*c == '\n')
5332                     c++;
5333                 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */
5334                 out_html(set_font("R"));
5335                 if (fillout)
5336                     curpos++;
5337                 else
5338                     curpos = 0;
5339                 break;
5340             }
5341             case REQ_URL: // man(7) ".URL url link trailer"
5342             {
5343                 c += j;
5344
5345                 getArguments(c, args);
5346                 ignore_links = true;
5347                 out_html("<a href=\"");
5348
5349                 if (args.count() > 0)
5350                     scan_troff(args[0].data(), 0, nullptr);
5351
5352                 out_html("\">");
5353                 if (args.count() > 1)
5354                     scan_troff(args[1].data(), 0, nullptr);
5355
5356                 out_html("</a>\n"); // trailing newline important to make ignore_links work
5357                 ignore_links = false;
5358
5359                 if (args.count() > 2)
5360                     scan_troff(args[2].data(), 1, nullptr);
5361
5362                 break;
5363             }
5364             case REQ_tr: // translate   TODO
5365             {
5366                 c = skip_till_newline(c);
5367                 break;
5368             }
5369             case REQ_nroff: // groff(7)  "NROFF mode"
5370                 mode = true;
5371                 Q_FALLTHROUGH();
5372             case REQ_troff: // groff(7) "TROFF mode"
5373             {
5374                 s_nroff = mode;
5375                 c += j;
5376                 c = skip_till_newline(c);
5377                 break;
5378             }
5379             case REQ_als: // groff(7) "ALias String"
5380             {
5381                 /*
5382                  * Note an alias is supposed to be something like a hard link
5383                  * However to make it simplier, we only copy the string.
5384                  */
5385                 // Be careful: unlike .rn, the destination is first, origin is second
5386                 qCDebug(KIO_MAN_LOG) << "start .als";
5387                 c += j;
5388                 const QByteArray name(scan_identifier(c));
5389                 if (name.isEmpty()) {
5390                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to alias";
5391                     break;
5392                 }
5393                 while (*c && isspace(*c) && *c != '\n')
5394                     ++c;
5395                 const QByteArray name2(scan_identifier(c));
5396                 if (name2.isEmpty()) {
5397                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to alias";
5398                     break;
5399                 }
5400                 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name);
5401                 c = skip_till_newline(c);
5402                 if (name == name2) {
5403                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination string to alias: " << BYTEARRAY(name);
5404                     break;
5405                 }
5406                 // Second parameter is origin (unlike in .rn)
5407                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name2);
5408                 if (it == s_stringDefinitionMap.end()) {
5409                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias of " << BYTEARRAY(name2);
5410                 } else {
5411                     StringDefinition def = (*it);
5412                     s_stringDefinitionMap.insert(name, def);
5413                 }
5414                 qCDebug(KIO_MAN_LOG) << "end .als";
5415                 break;
5416             }
5417             case REQ_rr: // groff(7) "Remove number Register"
5418             {
5419                 qCDebug(KIO_MAN_LOG) << "start .rr";
5420                 c += j;
5421                 const QByteArray name(scan_identifier(c));
5422                 if (name.isEmpty()) {
5423                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename: ";
5424                     break;
5425                 }
5426                 c = skip_till_newline(c);
5427                 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
5428                 if (it == s_numberDefinitionMap.end()) {
5429                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: trying to remove inexistant number register: ";
5430                 } else {
5431                     s_numberDefinitionMap.remove(name);
5432                 }
5433                 qCDebug(KIO_MAN_LOG) << "end .rr";
5434                 break;
5435             }
5436             case REQ_rnn: // groff(7) "ReName Number register"
5437             {
5438                 qCDebug(KIO_MAN_LOG) << "start .rnn";
5439                 c += j;
5440                 const QByteArray name(scan_identifier(c));
5441                 if (name.isEmpty()) {
5442                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin to remove/rename number register";
5443                     break;
5444                 }
5445                 while (*c && isspace(*c) && *c != '\n')
5446                     ++c;
5447                 const QByteArray name2(scan_identifier(c));
5448                 if (name2.isEmpty()) {
5449                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination to rename number register";
5450                     break;
5451                 }
5452                 c = skip_till_newline(c);
5453                 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
5454                 if (it == s_numberDefinitionMap.end()) {
5455                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find number register to rename" << BYTEARRAY(name);
5456                 } else {
5457                     NumberDefinition def = (*it);
5458                     s_numberDefinitionMap.remove(name); // ### QT4: removeAll
5459                     s_numberDefinitionMap.insert(name2, def);
5460                 }
5461                 qCDebug(KIO_MAN_LOG) << "end .rnn";
5462                 break;
5463             }
5464             case REQ_aln: // groff(7) "ALias Number Register"
5465             {
5466                 /*
5467                  * Note an alias is supposed to be something like a hard link
5468                  * However to make it simplier, we only copy the string.
5469                  */
5470                 // Be careful: unlike .rnn, the destination is first, origin is second
5471                 qCDebug(KIO_MAN_LOG) << "start .aln";
5472                 c += j;
5473                 const QByteArray name(scan_identifier(c));
5474                 if (name.isEmpty()) {
5475                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination number register to alias";
5476                     break;
5477                 }
5478                 while (*c && isspace(*c) && *c != '\n')
5479                     ++c;
5480                 const QByteArray name2(scan_identifier(c));
5481                 if (name2.isEmpty()) {
5482                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin number register to alias";
5483                     break;
5484                 }
5485                 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name);
5486                 c = skip_till_newline(c);
5487                 if (name == name2) {
5488                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination number register to alias: " << BYTEARRAY(name);
5489                     break;
5490                 }
5491                 // Second parameter is origin (unlike in .rnn)
5492                 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name2);
5493                 if (it == s_numberDefinitionMap.end()) {
5494                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias: " << BYTEARRAY(name2);
5495                 } else {
5496                     NumberDefinition def = (*it);
5497                     s_numberDefinitionMap.insert(name, def);
5498                 }
5499                 qCDebug(KIO_MAN_LOG) << "end .aln";
5500                 break;
5501             }
5502             case REQ_shift: // groff(7) "SHIFT parameter"
5503             {
5504                 c += j;
5505                 h = c;
5506                 while (*h && *h != '\n' && isdigit(*h))
5507                     ++h;
5508                 const char tempchar = *h;
5509                 *h = 0;
5510                 const QByteArray number(c);
5511                 *h = tempchar;
5512                 c = skip_till_newline(h);
5513                 unsigned int result = 1; // Numbers of shifts to do
5514                 if (!number.isEmpty()) {
5515                     bool ok = false;
5516                     result = number.toUInt(&ok);
5517                     if (!ok || result < 1)
5518                         result = 1;
5519                 }
5520                 for (unsigned int num = 0; num < result; ++num) {
5521                     if (!s_argumentList.isEmpty())
5522                         s_argumentList.pop_front();
5523                 }
5524                 break;
5525             }
5526             case REQ_while: // groff(7) "WHILE loop"
5527             {
5528                 request_while(c, j, mandoc_command);
5529                 break;
5530             }
5531             case REQ_break: // groff(7) Break out of a while loop.
5532             {
5533                 c += j;
5534                 break_the_while_loop = true;
5535                 break;
5536             }
5537             case REQ_do: // groff(7) "DO command"
5538             {
5539                 // ### HACK: we just replace do by a \n and a .
5540                 *c = '\n';
5541                 c++;
5542                 *c = '.';
5543                 // The . will be treated as next character
5544                 break;
5545             }
5546             case REQ_nop: // groff(7) nop
5547             {
5548                 c += j;
5549                 break;
5550             }
5551             default: {
5552                 if (mandoc_command && ((isupper(*c) && islower(*(c + 1))) || (islower(*c) && isupper(*(c + 1))))) {
5553                     /* Let through any mdoc(7) commands that haven't
5554                      * been delt with.
5555                      * I don't want to miss anything out of the text.
5556                      */
5557                     char buf[4] = {c[0], c[1], ' ', 0};
5558                     out_html(buf); /* Print the command (it might just be text). */
5559                     c = c + j;
5560                     trans_char(c, '"', '\a');
5561                     if (*c == '\n')
5562                         c++;
5563                     out_html(set_font("R"));
5564                     c = scan_troff(c, 1, nullptr);
5565                     out_html(NEWLINE);
5566                     if (fillout)
5567                         curpos++;
5568                     else
5569                         curpos = 0;
5570                 } else
5571                     c = skip_till_newline(c);
5572                 break;
5573             }
5574             }
5575         }
5576     }
5577     if (fillout) {
5578         out_html(NEWLINE);
5579         curpos++;
5580     }
5581     return c;
5582 }
5583
5584 //---------------------------------------------------------------------
5585
5586 static int contained_tab = 0;
5587 static bool mandoc_line = false; // Signals whether to look for embedded mandoc commands.
5588
5589 static char *scan_troff(char *c, bool san, char **result)
5590 { /* san : stop at newline */
5591     QByteArray intbuff;
5592     intbuff.reserve(MED_STR_MAX);
5593 #define FLUSHIBP                                                                                                                                               \
5594     {                                                                                                                                                          \
5595         out_html(intbuff);                                                                                                                                     \
5596         intbuff.clear();                                                                                                                                       \
5597     }
5598     char *exbuffer;
5599     int exbuffpos, exbuffmax, exnewline_for_fun;
5600     bool exscaninbuff;
5601     int usenbsp = 0;
5602
5603     exbuffer = buffer;
5604     exbuffpos = buffpos;
5605     exbuffmax = buffmax;
5606     exnewline_for_fun = newline_for_fun;
5607     exscaninbuff = scaninbuff;
5608     newline_for_fun = 0;
5609     if (result) {
5610         if (*result) {
5611             buffer = *result;
5612             buffpos = qstrlen(buffer);
5613             buffmax = buffpos;
5614         } else {
5615             buffer = new char[LARGE_STR_MAX + 1];
5616             buffpos = 0;
5617             buffmax = LARGE_STR_MAX;
5618         }
5619         scaninbuff = true;
5620     }
5621     char *h = c; // ### FIXME below are too many tests that may go before the position of c
5622     /* start scanning */
5623
5624     while (h && *h && (!san || newline_for_fun || (*h != '\n')) && !break_the_while_loop) {
5625         if (*h == escapesym) {
5626             h++;
5627             FLUSHIBP;
5628             // ###HACK: I think after escape expansion, the line should be reparsed
5629             // (this seems to be what troff does), but it would double-escape
5630             // HTML chars, e.g. the first escape produces "<span...", the second
5631             // would change that to &lt;span...
5632             // Therefore work around some man pages (e.g. nmap, smb.conf),
5633             // which have \." at beginning of
5634             // line (probably just typos), but troff would skip these
5635             if ((h[-2] == '\n') && (*h == '.')) // when line starts with \. ignore line
5636             {
5637                 while (*h && (*h != '\n'))
5638                     h++;
5639                 continue; // avoid h++ at the end
5640             } else {
5641                 h = scan_escape(h);
5642             }
5643         } else if (*h == controlsym && h[-1] == '\n') {
5644             h++;
5645             FLUSHIBP;
5646             h = scan_request(h);
5647             if (h && san && h[-1] == '\n')
5648                 h--;
5649         } else if (mandoc_line && ((*(h - 1)) && (isspace(*(h - 1)) || (*(h - 1)) == '\n')) && *(h) && isupper(*(h)) && *(h + 1) && islower(*(h + 1))
5650                    && *(h + 2) && isspace(*(h + 2))) {
5651             // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2"
5652             FLUSHIBP;
5653             h = scan_request(h);
5654             if (san && h[-1] == '\n') {
5655                 h--;
5656                 break;
5657             }
5658         } else if (*h == nobreaksym && h[-1] == '\n') {
5659             h++;
5660             FLUSHIBP;
5661             h = scan_request(h);
5662             if (san && h[-1] == '\n')
5663                 h--;
5664         } else {
5665             if (still_dd && isalnum(*h) && h[-1] == '\n') {
5666                 /* sometimes a .HP request is not followed by a .br request */
5667                 FLUSHIBP;
5668                 out_html("<DD>");
5669                 curpos = 0;
5670                 still_dd = false;
5671             }
5672             switch (*h) {
5673             case '&': {
5674                 intbuff += "&amp;";
5675                 curpos++;
5676                 break;
5677             }
5678             case '<': {
5679                 intbuff += "&lt;";
5680                 curpos++;
5681                 break;
5682             }
5683             case '>': {
5684                 intbuff += "&gt;";
5685                 curpos++;
5686                 break;
5687             }
5688             case '"': {
5689                 intbuff += "&quot;";
5690                 curpos++;
5691                 break;
5692             }
5693             case '\n': {
5694                 if (h != c && h[-1] == '\n' && fillout) {
5695                     intbuff += "<p>";
5696                 }
5697                 if (contained_tab && fillout) {
5698                     intbuff += "<br>";
5699                 }
5700                 contained_tab = 0;
5701                 curpos = 0;
5702                 usenbsp = 0;
5703                 intbuff += '\n';
5704                 FLUSHIBP;
5705                 break;
5706             }
5707             case '\t': {
5708                 int curtab = 0;
5709                 contained_tab = 1;
5710                 FLUSHIBP;
5711                 /* like a typewriter, not like TeX */
5712                 tabstops[19] = curpos + 1;
5713                 while (curtab < maxtstop && tabstops[curtab] <= curpos)
5714                     curtab++;
5715                 if (curtab < maxtstop) {
5716                     if (!fillout) {
5717                         while (curpos < tabstops[curtab]) {
5718                             intbuff += ' ';
5719                             if (intbuff.length() > MED_STR_MAX) {
5720                                 FLUSHIBP;
5721                             }
5722                             curpos++;
5723                         }
5724                     } else {
5725                         out_html("<TT>");
5726                         while (curpos < tabstops[curtab]) {
5727                             out_html("&nbsp;");
5728                             curpos++;
5729                         }
5730                         out_html("</TT>");
5731                     }
5732                 }
5733                 break;
5734             }
5735             default: {
5736                 if (*h == ' ' && (h[-1] == '\n' || usenbsp)) {
5737                     FLUSHIBP;
5738                     if (!usenbsp && fillout) {
5739                         out_html("<BR>");
5740                         curpos = 0;
5741                     }
5742                     usenbsp = fillout;
5743                     if (usenbsp)
5744                         out_html("&nbsp;");
5745                     else
5746                         intbuff += ' ';
5747                 } else if (*h > 31 && *h < 127)
5748                     intbuff += *h;
5749                 else if (((unsigned char)(*h)) > 127) {
5750                     intbuff += *h;
5751                 }
5752                 curpos++;
5753                 break;
5754             }
5755             }
5756             if (intbuff.length() > MED_STR_MAX)
5757                 FLUSHIBP;
5758             h++;
5759         }
5760     }
5761     FLUSHIBP;
5762     if (buffer)
5763         buffer[buffpos] = '\0';
5764     if (san && h && *h)
5765         h++;
5766     newline_for_fun = exnewline_for_fun;
5767     if (result) {
5768         *result = buffer;
5769         buffer = exbuffer;
5770         buffpos = exbuffpos;
5771         buffmax = exbuffmax;
5772         scaninbuff = exscaninbuff;
5773     }
5774
5775     return h;
5776 }
5777
5778 //---------------------------------------------------------------------
5779
5780 static char *scan_troff_mandoc(char *c, bool san, char **result)
5781 {
5782     char *ret;
5783     char *end = c;
5784     bool oldval = mandoc_line;
5785     mandoc_line = true;
5786     while (*end && *end != '\n') {
5787         end++;
5788     }
5789
5790     if (end > c + 2 && ispunct(*(end - 1)) && isspace(*(end - 2)) && *(end - 2) != '\n') {
5791         /* Don't format lonely punctuation E.g. in "xyz ," format
5792          * the xyz and then append the comma removing the space.
5793          */
5794         *(end - 2) = '\n';
5795         ret = scan_troff(c, san, result);
5796         *end = 0;
5797         out_html(end - 1); // output the punct char
5798         *end = '\n';
5799         ret = end;
5800     } else {
5801         ret = scan_troff(c, san, result);
5802     }
5803     mandoc_line = oldval;
5804     return ret;
5805 }
5806
5807 //---------------------------------------------------------------------
5808 // Entry point
5809
5810 void scan_man_page(const char *man_page)
5811 {
5812     if (!man_page)
5813         return;
5814
5815     qCDebug(KIO_MAN_LOG) << "Start scanning man page";
5816
5817     // ### Do more init
5818     // Unlike man2html, we actually call this several times, hence the need to
5819     // properly cleanup all those static vars
5820     s_ifelseval.clear();
5821
5822     s_characterDefinitionMap.clear();
5823     InitCharacterDefinitions();
5824
5825     s_stringDefinitionMap.clear();
5826     InitStringDefinitions();
5827
5828     s_numberDefinitionMap.clear();
5829     InitNumberDefinitions();
5830
5831     s_argumentList.clear();
5832     listItemStack.clear();
5833
5834     in_div = 0;
5835
5836     s_dollarZero = ""; // No macro called yet!
5837     mandoc_name = "";
5838
5839     output_possible = false;
5840     int strLength = qstrlen(man_page);
5841     char *buf = new char[strLength + 2];
5842     qstrcpy(buf + 1, man_page);
5843     buf[0] = '\n';
5844
5845     qCDebug(KIO_MAN_LOG) << "Parse man page";
5846
5847     scan_troff(buf + 1, 0, nullptr);
5848
5849     qCDebug(KIO_MAN_LOG) << "Man page parsed!";
5850
5851     while (itemdepth || dl_set[itemdepth]) {
5852         checkListStack();
5853         out_html("</DL>\n");
5854         if (dl_set[itemdepth])
5855             dl_set[itemdepth] = 0;
5856         else if (itemdepth > 0)
5857             itemdepth--;
5858     }
5859
5860     out_html(set_font("R"));
5861     out_html(change_to_size(0));
5862     if (!fillout) {
5863         fillout = 1;
5864         out_html("</PRE>");
5865     }
5866     out_html(NEWLINE);
5867
5868     if (in_div) {
5869         output_real("</div><div style=\"margin-left: 2cm\">\n");
5870         in_div = 0;
5871     }
5872
5873     if (output_possible) {
5874         // The output is buggy wrt to how divs are handled.  Fixing it would
5875         // require closing divs before other block-level elements are output,
5876         // and I do not feel like going to find them all.
5877         output_real("</div></div></div></div>\n");
5878
5879         output_real("<div id=\"footer\"><div id=\"footer_text\">\n");
5880 #ifdef SIMPLE_MAN2HTML
5881         output_real("Generated by kio_man");
5882 #else
5883         output_real("Generated by kio_man version ");
5884         output_real(QString(KDE_VERSION_STRING).toHtmlEscaped().toLocal8Bit());
5885 #endif
5886         output_real("</div></div>\n\n");
5887
5888         output_real("</BODY>\n</HTML>\n");
5889     }
5890     delete[] buf;
5891
5892     // Release memory
5893     s_characterDefinitionMap.clear();
5894     s_stringDefinitionMap.clear();
5895     s_numberDefinitionMap.clear();
5896     s_argumentList.clear();
5897
5898     // reinit static variables for reuse
5899     delete[] buffer;
5900     buffer = nullptr;
5901
5902     escapesym = '\\';
5903     nobreaksym = '\'';
5904     controlsym = '.';
5905     fieldsym = 0;
5906     padsym = 0;
5907
5908     buffpos = 0;
5909     buffmax = 0;
5910     scaninbuff = false;
5911     itemdepth = 0;
5912     for (int i = 0; i < 20; i++)
5913         dl_set[i] = 0;
5914     still_dd = false;
5915     for (int i = 0; i < 12; i++)
5916         tabstops[i] = (i + 1) * 8;
5917     maxtstop = 12;
5918     curpos = 0;
5919
5920     mandoc_name_count = 0;
5921 }
5922
5923 //---------------------------------------------------------------------
5924
5925 char *manPageToUtf8(const QByteArray &input, const QByteArray &dirName)
5926 {
5927     // as we do not know in which encoding the man source is, try to automatically
5928     // detect it and always return it as UTF-8
5929
5930     QByteArray encoding;
5931
5932     // some pages contain "coding:" information. See "man manconv"
5933     // (but I find pages which do not exactly obey the format described in manconv, e.g.
5934     // the control char is either "." or "'")
5935     // Therefore use a QRegularExpression
5936     const QRegularExpression regex("[\\.']\\\\\"[^$]*coding:\\s*(\\S*)\\s", QRegularExpression::CaseInsensitiveOption);
5937     QRegularExpressionMatch rmatch;
5938     if (QString::fromLatin1(input).indexOf(regex, 0, &rmatch) == 0) {
5939         encoding = rmatch.captured(1).toLatin1();
5940
5941         qCDebug(KIO_MAN_LOG) << "found embedded encoding" << encoding;
5942     } else {
5943         // check according to the dirName the man page is in
5944
5945         // if the dirName contains a ".", the encoding follows, e.g. "de.UTF-8"
5946         int dot = dirName.indexOf('.');
5947         if (dot != -1) {
5948             encoding = dirName.mid(dot + 1);
5949         } else {
5950             /* wanted to use KEncodingProber ... however it fails and gives very unreliable
5951                results ... telling me often UTF-8 encoded pages are EUC-JP or gb18030 ...
5952                In fact all man pages here on openSuse are encoded in UTF-8
5953
5954             KEncodingProber encodingProber;
5955             encodingProber.feed(input);
5956
5957             qCDebug(KIO_MAN_LOG) << "auto-detect encoding; guess=" << encodingProber.encoding()
5958                          << "confidence=" << encodingProber.confidence();
5959
5960             encoding = encodingProber.encoding();
5961             */
5962
5963             // the original bug report #141340
5964             // mentioned the env var MAN_ICONV_INPUT_CHARSET ... let's check if it is set
5965             // This seems not be a std. man-db env var, but I find several traces of it on the web
5966             encoding = qgetenv("MAN_ICONV_INPUT_CHARSET");
5967
5968             if (encoding.isEmpty())
5969                 encoding = "UTF-8";
5970         }
5971     }
5972
5973     QTextCodec *codec = 0;
5974
5975     if (!encoding.isEmpty())
5976         codec = QTextCodec::codecForName(encoding);
5977
5978     if (!codec) // fallback encoding
5979         codec = QTextCodec::codecForName("ISO-8859-1");
5980
5981     qCDebug(KIO_MAN_LOG) << "using the encoding" << codec->name() << "for file in dir" << dirName;
5982
5983     QString out = codec->toUnicode(input);
5984     QByteArray array = out.toUtf8();
5985
5986     // TODO get rid of this double allocation and scan a QByteArray
5987     const int len = array.size();
5988     char *buf = new char[len + 4];
5989     memmove(buf + 1, array.data(), len);
5990     buf[0] = buf[len + 1] = '\n'; // Start and end with an end of line
5991     buf[len + 2] = buf[len + 3] = '\0'; // Two NUL characters at end
5992
5993     return buf;
5994 }
5995
5996 //---------------------------------------------------------------------
5997
5998 #ifdef SIMPLE_MAN2HTML
5999 void output_real(const char *insert)
6000 {
6001     std::cout << insert;
6002 }
6003
6004 char *read_man_page(const char *filename)
6005 {
6006     KCompressionDevice fd(QFile::decodeName(filename));
6007     if (!fd.open(QIODevice::ReadOnly)) {
6008         std::cerr << "read_man_page: can not open " << filename << std::endl;
6009         return nullptr;
6010     }
6011
6012     QDir dir(QFileInfo(QFile::decodeName(filename)).dir());
6013     dir.cdUp();
6014     char *data = manPageToUtf8(fd.readAll(), QFile::encodeName(dir.dirName()));
6015
6016     return data;
6017 }
6018
6019 //--------------------------------------------------------------------------------
6020
6021 #ifndef KIO_MAN_TEST
6022 int main(int argc, char **argv)
6023 {
6024     if (argc < 2) {
6025         std::cerr << "call: " << argv[0] << " <filename>\n";
6026         return 1;
6027     }
6028     if (chdir(argv[1])) {
6029         char *buf = read_man_page(argv[1]);
6030         if (buf) {
6031             scan_man_page(buf);
6032             delete[] buf;
6033         }
6034     } else {
6035         DIR *dir = opendir(".");
6036         struct dirent *ent;
6037         while ((ent = readdir(dir)) != nullptr) {
6038             std::cerr << "converting " << ent->d_name << std::endl;
6039             char *buf = read_man_page(ent->d_name);
6040             if (buf) {
6041                 scan_man_page(buf);
6042                 delete[] buf;
6043             }
6044         }
6045         closedir(dir);
6046     }
6047     return 0;
6048 }
6049 #endif
6050
6051 #endif
6052
6053 // kate: indent-mode cstyle; space-indent on; indent-width 2; replace-tabs on;