File indexing completed on 2024-04-28 04:57:51
0001 /* 0002 This file is part of the KDE libraries 0003 0004 SPDX-FileCopyrightText: 2000 Stephan Kulow <coolo@kde.org> 0005 SPDX-FileCopyrightText: 2005 Nicolas GOUTTE <goutte@kde.org> 0006 SPDX-FileCopyrightText: 2011 Martin Koller <kollix@aon.at> 0007 0008 ... and others (see SVN history) 0009 */ 0010 0011 // Start of verbatim comment 0012 0013 /* 0014 ** This program was written by Richard Verhoeven (NL:5482ZX35) 0015 ** at the Eindhoven University of Technology. Email: rcb5@win.tue.nl 0016 ** 0017 ** Permission is granted to distribute, modify and use this program as long 0018 ** as this comment is not removed or changed. 0019 */ 0020 0021 // End of verbatim comment 0022 0023 /* 0024 * man2html-linux-1.0/1.1 0025 * This version modified for Redhat/Caldera linux - March 1996. 0026 * Michael Hamilton <michael@actrix.gen.nz>. 0027 * 0028 * man2html-linux-1.2 0029 * Added support for BSD mandoc pages - I didn't have any documentation 0030 * on the mandoc macros, so I may have missed some. 0031 * Michael Hamilton <michael@actrix.gen.nz>. 0032 * 0033 * vh-man2html-1.3 0034 * Renamed to avoid confusion (V for Verhoeven, H for Hamilton). 0035 * 0036 * vh-man2html-1.4 0037 * Now uses /etc/man.config 0038 * Added support for compressed pages. 0039 * Added "length-safe" string operations for client input parameters. 0040 * More secure, -M secured, and client input string lengths checked. 0041 * 0042 */ 0043 0044 /* 0045 ** If you want to use this program for your WWW server, adjust the line 0046 ** which defines the CGIBASE or compile it with the -DCGIBASE='"..."' option. 0047 ** 0048 ** You have to adjust the built-in manpath to your local system. Note that 0049 ** every directory should start and end with the '/' and that the first 0050 ** directory should be "/" to allow a full path as an argument. 0051 ** 0052 ** The program first check if PATH_INFO contains some information. 0053 ** If it does (t.i. man2html/some/thing is used), the program will look 0054 ** for a manpage called PATH_INFO in the manpath. 0055 ** 0056 ** Otherwise the manpath is searched for the specified command line argument, 0057 ** where the following options can be used: 0058 ** 0059 ** name name of manpage (csh, printf, xv, troff) 0060 ** section the section (1 2 3 4 5 6 7 8 9 n l 1v ...) 0061 ** -M path an extra directory to look for manpages (replaces "/") 0062 ** 0063 ** If man2html finds multiple manpages that satisfy the options, an index 0064 ** is displayed and the user can make a choice. If only one page is 0065 ** found, that page will be displayed. 0066 ** 0067 ** man2html will add links to the converted manpages. The function add_links 0068 ** is used for that. At the moment it will add links as follows, where 0069 ** indicates what should match to start with: 0070 ** ^^^ 0071 ** Recognition Item Link 0072 ** ---------------------------------------------------------- 0073 ** name(*) Manpage ../man?/name.* 0074 ** ^ 0075 ** name@hostname Email address mailto:name@hostname 0076 ** ^ 0077 ** method://string URL method://string 0078 ** ^^^ 0079 ** www.host.name WWW server http://www.host.name 0080 ** ^^^^ 0081 ** ftp.host.name FTP server ftp://ftp.host.name 0082 ** ^^^^ 0083 ** <file.h> Include file file:/usr/include/file.h 0084 ** ^^^ 0085 ** 0086 ** Since man2html does not check if manpages, hosts or email addresses exist, 0087 ** some links might not work. For manpages, some extra checks are performed 0088 ** to make sure not every () pair creates a link. Also out of date pages 0089 ** might point to incorrect places. 0090 ** 0091 ** The program will not allow users to get system specific files, such as 0092 ** /etc/passwd. It will check that "man" is part of the specified file and 0093 ** that "/../" isn't. Even if someone manages to get such file, man2html will 0094 ** handle it like a manpage and will usually not produce any output (or crash). 0095 ** 0096 ** If you find any bugs when normal manpages are converted, please report 0097 ** them to me (rcb5@win.tue.nl) after you have checked that man(1) can handle 0098 ** the manpage correct. 0099 ** 0100 ** Known bugs and missing features: 0101 ** 0102 ** * Equations are not converted at all. 0103 ** * Tables are converted but some features are not possible in html. 0104 ** * The tabbing environment is converted by counting characters and adding 0105 ** spaces. This might go wrong (outside <PRE>) 0106 ** * Some manpages rely on the fact that troff/nroff is used to convert 0107 ** them and use features which are not described in the man manpages. 0108 ** (definitions, calculations, conditionals, requests). I can't guarantee 0109 ** that all these features work on all manpages. (I didn't have the 0110 ** time to look through all the available manpages.) 0111 */ 0112 0113 #include "man2html.h" 0114 #include "kio_man_debug.h" 0115 #include "request_hash.h" 0116 0117 #include <config-runtime.h> 0118 0119 #include <ctype.h> 0120 0121 #include <string.h> 0122 #include <unistd.h> 0123 0124 #include <stdio.h> 0125 0126 #include <QByteArray> 0127 #include <QDateTime> 0128 #include <QDebug> 0129 #include <QMap> 0130 #include <QRegularExpression> 0131 #include <QStack> 0132 #include <QString> 0133 #include <QTextCodec> 0134 0135 #ifdef SIMPLE_MAN2HTML 0136 #include <KCompressionDevice> 0137 #include <QDir> 0138 #include <QFile> 0139 #include <QFileInfo> 0140 #include <dirent.h> 0141 #include <iostream> 0142 #include <stdlib.h> 0143 #include <sys/stat.h> 0144 #define BYTEARRAY(x) x.constData() 0145 #else 0146 #include <KLocalizedString> 0147 #define BYTEARRAY(x) x 0148 #endif 0149 0150 #define NULL_TERMINATED(n) ((n) + 1) 0151 0152 #define HUGE_STR_MAX 10000 0153 #define LARGE_STR_MAX 2000 0154 #define MED_STR_MAX 500 0155 0156 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" 0157 0158 /* mdoc(7) Bl/El lists to HTML list types */ 0159 #define BL_DESC_LIST 1 0160 #define BL_BULLET_LIST 2 0161 #define BL_ENUM_LIST 4 0162 0163 /* mdoc(7) Bd/Ed example(?) blocks */ 0164 #define BD_LITERAL 1 0165 #define BD_INDENT 2 0166 0167 static int s_nroff = 1; // NROFF mode by default 0168 0169 static QByteArray mandoc_name; // Nm can store the first used name 0170 0171 static int mandoc_name_count = 0; /* Don't break on the first Nm */ 0172 0173 /* below this you should not change anything unless you know a lot 0174 ** about this program or about troff. 0175 */ 0176 0177 /// Structure for character definitions 0178 struct CSTRDEF { 0179 int nr, slen; 0180 const char *st; 0181 }; 0182 0183 const char NEWLINE[2] = "\n"; 0184 0185 /** 0186 * Class for defining strings and macros 0187 */ 0188 class StringDefinition 0189 { 0190 public: 0191 StringDefinition(void) 0192 : m_length(0) 0193 { 0194 } 0195 StringDefinition(int len, const char *cstr) 0196 : m_length(len) 0197 , m_output(cstr) 0198 { 0199 } 0200 0201 public: 0202 int m_length; ///< Length of output text 0203 QByteArray m_output; ///< Defined string 0204 }; 0205 0206 /** 0207 * Class for defining number registers 0208 * \note Not for internal read-only registers 0209 */ 0210 class NumberDefinition 0211 { 0212 public: 0213 NumberDefinition(void) 0214 : m_value(0) 0215 , m_increment(0) 0216 { 0217 } 0218 NumberDefinition(int value) 0219 : m_value(value) 0220 , m_increment(0) 0221 { 0222 } 0223 NumberDefinition(int value, int incr) 0224 : m_value(value) 0225 , m_increment(incr) 0226 { 0227 } 0228 0229 public: 0230 int m_value; ///< value of number register 0231 int m_increment; ///< Increment of number register 0232 // ### TODO: display form (.af) 0233 }; 0234 0235 /** 0236 * Map of character definitions 0237 */ 0238 static QMap<QByteArray, StringDefinition> s_characterDefinitionMap; 0239 0240 /** 0241 * Map of string variable and macro definitions 0242 * \note String variables and macros are the same thing! 0243 */ 0244 static QMap<QByteArray, StringDefinition> s_stringDefinitionMap; 0245 0246 /** 0247 * Map of number registers 0248 * \note Intern number registers (starting with a dot are not handled here) 0249 */ 0250 static QMap<QByteArray, NumberDefinition> s_numberDefinitionMap; 0251 0252 static void fill_old_character_definitions(void); 0253 0254 /** 0255 * Initialize character variables 0256 */ 0257 static void InitCharacterDefinitions(void) 0258 { 0259 fill_old_character_definitions(); 0260 // ### HACK: as we are converting to HTML too early, define characters with HTML references 0261 s_characterDefinitionMap.insert("<-", StringDefinition(1, "←")); // <- 0262 s_characterDefinitionMap.insert("->", StringDefinition(1, "→")); // -> 0263 s_characterDefinitionMap.insert("<>", StringDefinition(1, "↔")); // <> 0264 s_characterDefinitionMap.insert("<=", StringDefinition(1, "≤")); // <= 0265 s_characterDefinitionMap.insert(">=", StringDefinition(1, "≥")); // >= 0266 // End HACK 0267 } 0268 0269 /** 0270 * Initialize string variables 0271 */ 0272 static void InitStringDefinitions(void) 0273 { 0274 // mdoc-only, see mdoc.samples(7) 0275 s_stringDefinitionMap.insert("<=", StringDefinition(1, "≤")); 0276 s_stringDefinitionMap.insert(">=", StringDefinition(1, "≥")); 0277 s_stringDefinitionMap.insert("Rq", StringDefinition(1, "”")); 0278 s_stringDefinitionMap.insert("Lq", StringDefinition(1, "“")); 0279 s_stringDefinitionMap.insert("ua", StringDefinition(1, "&circ")); // Note this is different from \(ua 0280 s_stringDefinitionMap.insert("aa", StringDefinition(1, "´")); 0281 s_stringDefinitionMap.insert("ga", StringDefinition(1, "`")); 0282 s_stringDefinitionMap.insert("q", StringDefinition(1, """)); 0283 s_stringDefinitionMap.insert("Pi", StringDefinition(1, "π")); 0284 s_stringDefinitionMap.insert("Ne", StringDefinition(1, "≠")); 0285 s_stringDefinitionMap.insert("Le", StringDefinition(1, "≤")); 0286 s_stringDefinitionMap.insert("Ge", StringDefinition(1, "≥")); 0287 s_stringDefinitionMap.insert("Lt", StringDefinition(1, "<")); 0288 s_stringDefinitionMap.insert("Gt", StringDefinition(1, ">")); 0289 s_stringDefinitionMap.insert("Pm", StringDefinition(1, "±")); 0290 s_stringDefinitionMap.insert("If", StringDefinition(1, "∞")); 0291 s_stringDefinitionMap.insert("Na", StringDefinition(3, "NaN")); 0292 s_stringDefinitionMap.insert("Ba", StringDefinition(1, "|")); 0293 // end mdoc-only 0294 // man(7) 0295 s_stringDefinitionMap.insert("Tm", StringDefinition(1, "™")); // \*(TM 0296 s_stringDefinitionMap.insert("R", StringDefinition(1, "®")); // \*R 0297 s_stringDefinitionMap.insert("lq", StringDefinition(1, "“")); // Left angled double quote 0298 s_stringDefinitionMap.insert("rq", StringDefinition(1, "”")); // Right angled double quote 0299 // end man(7) 0300 // Missing characters from man(7): 0301 // \*S "Change to default font size" 0302 #ifndef SIMPLE_MAN2HTML 0303 // Special KDE KIO man: 0304 const QByteArray kdeversion(KDE_VERSION_STRING); 0305 s_stringDefinitionMap.insert(".KDE_VERSION_STRING", StringDefinition(kdeversion.length(), kdeversion)); 0306 #endif 0307 } 0308 0309 /** 0310 * Initialize number registers 0311 * \note Internal read-only registers are not handled here 0312 */ 0313 static void InitNumberDefinitions(void) 0314 { 0315 // As the date number registers are more for end-users, better choose local time. 0316 // Groff seems to support Gregorian dates only 0317 QDate today(QDate::currentDate()); 0318 s_numberDefinitionMap.insert("year", today.year()); // Y2K-correct year 0319 s_numberDefinitionMap.insert("yr", today.year() - 1900); // Y2K-incorrect year 0320 s_numberDefinitionMap.insert("mo", today.month()); 0321 s_numberDefinitionMap.insert("dy", today.day()); 0322 s_numberDefinitionMap.insert("dw", today.dayOfWeek()); 0323 } 0324 0325 #define V(A, B) ((A)*256 + (B)) 0326 0327 // used in expand_char, e.g. for "\(bu" 0328 // see groff_char(7) for list 0329 static const CSTRDEF standardchars[] = { 0330 {V('*', '*'), 1, "*"}, 0331 {V('*', 'A'), 1, "Α"}, 0332 {V('*', 'B'), 1, "Β"}, 0333 {V('*', 'C'), 1, "Ξ"}, 0334 {V('*', 'D'), 1, "Δ"}, 0335 {V('*', 'E'), 1, "Ε"}, 0336 {V('*', 'F'), 1, "Φ"}, 0337 {V('*', 'G'), 1, "Γ"}, 0338 {V('*', 'H'), 1, "Θ"}, 0339 {V('*', 'I'), 1, "Ι"}, 0340 {V('*', 'K'), 1, "Κ"}, 0341 {V('*', 'L'), 1, "Λ"}, 0342 {V('*', 'M'), 1, "&Mu:"}, 0343 {V('*', 'N'), 1, "Ν"}, 0344 {V('*', 'O'), 1, "Ο"}, 0345 {V('*', 'P'), 1, "Π"}, 0346 {V('*', 'Q'), 1, "Ψ"}, 0347 {V('*', 'R'), 1, "Ρ"}, 0348 {V('*', 'S'), 1, "Σ"}, 0349 {V('*', 'T'), 1, "Τ"}, 0350 {V('*', 'U'), 1, "Υ"}, 0351 {V('*', 'W'), 1, "Ω"}, 0352 {V('*', 'X'), 1, "Χ"}, 0353 {V('*', 'Y'), 1, "Η"}, 0354 {V('*', 'Z'), 1, "Ζ"}, 0355 {V('*', 'a'), 1, "α"}, 0356 {V('*', 'b'), 1, "β"}, 0357 {V('*', 'c'), 1, "ξ"}, 0358 {V('*', 'd'), 1, "δ"}, 0359 {V('*', 'e'), 1, "ε"}, 0360 {V('*', 'f'), 1, "φ"}, 0361 {V('*', 'g'), 1, "γ"}, 0362 {V('*', 'h'), 1, "θ"}, 0363 {V('*', 'i'), 1, "ι"}, 0364 {V('*', 'k'), 1, "κ"}, 0365 {V('*', 'l'), 1, "λ"}, 0366 {V('*', 'm'), 1, "μ"}, 0367 {V('*', 'n'), 1, "ν"}, 0368 {V('*', 'o'), 1, "ο"}, 0369 {V('*', 'p'), 1, "π"}, 0370 {V('*', 'q'), 1, "ψ"}, 0371 {V('*', 'r'), 1, "ρ"}, 0372 {V('*', 's'), 1, "σ"}, 0373 {V('*', 't'), 1, "τ"}, 0374 {V('*', 'u'), 1, "υ"}, 0375 {V('*', 'w'), 1, "ω"}, 0376 {V('*', 'x'), 1, "χ"}, 0377 {V('*', 'y'), 1, "η"}, 0378 {V('*', 'z'), 1, "ζ"}, 0379 {V('+', '-'), 1, "±"}, // not in groff_char(7) 0380 {V('+', 'f'), 1, "φ"}, // phi1, we use the standard phi 0381 {V('+', 'h'), 1, "θ"}, // theta1, we use the standard theta 0382 {V('+', 'p'), 1, "ω"}, // omega1, we use the standard omega 0383 {V('1', '2'), 1, "½"}, 0384 {V('1', '4'), 1, "¼"}, 0385 {V('3', '4'), 1, "¾"}, 0386 {V('F', 'i'), 1, "ffi"}, // ffi ligature 0387 {V('F', 'l'), 1, "ffl"}, // ffl ligature 0388 {V('a', 'p'), 1, "~"}, 0389 {V('b', 'r'), 1, "|"}, 0390 {V('b', 'u'), 1, "•"}, 0391 {V('b', 'v'), 1, "|"}, 0392 {V('c', 'i'), 1, "○"}, // circle 0393 {V('c', 'o'), 1, "©"}, 0394 {V('c', 't'), 1, "¢"}, 0395 {V('d', 'e'), 1, "°"}, 0396 {V('d', 'g'), 1, "†"}, 0397 {V('d', 'i'), 1, "÷"}, 0398 {V('e', 'm'), 1, "—"}, 0399 {V('e', 'n'), 1, "–"}, 0400 {V('e', 'q'), 1, "="}, 0401 {V('e', 's'), 1, "∅"}, 0402 {V('f', 'f'), 1, "�xFB00;"}, // ff ligature 0403 {V('f', 'i'), 1, "�xFB01;"}, // fi ligature 0404 {V('f', 'l'), 1, "�xFB02;"}, // fl ligature 0405 {V('f', 'm'), 1, "′"}, 0406 {V('g', 'a'), 1, "`"}, 0407 {V('h', 'y'), 1, "-"}, 0408 {V('l', 'c'), 2, "|¯"}, // ### TODO: not in groff_char(7) 0409 {V('l', 'f'), 2, "|_"}, // ### TODO: not in groff_char(7) 0410 {V('l', 'k'), 1, "<FONT SIZE=+2>{</FONT>"}, // ### TODO: not in groff_char(7) 0411 {V('m', 'i'), 1, "-"}, // ### TODO: not in groff_char(7) 0412 {V('m', 'u'), 1, "×"}, 0413 {V('n', 'o'), 1, "¬"}, 0414 {V('o', 'r'), 1, "|"}, 0415 {V('p', 'l'), 1, "+"}, 0416 {V('r', 'c'), 2, "¯|"}, // ### TODO: not in groff_char(7) 0417 {V('r', 'f'), 2, "_|"}, // ### TODO: not in groff_char(7) 0418 {V('r', 'g'), 1, "®"}, 0419 {V('r', 'k'), 1, "<FONT SIZE=+2>}</FONT>"}, // ### TODO: not in groff_char(7) 0420 {V('r', 'n'), 1, "‾"}, 0421 {V('r', 'u'), 1, "_"}, 0422 {V('s', 'c'), 1, "§"}, 0423 {V('s', 'l'), 1, "/"}, 0424 {V('s', 'q'), 2, "□"}, // WHITE SQUARE 0425 {V('t', 's'), 1, "ς"}, // FINAL SIGMA 0426 {V('u', 'l'), 1, "_"}, 0427 {V('-', 'D'), 1, "Ð"}, 0428 {V('S', 'd'), 1, "ð"}, 0429 {V('T', 'P'), 1, "Þ"}, 0430 {V('T', 'p'), 1, "þ"}, 0431 {V('A', 'E'), 1, "Æ"}, 0432 {V('a', 'e'), 1, "æ"}, 0433 {V('O', 'E'), 1, "Œ"}, 0434 {V('o', 'e'), 1, "œ"}, 0435 {V('s', 's'), 1, "ß"}, 0436 {V('\'', 'A'), 1, "Á"}, 0437 {V('\'', 'E'), 1, "É"}, 0438 {V('\'', 'I'), 1, "Í"}, 0439 {V('\'', 'O'), 1, "Ó"}, 0440 {V('\'', 'U'), 1, "Ú"}, 0441 {V('\'', 'Y'), 1, "Ý"}, 0442 {V('\'', 'a'), 1, "á"}, 0443 {V('\'', 'e'), 1, "é"}, 0444 {V('\'', 'i'), 1, "í"}, 0445 {V('\'', 'o'), 1, "ó"}, 0446 {V('\'', 'u'), 1, "ú"}, 0447 {V('\'', 'y'), 1, "ý"}, 0448 {V(':', 'A'), 1, "Ä"}, 0449 {V(':', 'E'), 1, "Ë"}, 0450 {V(':', 'I'), 1, "Ï"}, 0451 {V(':', 'O'), 1, "Ö"}, 0452 {V(':', 'U'), 1, "Ü"}, 0453 {V(':', 'a'), 1, "ä"}, 0454 {V(':', 'e'), 1, "ë"}, 0455 {V(':', 'i'), 1, "ï"}, 0456 {V(':', 'o'), 1, "ö"}, 0457 {V(':', 'u'), 1, "ü"}, 0458 {V(':', 'y'), 1, "ÿ"}, 0459 {V('^', 'A'), 1, "Â"}, 0460 {V('^', 'E'), 1, "Ê"}, 0461 {V('^', 'I'), 1, "Î"}, 0462 {V('^', 'O'), 1, "Ô"}, 0463 {V('^', 'U'), 1, "Û"}, 0464 {V('^', 'a'), 1, "â"}, 0465 {V('^', 'e'), 1, "ê"}, 0466 {V('^', 'i'), 1, "î"}, 0467 {V('^', 'o'), 1, "ô"}, 0468 {V('^', 'u'), 1, "û"}, 0469 {V('`', 'A'), 1, "À"}, 0470 {V('`', 'E'), 1, "È"}, 0471 {V('`', 'I'), 1, "Ì"}, 0472 {V('`', 'O'), 1, "Ò"}, 0473 {V('`', 'U'), 1, "Ù"}, 0474 {V('`', 'a'), 1, "à"}, 0475 {V('`', 'e'), 1, "è"}, 0476 {V('`', 'i'), 1, "ì"}, 0477 {V('`', 'o'), 1, "ò"}, 0478 {V('`', 'u'), 1, "ù"}, 0479 {V('~', 'A'), 1, "Ã"}, 0480 {V('~', 'N'), 1, "Ñ"}, 0481 {V('~', 'O'), 1, "Õ"}, 0482 {V('~', 'a'), 1, "ã"}, 0483 {V('~', 'n'), 1, "ñ"}, 0484 {V('~', 'o'), 1, "õ"}, 0485 {V(',', 'C'), 1, "Ç"}, 0486 {V(',', 'c'), 1, "ç"}, 0487 {V('/', 'L'), 1, "Ł"}, 0488 {V('/', 'l'), 1, "ł"}, 0489 {V('/', 'O'), 1, "Ø"}, 0490 {V('/', 'o'), 1, "ø"}, 0491 {V('o', 'A'), 1, "Å"}, 0492 {V('o', 'a'), 1, "å"}, 0493 {V('a', '"'), 1, "\""}, 0494 {V('a', '-'), 1, "¯"}, 0495 {V('a', '.'), 1, "."}, 0496 {V('a', '^'), 1, "ˆ"}, 0497 {V('a', 'a'), 1, "´"}, 0498 {V('a', 'b'), 1, "`"}, 0499 {V('a', 'c'), 1, "¸"}, 0500 {V('a', 'd'), 1, "¨"}, 0501 {V('a', 'h'), 1, "˂"}, // caron 0502 {V('a', 'o'), 1, "˚"}, // ring 0503 {V('a', '~'), 1, "˜"}, 0504 {V('h', 'o'), 1, "˛"}, // ogonek 0505 {V('.', 'i'), 1, "ı"}, // dot less i 0506 {V('C', 's'), 1, "¤"}, // krazy:exclude=spelling 0507 {V('D', 'o'), 1, "$"}, 0508 {V('P', 'o'), 1, "£"}, 0509 {V('Y', 'e'), 1, "¥"}, 0510 {V('F', 'n'), 1, "ƒ"}, 0511 {V('F', 'o'), 1, "«"}, 0512 {V('F', 'c'), 1, "»"}, 0513 {V('f', 'o'), 1, "‹"}, // single left guillemet 0514 {V('f', 'c'), 1, "›"}, // single right guillemet 0515 {V('r', '!'), 1, "&iecl;"}, 0516 {V('r', '?'), 1, "¿"}, 0517 {V('O', 'f'), 1, "ª"}, 0518 {V('O', 'm'), 1, "º"}, 0519 {V('p', 'c'), 1, "·"}, 0520 {V('S', '1'), 1, "¹"}, 0521 {V('S', '2'), 1, "²"}, 0522 {V('S', '3'), 1, "³"}, 0523 {V('<', '-'), 1, "←"}, 0524 {V('-', '>'), 1, "→"}, 0525 {V('<', '>'), 1, "↔"}, 0526 {V('d', 'a'), 1, "↓"}, 0527 {V('u', 'a'), 1, "↑"}, 0528 {V('l', 'A'), 1, "⇐"}, 0529 {V('r', 'A'), 1, "⇒"}, 0530 {V('h', 'A'), 1, "⇔"}, 0531 {V('d', 'A'), 1, "⇓"}, 0532 {V('u', 'A'), 1, "⇑"}, 0533 {V('b', 'a'), 1, "|"}, 0534 {V('b', 'b'), 1, "¦"}, 0535 {V('t', 'm'), 1, "™"}, 0536 {V('d', 'd'), 1, "‡"}, 0537 {V('p', 's'), 1, "¶"}, 0538 {V('%', '0'), 1, "‰"}, 0539 {V('f', '/'), 1, "⁄"}, // Fraction slash 0540 {V('s', 'd'), 1, "″"}, 0541 {V('h', 'a'), 1, "^"}, 0542 {V('t', 'i'), 1, "˜"}, 0543 {V('l', 'B'), 1, "["}, 0544 {V('r', 'B'), 1, "]"}, 0545 {V('l', 'C'), 1, "{"}, 0546 {V('r', 'C'), 1, "}"}, 0547 {V('l', 'a'), 1, "<"}, 0548 {V('r', 'a'), 1, ">"}, 0549 {V('l', 'h'), 1, "≤"}, 0550 {V('r', 'h'), 1, "≥"}, 0551 {V('B', 'q'), 1, "„"}, 0552 {V('b', 'q'), 1, "‚"}, 0553 {V('l', 'q'), 1, "“"}, 0554 {V('r', 'q'), 1, "”"}, 0555 {V('o', 'q'), 1, "‘"}, 0556 {V('c', 'q'), 1, "’"}, 0557 {V('a', 'q'), 1, "'"}, 0558 {V('d', 'q'), 1, "\""}, 0559 {V('a', 't'), 1, "@"}, 0560 {V('s', 'h'), 1, "#"}, 0561 {V('r', 's'), 1, "\\"}, 0562 {V('t', 'f'), 1, "∴"}, 0563 {V('~', '~'), 1, "≅"}, 0564 {V('~', '='), 1, "≈"}, 0565 {V('!', '='), 1, "≠"}, 0566 {V('<', '='), 1, "≤"}, 0567 {V('=', '='), 1, "≡"}, 0568 {V('=', '~'), 1, "≅"}, // ### TODO: verify 0569 {V('>', '='), 1, "≥"}, 0570 {V('A', 'N'), 1, "∧"}, 0571 {V('O', 'R'), 1, "∨"}, 0572 {V('t', 'e'), 1, "∃"}, 0573 {V('f', 'a'), 1, "∀"}, 0574 {V('A', 'h'), 1, "ℵ"}, 0575 {V('I', 'm'), 1, "ℑ"}, 0576 {V('R', 'e'), 1, "ℜ"}, 0577 {V('i', 'f'), 1, "∞"}, 0578 {V('m', 'd'), 1, "⋅"}, 0579 {V('m', 'o'), 1, "∆"}, // element ### TODO verify 0580 {V('n', 'm'), 1, "∉"}, 0581 {V('p', 't'), 1, "∝"}, 0582 {V('p', 'p'), 1, "⊥"}, 0583 {V('s', 'b'), 1, "⊂"}, 0584 {V('s', 'p'), 1, "⊃"}, 0585 {V('i', 'b'), 1, "⊆"}, 0586 {V('i', 'p'), 1, "⊇"}, 0587 {V('i', 's'), 1, "∫"}, 0588 {V('s', 'r'), 1, "√"}, 0589 {V('p', 'd'), 1, "∂"}, 0590 {V('c', '*'), 1, "⊗"}, 0591 {V('c', '+'), 1, "⊕"}, 0592 {V('c', 'a'), 1, "∩"}, 0593 {V('c', 'u'), 1, "∪"}, 0594 {V('g', 'r'), 1, "V"}, // gradient ### TODO Where in Unicode? 0595 {V('C', 'R'), 1, "↵"}, 0596 {V('s', 't'), 2, "-)"}, // "such that" ### TODO Where in Unicode? 0597 {V('/', '_'), 1, "∠"}, 0598 {V('w', 'p'), 1, "℘"}, 0599 {V('l', 'z'), 1, "◊"}, 0600 {V('a', 'n'), 1, "-"}, // "horizontal arrow extension" ### TODO Where in Unicode? 0601 }; 0602 0603 // long form for abbreviated standard names (.St macro) 0604 struct StandardName { 0605 const char *abbrev; 0606 const char *formalName; 0607 }; 0608 0609 static const StandardName STANDARD_NAMES[] = {{"-ansiC", "ANSI X3.159-1989 ('ANSI C89')"}, 0610 {"-ansiC-89", "ANSI X3.159-1989 ('ANSI C89')"}, 0611 {"-isoC", "ISO/IEC 9899:1990 ('ISO C90')"}, 0612 {"-isoC-90", "ISO/IEC 9899:1990 ('ISO C90')"}, 0613 {"-isoC-99", "ISO/IEC 9899:1999 ('ISO C99')"}, 0614 {"-isoC-2011", "ISO/IEC 9899:2011 ('ISO C11')"}, 0615 {"-iso9945-1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')"}, 0616 {"-iso9945-1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')"}, 0617 {"-p1003.1", "IEEE Std 1003.1 ('POSIX.1')"}, 0618 {"-p1003.1-88", "IEEE Std 1003.1-1988 ('POSIX.1')"}, 0619 {"-p1003.1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')"}, 0620 {"-p1003.1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')"}, 0621 {"-p1003.1b-93", "IEEE Std 1003.1b-1993 ('POSIX.1')"}, 0622 {"-p1003.1c-95", "IEEE Std 1003.1c-1995 ('POSIX.1')"}, 0623 {"-p1003.1g-2000", "IEEE Std 1003.1g-2000 ('POSIX.1')"}, 0624 {"-p1003.1i-95", "IEEE Std 1003.1i-1995 ('POSIX.1')"}, 0625 {"-p1003.1-2001", "IEEE Std 1003.1-2001 ('POSIX.1')"}, 0626 {"-p1003.1-2004", "IEEE Std 1003.1-2004 ('POSIX.1')"}, 0627 {"-p1003.1-2008", "IEEE Std 1003.1-2008 ('POSIX.1')"}, 0628 {"-iso9945-2-93", "ISO/IEC 9945-2:1993 ('POSIX.2')"}, 0629 {"-p1003.2", "IEEE Std 1003.2 ('POSIX.2')"}, 0630 {"-p1003.2-92", "IEEE Std 1003.2-1992 ('POSIX.2')"}, 0631 {"-p1003.2a-92", "IEEE Std 1003.2a-1992 ('POSIX.2')"}, 0632 {"-susv2", "Version 2 of the Single UNIX Specification ('SUSv2')"}, 0633 {"-susv3", "Version 3 of the Single UNIX Specification ('SUSv3')"}, 0634 {"-svid4", "System V Interface Definition, Fourth Edition ('SVID4')"}, 0635 {"-xbd5", "X/Open Base Definitions Issue 5 ('XBD5')"}, 0636 {"-xcu5", "X/Open Commands and Utilities Issue 5 ('XCU5')"}, 0637 {"-xcurses4.2", "X/Open Curses Issue 4, Version 2 ('XCURSES4.2')"}, 0638 {"-xns5", "X/Open Networking Services Issue 5 ('XNS5')"}, 0639 {"-xns5.2", "X/Open Networking Services Issue 5.2 ('XNS5.2')"}, 0640 {"-xpg3", "X/Open Portability Guide Issue 3 ('XPG3')"}, 0641 {"-xpg4", "X/Open Portability Guide Issue 4 ('XPG4')"}, 0642 {"-xpg4.2", "X/Open Portability Guide Issue 4, Version 2 ('XPG4.2')"}, 0643 {"-xsh5", "X/Open System Interfaces and Headers Issue 5 ('XSH5')"}, 0644 {"-ieee754", "IEEE Std 754-1985"}, 0645 {"-iso8802-3", "ISO/IEC 8802-3:1989"}}; 0646 0647 /* default: print code */ 0648 0649 /* static char eqndelimopen=0, eqndelimclose=0; */ 0650 static char escapesym = '\\', nobreaksym = '\'', controlsym = '.', fieldsym = 0, padsym = 0; 0651 0652 static char *buffer = nullptr; 0653 static int buffpos = 0, buffmax = 0; 0654 static bool scaninbuff = false; 0655 static int itemdepth = 0; 0656 static int in_div = 0; 0657 static int dl_set[20] = {0}; 0658 static QStack<QByteArray> listItemStack; 0659 static bool still_dd = 0; 0660 static int tabstops[20] = {8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96}; 0661 static int maxtstop = 12; 0662 static int curpos = 0; 0663 static bool break_the_while_loop = false; 0664 0665 static char *scan_troff(char *c, bool san, char **result); 0666 static char *scan_troff_mandoc(char *c, bool san, char **result); 0667 static int getNumberRegisterValue(const QByteArray &name, int sign = 0); 0668 0669 static QList<QByteArray> s_argumentList; 0670 0671 static QByteArray cssFile; 0672 0673 static QByteArray s_dollarZero; // Value of $0 0674 0675 void setCssFile(const QByteArray &_cssFile) 0676 { 0677 cssFile = _cssFile; 0678 } 0679 0680 static void fill_old_character_definitions(void) 0681 { 0682 for (const CSTRDEF &standardchar : standardchars) { 0683 const int nr = standardchar.nr; 0684 const char temp[3] = {char(nr / 256), char(nr % 256), 0}; 0685 QByteArray name(temp); 0686 s_characterDefinitionMap.insert(name, StringDefinition(standardchar.slen, standardchar.st)); 0687 } 0688 } 0689 0690 static char outbuffer[NULL_TERMINATED(HUGE_STR_MAX)]; 0691 static int no_newline_output = 0; 0692 static int newline_for_fun = 0; 0693 static bool output_possible = false; 0694 0695 static const char *const includedirs[] = {"/usr/include", 0696 "/usr/include/sys", 0697 "/usr/local/include", 0698 "/opt/local/include", 0699 "/usr/ccs", 0700 "/usr/X11R6/include", 0701 "/usr/openwin/include", 0702 "/usr/include/g++", 0703 nullptr}; 0704 0705 static bool ignore_links = false; 0706 0707 static void add_links(char *c) 0708 { 0709 /* 0710 ** Add the links to the output. 0711 ** At the moment the following are recognized: 0712 ** 0713 ** name(*) -> ../man?/name.* 0714 ** method://string -> method://string 0715 ** www.host.name -> http://www.host.name 0716 ** ftp.host.name -> ftp://ftp.host.name 0717 ** name@host -> mailto:name@host 0718 ** <name.h> -> file:/usr/include/name.h (guess) 0719 ** 0720 ** Other possible links to add in the future: 0721 ** 0722 ** /dir/dir/file -> file:/dir/dir/file 0723 */ 0724 0725 if (ignore_links) { 0726 output_real(c); 0727 return; 0728 } 0729 0730 int i, j, nr; 0731 char *f, *g, *h; 0732 const int numtests = 6; // Nmber of tests 0733 char *idtest[numtests]; // url, mailto, www, ftp, manpage, C header file 0734 bool ok; 0735 /* search for (section) */ 0736 nr = 0; 0737 idtest[0] = strstr(c + 1, "://"); 0738 idtest[1] = strchr(c + 1, '@'); 0739 idtest[2] = strstr(c, "www."); 0740 idtest[3] = strstr(c, "ftp."); 0741 idtest[4] = strchr(c + 1, '('); 0742 idtest[5] = strstr(c + 1, ".h>"); 0743 for (i = 0; i < numtests; ++i) 0744 nr += (idtest[i] != nullptr); 0745 while (nr) { 0746 j = -1; 0747 for (i = 0; i < numtests; i++) 0748 if (idtest[i] && (j < 0 || idtest[i] < idtest[j])) 0749 j = i; 0750 switch (j) { 0751 case 5: /* <name.h> */ 0752 { 0753 f = idtest[5]; 0754 h = f + 2; 0755 g = f; 0756 while (g > c && g[-1] != ';') 0757 g--; 0758 bool wrote_include = false; 0759 0760 if (g != c) { 0761 QByteArray dir; 0762 QByteArray file(g, h - g); 0763 file = file.trimmed(); 0764 for (int index = 0; includedirs[index]; index++) { 0765 QByteArray str(includedirs[index]); 0766 str.append('/'); 0767 str.append(file); 0768 if (!access(str.data(), R_OK)) { 0769 dir = includedirs[index]; 0770 break; 0771 } 0772 } 0773 if (!dir.isEmpty()) { 0774 char t; 0775 t = *g; 0776 *g = 0; 0777 output_real(c); 0778 *g = t; 0779 *h = 0; 0780 0781 QByteArray str; 0782 str.append("<A HREF=\"file:"); 0783 str.append(dir.data()); 0784 str.append("/"); 0785 str.append(file.data()); 0786 str.append("\">"); 0787 str.append(file.data()); 0788 str.append("</A>>"); 0789 0790 output_real(str.data()); 0791 c = f + 6; 0792 wrote_include = true; 0793 } 0794 } 0795 0796 if (!wrote_include) { 0797 f[5] = 0; 0798 output_real(c); 0799 f[5] = ';'; 0800 c = f + 5; 0801 } 0802 } break; 0803 case 4: /* manpage */ 0804 f = idtest[j]; 0805 /* check section */ 0806 g = strchr(f, ')'); 0807 // The character before f must be alphanumeric, the end of a HTML tag or the end of a 0808 if (g != nullptr && f > c && (g - f) < 12 && (isalnum(f[-1]) || f[-1] == '>' || (f[-1] == ';')) && (isdigit(f[1]) || (f[1] == 'n')) && f[1] != '0' 0809 && ((g - f) <= 2 || isalpha(f[2]))) { 0810 ok = true; 0811 h = f + 2; 0812 while (h < g) { 0813 if (!isalnum(*h++)) { 0814 ok = false; 0815 break; 0816 } 0817 } 0818 } else 0819 ok = false; 0820 0821 h = f - 1; 0822 if (ok) { 0823 // Skip 0824 qCDebug(KIO_MAN_LOG) << "BEFORE SECTION:" << *h; 0825 if ((h > c + 5) && (!memcmp(h - 5, " ", 6))) { 0826 h -= 6; 0827 qCDebug(KIO_MAN_LOG) << "Skip "; 0828 } else if ((h > (c + 6)) && (!memcmp(h - 6, " ", 7))) //   narrow space 0829 { 0830 h -= 7; 0831 } else if (*h == ';') { 0832 // Not a non-breaking space, so probably not ok 0833 ok = false; 0834 } 0835 } 0836 0837 if (ok) { 0838 /* this might be a link */ 0839 /* skip html makeup */ 0840 while (h > c && *h == '>') { 0841 while (h != c && *h != '<') 0842 h--; 0843 if (h != c) 0844 h--; 0845 } 0846 if (isalnum(*h)) { 0847 char t, sec, *e; 0848 QByteArray fstr(f); 0849 e = h + 1; 0850 sec = f[1]; 0851 const int index = fstr.indexOf(')', 2); 0852 QByteArray subsec; 0853 if (index != -1) 0854 subsec = fstr.mid(2, index - 2); 0855 else // No closing ')' found, take first character as subsection. 0856 subsec = fstr.mid(2, 1); 0857 while (h > c && (isalnum(h[-1]) || h[-1] == '_' || h[-1] == ':' || h[-1] == '-' || h[-1] == '.')) 0858 h--; 0859 t = *h; 0860 *h = '\0'; 0861 output_real(c); 0862 *h = t; 0863 t = *e; 0864 *e = '\0'; 0865 QByteArray str("<a href=\"man:/"); 0866 str += h; 0867 str += '('; 0868 str += char(sec); 0869 if (!subsec.isEmpty()) 0870 str += subsec.toLower(); 0871 str += ")\">"; 0872 str += h; 0873 str += "</a>"; 0874 output_real(str.data()); 0875 *e = t; 0876 c = e; 0877 } 0878 } 0879 *f = '\0'; 0880 output_real(c); 0881 *f = '('; 0882 idtest[4] = f - 1; 0883 c = f; 0884 break; /* manpage */ 0885 case 3: /* ftp */ 0886 case 2: /* www */ 0887 g = f = idtest[j]; 0888 while (*g && (isalnum(*g) || *g == '_' || *g == '-' || *g == '+' || *g == '.' || *g == '/')) 0889 g++; 0890 if (g[-1] == '.') 0891 g--; 0892 if (g - f > 4) { 0893 char t; 0894 t = *f; 0895 *f = '\0'; 0896 output_real(c); 0897 *f = t; 0898 t = *g; 0899 *g = '\0'; 0900 QByteArray str; 0901 str.append("<A HREF=\""); 0902 str.append(j == 3 ? "ftp" : "http"); 0903 str.append("://"); 0904 str.append(f); 0905 str.append("\">"); 0906 str.append(f); 0907 str.append("</A>"); 0908 output_real(str.data()); 0909 *g = t; 0910 c = g; 0911 } else { 0912 f[3] = '\0'; 0913 output_real(c); 0914 c = f + 3; 0915 f[3] = '.'; 0916 } 0917 break; 0918 case 1: /* mailto */ 0919 g = f = idtest[1]; 0920 while (g > c && (isalnum(g[-1]) || g[-1] == '_' || g[-1] == '-' || g[-1] == '+' || g[-1] == '.' || g[-1] == '%')) 0921 g--; 0922 if (g - 7 >= c && g[-1] == ':') { 0923 // We have perhaps an email address starting with mailto: 0924 if (!qstrncmp("mailto:", g - 7, 7)) 0925 g -= 7; 0926 } 0927 h = f + 1; 0928 while (*h && (isalnum(*h) || *h == '_' || *h == '-' || *h == '+' || *h == '.')) 0929 h++; 0930 if (*h == '.') 0931 h--; 0932 if (h - f > 4 && f - g > 1) { 0933 char t; 0934 t = *g; 0935 *g = '\0'; 0936 output_real(c); 0937 *g = t; 0938 t = *h; 0939 *h = '\0'; 0940 QByteArray str; 0941 str.append("<A HREF=\"mailto:"); 0942 str.append(g); 0943 str.append("\">"); 0944 str.append(g); 0945 str.append("</A>"); 0946 output_real(str.data()); 0947 *h = t; 0948 c = h; 0949 } else { 0950 *f = '\0'; 0951 output_real(c); 0952 *f = '@'; 0953 idtest[1] = c; 0954 c = f; 0955 } 0956 break; 0957 case 0: /* url */ 0958 g = f = idtest[0]; // ://foo... 0959 0960 // backup before :// to get protocol 0961 while (g > c && isalpha(g[-1]) && islower(g[-1])) 0962 g--; 0963 h = f + 3; // start past :// 0964 // determine length of path and part of query it looks like... 0965 while (*h && !isspace(*h) && *h != '<' && *h != '>' && *h != '"' && *h != '&') 0966 h++; 0967 // if protocol length 3-6 characters and path has any length at all... 0968 // more tests added because this code breaks stylesheet links that use 0969 // the correct file:/// stuff. 0970 if (f - g > 2 && f - g < 7 && h - f > 3 && (strstr(c, "http://") != nullptr || strstr(c, "ftp://") != nullptr)) { 0971 char t; 0972 t = *g; 0973 *g = '\0'; 0974 output_real(c); 0975 *g = t; 0976 t = *h; 0977 *h = '\0'; 0978 QByteArray str; 0979 str.append("<A HREF=\""); 0980 str.append(g); 0981 str.append("\">"); 0982 str.append(g); 0983 str.append("</A>"); 0984 output_real(str.data()); 0985 *h = t; 0986 c = h; 0987 } else { 0988 f[1] = '\0'; 0989 output_real(c); 0990 f[1] = '/'; 0991 c = f + 1; 0992 } 0993 break; 0994 default: 0995 break; 0996 } 0997 nr = 0; 0998 if (idtest[0] && idtest[0] <= c) 0999 idtest[0] = strstr(c + 1, "://"); 1000 if (idtest[1] && idtest[1] <= c) 1001 idtest[1] = strchr(c + 1, '@'); 1002 if (idtest[2] && idtest[2] < c) 1003 idtest[2] = strstr(c, "www."); 1004 if (idtest[3] && idtest[3] < c) 1005 idtest[3] = strstr(c, "ftp."); 1006 if (idtest[4] && idtest[4] <= c) 1007 idtest[4] = strchr(c + 1, '('); 1008 if (idtest[5] && idtest[5] <= c) 1009 idtest[5] = strstr(c + 1, ".h>"); 1010 for (i = 0; i < numtests; i++) 1011 nr += (idtest[i] != nullptr); 1012 } 1013 output_real(c); 1014 } 1015 1016 //--------------------------------------------------------------------- 1017 1018 static QByteArray current_font; 1019 static int current_size = 0; 1020 1021 /* 1022 "fillout" is the mode of text output: 1023 1 = fill mode (line breaks happen when the browser wants them. Normal HTML text) 1024 0 = no-fill mode (preformatted text (<pre>..</pre>). 1025 Input lines are output as-is, retaining line breaks and ignoring the current line length. 1026 */ 1027 static int fillout = 1; 1028 1029 //--------------------------------------------------------------------- 1030 1031 static void out_html(const char *c) 1032 { 1033 if (!c || !*c) 1034 return; 1035 1036 // Added, probably due to the const? 1037 char *c2 = qstrdup(c); 1038 char *c3 = c2; 1039 1040 static int obp = 0; 1041 1042 if (no_newline_output) { 1043 int i = 0; 1044 no_newline_output = 1; 1045 while (c2[i]) { 1046 if (!no_newline_output) 1047 c2[i - 1] = c2[i]; 1048 if (c2[i] == '\n') 1049 no_newline_output = 0; 1050 i++; 1051 } 1052 if (!no_newline_output) 1053 c2[i - 1] = 0; 1054 } 1055 if (scaninbuff) { 1056 while (*c2) { 1057 if (buffpos >= buffmax) { 1058 char *h = new char[buffmax * 2]; 1059 1060 memcpy(h, buffer, buffmax); 1061 delete[] buffer; 1062 buffer = h; 1063 buffmax = buffmax * 2; 1064 } 1065 buffer[buffpos++] = *c2++; 1066 } 1067 } else if (output_possible) { 1068 while (*c2) { 1069 outbuffer[obp++] = *c2; 1070 if (*c2 == '\n' || obp >= HUGE_STR_MAX) { 1071 outbuffer[obp] = '\0'; 1072 add_links(outbuffer); 1073 obp = 0; 1074 } 1075 c2++; 1076 } 1077 } 1078 delete[] c3; 1079 } 1080 1081 //--------------------------------------------------------------------- 1082 1083 void checkListStack() // see if we need to end a previously begun list item 1084 { 1085 if (!listItemStack.isEmpty() && (listItemStack.size() == itemdepth)) { 1086 out_html("</"); 1087 out_html(listItemStack.pop()); 1088 out_html(">"); 1089 } 1090 } 1091 1092 //--------------------------------------------------------------------- 1093 1094 static QByteArray set_font(const QByteArray &name) 1095 { 1096 // Every font but R (Regular) creates <span> elements 1097 QByteArray markup; 1098 if ((current_font != "R") && (current_font != "P") && !current_font.isEmpty()) 1099 markup += "</span>"; 1100 const uint len = name.length(); 1101 bool fontok = true; 1102 if (len == 1) { 1103 const char lead = name[0]; 1104 switch (lead) { 1105 case 'P': // ### TODO: this seems to mean "precedent font" 1106 case 'R': 1107 break; // regular, do nothing 1108 case 'I': 1109 markup += "<span style=\"font-style:italic\">"; 1110 break; 1111 case 'B': 1112 markup += "<span style=\"font-weight:bold\">"; 1113 break; 1114 case 'L': 1115 markup += "<span style=\"font-family:monospace\">"; 1116 break; // ### What's L? 1117 default: 1118 fontok = false; 1119 } 1120 } else if (len == 2) { 1121 if (name == "BI") 1122 markup += "<span style=\"font-style:italic;font-weight:bold\">"; 1123 // Courier 1124 else if (name == "CR") 1125 markup += "<span style=\"font-family:monospace\">"; 1126 else if (name == "CW") // CW is used by pod2man(1) (part of perldoc(1)) 1127 markup += "<span style=\"font-family:monospace\">"; 1128 else if (name == "CI") 1129 markup += "<span style=\"font-family:monospace;font-style:italic\">"; 1130 else if (name == "CB") 1131 markup += "<span style=\"font-family:monospace;font-weight:bold\">"; 1132 // Times 1133 else if (name == "TR") 1134 markup += "<span style=\"font-family:serif\">"; 1135 else if (name == "TI") 1136 markup += "<span style=\"font-family:serif;font-style:italic\">"; 1137 else if (name == "TB") 1138 markup += "<span style=\"font-family:serif;font-weight:bold\">"; 1139 // Helvetica 1140 else if (name == "HR") 1141 markup += "<span style=\"font-family:sans-serif\">"; 1142 else if (name == "HI") 1143 markup += "<span style=\"font-family:sans-serif;font-style:italic\">"; 1144 else if (name == "HB") 1145 markup += "<span style=\"font-family:sans-serif;font-weight:bold\">"; 1146 else 1147 fontok = false; 1148 } else if (len == 3) { 1149 if (name == "CBI") 1150 markup += "<span style=\"font-family:monospace;font-style:italic;font-weight:bold\">"; 1151 else if (name == "TBI") 1152 markup += "<span style=\"font-family:serif;font-style:italic;font-weight:bold\">"; 1153 else if (name == "HBI") 1154 markup += "<span style=\"font-family:sans-serif;font-style:italic;font-weight:bold\">"; 1155 else 1156 fontok = false; 1157 } else 1158 fontok = false; 1159 1160 if (fontok) 1161 current_font = name; 1162 else 1163 current_font = "R"; // Still nothing, then it is 'R' (Regular) // krazy:exclude=doublequote_chars 1164 return markup; 1165 } 1166 1167 //--------------------------------------------------------------------- 1168 1169 static QByteArray change_to_size(int nr) 1170 { 1171 switch (nr) { 1172 case '0': 1173 case '1': 1174 case '2': 1175 case '3': 1176 case '4': 1177 case '5': 1178 case '6': 1179 case '7': 1180 case '8': 1181 case '9': 1182 nr = nr - '0'; 1183 break; 1184 case '\0': 1185 break; 1186 default: 1187 nr = current_size + nr; 1188 if (nr > 9) 1189 nr = 9; 1190 if (nr < -9) 1191 nr = -9; 1192 break; 1193 } 1194 if (nr == current_size) 1195 return ""; 1196 const QByteArray font(current_font); 1197 QByteArray markup; 1198 markup = set_font("R"); 1199 if (current_size) 1200 markup += "</span>"; 1201 current_size = nr; 1202 if (nr) { 1203 int percent = 100 + nr * 1; 1204 markup += "<span style=\"font-size:"; 1205 markup += QByteArray::number(percent); 1206 markup += "%\">"; 1207 } 1208 markup += set_font(font); 1209 return markup; 1210 } 1211 1212 //--------------------------------------------------------------------- 1213 1214 /* static int asint=0; */ 1215 static int intresult = 0; 1216 1217 static bool skip_escape = false; 1218 static bool single_escape = false; 1219 1220 static char *scan_escape_direct(char *c, QByteArray &cstr); 1221 1222 /** 1223 * scan a named character 1224 * param c position 1225 */ 1226 static QByteArray scan_named_character(char *&c) 1227 { 1228 QByteArray name; 1229 if (*c == '(') { 1230 // \*(ab Name of two characters 1231 if (c[1] == escapesym) { 1232 QByteArray cstr; 1233 c = scan_escape_direct(c + 2, cstr); 1234 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the 1235 // variable are to be used. 1236 name = cstr; 1237 } else { 1238 name += c[1]; 1239 name += c[2]; 1240 c += 3; 1241 } 1242 } else if (*c == '[') { 1243 // \*[long_name] Long name 1244 // Named character groff(7) 1245 // We must find the ] to get a name 1246 c++; 1247 while (*c && *c != ']' && *c != '\n') { 1248 if (*c == escapesym) { 1249 QByteArray cstr; 1250 c = scan_escape_direct(c + 1, cstr); 1251 const int result = cstr.indexOf(']'); 1252 if (result == -1) 1253 name += cstr; 1254 else { 1255 // Note: we drop the characters after the ] 1256 name += cstr.left(result); 1257 } 1258 } else { 1259 name += *c; 1260 c++; 1261 } 1262 } 1263 if (!*c || *c == '\n') { 1264 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse character name: " << BYTEARRAY(name); 1265 return ""; 1266 } 1267 c++; 1268 } else if (*c == 'C' || c[1] == '\'') { 1269 // \C'name' 1270 c += 2; 1271 while (*c && *c != '\'' && *c != '\n') { 1272 if (*c == escapesym) { 1273 QByteArray cstr; 1274 c = scan_escape_direct(c + 1, cstr); 1275 const int result = cstr.indexOf('\''); 1276 if (result == -1) 1277 name += cstr; 1278 else { 1279 // Note: we drop the characters after the ] 1280 name += cstr.left(result); 1281 } 1282 } else { 1283 name += *c; 1284 c++; 1285 } 1286 } 1287 if (!*c || *c == '\n') { 1288 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse (\\C mode) character name: " << BYTEARRAY(name); 1289 return ""; 1290 } 1291 c++; 1292 } 1293 // Note: characters with a one character length name do not exist, as they would collide with other escapes 1294 1295 // Now we have the name, let us find it between the string names 1296 QMap<QByteArray, StringDefinition>::const_iterator it = s_characterDefinitionMap.constFind(name); 1297 if (it == s_characterDefinitionMap.constEnd()) { 1298 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find character with name: " << BYTEARRAY(name); 1299 // No output, as an undefined string is empty by default 1300 return ""; 1301 } else { 1302 qCDebug(KIO_MAN_LOG) << "Character with name: \"" << BYTEARRAY(name) << "\" => " << BYTEARRAY((*it).m_output); 1303 return (*it).m_output; 1304 } 1305 } 1306 1307 //--------------------------------------------------------------------- 1308 1309 static QByteArray scan_named_string(char *&c) 1310 { 1311 QByteArray name; 1312 if (*c == '(') { 1313 // \*(ab Name of two characters 1314 if (c[1] == escapesym) { 1315 QByteArray cstr; 1316 c = scan_escape_direct(c + 2, cstr); 1317 qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr); 1318 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the 1319 // variable are to be used. 1320 name = cstr; 1321 } else { 1322 name += c[1]; 1323 name += c[2]; 1324 c += 3; 1325 } 1326 } else if (*c == '[') { 1327 // \*[long_name] Long name 1328 // Named character groff(7) 1329 // We must find the ] to get a name 1330 c++; 1331 while (*c && *c != ']' && *c != '\n') { 1332 if (*c == escapesym) { 1333 QByteArray cstr; 1334 c = scan_escape_direct(c + 1, cstr); 1335 const int result = cstr.indexOf(']'); 1336 if (result == -1) 1337 name += cstr; 1338 else { 1339 // Note: we drop the characters after the ] 1340 name += cstr.left(result); 1341 } 1342 } else { 1343 name += *c; 1344 c++; 1345 } 1346 } 1347 if (!*c || *c == '\n') { 1348 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse string name: " << BYTEARRAY(name); 1349 return ""; 1350 } 1351 c++; 1352 } else { 1353 // \*a Name of one character 1354 name += *c; 1355 c++; 1356 } 1357 // Now we have the name, let us find it between the string names 1358 QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(name); 1359 if (it == s_stringDefinitionMap.constEnd()) { 1360 // try a number register: 1361 return QByteArray::number(getNumberRegisterValue(name)); 1362 1363 // qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string with name: " << BYTEARRAY(name); 1364 // No output, as an undefined string is empty by default 1365 // return ""; 1366 } else { 1367 qCDebug(KIO_MAN_LOG) << "String with name: '" << BYTEARRAY(name) << "' => >>>" << BYTEARRAY((*it).m_output) << "<<<"; 1368 return (*it).m_output; 1369 } 1370 } 1371 1372 //--------------------------------------------------------------------- 1373 1374 static QByteArray scan_dollar_parameter(char *&c) 1375 { 1376 int argno = 0; // No dollar argument number yet! 1377 if (*c == '0') { 1378 // qCDebug(KIO_MAN_LOG) << "$0"; 1379 c++; 1380 return s_dollarZero; 1381 } else if (*c >= '1' && *c <= '9') { 1382 // qCDebug(KIO_MAN_LOG) << "$ direct"; 1383 argno = (*c - '0'); 1384 c++; 1385 } else if (*c == '(') { 1386 // qCDebug(KIO_MAN_LOG) << "$("; 1387 if (c[1] && c[2] && c[1] >= '0' && c[1] <= '9' && c[2] >= '0' && c[2] <= '9') { 1388 argno = (c[1] - '0') * 10 + (c[2] - '0'); 1389 c += 3; 1390 } else { 1391 if (!c[1]) 1392 c++; 1393 else if (!c[2]) 1394 c += 2; 1395 else 1396 c += 3; 1397 return ""; 1398 } 1399 } else if (*c == '[') { 1400 // qCDebug(KIO_MAN_LOG) << "$["; 1401 argno = 0; 1402 c++; 1403 while (*c && *c >= '0' && *c <= '9' && *c != ']') { 1404 argno *= 10; 1405 argno += (*c - '0'); 1406 c++; 1407 } 1408 if (*c != ']') { 1409 return ""; 1410 } 1411 c++; 1412 } else if ((*c == '*') || (*c == '@')) { 1413 const bool quote = (*c == '@'); 1414 QList<QByteArray>::const_iterator it = s_argumentList.constBegin(); 1415 QByteArray param; 1416 bool space = false; 1417 for (; it != s_argumentList.constEnd(); ++it) { 1418 if (space) 1419 param += ' '; 1420 if (quote) 1421 param += '\"'; // Not as HTML, as it could be used by macros ! 1422 param += (*it); 1423 if (quote) 1424 param += '\"'; // Not as HTML, as it could be used by macros! 1425 space = true; 1426 } 1427 c++; 1428 return param; 1429 } else { 1430 qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown parameter $" << *c; 1431 return ""; 1432 } 1433 // qCDebug(KIO_MAN_LOG) << "ARG $" << argno; 1434 if (!s_argumentList.isEmpty() && argno > 0) { 1435 // qCDebug(KIO_MAN_LOG) << "ARG $" << argno << " OK!"; 1436 argno--; 1437 if (argno >= s_argumentList.size()) { 1438 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find parameter $" << (argno + 1); 1439 return ""; 1440 } 1441 1442 return s_argumentList[argno]; 1443 } 1444 return ""; 1445 } 1446 1447 //--------------------------------------------------------------------- 1448 /// return the value of read-only number registers 1449 1450 static int read_only_number_register(const QByteArray &name) 1451 { 1452 // Internal read-only variables 1453 if (name == ".$") { 1454 qCDebug(KIO_MAN_LOG) << "\\n[.$] == " << s_argumentList.size(); 1455 return s_argumentList.size(); 1456 } else if (name == ".g") 1457 return 0; // We are not groff(1) 1458 else if (name == ".s") 1459 return current_size; 1460 #if 0 1461 // ### TODO: map the fonts to a number 1462 else if (name == ".f") 1463 return current_font; 1464 #endif 1465 else if (name == ".P") 1466 return 0; // We are not printing 1467 else if (name == ".A") 1468 return s_nroff; 1469 #ifndef SIMPLE_MAN2HTML 1470 // Special KDE KIO man: 1471 const QString version_string(KDE_VERSION_STRING); 1472 const int version_major = version_string.section('.', 0, 0).toInt(); 1473 const int version_minor = version_string.section('.', 1, 1).toInt(); 1474 const int version_patch = version_string.section('.', 2, 2).toInt(); 1475 if (name == ".KDE_VERSION_MAJOR") 1476 return version_major; 1477 else if (name == ".KDE_VERSION_MINOR") 1478 return version_minor; 1479 else if (name == ".KDE_VERSION_RELEASE") 1480 return version_patch; 1481 else if (name == ".KDE_VERSION") 1482 return (version_major << 16) | (version_minor << 8) | version_patch; 1483 #endif 1484 else if (name == ".T") 1485 return 0; // Set to 1 in nroff, if -T option used; always 0 in troff. 1486 1487 // ### TODO: groff defines many more read-only number registers 1488 qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown read-only number register: " << BYTEARRAY(name); 1489 1490 return 0; // Undefined variable 1491 } 1492 1493 //--------------------------------------------------------------------- 1494 1495 static int getNumberRegisterValue(const QByteArray &name, int sign) 1496 { 1497 if (name[0] == '.') { 1498 return read_only_number_register(name); 1499 } else { 1500 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name); 1501 if (it == s_numberDefinitionMap.end()) { 1502 return 0; // Undefined variable 1503 } else { 1504 (*it).m_value += sign * (*it).m_increment; 1505 return (*it).m_value; 1506 } 1507 } 1508 } 1509 1510 //--------------------------------------------------------------------- 1511 /// get the value of a number register and auto-increment if asked 1512 1513 static int scan_number_register(char *&c) 1514 { 1515 int sign = 0; // Sign for auto-increment (if any) 1516 switch (*c) { 1517 case '+': 1518 sign = 1; 1519 c++; 1520 break; 1521 case '-': 1522 sign = -1; 1523 c++; 1524 break; 1525 default: 1526 break; 1527 } 1528 QByteArray name; 1529 if (*c == '[') { 1530 c++; 1531 if (*c == '+') { 1532 sign = 1; 1533 c++; 1534 } else if (*c == '-') { 1535 sign = -1; 1536 c++; 1537 } 1538 while (*c && *c != ']' && *c != '\n') { 1539 // ### TODO: a \*[string] could be inside and should be processed 1540 name += *c; 1541 c++; 1542 } 1543 if (!*c || *c == '\n') { 1544 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse number register name: " << BYTEARRAY(name); 1545 return 0; 1546 } 1547 c++; 1548 } else if (*c == '(') { 1549 c++; 1550 if (*c == '+') { 1551 sign = 1; 1552 c++; 1553 } else if (*c == '-') { 1554 sign = -1; 1555 c++; 1556 } 1557 name += c[0]; 1558 name += c[1]; 1559 c += 2; 1560 } else { 1561 name += *c; 1562 c++; 1563 } 1564 1565 return getNumberRegisterValue(name, sign); 1566 } 1567 1568 //--------------------------------------------------------------------- 1569 // scan a name from the following 1570 // x ... return x (one char) 1571 // (xx ... return xx (two chars) 1572 // [xxx] ... return xxx (any chars) 1573 // after scanning, c points to the terminating char (0, \n or ]) 1574 1575 static QByteArray scan_name(char *&c) 1576 { 1577 QByteArray name; 1578 if (*c == '(') { 1579 int i = 0; 1580 for (c++; *c && (*c != '\n') && (i < 2); c++, i++) 1581 name += *c; 1582 } else if (*c == '[') { 1583 for (c++; *c && (*c != ']') && (*c != '\n'); c++) 1584 name += *c; 1585 } else 1586 name += *c; 1587 1588 return name; 1589 } 1590 1591 //--------------------------------------------------------------------- 1592 /// get and set font 1593 1594 static QByteArray scan_named_font(char *&c) 1595 { 1596 QByteArray name; 1597 if (*c == '(') { 1598 // \f(ab Name of two characters 1599 if (c[1] == escapesym) { 1600 QByteArray cstr; 1601 c = scan_escape_direct(c + 2, cstr); 1602 qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr); 1603 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the 1604 // variable are to be used. 1605 name = cstr; 1606 } else { 1607 name += c[1]; 1608 name += c[2]; 1609 c += 3; 1610 } 1611 } else if (*c == '[') { 1612 // \f[long_name] Long name 1613 // We must find the ] to get a name 1614 c++; 1615 while (*c && *c != ']' && *c != '\n') { 1616 if (*c == escapesym) { 1617 QByteArray cstr; 1618 c = scan_escape_direct(c + 1, cstr); 1619 const int result = cstr.indexOf(']'); 1620 if (result == -1) 1621 name += cstr; 1622 else { 1623 // Note: we drop the characters after the ] 1624 name += cstr.left(result); 1625 } 1626 } else { 1627 name += *c; 1628 c++; 1629 } 1630 } 1631 if (!*c || *c == '\n') { 1632 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse font name: " << BYTEARRAY(name); 1633 return ""; 1634 } 1635 c++; 1636 } else if (*c) // \f alone makes c point at 0-byte 1637 { 1638 // \fa Font name with one character or one digit 1639 // ### HACK do *not* use: name = *c; or name would be empty 1640 name += *c; 1641 c++; 1642 } 1643 // qCDebug(KIO_MAN_LOG) << "FONT NAME: " << BYTEARRAY( name ); 1644 // Now we have the name, let us find the font 1645 bool ok = false; 1646 const unsigned int number = name.toUInt(&ok); 1647 if (ok) { 1648 if (number < 5) { 1649 const char *const fonts[] = {"R", "I", "B", "BI", "CR"}; // Regular, Italic, Bold, Bold Italic, Courier regular 1650 name = fonts[number]; 1651 } else { 1652 qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has too big number: " << BYTEARRAY(name) << " => " << number; 1653 name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars 1654 } 1655 } else if (name.isEmpty()) { 1656 qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has no name => using R"; 1657 name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars 1658 } 1659 if (!skip_escape) 1660 return set_font(name); 1661 else 1662 return ""; 1663 } 1664 1665 //--------------------------------------------------------------------- 1666 1667 static QByteArray scan_number_code(char *&c) 1668 { 1669 QByteArray number; 1670 if (*c != '\'') 1671 return ""; 1672 c++; // Go past the opening single quote 1673 while (*c && (*c != '\n') && (*c != '\'')) { 1674 number += *c; 1675 c++; 1676 } 1677 bool ok = false; 1678 unsigned int result = number.toUInt(&ok); 1679 if ((result < ' ') || (result > 65535)) 1680 return ""; 1681 else if (result == '\t') { 1682 curpos += 8; 1683 curpos &= 0xfff8; 1684 return "\t"; 1685 } 1686 number.setNum(result); 1687 number.prepend("&#"); 1688 number.append(";"); 1689 curpos++; 1690 c++; // Go past the closing single quote 1691 return number; 1692 } 1693 1694 //--------------------------------------------------------------------- 1695 // ### TODO known missing escapes from groff(7): 1696 // ### TODO \R 1697 1698 static char *scan_escape_direct(char *c, QByteArray &cstr) 1699 { 1700 bool exoutputp; 1701 bool exskipescape; 1702 int i, j; 1703 bool cplusplus = true; // Should the c++ call be executed at the end of the function 1704 1705 cstr.clear(); 1706 intresult = 0; 1707 switch (*c) { 1708 case 'e': 1709 cstr += escapesym; 1710 curpos++; 1711 break; 1712 case '0': // space of digit width 1713 cstr = " "; // Unicode FIGURE SPACE 1714 curpos++; 1715 break; 1716 case '~': // non-breakable-space (resizeable!) 1717 case ' ': 1718 cstr = " "; 1719 curpos++; 1720 break; 1721 case '|': // half-non-breakable-space 1722 case '^': // quarter-non-breakable-space 1723 cstr = " "; // Unicode NARROW NO-BREAK SPACE 1724 curpos++; 1725 break; 1726 case ':': 1727 break; // ignore optional line break 1728 case ',': 1729 break; // left italic correction, always a zero motion 1730 case '/': 1731 cstr = " "; // Unicode THIN SPACE 1732 curpos++; 1733 break; // italic correction, i.e. a small piece of horizontal motion 1734 case '"': // comment. skip rest of line 1735 for (c++; *c && (*c != '\n'); c++) 1736 ; 1737 cplusplus = false; 1738 break; 1739 // ### TODO \# like \" but does not ignore the end of line (groff(7)) 1740 case '$': { 1741 c++; 1742 cstr = scan_dollar_parameter(c); 1743 cplusplus = false; 1744 break; 1745 } 1746 case 'z': { 1747 c++; 1748 if (*c == '\\') { 1749 c = scan_escape_direct(c + 1, cstr); 1750 c--; 1751 } else 1752 cstr = QByteArray(c, 1); 1753 break; 1754 } 1755 case 'k': { 1756 // Store the current horizontal position in the _input_ line in 1757 // number register with name POSITION 1758 c++; 1759 cstr = scan_name(c); 1760 cstr.clear(); // TODO not implemented; discard it 1761 break; 1762 } 1763 case '!': 1764 case '%': 1765 case 'a': 1766 case 'd': 1767 case 'r': 1768 case 'u': 1769 case '\n': 1770 case '&': // Non-printing, zero width character 1771 case ')': // Transparent non-printing zero width character 1772 break; 1773 case '(': 1774 case '[': 1775 case 'C': { 1776 // Do not go forward as scan_named_character needs the leading symbol 1777 cstr = scan_named_character(c); 1778 cplusplus = false; 1779 break; 1780 } 1781 case '*': { 1782 c++; 1783 cstr = scan_named_string(c); 1784 cplusplus = false; 1785 break; 1786 } 1787 case 'f': { 1788 c++; 1789 cstr = scan_named_font(c); 1790 cplusplus = false; 1791 break; 1792 } 1793 case 'F': // font family 1794 { 1795 c++; 1796 cstr = scan_name(c); 1797 1798 if (cstr == "C") 1799 cstr = set_font("CR"); 1800 else if (cstr == "T") 1801 cstr = set_font("TR"); 1802 else if (cstr == "H") 1803 cstr = set_font("HR"); 1804 else 1805 cstr = set_font(cstr); 1806 1807 break; 1808 } 1809 case 'm': // color 1810 { 1811 c++; 1812 cstr = scan_name(c); 1813 1814 if (cstr.isEmpty()) 1815 cstr = "</span>"; 1816 else 1817 cstr = "<span style='color:" + cstr + "'>"; 1818 1819 break; 1820 } 1821 case 's': // ### FIXME: many forms are missing 1822 c++; 1823 j = 0; 1824 i = 0; 1825 if (*c == '-') { 1826 j = -1; 1827 c++; 1828 } else if (*c == '+') { 1829 j = 1; 1830 c++; 1831 } 1832 if (*c == '0') 1833 c++; 1834 else if (*c == '\\') { 1835 c++; 1836 c = scan_escape_direct(c, cstr); 1837 i = intresult; 1838 if (!j) 1839 j = 1; 1840 } else 1841 while (isdigit(*c) && (!i || (!j && i < 4))) 1842 i = i * 10 + (*c++) - '0'; 1843 if (!j) { 1844 j = 1; 1845 if (i) 1846 i = i - 10; 1847 } 1848 if (!skip_escape) 1849 cstr = change_to_size(i * j); 1850 c--; 1851 break; 1852 case 'n': { 1853 c++; 1854 intresult = scan_number_register(c); 1855 cplusplus = false; 1856 break; 1857 } 1858 case 'w': 1859 c++; 1860 i = *c; 1861 c++; 1862 exoutputp = output_possible; 1863 exskipescape = skip_escape; 1864 output_possible = false; 1865 skip_escape = true; 1866 j = 0; 1867 while (*c != i) { 1868 j++; 1869 if (*c == escapesym) 1870 c = scan_escape_direct(c + 1, cstr); 1871 else 1872 c++; 1873 } 1874 output_possible = exoutputp; 1875 skip_escape = exskipescape; 1876 intresult = j; 1877 break; 1878 case 'l': 1879 cstr = "<HR>"; 1880 curpos = 0; 1881 case 'b': 1882 case 'v': 1883 case 'x': 1884 case 'o': 1885 case 'L': 1886 case 'h': 1887 c++; 1888 i = *c; 1889 c++; 1890 exoutputp = output_possible; 1891 exskipescape = skip_escape; 1892 output_possible = 0; 1893 skip_escape = true; 1894 while (*c != i) 1895 if (*c == escapesym) 1896 c = scan_escape_direct(c + 1, cstr); 1897 else 1898 c++; 1899 output_possible = exoutputp; 1900 skip_escape = exskipescape; 1901 break; 1902 case 'c': 1903 no_newline_output = 1; 1904 break; 1905 case '{': 1906 newline_for_fun++; 1907 break; // Start conditional block 1908 case '}': 1909 if (newline_for_fun) 1910 newline_for_fun--; 1911 break; // End conditional block 1912 case 'p': 1913 cstr = "<BR>\n"; 1914 curpos = 0; 1915 break; 1916 case 't': 1917 cstr = "\t"; 1918 curpos = (curpos + 8) & 0xfff8; 1919 break; 1920 case '<': 1921 cstr = "<"; 1922 curpos++; 1923 break; 1924 case '>': 1925 cstr = ">"; 1926 curpos++; 1927 break; 1928 case '\\': { 1929 if (single_escape) 1930 c--; 1931 else 1932 cstr = "\\"; 1933 break; 1934 } 1935 case 'N': { 1936 c++; 1937 cstr = scan_number_code(c); 1938 cplusplus = false; 1939 break; 1940 } 1941 case '\'': 1942 cstr = "´"; 1943 curpos++; 1944 break; // groff(7) ### TODO verify 1945 case '`': 1946 cstr = "`"; // krazy:exclude=doublequote_chars 1947 curpos++; 1948 break; // groff(7) 1949 case '-': 1950 cstr = "-"; // krazy:exclude=doublequote_chars 1951 curpos++; 1952 break; // groff(7) 1953 case '.': 1954 cstr = "."; // krazy:exclude=doublequote_chars 1955 curpos++; 1956 break; // groff(7) 1957 default: 1958 cstr = QByteArray(c, 1); 1959 curpos++; 1960 break; 1961 } 1962 if (cplusplus && *c) 1963 c++; 1964 return c; 1965 } 1966 1967 //--------------------------------------------------------------------- 1968 1969 static char *scan_escape(char *c) 1970 { 1971 QByteArray cstr; 1972 char *result = scan_escape_direct(c, cstr); 1973 if (!skip_escape) 1974 out_html(cstr); 1975 return result; 1976 } 1977 1978 //--------------------------------------------------------------------- 1979 1980 class TABLEROW; 1981 1982 class TABLEITEM 1983 { 1984 public: 1985 TABLEITEM(TABLEROW *row); 1986 ~TABLEITEM() 1987 { 1988 delete[] contents; 1989 } 1990 void setContents(const char *_contents) 1991 { 1992 delete[] contents; 1993 contents = qstrdup(_contents); 1994 } 1995 const char *getContents() const 1996 { 1997 return contents; 1998 } 1999 2000 void init() 2001 { 2002 delete[] contents; 2003 contents = nullptr; 2004 size = 0; 2005 align = 0; 2006 valign = 0; 2007 colspan = 1; 2008 rowspan = 1; 2009 font = 0; 2010 vleft = 0; 2011 vright = 0; 2012 space = 0; 2013 width = 0; 2014 } 2015 2016 void copyLayout(const TABLEITEM *orig) 2017 { 2018 size = orig->size; 2019 align = orig->align; 2020 valign = orig->valign; 2021 colspan = orig->colspan; 2022 rowspan = orig->rowspan; 2023 font = orig->font; 2024 vleft = orig->vleft; 2025 vright = orig->vright; 2026 space = orig->space; 2027 width = orig->width; 2028 } 2029 2030 public: 2031 int size, align, valign, colspan, rowspan, font, vleft, vright, space, width; 2032 2033 private: 2034 char *contents; 2035 TABLEROW *_parent; 2036 }; 2037 2038 class TABLEROW 2039 { 2040 char *test; 2041 2042 public: 2043 TABLEROW() 2044 { 2045 test = new char; 2046 prev = nullptr; 2047 next = nullptr; 2048 } 2049 ~TABLEROW() 2050 { 2051 qDeleteAll(items); 2052 items.clear(); 2053 delete test; 2054 } 2055 int length() const 2056 { 2057 return items.count(); 2058 } 2059 bool has(int index) 2060 { 2061 return (index >= 0) && (index < (int)items.count()); 2062 } 2063 TABLEITEM &at(int index) 2064 { 2065 return *items.at(index); 2066 } 2067 2068 TABLEROW *copyLayout() const; 2069 2070 void addItem(TABLEITEM *item) 2071 { 2072 items.append(item); 2073 } 2074 TABLEROW *prev, *next; 2075 2076 private: 2077 QList<TABLEITEM *> items; 2078 }; 2079 2080 TABLEITEM::TABLEITEM(TABLEROW *row) 2081 : contents(nullptr) 2082 , _parent(row) 2083 { 2084 init(); 2085 _parent->addItem(this); 2086 } 2087 2088 TABLEROW *TABLEROW::copyLayout() const 2089 { 2090 TABLEROW *newrow = new TABLEROW(); 2091 2092 QListIterator<TABLEITEM *> it(items); 2093 while (it.hasNext()) { 2094 TABLEITEM *newitem = new TABLEITEM(newrow); 2095 newitem->copyLayout(it.next()); 2096 } 2097 return newrow; 2098 } 2099 2100 static const char *const tableopt[] = {"center", "expand", "box", "allbox", "doublebox", "tab", "linesize", "delim", nullptr}; 2101 static const int tableoptl[] = {6, 6, 3, 6, 9, 3, 8, 5, 0}; 2102 2103 static void clear_table(TABLEROW *table) 2104 { 2105 TABLEROW *tr1, *tr2; 2106 2107 tr1 = table; 2108 while (tr1->prev) 2109 tr1 = tr1->prev; 2110 while (tr1) { 2111 tr2 = tr1; 2112 tr1 = tr1->next; 2113 delete tr2; 2114 } 2115 } 2116 2117 //--------------------------------------------------------------------- 2118 2119 static char *scan_expression(char *c, int *result); 2120 2121 //--------------------------------------------------------------------- 2122 2123 static char *scan_format(char *c, TABLEROW **result, int *maxcol) 2124 { 2125 TABLEROW *layout, *currow; 2126 TABLEITEM *curfield; 2127 int i, j; 2128 if (*result) { 2129 clear_table(*result); 2130 } 2131 layout = currow = new TABLEROW(); 2132 curfield = new TABLEITEM(currow); 2133 while (*c && *c != '.') { 2134 switch (*c) { 2135 case 'C': 2136 case 'c': 2137 case 'N': 2138 case 'n': 2139 case 'R': 2140 case 'r': 2141 case 'A': 2142 case 'a': 2143 case 'L': 2144 case 'l': 2145 case 'S': 2146 case 's': 2147 case '^': 2148 case '_': 2149 if (curfield->align) 2150 curfield = new TABLEITEM(currow); 2151 curfield->align = toupper(*c); 2152 c++; 2153 break; 2154 case 'i': 2155 case 'I': 2156 case 'B': 2157 case 'b': 2158 curfield->font = toupper(*c); 2159 c++; 2160 break; 2161 case 'f': 2162 case 'F': 2163 c++; 2164 curfield->font = toupper(*c); 2165 c++; 2166 if (!isspace(*c) && *c != '.') 2167 c++; 2168 break; 2169 case 't': 2170 case 'T': 2171 curfield->valign = 't'; 2172 c++; 2173 break; 2174 case 'p': 2175 case 'P': 2176 c++; 2177 i = j = 0; 2178 if (*c == '+') { 2179 j = 1; 2180 c++; 2181 } 2182 if (*c == '-') { 2183 j = -1; 2184 c++; 2185 } 2186 while (isdigit(*c)) 2187 i = i * 10 + (*c++) - '0'; 2188 if (j) 2189 curfield->size = i * j; 2190 else 2191 curfield->size = j - 10; 2192 break; 2193 case 'v': 2194 case 'V': 2195 case 'w': 2196 case 'W': 2197 c = scan_expression(c + 2, &curfield->width); 2198 break; 2199 case '|': 2200 if (curfield->align) 2201 curfield->vleft++; 2202 else 2203 curfield->vright++; 2204 c++; 2205 break; 2206 case 'e': 2207 case 'E': 2208 c++; 2209 break; 2210 case '0': 2211 case '1': 2212 case '2': 2213 case '3': 2214 case '4': 2215 case '5': 2216 case '6': 2217 case '7': 2218 case '8': 2219 case '9': 2220 i = 0; 2221 while (isdigit(*c)) 2222 i = i * 10 + (*c++) - '0'; 2223 curfield->space = i; 2224 break; 2225 case ',': 2226 case '\n': 2227 currow->next = new TABLEROW(); 2228 currow->next->prev = currow; 2229 currow = currow->next; 2230 currow->next = nullptr; 2231 curfield = new TABLEITEM(currow); 2232 c++; 2233 break; 2234 default: 2235 c++; 2236 break; 2237 } 2238 } 2239 if (*c == '.') 2240 while (*c++ != '\n') 2241 ; 2242 *maxcol = 0; 2243 currow = layout; 2244 while (currow) { 2245 i = currow->length(); 2246 if (i > *maxcol) 2247 *maxcol = i; 2248 currow = currow->next; 2249 } 2250 *result = layout; 2251 return c; 2252 } 2253 2254 //--------------------------------------------------------------------- 2255 2256 static TABLEROW *next_row(TABLEROW *tr) 2257 { 2258 if (tr->next) { 2259 tr = tr->next; 2260 if (!tr->next) 2261 return next_row(tr); 2262 return tr; 2263 } else { 2264 tr->next = tr->copyLayout(); 2265 tr->next->prev = tr; 2266 return tr->next; 2267 } 2268 } 2269 2270 //--------------------------------------------------------------------- 2271 2272 static char itemreset[20] = "\\fR\\s0"; 2273 2274 #define FORWARDCUR \ 2275 do { \ 2276 curfield++; \ 2277 } while (currow->has(curfield) && currow->at(curfield).align == 'S'); 2278 2279 static char *scan_table(char *c) 2280 { 2281 char *h; 2282 char *g; 2283 int center = 0, expand = 0, box = 0, border = 0, linesize = 1; 2284 int i, j, maxcol = 0, finished = 0; 2285 QByteArray oldfont; 2286 int oldsize, oldfillout; 2287 char itemsep = '\t'; 2288 TABLEROW *layout = nullptr, *currow; 2289 int curfield = -1; 2290 while (*c++ != '\n') 2291 ; 2292 h = c; 2293 if (*h == '.') 2294 return c - 1; 2295 oldfont = current_font; 2296 oldsize = current_size; 2297 oldfillout = fillout; 2298 out_html(set_font("R")); 2299 out_html(change_to_size(0)); 2300 if (!fillout) { 2301 fillout = 1; 2302 out_html("</PRE>"); 2303 } 2304 while (*h && *h != '\n') 2305 h++; 2306 if (h[-1] == ';') { 2307 /* scan table options */ 2308 while (c < h) { 2309 while (isspace(*c)) 2310 c++; 2311 for (i = 0; tableopt[i] && qstrncmp(tableopt[i], c, tableoptl[i]); i++) 2312 ; 2313 c = c + tableoptl[i]; 2314 switch (i) { 2315 case 0: 2316 center = 1; 2317 break; 2318 case 1: 2319 expand = 1; 2320 break; 2321 case 2: 2322 box = 1; 2323 break; 2324 case 3: 2325 border = 1; 2326 break; 2327 case 4: 2328 box = 2; 2329 break; 2330 case 5: 2331 while (*c++ != '(') 2332 ; 2333 itemsep = *c++; 2334 break; 2335 case 6: 2336 while (*c++ != '(') 2337 ; 2338 linesize = 0; 2339 while (isdigit(*c)) 2340 linesize = linesize * 10 + (*c++) - '0'; 2341 break; 2342 case 7: 2343 while (*c != ')') 2344 c++; 2345 default: 2346 break; 2347 } 2348 c++; 2349 } 2350 c = h + 1; 2351 } 2352 /* scan layout */ 2353 c = scan_format(c, &layout, &maxcol); 2354 // currow=layout; 2355 currow = next_row(layout); 2356 curfield = 0; 2357 i = 0; 2358 while (!finished && *c) { 2359 /* search item */ 2360 h = c; 2361 if ((*c == '_' || *c == '=') && (c[1] == itemsep || c[1] == '\n')) { 2362 if (c[-1] == '\n' && c[1] == '\n') { 2363 if (currow->prev) { 2364 currow->prev->next = new TABLEROW(); 2365 currow->prev->next->next = currow; 2366 currow->prev->next->prev = currow->prev; 2367 currow->prev = currow->prev->next; 2368 } else { 2369 currow->prev = layout = new TABLEROW(); 2370 currow->prev->prev = nullptr; 2371 currow->prev->next = currow; 2372 } 2373 TABLEITEM *newitem = new TABLEITEM(currow->prev); 2374 newitem->align = *c; 2375 newitem->colspan = maxcol; 2376 curfield = 0; 2377 c = c + 2; 2378 } else { 2379 if (currow->has(curfield)) { 2380 currow->at(curfield).align = *c; 2381 FORWARDCUR; 2382 } 2383 if (c[1] == '\n') { 2384 currow = next_row(currow); 2385 curfield = 0; 2386 } 2387 c = c + 2; 2388 } 2389 } else if (*c == 'T' && c[1] == '{') { 2390 h = c + 2; 2391 c = strstr(h, "\nT}"); 2392 c++; 2393 *c = '\0'; 2394 g = nullptr; 2395 scan_troff(h, 0, &g); 2396 scan_troff(itemreset, 0, &g); 2397 *c = 'T'; 2398 c += 3; 2399 if (currow->has(curfield)) { 2400 currow->at(curfield).setContents(g); 2401 FORWARDCUR; 2402 } 2403 delete[] g; 2404 2405 if (c[-1] == '\n') { 2406 currow = next_row(currow); 2407 curfield = 0; 2408 } 2409 } else if (*c == '.' && c[1] == 'T' && c[2] == '&' && c[-1] == '\n') { 2410 TABLEROW *hr; 2411 while (*c++ != '\n') 2412 ; 2413 hr = currow; 2414 currow = currow->prev; 2415 hr->prev = nullptr; 2416 c = scan_format(c, &hr, &i); 2417 hr->prev = currow; 2418 currow->next = hr; 2419 currow = hr; 2420 next_row(currow); 2421 curfield = 0; 2422 } else if (*c == '.' && c[1] == 'T' && c[2] == 'E' && c[-1] == '\n') { 2423 finished = 1; 2424 while (*c++ != '\n') 2425 ; 2426 if (currow->prev) 2427 currow->prev->next = nullptr; 2428 currow->prev = nullptr; 2429 clear_table(currow); 2430 currow = nullptr; 2431 } else if (*c == '.' && c[-1] == '\n' && !isdigit(c[1])) { 2432 /* skip troff request inside table (usually only .sp ) */ 2433 while (*c++ != '\n') 2434 ; 2435 } else { 2436 h = c; 2437 while (*c && (*c != itemsep || c[-1] == '\\') && (*c != '\n' || c[-1] == '\\')) 2438 c++; 2439 i = 0; 2440 if (*c == itemsep) { 2441 i = 1; 2442 *c = '\n'; 2443 } 2444 if (h[0] == '\\' && h[2] == '\n' && (h[1] == '_' || h[1] == '^')) { 2445 if (currow->has(curfield)) { 2446 currow->at(curfield).align = h[1]; 2447 FORWARDCUR; 2448 } 2449 h = h + 3; 2450 } else { 2451 g = nullptr; 2452 h = scan_troff(h, 1, &g); 2453 scan_troff(itemreset, 0, &g); 2454 if (currow->has(curfield)) { 2455 currow->at(curfield).setContents(g); 2456 FORWARDCUR; 2457 } 2458 delete[] g; 2459 } 2460 if (i) 2461 *c = itemsep; 2462 c = h; 2463 if (c[-1] == '\n') { 2464 currow = next_row(currow); 2465 curfield = 0; 2466 } 2467 } 2468 } 2469 /* calculate colspan and rowspan */ 2470 currow = layout; 2471 while (currow->next) 2472 currow = currow->next; 2473 while (currow) { 2474 int ti = 0, ti1 = 0, ti2 = -1; 2475 TABLEROW *prev = currow->prev; 2476 if (!prev) 2477 break; 2478 2479 while (prev->has(ti1)) { 2480 if (currow->has(ti)) 2481 switch (currow->at(ti).align) { 2482 case 'S': 2483 if (currow->has(ti2)) { 2484 currow->at(ti2).colspan++; 2485 if (currow->at(ti2).rowspan < prev->at(ti1).rowspan) 2486 currow->at(ti2).rowspan = prev->at(ti1).rowspan; 2487 } 2488 break; 2489 case '^': 2490 if (prev->has(ti1)) 2491 prev->at(ti1).rowspan++; 2492 default: 2493 if (ti2 < 0) 2494 ti2 = ti; 2495 else { 2496 do { 2497 ti2++; 2498 } while (currow->has(ti2) && currow->at(ti2).align == 'S'); 2499 } 2500 break; 2501 } 2502 ti++; 2503 if (ti1 >= 0) 2504 ti1++; 2505 } 2506 currow = currow->prev; 2507 } 2508 /* produce html output */ 2509 if (center) 2510 out_html("<CENTER>"); 2511 if (box == 2) 2512 out_html("<TABLE BORDER><TR><TD>"); 2513 out_html("<TABLE"); 2514 if (box || border) { 2515 out_html(" BORDER"); 2516 if (!border) 2517 out_html("><TR><TD><TABLE"); 2518 if (expand) 2519 out_html(" WIDTH=\"100%\""); 2520 } 2521 out_html(">\n"); 2522 currow = layout; 2523 while (currow) { 2524 j = 0; 2525 out_html("<TR VALIGN=top>"); 2526 curfield = 0; 2527 while (currow->has(curfield)) { 2528 if (currow->at(curfield).align != 'S' && currow->at(curfield).align != '^') { 2529 out_html("<TD style='padding-right:10px; padding-left:10px;'"); 2530 switch (currow->at(curfield).align) { 2531 case 'N': 2532 currow->at(curfield).space += 4; 2533 case 'R': 2534 out_html(" ALIGN=right"); 2535 break; 2536 case 'C': 2537 out_html(" ALIGN=center"); 2538 default: 2539 break; 2540 } 2541 if (!currow->at(curfield).valign && currow->at(curfield).rowspan > 1) 2542 out_html(" VALIGN=center"); 2543 if (currow->at(curfield).colspan > 1) { 2544 out_html(" COLSPAN="); 2545 out_html(QByteArray::number(currow->at(curfield).colspan)); 2546 } 2547 if (currow->at(curfield).rowspan > 1) { 2548 out_html(" ROWSPAN="); 2549 out_html(QByteArray::number(currow->at(curfield).rowspan)); 2550 } 2551 j = j + currow->at(curfield).colspan; 2552 out_html(">"); 2553 if (currow->at(curfield).size) 2554 out_html(change_to_size(currow->at(curfield).size)); 2555 if (currow->at(curfield).font) 2556 out_html(set_font(QByteArray::number(currow->at(curfield).font))); 2557 switch (currow->at(curfield).align) { 2558 case '=': 2559 out_html("<HR><HR>"); 2560 break; 2561 case '_': 2562 out_html("<HR>"); 2563 break; 2564 default: 2565 out_html(currow->at(curfield).getContents()); 2566 break; 2567 } 2568 if (currow->at(curfield).space) 2569 for (i = 0; i < currow->at(curfield).space; i++) 2570 out_html(" "); 2571 if (currow->at(curfield).font) 2572 out_html(set_font("R")); 2573 if (currow->at(curfield).size) 2574 out_html(change_to_size(0)); 2575 if (j >= maxcol && currow->at(curfield).align > '@' && currow->at(curfield).align != '_') 2576 out_html("<BR>"); 2577 out_html("</TD>"); 2578 } 2579 curfield++; 2580 } 2581 out_html("</TR>\n"); 2582 currow = currow->next; 2583 } 2584 2585 clear_table(layout); 2586 2587 if (box && !border) 2588 out_html("</TABLE>"); 2589 out_html("</TABLE>"); 2590 if (box == 2) 2591 out_html("</TABLE>"); 2592 if (center) 2593 out_html("</CENTER>\n"); 2594 else 2595 out_html("\n"); 2596 if (!oldfillout) 2597 out_html("<PRE>"); 2598 fillout = oldfillout; 2599 out_html(change_to_size(oldsize)); 2600 out_html(set_font(oldfont)); 2601 return c; 2602 } 2603 2604 //--------------------------------------------------------------------- 2605 2606 static char *scan_expression(char *c, int *result, const unsigned int numLoop) 2607 { 2608 int value = 0, value2, sign = 1, opex = 0; 2609 char oper = 'c'; 2610 bool oldSkipEscape = skip_escape; 2611 skip_escape = true; // evaluating an expression shall not print it 2612 2613 if (*c == '!') { 2614 c = scan_expression(c + 1, &value); 2615 value = (!value); 2616 } else if (*c == 'n') { 2617 c++; 2618 value = s_nroff; 2619 } else if (*c == 't') { 2620 c++; 2621 value = 1 - s_nroff; 2622 } else if (*c == '\'' || *c == '"' || *c < ' ' || (*c == '\\' && c[1] == '(')) { 2623 /* ?string1?string2? 2624 ** test if string1 equals string2. 2625 */ 2626 char *st1 = nullptr, *st2 = nullptr, *h; 2627 char *tcmp = nullptr; 2628 char sep; 2629 sep = *c; 2630 if (sep == '\\') { 2631 tcmp = c; 2632 c = c + 3; 2633 } 2634 c++; 2635 h = c; 2636 while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4))) 2637 c++; 2638 *c = '\n'; 2639 scan_troff(h, 1, &st1); 2640 *c = sep; 2641 if (tcmp) 2642 c = c + 3; 2643 c++; 2644 h = c; 2645 while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4))) 2646 c++; 2647 *c = '\n'; 2648 scan_troff(h, 1, &st2); 2649 *c = sep; 2650 if (!st1 && !st2) 2651 value = 1; 2652 else if (!st1 || !st2) 2653 value = 0; 2654 else 2655 value = (!qstrcmp(st1, st2)); 2656 delete[] st1; 2657 delete[] st2; 2658 if (tcmp) 2659 c = c + 3; 2660 c++; 2661 } else { 2662 while (*c && (!isspace(*c) || (numLoop > 0)) && *c != ')' && opex >= 0) { 2663 opex = 0; 2664 switch (*c) { 2665 case '(': 2666 c = scan_expression(c + 1, &value2, numLoop + 1); 2667 value2 = sign * value2; 2668 opex = 1; 2669 break; 2670 case '.': 2671 case '0': 2672 case '1': 2673 case '2': 2674 case '3': 2675 case '4': 2676 case '5': 2677 case '6': 2678 case '7': 2679 case '8': 2680 case '9': { 2681 int num = 0, denum = 1; 2682 value2 = 0; 2683 while (isdigit(*c)) 2684 value2 = value2 * 10 + ((*c++) - '0'); 2685 if (*c == '.' && isdigit(c[1])) { 2686 c++; 2687 while (isdigit(*c)) { 2688 num = num * 10 + ((*c++) - '0'); 2689 denum = denum * 10; 2690 } 2691 } 2692 if (isalpha(*c)) { 2693 /* scale indicator */ 2694 switch (*c) { 2695 case 'i': /* inch -> 10pt */ 2696 value2 = value2 * 10 + (num * 10 + denum / 2) / denum; 2697 num = 0; 2698 break; 2699 default: 2700 break; 2701 } 2702 c++; 2703 } 2704 value2 = value2 + (num + denum / 2) / denum; 2705 value2 = sign * value2; 2706 opex = 1; 2707 if (*c == '.') 2708 opex = -1; 2709 2710 } break; 2711 case '\\': 2712 c = scan_escape(c + 1); 2713 value2 = intresult * sign; 2714 if (isalpha(*c)) 2715 c++; /* scale indicator */ 2716 opex = 1; 2717 break; 2718 case '-': 2719 if (oper) { 2720 sign = -1; 2721 c++; 2722 break; 2723 } 2724 Q_FALLTHROUGH(); 2725 case '>': 2726 case '<': 2727 case '+': 2728 case '/': 2729 case '*': 2730 case '%': 2731 case '&': 2732 case '=': 2733 case ':': 2734 if (c[1] == '=') 2735 oper = (*c++) + 16; 2736 else 2737 oper = *c; 2738 c++; 2739 break; 2740 default: 2741 c++; 2742 break; 2743 } 2744 if (opex > 0) { 2745 sign = 1; 2746 switch (oper) { 2747 case 'c': 2748 value = value2; 2749 break; 2750 case '-': 2751 value = value - value2; 2752 break; 2753 case '+': 2754 value = value + value2; 2755 break; 2756 case '*': 2757 value = value * value2; 2758 break; 2759 case '/': 2760 if (value2) 2761 value = value / value2; 2762 break; 2763 case '%': 2764 if (value2) 2765 value = value % value2; 2766 break; 2767 case '<': 2768 value = (value < value2); 2769 break; 2770 case '>': 2771 value = (value > value2); 2772 break; 2773 case '>' + 16: 2774 value = (value >= value2); 2775 break; 2776 case '<' + 16: 2777 value = (value <= value2); 2778 break; 2779 case '=': 2780 case '=' + 16: 2781 value = (value == value2); 2782 break; 2783 case '&': 2784 value = (value && value2); 2785 break; 2786 case ':': 2787 value = (value || value2); 2788 break; 2789 default: { 2790 qCDebug(KIO_MAN_LOG) << "Unknown operator " << char(oper); 2791 } 2792 } 2793 oper = 0; 2794 } 2795 } 2796 if (*c == ')') 2797 c++; 2798 } 2799 *result = value; 2800 2801 skip_escape = oldSkipEscape; 2802 2803 return c; 2804 } 2805 2806 //--------------------------------------------------------------------- 2807 2808 static char *scan_expression(char *c, int *result) 2809 { 2810 return scan_expression(c, result, 0); 2811 } 2812 2813 //--------------------------------------------------------------------- 2814 2815 static void trans_char(char *c, char s, char t) 2816 { 2817 char *sl = c; 2818 int slash = 0; 2819 while (*sl != '\n' || slash) { 2820 if (!slash) { 2821 if (*sl == escapesym) 2822 slash = 1; 2823 else if (*sl == s) 2824 *sl = t; 2825 } else 2826 slash = 0; 2827 sl++; 2828 } 2829 } 2830 2831 //--------------------------------------------------------------------- 2832 // parse 1 line (or a line which stretches multiple lines by \(enter) ) 2833 // return all arguments starting at \p c in \p args 2834 // returns the pointer to the next char where scanning should continue 2835 // (which is the char after the ending \n) 2836 // argPointers .. a list of pointers to the startchars of each arg pointing into the string given with c 2837 2838 void getArguments(/* const */ char *&c, QList<QByteArray> &args, QList<char *> *argPointers = nullptr) 2839 { 2840 args.clear(); 2841 if (argPointers) 2842 argPointers->clear(); 2843 2844 QByteArray arg; 2845 arg.reserve(30); // reduce num of reallocs 2846 bool inString = false; 2847 bool inArgument = false; 2848 2849 for (; *c && (*c != '\n'); c++) { 2850 if (*c == '"') { 2851 if (!inString) { 2852 inString = true; // start of quoted argument 2853 } else { 2854 // according to http://heirloom.sourceforge.net/doctools/troff.pdf chapter 7.3 2855 // two consecutive quotes inside a string is one quote char 2856 if (*(c + 1) == '"') { 2857 arg += '"'; 2858 c++; 2859 } else // end of quoted argument 2860 { 2861 args.append(arg); 2862 arg.clear(); 2863 inString = false; 2864 inArgument = false; 2865 } 2866 } 2867 } else if (*c == ' ') { 2868 if (inString) { 2869 arg += *c; 2870 if (!inArgument) // argument not yet found (leading spaces) 2871 { 2872 inArgument = true; 2873 2874 if (argPointers) 2875 argPointers->append(c); 2876 } 2877 } else if (inArgument) { 2878 // end of previous argument 2879 args.append(arg); 2880 arg.clear(); 2881 inArgument = false; 2882 } 2883 } else if ((*c == escapesym) && (*(c + 1) == ' ')) { 2884 // special handling \<SP> shall be kept as is 2885 arg += *c++; 2886 arg += *c; 2887 2888 if (!inArgument) // argument not yet found (leading spaces) 2889 { 2890 inArgument = true; 2891 2892 if (argPointers) 2893 argPointers->append(c); 2894 } 2895 } else if ((*c == escapesym) && (*(c + 1) == '\n')) { 2896 c++; 2897 } else if ((*c == escapesym) && (*(c + 1) == '"')) // start of comment; skip rest of line 2898 { 2899 if (inArgument) { 2900 // end of previous argument 2901 args.append(arg); 2902 arg.clear(); 2903 inArgument = false; 2904 } 2905 2906 // skip rest of line 2907 while (*c && (*c != '\n')) 2908 c++; 2909 break; 2910 } else if (*c != ' ') { 2911 arg += *c; 2912 if (!inArgument) // argument not yet found (leading spaces) 2913 { 2914 inArgument = true; 2915 2916 if (argPointers) 2917 argPointers->append(c); 2918 } 2919 } 2920 } 2921 2922 if (inArgument) { 2923 // end of previous argument 2924 args.append(arg); 2925 } 2926 2927 if (*c) 2928 c++; 2929 } 2930 2931 //--------------------------------------------------------------------- 2932 2933 static const char *const abbrev_list[] = {"GSBG", "Getting Started ", 2934 "SUBG", "Customizing SunOS", 2935 "SHBG", "Basic Troubleshooting", 2936 "SVBG", "SunView User's Guide", 2937 "MMBG", "Mail and Messages", 2938 "DMBG", "Doing More with SunOS", 2939 "UNBG", "Using the Network", 2940 "GDBG", "Games, Demos & Other Pursuits", 2941 "CHANGE", "SunOS 4.1 Release Manual", 2942 "INSTALL", "Installing SunOS 4.1", 2943 "ADMIN", "System and Network Administration", 2944 "SECUR", "Security Features Guide", 2945 "PROM", "PROM User's Manual", 2946 "DIAG", "Sun System Diagnostics", 2947 "SUNDIAG", "Sundiag User's Guide", 2948 "MANPAGES", "SunOS Reference Manual", 2949 "REFMAN", "SunOS Reference Manual", 2950 "SSI", "Sun System Introduction", 2951 "SSO", "System Services Overview", 2952 "TEXT", "Editing Text Files", 2953 "DOCS", "Formatting Documents", 2954 "TROFF", "Using <B>nroff</B> and <B>troff</B>", 2955 "INDEX", "Global Index", 2956 "CPG", "C Programmer's Guide", 2957 "CREF", "C Reference Manual", 2958 "ASSY", "Assembly Language Reference", 2959 "PUL", "Programming Utilities and Libraries", 2960 "DEBUG", "Debugging Tools", 2961 "NETP", "Network Programming", 2962 "DRIVER", "Writing Device Drivers", 2963 "STREAMS", "STREAMS Programming", 2964 "SBDK", "SBus Developer's Kit", 2965 "WDDS", "Writing Device Drivers for the SBus", 2966 "FPOINT", "Floating-Point Programmer's Guide", 2967 "SVPG", "SunView 1 Programmer's Guide", 2968 "SVSPG", "SunView 1 System Programmer's Guide", 2969 "PIXRCT", "Pixrect Reference Manual", 2970 "CGI", "SunCGI Reference Manual", 2971 "CORE", "SunCore Reference Manual", 2972 "4ASSY", "Sun-4 Assembly Language Reference", 2973 "SARCH", "<FONT SIZE=\"-1\">SPARC</FONT> Architecture Manual", 2974 "KR", "The C Programming Language", 2975 nullptr, nullptr}; 2976 2977 static const char *lookup_abbrev(const char *c) 2978 { 2979 int i = 0; 2980 2981 if (!c) 2982 return ""; 2983 while (abbrev_list[i] && qstrcmp(c, abbrev_list[i])) 2984 i = i + 2; 2985 if (abbrev_list[i]) 2986 return abbrev_list[i + 1]; 2987 else 2988 return c; 2989 } 2990 2991 //--------------------------------------------------------------------- 2992 2993 static const char *const section_list[] = { 2994 #ifdef Q_OS_SOLARIS 2995 // for Solaris 2996 "1", 2997 "User Commands", 2998 "1B", 2999 "SunOS/BSD Compatibility Package Commands", 3000 "1b", 3001 "SunOS/BSD Compatibility Package Commands", 3002 "1C", 3003 "Communication Commands ", 3004 "1c", 3005 "Communication Commands", 3006 "1F", 3007 "FMLI Commands ", 3008 "1f", 3009 "FMLI Commands", 3010 "1G", 3011 "Graphics and CAD Commands ", 3012 "1g", 3013 "Graphics and CAD Commands ", 3014 "1M", 3015 "Maintenance Commands", 3016 "1m", 3017 "Maintenance Commands", 3018 "1S", 3019 "SunOS Specific Commands", 3020 "1s", 3021 "SunOS Specific Commands", 3022 "2", 3023 "System Calls", 3024 "3", 3025 "C Library Functions", 3026 "3B", 3027 "SunOS/BSD Compatibility Library Functions", 3028 "3b", 3029 "SunOS/BSD Compatibility Library Functions", 3030 "3C", 3031 "C Library Functions", 3032 "3c", 3033 "C Library Functions", 3034 "3E", 3035 "C Library Functions", 3036 "3e", 3037 "C Library Functions", 3038 "3F", 3039 "Fortran Library Routines", 3040 "3f", 3041 "Fortran Library Routines", 3042 "3G", 3043 "C Library Functions", 3044 "3g", 3045 "C Library Functions", 3046 "3I", 3047 "Wide Character Functions", 3048 "3i", 3049 "Wide Character Functions", 3050 "3K", 3051 "Kernel VM Library Functions", 3052 "3k", 3053 "Kernel VM Library Functions", 3054 "3L", 3055 "Lightweight Processes Library", 3056 "3l", 3057 "Lightweight Processes Library", 3058 "3M", 3059 "Mathematical Library", 3060 "3m", 3061 "Mathematical Library", 3062 "3N", 3063 "Network Functions", 3064 "3n", 3065 "Network Functions", 3066 "3R", 3067 "Realtime Library", 3068 "3r", 3069 "Realtime Library", 3070 "3S", 3071 "Standard I/O Functions", 3072 "3s", 3073 "Standard I/O Functions", 3074 "3T", 3075 "Threads Library", 3076 "3t", 3077 "Threads Library", 3078 "3W", 3079 "C Library Functions", 3080 "3w", 3081 "C Library Functions", 3082 "3X", 3083 "Miscellaneous Library Functions", 3084 "3x", 3085 "Miscellaneous Library Functions", 3086 "4", 3087 "File Formats", 3088 "4B", 3089 "SunOS/BSD Compatibility Package File Formats", 3090 "4b", 3091 "SunOS/BSD Compatibility Package File Formats", 3092 "5", 3093 "Headers, Tables, and Macros", 3094 "6", 3095 "Games and Demos", 3096 "7", 3097 "Special Files", 3098 "7B", 3099 "SunOS/BSD Compatibility Special Files", 3100 "7b", 3101 "SunOS/BSD Compatibility Special Files", 3102 "8", 3103 "Maintenance Procedures", 3104 "8C", 3105 "Maintenance Procedures", 3106 "8c", 3107 "Maintenance Procedures", 3108 "8S", 3109 "Maintenance Procedures", 3110 "8s", 3111 "Maintenance Procedures", 3112 "9", 3113 "DDI and DKI", 3114 "9E", 3115 "DDI and DKI Driver Entry Points", 3116 "9e", 3117 "DDI and DKI Driver Entry Points", 3118 "9F", 3119 "DDI and DKI Kernel Functions", 3120 "9f", 3121 "DDI and DKI Kernel Functions", 3122 "9S", 3123 "DDI and DKI Data Structures", 3124 "9s", 3125 "DDI and DKI Data Structures", 3126 "L", 3127 "Local Commands", 3128 #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) 3129 "1", 3130 "General Commands", 3131 "2", 3132 "System Calls", 3133 "3", 3134 "Library Functions", 3135 "4", 3136 "Kernel Interfaces", 3137 "5", 3138 "File Formats", 3139 "6", 3140 "Games", 3141 "7", 3142 "Miscellaneous Information", 3143 "8", 3144 "System Manager's Manuals", 3145 "9", 3146 "Kernel Developer's Manuals", 3147 #else 3148 // Other OS 3149 "1", 3150 "User Commands ", 3151 "1C", 3152 "User Commands", 3153 "1G", 3154 "User Commands", 3155 "1S", 3156 "User Commands", 3157 "1V", 3158 "User Commands ", 3159 "2", 3160 "System Calls", 3161 "2V", 3162 "System Calls", 3163 "3", 3164 "C Library Functions", 3165 "3C", 3166 "Compatibility Functions", 3167 "3F", 3168 "Fortran Library Routines", 3169 "3K", 3170 "Kernel VM Library Functions", 3171 "3L", 3172 "Lightweight Processes Library", 3173 "3M", 3174 "Mathematical Library", 3175 "3N", 3176 "Network Functions", 3177 "3R", 3178 "RPC Services Library", 3179 "3S", 3180 "Standard I/O Functions", 3181 "3V", 3182 "C Library Functions", 3183 "3X", 3184 "Miscellaneous Library Functions", 3185 "4", 3186 "Devices and Network Interfaces", 3187 "4F", 3188 "Protocol Families", 3189 "4I", 3190 "Devices and Network Interfaces", 3191 "4M", 3192 "Devices and Network Interfaces", 3193 "4N", 3194 "Devices and Network Interfaces", 3195 "4P", 3196 "Protocols", 3197 "4S", 3198 "Devices and Network Interfaces", 3199 "4V", 3200 "Devices and Network Interfaces", 3201 "5", 3202 "File Formats", 3203 "5V", 3204 "File Formats", 3205 "6", 3206 "Games and Demos", 3207 "7", 3208 "Environments, Tables, and Troff Macros", 3209 "7V", 3210 "Environments, Tables, and Troff Macros", 3211 "8", 3212 "Maintenance Commands", 3213 "8C", 3214 "Maintenance Commands", 3215 "8S", 3216 "Maintenance Commands", 3217 "8V", 3218 "Maintenance Commands", 3219 "L", 3220 "Local Commands", 3221 #endif 3222 // The defaults 3223 nullptr, 3224 "Misc. Reference Manual Pages", 3225 nullptr, 3226 nullptr}; 3227 3228 static const char *section_name(char *c) 3229 { 3230 int i = 0; 3231 3232 if (!c) 3233 return ""; 3234 while (section_list[i] && qstrcmp(c, section_list[i])) 3235 i = i + 2; 3236 if (section_list[i + 1]) 3237 return section_list[i + 1]; 3238 else 3239 return c; 3240 } 3241 3242 static char *skip_till_newline(char *c) 3243 { 3244 int lvl = 0; 3245 3246 while (*c && (*c != '\n' || lvl > 0)) { 3247 if (*c == '\\') { 3248 c++; 3249 if (*c == '}') 3250 lvl--; 3251 else if (*c == '{') 3252 lvl++; 3253 else if (*c == '\0') 3254 break; 3255 } 3256 c++; 3257 } 3258 if (*c) 3259 c++; 3260 if (lvl < 0 && newline_for_fun) { 3261 newline_for_fun = newline_for_fun + lvl; 3262 if (newline_for_fun < 0) 3263 newline_for_fun = 0; 3264 } 3265 return c; 3266 } 3267 3268 //--------------------------------------------------------------------- 3269 3270 static bool s_whileloop = false; 3271 3272 /// Processing the .while request 3273 static void request_while(char *&c, int j, bool mdoc) 3274 { 3275 // ### TODO: .continue 3276 qCDebug(KIO_MAN_LOG) << "Entering .while"; 3277 c += j; 3278 char *newline = skip_till_newline(c); 3279 const char oldchar = *newline; 3280 *newline = 0; 3281 // We store the full .while stuff into a QByteArray as if it would be a macro 3282 const QByteArray macro = c; 3283 qCDebug(KIO_MAN_LOG) << "'Macro' of .while" << BYTEARRAY(macro); 3284 // Prepare for continuing after .while loop end 3285 *newline = oldchar; 3286 c = newline; 3287 // Process -while loop 3288 const bool oldwhileloop = s_whileloop; 3289 s_whileloop = true; 3290 int result = true; // It must be an int due to the call to scan_expression 3291 break_the_while_loop = false; 3292 while (result && !break_the_while_loop) { 3293 // Unlike for a normal macro, we have the condition at start, so we do not need to prepend extra bytes 3294 char *liveloop = qstrdup(macro.data()); 3295 qCDebug(KIO_MAN_LOG) << "Scanning .while condition"; 3296 qCDebug(KIO_MAN_LOG) << "Loop macro " << liveloop; 3297 char *end_expression = scan_expression(liveloop, &result); 3298 qCDebug(KIO_MAN_LOG) << "After " << end_expression; 3299 if (result) { 3300 qCDebug(KIO_MAN_LOG) << "New .while iteration"; 3301 // The condition is true, so call the .while's content 3302 char *help = end_expression + 1; 3303 while (*help && (*help == ' ' || *help == '\t')) 3304 ++help; 3305 if (!*help) { 3306 // We have a problem, so stop .while 3307 result = false; 3308 break; 3309 } 3310 if (mdoc) 3311 scan_troff_mandoc(help, false, nullptr); 3312 else 3313 scan_troff(help, false, nullptr); 3314 } 3315 delete[] liveloop; 3316 } 3317 break_the_while_loop = false; 3318 3319 // 3320 s_whileloop = oldwhileloop; 3321 qCDebug(KIO_MAN_LOG) << "Ending .while"; 3322 } 3323 3324 //--------------------------------------------------------------------- 3325 // Processing mixed fonts requests like .BI 3326 3327 static void request_mixed_fonts(char *&c, int j, const char *font1, const char *font2, const bool mode, const bool inFMode) 3328 { 3329 c += j; 3330 if (*c == '\n') 3331 c++; 3332 3333 QList<QByteArray> args; 3334 getArguments(c, args); 3335 3336 for (int i = 0; i < args.count(); i++) { 3337 if (mode || inFMode) { 3338 out_html(" "); 3339 curpos++; 3340 } 3341 out_html(set_font((i & 1) ? font2 : font1)); 3342 scan_troff(args[i].data(), 1, nullptr); 3343 } 3344 out_html(set_font("R")); 3345 if (mode) { 3346 out_html(" ]"); 3347 curpos++; 3348 } 3349 out_html(NEWLINE); 3350 if (!fillout) 3351 curpos = 0; 3352 else 3353 curpos++; 3354 } 3355 3356 //--------------------------------------------------------------------- 3357 3358 // &%(#@ c programs !!! 3359 // static int ifelseval=0; 3360 // If/else can be nested! 3361 static QStack<int> s_ifelseval; 3362 3363 //--------------------------------------------------------------------- 3364 3365 // Process a (mdoc) request involving quotes 3366 static char *process_quote(char *c, int j, const char *open, const char *close) 3367 { 3368 trans_char(c, '"', '\a'); 3369 c += j; 3370 if (*c == '\n') 3371 c++; // ### TODO: why? Quote requests cannot be empty! 3372 out_html(open); 3373 c = scan_troff_mandoc(c, 1, nullptr); 3374 out_html(close); 3375 out_html(NEWLINE); 3376 if (fillout) 3377 curpos++; 3378 else 3379 curpos = 0; 3380 return c; 3381 } 3382 3383 //--------------------------------------------------------------------- 3384 /** 3385 * Is the char \p ch a punctuation in sense of mdoc(7) 3386 */ 3387 3388 static bool is_mdoc_punctuation(const char ch) 3389 { 3390 if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) 3391 return false; 3392 else if (ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == '(' || ch == ')' || ch == '[' || ch == ']') 3393 return true; 3394 else 3395 return false; 3396 } 3397 3398 //--------------------------------------------------------------------- 3399 /** 3400 * Can the char \p c be part of an identifier 3401 * \note For groff, an identifier can consist of nearly all ASCII printable non-white-space characters 3402 * See info:/groff/Identifiers 3403 */ 3404 3405 static bool is_identifier_char(const char c) 3406 { 3407 if (c >= '!' && c <= '[') // Include digits and upper case 3408 return true; 3409 else if (c >= ']' && c <= '~') // Include lower case 3410 return true; 3411 else if (c == '\\') 3412 return false; // ### TODO: it should be treated as escape instead! 3413 return false; 3414 } 3415 3416 //--------------------------------------------------------------------- 3417 3418 static QByteArray scan_identifier(char *&c) 3419 { 3420 char *h = c; // help pointer 3421 // ### TODO Groff seems to eat nearly everything as identifier name (info:/groff/Identifiers) 3422 while (*h && *h != '\a' && *h != '\n' && is_identifier_char(*h)) 3423 ++h; 3424 const char tempchar = *h; 3425 *h = 0; 3426 const QByteArray name = c; 3427 *h = tempchar; 3428 if (name.isEmpty()) { 3429 qCDebug(KIO_MAN_LOG) << "EXCEPTION: identifier empty!"; 3430 } 3431 c = h; 3432 return name; 3433 } 3434 3435 //--------------------------------------------------------------------- 3436 3437 static char *scan_request(char *c) 3438 { 3439 // mdoc(7) stuff 3440 static bool mandoc_synopsis = false; /* True if we are in the synopsis section */ 3441 static bool mandoc_command = false; /* True if this is mdoc(7) page */ 3442 static int mandoc_bd_options; /* Only copes with non-nested Bd's */ 3443 static int function_argument = 0; // Number of function argument (.Fo, .Fa, .Fc) 3444 3445 int i = 0; 3446 bool mode = false; 3447 char *h = nullptr; 3448 char *sl; 3449 QList<QByteArray> args; 3450 3451 while (*c == ' ' || *c == '\t') 3452 c++; // Spaces or tabs allowed between control character and request 3453 if (c[0] == '\n') 3454 return c + 1; 3455 if (c[0] == escapesym) { 3456 /* some pages use .\" .\$1 .\} */ 3457 /* .\$1 is too difficult/stuppid */ 3458 if (c[1] == '$') { 3459 qCDebug(KIO_MAN_LOG) << "Found .\\$"; 3460 c = skip_till_newline(c); // ### TODO 3461 } else { 3462 // the result of the escape expansion must be parsed again 3463 c++; 3464 QByteArray cstr; 3465 c = scan_escape_direct(c, cstr); 3466 for (; *c && (*c != '\n'); c++) 3467 cstr += *c; 3468 if (cstr.length()) 3469 scan_request(cstr.data()); 3470 } 3471 } else { 3472 int nlen = 0; 3473 QByteArray macroName; 3474 while (c[nlen] && (c[nlen] != ' ') && (c[nlen] != '\t') && (c[nlen] != '\n') && (c[nlen] != escapesym)) { 3475 macroName += c[nlen]; 3476 nlen++; 3477 } 3478 int j = nlen; 3479 while (c[j] == ' ' || c[j] == '\t') 3480 j++; 3481 /* search macro database of self-defined macros */ 3482 QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(macroName); 3483 3484 // ### HACK: e.g. nmap, smb.conf redefine SH, SS to increase the font, etc. for non-TTY output 3485 // Ignore those to make the HTML result look better 3486 if ((macroName != "SH") && (macroName != "SS") && it != s_stringDefinitionMap.constEnd()) { 3487 qCDebug(KIO_MAN_LOG) << "CALLING MACRO: " << BYTEARRAY(macroName); 3488 const QByteArray oldDollarZero = s_dollarZero; // Previous value of $0 3489 s_dollarZero = macroName; 3490 3491 c += j; 3492 getArguments(c, args); 3493 for (i = 0; i < args.count(); i++) { 3494 char *h = nullptr; 3495 3496 if (mandoc_command) 3497 scan_troff_mandoc(args[i].data(), 1, &h); 3498 else 3499 scan_troff(args[i].data(), 1, &h); 3500 3501 args[i] = h; 3502 delete[] h; 3503 } 3504 3505 if (!(*it).m_output.isEmpty()) { 3506 // qCDebug(KIO_MAN_LOG) << "Macro content is: "<< BYTEARRAY( (*it).m_output ); 3507 const unsigned int length = (*it).m_output.length(); 3508 char *work = new char[length + 2]; 3509 work[0] = '\n'; // The macro must start after an end of line to allow a request on first line 3510 qstrncpy(work + 1, (*it).m_output.data(), length + 1); 3511 const QList<QByteArray> oldArgumentList(s_argumentList); 3512 s_argumentList.clear(); 3513 for (i = 0; i < args.count(); i++) 3514 s_argumentList.push_back(args[i]); 3515 3516 const int onff = newline_for_fun; 3517 if (mandoc_command) 3518 scan_troff_mandoc(work + 1, 0, nullptr); 3519 else 3520 scan_troff(work + 1, 0, nullptr); 3521 delete[] work; 3522 newline_for_fun = onff; 3523 s_argumentList = oldArgumentList; 3524 } 3525 s_dollarZero = oldDollarZero; 3526 qCDebug(KIO_MAN_LOG) << "ENDING MACRO: " << BYTEARRAY(macroName); 3527 } else { 3528 qCDebug(KIO_MAN_LOG) << "REQUEST: " << BYTEARRAY(macroName); 3529 switch (RequestNum request = RequestHash::getRequest(macroName, macroName.length())) { 3530 case REQ_ab: // groff(7) "ABort" 3531 { 3532 h = c + j; 3533 while (*h && *h != '\n') 3534 h++; 3535 *h = '\0'; 3536 if (scaninbuff && buffpos) { 3537 buffer[buffpos] = '\0'; 3538 qCDebug(KIO_MAN_LOG) << "ABORT: " << buffer; 3539 } 3540 // ### TODO find a way to display it to the user 3541 qCDebug(KIO_MAN_LOG) << "Aborting: .ab " << (c + j); 3542 return nullptr; 3543 break; 3544 } 3545 case REQ_An: // mdoc(7) "Author Name" 3546 { 3547 c += j; 3548 c = scan_troff_mandoc(c, 1, nullptr); 3549 break; 3550 } 3551 case REQ_di: // groff(7) "end current DIversion" 3552 { 3553 qCDebug(KIO_MAN_LOG) << "Start .di"; 3554 c += j; 3555 if (*c == '\n') { 3556 ++c; 3557 break; 3558 } 3559 const QByteArray name(scan_identifier(c)); 3560 while (*c && *c != '\n') 3561 c++; 3562 c++; 3563 h = c; 3564 while (*c && qstrncmp(c, ".di", 3)) 3565 while (*c && *c++ != '\n') 3566 ; 3567 *c = '\0'; 3568 char *result = nullptr; 3569 scan_troff(h, 0, &result); 3570 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name); 3571 if (it == s_stringDefinitionMap.end()) { 3572 StringDefinition def; 3573 def.m_length = 0; 3574 def.m_output = result; 3575 s_stringDefinitionMap.insert(name, def); 3576 } else { 3577 (*it).m_length = 0; 3578 (*it).m_output = result; 3579 } 3580 delete[] result; 3581 if (*c) 3582 *c = '.'; 3583 c = skip_till_newline(c); 3584 qCDebug(KIO_MAN_LOG) << "end .di"; 3585 break; 3586 } 3587 case REQ_ds: // groff(7) "Define String variable" 3588 mode = true; 3589 Q_FALLTHROUGH(); 3590 case REQ_as: // groff (7) "Append String variable" 3591 { 3592 qCDebug(KIO_MAN_LOG) << "start .ds/.as"; 3593 int oldcurpos = curpos; 3594 c += j; 3595 const QByteArray name(scan_identifier(c)); 3596 if (name.isEmpty()) 3597 break; 3598 // an initial " is removed to allow leading space 3599 while (*c && isspace(*c)) 3600 c++; 3601 if (*c == '"') 3602 c++; 3603 3604 single_escape = true; 3605 curpos = 0; 3606 char *result = nullptr; 3607 c = scan_troff(c, 1, &result); 3608 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name); 3609 if (it == s_stringDefinitionMap.end()) { 3610 StringDefinition def; 3611 def.m_length = curpos; 3612 def.m_output = result; 3613 s_stringDefinitionMap.insert(name, def); 3614 } else { 3615 if (mode) { // .ds Defining String 3616 (*it).m_length = curpos; 3617 (*it).m_output = result; 3618 } else { // .as Appending String 3619 (*it).m_length += curpos; 3620 (*it).m_output += result; 3621 } 3622 } 3623 delete[] result; 3624 single_escape = false; 3625 curpos = oldcurpos; 3626 qCDebug(KIO_MAN_LOG) << "end .ds/.as"; 3627 break; 3628 } 3629 case REQ_br: // groff(7) "line BReak" 3630 { 3631 if (still_dd) 3632 out_html("<DD>"); // ### VERIFY (does not look like generating good HTML) 3633 else 3634 out_html("<BR>\n"); 3635 curpos = 0; 3636 c = c + j; 3637 if (c[0] == escapesym) 3638 c = scan_escape(c + 1); 3639 c = skip_till_newline(c); 3640 break; 3641 } 3642 case REQ_c2: // groff(7) "reset non-break Control character" (2 means non-break) 3643 { 3644 c = c + j; 3645 if (*c != '\n') 3646 nobreaksym = *c; 3647 else 3648 nobreaksym = '\''; 3649 c = skip_till_newline(c); 3650 break; 3651 } 3652 case REQ_cc: // groff(7) "reset Control Character" 3653 { 3654 c = c + j; 3655 if (*c != '\n') 3656 controlsym = *c; 3657 else 3658 controlsym = '.'; 3659 c = skip_till_newline(c); 3660 break; 3661 } 3662 case REQ_ce: // groff (7) "CEnter" 3663 { 3664 c = c + j; 3665 if (*c == '\n') 3666 i = 1; 3667 else { 3668 i = 0; 3669 while ('0' <= *c && *c <= '9') { 3670 i = i * 10 + *c - '0'; 3671 c++; 3672 } 3673 } 3674 c = skip_till_newline(c); 3675 /* center next i lines */ 3676 if (i > 0) { 3677 out_html("<CENTER>\n"); 3678 while (i && *c) { 3679 char *line = nullptr; 3680 c = scan_troff(c, 1, &line); 3681 if (line && qstrncmp(line, "<BR>", 4)) { 3682 out_html(line); 3683 out_html("<BR>\n"); 3684 delete[] line; // ### FIXME: memory leak! 3685 i--; 3686 } 3687 } 3688 out_html("</CENTER>\n"); 3689 curpos = 0; 3690 } 3691 break; 3692 } 3693 case REQ_ec: // groff(7) "reset Escape Character" 3694 { 3695 c = c + j; 3696 if (*c != '\n') 3697 escapesym = *c; 3698 else 3699 escapesym = '\\'; 3700 break; 3701 } 3702 case REQ_eo: // groff(7) "turn Escape character Off" 3703 { 3704 escapesym = '\0'; 3705 c = skip_till_newline(c); 3706 break; 3707 } 3708 case REQ_ex: // groff(7) "EXit" 3709 { 3710 return nullptr; 3711 break; 3712 } 3713 case REQ_fc: // groff(7) "set Field and pad Character" 3714 { 3715 c = c + j; 3716 if (*c == '\n') 3717 fieldsym = padsym = '\0'; 3718 else { 3719 fieldsym = c[0]; 3720 padsym = c[1]; 3721 } 3722 c = skip_till_newline(c); 3723 break; 3724 } 3725 case REQ_fi: // groff(7) "FIll" 3726 { 3727 if (!fillout) { 3728 out_html(set_font("R")); 3729 out_html(change_to_size('0')); 3730 out_html("</PRE>\n"); 3731 } 3732 curpos = 0; 3733 fillout = 1; 3734 c = skip_till_newline(c); 3735 break; 3736 } 3737 case REQ_ft: // groff(7) "FonT" 3738 { 3739 c += j; 3740 h = skip_till_newline(c); 3741 const char oldChar = *h; 3742 *h = 0; 3743 const QByteArray name = c; 3744 // ### TODO: name might contain a variable 3745 if (name.isEmpty()) 3746 out_html(set_font("P")); // Previous font 3747 else 3748 out_html(set_font(name)); 3749 *h = oldChar; 3750 c = h; 3751 break; 3752 } 3753 case REQ_el: // groff(7) "ELse" 3754 { 3755 int ifelseval = s_ifelseval.pop(); 3756 /* .el anything : else part of if else */ 3757 if (ifelseval) { 3758 c = c + j; 3759 c[-1] = '\n'; 3760 c = scan_troff(c, 1, nullptr); 3761 } else 3762 c = skip_till_newline(c + j); 3763 break; 3764 } 3765 case REQ_ie: // groff(7) "If with Else" 3766 /* .ie c anything : then part of if else */ 3767 // fallthrough 3768 case REQ_if: // groff(7) "IF" 3769 { 3770 /* .if c anything 3771 * .if !c anything 3772 * .if N anything 3773 * .if !N anything 3774 * .if 'string1'string2' anything 3775 * .if !'string1'string2' anything 3776 */ 3777 c = c + j; 3778 c = scan_expression(c, &i); 3779 if (request == REQ_ie) { 3780 int ifelseval = !i; 3781 s_ifelseval.push(ifelseval); 3782 } 3783 if (i) { 3784 *c = '\n'; 3785 c++; 3786 c = scan_troff(c, 1, nullptr); 3787 } else 3788 c = skip_till_newline(c); 3789 break; 3790 } 3791 case REQ_ig: // groff(7) "IGnore" 3792 { 3793 const char *endwith = "..\n"; 3794 i = 3; 3795 c = c + j; 3796 if (*c != '\n' && *c != '\\') { 3797 /* Not newline or comment */ 3798 endwith = c - 1; 3799 i = 1; 3800 c[-1] = '.'; 3801 while (*c && *c != '\n') 3802 c++, i++; 3803 } 3804 c++; 3805 while (*c && qstrncmp(c, endwith, i)) 3806 while (*c++ != '\n') 3807 ; 3808 while (*c && *c++ != '\n') 3809 ; 3810 break; 3811 } 3812 case REQ_nf: // groff(7) "No Filling" 3813 { 3814 if (fillout) { 3815 out_html(set_font("R")); 3816 out_html(change_to_size('0')); 3817 out_html("<PRE>\n"); 3818 } 3819 curpos = 0; 3820 fillout = 0; 3821 c = skip_till_newline(c); 3822 break; 3823 } 3824 case REQ_ps: // groff(7) "previous Point Size" 3825 { 3826 c += j; 3827 getArguments(c, args); 3828 if (args.count() == 0) 3829 out_html(change_to_size('0')); 3830 else { 3831 char *h = args[0].data(); 3832 int sign = 0; 3833 i = 0; 3834 if (*h == '-') { 3835 sign = -1; 3836 h++; 3837 } else if (*h == '+') { 3838 sign = 1; 3839 h++; 3840 } 3841 scan_expression(h, &i); 3842 if (sign == 0) { 3843 sign = 1; 3844 if (i > 5) 3845 i = i - 10; 3846 } 3847 out_html(change_to_size(sign * i)); 3848 } 3849 break; 3850 } 3851 case REQ_sp: // groff(7) "SKip one line" 3852 { 3853 c += j; 3854 if (fillout) 3855 out_html("<br><br>"); 3856 else 3857 out_html(NEWLINE); 3858 curpos = 0; 3859 c = skip_till_newline(c); 3860 break; 3861 } 3862 case REQ_so: // groff(7) "Include SOurce file" 3863 { 3864 char *buf; 3865 char *name = nullptr; 3866 curpos = 0; 3867 c = c + j; 3868 if (*c == '/') 3869 h = c; 3870 else { 3871 h = c - 3; 3872 h[0] = '.'; 3873 h[1] = '.'; 3874 h[2] = '/'; 3875 } 3876 while (*c != '\n') 3877 c++; 3878 *c = '\0'; 3879 scan_troff(h, 1, &name); 3880 if (name[3] == '/') 3881 h = name + 3; 3882 else 3883 h = name; 3884 3885 // The format of the argument to .so varies among man pages. 3886 // Some of them, e.g. pam.8, use "PAM.8". Others, e.g. telinit.8, 3887 // use "man8/init.8". So they are not always true relative paths, 3888 // although the man(1) command seems to handle them with no problem. 3889 // 3890 // The code above starting "h = c - 3" attempts to turn the argument 3891 // into a relative path, but that is not correct in the case of pam.8 3892 // as above. So this removes the "../" prefix again if there is 3893 // no other slash following it. 3894 char *firstSlash = strchr(h, '/'); 3895 if (firstSlash != 0) { 3896 char *nextSlash = strchr(firstSlash + 1, '/'); 3897 if (nextSlash == 0) 3898 h = firstSlash + 1; 3899 } 3900 3901 /* this works alright, except for section 3 */ 3902 buf = read_man_page(h); 3903 if (!buf) { 3904 qCDebug(KIO_MAN_LOG) << "Unable to open or read file: .so " << (h); 3905 out_html( 3906 "<BLOCKQUOTE>" 3907 "man2html: unable to open or read file.\n"); 3908 out_html(h); 3909 out_html("</BLOCKQUOTE>\n"); 3910 } else 3911 scan_troff(buf + 1, 0, nullptr); 3912 delete[] buf; 3913 delete[] name; 3914 3915 *c++ = '\n'; 3916 break; 3917 } 3918 case REQ_ta: // gorff(7) "set TAbulators" 3919 { 3920 c = c + j; 3921 j = 0; 3922 while (*c != '\n') { 3923 sl = scan_expression(c, &tabstops[j]); 3924 if (j > 0 && (*c == '-' || *c == '+')) 3925 tabstops[j] += tabstops[j - 1]; 3926 c = sl; 3927 while (*c == ' ' || *c == '\t') 3928 c++; 3929 j++; 3930 } 3931 maxtstop = j; 3932 curpos = 0; 3933 break; 3934 } 3935 case REQ_ti: // groff(7) "Temporary Indent" 3936 { 3937 /*while (itemdepth || dl_set[itemdepth]) { 3938 out_html("</DL>\n"); 3939 if (dl_set[itemdepth]) dl_set[itemdepth]=0; 3940 else itemdepth--; 3941 }*/ 3942 out_html("<BR>\n"); 3943 c = c + j; 3944 c = scan_expression(c, &j); 3945 for (i = 0; i < j; i++) 3946 out_html(" "); 3947 curpos = j; 3948 c = skip_till_newline(c); 3949 break; 3950 } 3951 case REQ_tm: // groff(7) "TerMinal" ### TODO: what are useful uses for it 3952 { 3953 c += j; 3954 getArguments(c, args); 3955 if (args.count()) 3956 qCDebug(KIO_MAN_LOG) << ".tm " << args[0]; 3957 break; 3958 } 3959 case REQ_B: // man(7) "Bold" 3960 mode = true; 3961 Q_FALLTHROUGH(); 3962 case REQ_I: // man(7) "Italic" 3963 { 3964 /* parse one line in a certain font */ 3965 c += j; 3966 getArguments(c, args); 3967 3968 out_html(set_font(mode ? "B" : "I")); 3969 3970 for (int i = 0; i < args.count(); i++) { 3971 scan_troff(args[i].data(), 1, nullptr); 3972 out_html(" "); 3973 } 3974 3975 out_html(set_font("R")); 3976 3977 if (fillout) 3978 curpos++; 3979 else { 3980 out_html(NEWLINE); 3981 curpos = 0; 3982 } 3983 break; 3984 } 3985 case REQ_Fd: // mdoc(7) "Function Definition" 3986 { 3987 // Normal text must be printed in bold, punctuation in regular font 3988 c += j; 3989 if (*c == '\n') 3990 c++; 3991 getArguments(c, args); 3992 3993 for (i = 0; i < args.count(); i++) { 3994 // ### FIXME In theory, only a single punctuation character is recognized as punctuation 3995 if (is_mdoc_punctuation(args[i][0])) 3996 out_html(set_font("R")); 3997 else 3998 out_html(set_font("B")); 3999 scan_troff(args[i].data(), 1, nullptr); 4000 out_html(" "); 4001 } 4002 // In the mdoc synopsis, there are automatical line breaks (### TODO: before or after?) 4003 if (mandoc_synopsis) 4004 out_html("<br>"); 4005 4006 out_html(set_font("R")); 4007 out_html(NEWLINE); 4008 if (!fillout) 4009 curpos = 0; 4010 else 4011 curpos++; 4012 break; 4013 } 4014 case REQ_Fn: // mdoc(7) for "Function calls" 4015 { 4016 // brackets and commas have to be inserted automatically 4017 c += j; 4018 if (*c == '\n') 4019 c++; 4020 getArguments(c, args); 4021 if (args.count()) { 4022 for (i = 0; i < args.count(); i++) { 4023 if (i) 4024 out_html(set_font("I")); 4025 else 4026 out_html(set_font("B")); 4027 scan_troff(args[i].data(), 1, nullptr); 4028 out_html(set_font("R")); 4029 if (i == 0) { 4030 out_html(" ("); 4031 } else if (i < args.count() - 1) 4032 out_html(", "); 4033 } 4034 out_html(")"); 4035 } 4036 out_html(set_font("R")); 4037 if (mandoc_synopsis) 4038 out_html("<br>"); 4039 out_html(NEWLINE); 4040 if (!fillout) 4041 curpos = 0; 4042 else 4043 curpos++; 4044 break; 4045 } 4046 case REQ_Fo: // mdoc(7) "Function definition Opening" 4047 { 4048 char *font[2] = {(char *)"B", (char *)"R"}; 4049 c += j; 4050 if (*c == '\n') 4051 c++; 4052 char *eol = strchr(c, '\n'); 4053 char *semicolon = strchr(c, ';'); 4054 if ((semicolon != nullptr) && (semicolon < eol)) 4055 *semicolon = ' '; 4056 4057 getArguments(c, args); 4058 // Normally a .Fo has only one parameter 4059 for (i = 0; i < args.count(); i++) { 4060 out_html(set_font(font[i & 1])); 4061 scan_troff(args[i].data(), 1, nullptr); 4062 if (i == 0) { 4063 out_html(" ("); 4064 } 4065 // ### TODO What should happen if there is more than one argument 4066 // else if (i<args.count()-1) out_html(", "); 4067 } 4068 function_argument = 1; // Must be > 0 4069 out_html(set_font("R")); 4070 out_html(NEWLINE); 4071 if (!fillout) 4072 curpos = 0; 4073 else 4074 curpos++; 4075 break; 4076 } 4077 case REQ_Fc: // mdoc(7) "Function definition Close" 4078 { 4079 // .Fc has no parameter 4080 c += j; 4081 c = skip_till_newline(c); 4082 char *font[2] = {(char *)"B", (char *)"R"}; 4083 out_html(set_font(font[i & 1])); 4084 out_html(")"); 4085 out_html(set_font("R")); 4086 if (mandoc_synopsis) 4087 out_html("<br>"); 4088 out_html(NEWLINE); 4089 if (!fillout) 4090 curpos = 0; 4091 else 4092 curpos++; 4093 function_argument = 0; // Reset the count variable 4094 break; 4095 } 4096 case REQ_Fa: // mdoc(7) "Function definition argument" 4097 { 4098 char *font[2] = {(char *)"B", (char *)"R"}; 4099 c += j; 4100 if (*c == '\n') 4101 c++; 4102 getArguments(c, args); 4103 out_html(set_font(font[i & 1])); 4104 // function_argument==0 means that we had no .Fo before, e.g. in mdoc.samples(7) 4105 if (function_argument > 1) { 4106 out_html(", "); 4107 curpos += 2; 4108 function_argument++; 4109 } else if (function_argument == 1) { 4110 // We are only at the first parameter 4111 function_argument++; 4112 } 4113 for (i = 0; i < args.count(); i++) 4114 scan_troff(args[i].data(), 1, nullptr); 4115 4116 out_html(set_font("R")); 4117 if (!fillout) 4118 curpos = 0; 4119 else 4120 curpos++; 4121 break; 4122 } 4123 4124 case REQ_OP: /* groff manpages use this construction */ 4125 { 4126 /* .OP a b : [ <B>a</B> <I>b</I> ] */ 4127 out_html(set_font("R")); 4128 out_html("["); 4129 curpos++; 4130 request_mixed_fonts(c, j, "B", "I", true, false); 4131 break; 4132 } 4133 case REQ_Ft: // perhaps "Function return type" 4134 { 4135 request_mixed_fonts(c, j, "B", "I", false, true); 4136 break; 4137 } 4138 case REQ_BR: { 4139 request_mixed_fonts(c, j, "B", "R", false, false); 4140 break; 4141 } 4142 case REQ_BI: { 4143 request_mixed_fonts(c, j, "B", "I", false, false); 4144 break; 4145 } 4146 case REQ_IB: { 4147 request_mixed_fonts(c, j, "I", "B", false, false); 4148 break; 4149 } 4150 case REQ_IR: { 4151 request_mixed_fonts(c, j, "I", "R", false, false); 4152 break; 4153 } 4154 case REQ_RB: { 4155 request_mixed_fonts(c, j, "R", "B", false, false); 4156 break; 4157 } 4158 case REQ_RI: { 4159 request_mixed_fonts(c, j, "R", "I", false, false); 4160 break; 4161 } 4162 case REQ_DT: // man(7) "Default Tabulators" 4163 { 4164 for (j = 0; j < 20; j++) 4165 tabstops[j] = (j + 1) * 8; 4166 maxtstop = 20; 4167 c = skip_till_newline(c); 4168 break; 4169 } 4170 case REQ_IP: // man(7) "Ident Paragraph" 4171 { 4172 c += j; 4173 getArguments(c, args); 4174 4175 if (!dl_set[itemdepth]) { 4176 out_html("<DL>\n"); 4177 dl_set[itemdepth] = 1; 4178 } 4179 out_html("<DT>"); 4180 4181 if (args.count()) 4182 scan_troff(args[0].data(), 1, nullptr); 4183 4184 out_html("</DT>\n<DD>"); 4185 listItemStack.push("DD"); 4186 curpos = 0; 4187 break; 4188 } 4189 case REQ_TP: // man(7) "hanging Tag Paragraph" 4190 { 4191 if (!dl_set[itemdepth]) { 4192 out_html("<DL>\n"); 4193 dl_set[itemdepth] = 1; 4194 } 4195 out_html(set_font("R")); 4196 out_html("<DT>"); 4197 c = skip_till_newline(c); 4198 /* somewhere a definition ends with '.TP' */ 4199 if (!*c) 4200 still_dd = true; 4201 else { 4202 // HACK for proc(5) 4203 while (c[0] == '.' && c[1] == '\\' && c[2] == '\"') { 4204 // We have a comment, so skip the line 4205 c = skip_till_newline(c); 4206 } 4207 c = scan_troff(c, 1, nullptr); 4208 out_html("<DD>"); 4209 listItemStack.push("DD"); 4210 } 4211 curpos = 0; 4212 break; 4213 } 4214 case REQ_IX: // Indexing term (printed on standard error) 4215 { 4216 c = skip_till_newline(c); // ignore 4217 break; 4218 } 4219 case REQ_P: // man(7) "Paragraph" 4220 case REQ_LP: // man(7) "Paragraph" 4221 case REQ_PP: // man(7) "Paragraph; reset Prevailing indent" 4222 { 4223 if (dl_set[itemdepth]) { 4224 out_html("</DL>\n"); 4225 dl_set[itemdepth] = 0; 4226 } else if (fillout) 4227 out_html("<br>"); 4228 4229 if (fillout) 4230 out_html("<br>\n"); 4231 else 4232 out_html(NEWLINE); 4233 4234 curpos = 0; 4235 c = skip_till_newline(c); 4236 break; 4237 } 4238 case REQ_HP: // man(7) "Hanging indent Paragraph" 4239 { 4240 if (!dl_set[itemdepth]) { 4241 out_html("<DL>"); 4242 dl_set[itemdepth] = 1; 4243 } 4244 out_html("<DT>\n"); 4245 still_dd = true; 4246 c = skip_till_newline(c); 4247 curpos = 0; 4248 break; 4249 } 4250 case REQ_PD: // man(7) "Paragraph Distance" 4251 { 4252 c = skip_till_newline(c); 4253 break; 4254 } 4255 case REQ_Rs: // mdoc(7) "Relative margin Start" 4256 case REQ_RS: // man(7) "Relative margin Start" 4257 { 4258 c += j; 4259 getArguments(c, args); 4260 j = 1; 4261 if (args.count() > 0) 4262 scan_expression(args[0].data(), &j); 4263 if (j >= 0) { 4264 itemdepth++; 4265 dl_set[itemdepth] = 0; 4266 out_html("<DL><DT></DT><DD>"); 4267 listItemStack.push("DD"); 4268 curpos = 0; 4269 } 4270 break; 4271 } 4272 case REQ_Re: // mdoc(7) "Relative margin End" 4273 case REQ_RE: // man(7) "Relative margin End" 4274 { 4275 if (itemdepth > 0) { 4276 if (dl_set[itemdepth]) 4277 out_html("</DL>"); 4278 out_html("</DL>\n"); 4279 itemdepth--; 4280 } 4281 c = skip_till_newline(c); 4282 curpos = 0; 4283 break; 4284 } 4285 case REQ_SB: // man(7) "Small; Bold" 4286 { 4287 out_html(set_font("B")); 4288 out_html("<small>"); 4289 c = scan_troff(c + j, 1, nullptr); 4290 out_html("</small>"); 4291 out_html(set_font("R")); 4292 break; 4293 } 4294 case REQ_SM: // man(7) "SMall" 4295 { 4296 c = c + j; 4297 if (*c == '\n') 4298 c++; 4299 out_html("<small>"); 4300 c = scan_troff(c, 1, nullptr); 4301 out_html("</small>"); 4302 break; 4303 } 4304 case REQ_Ss: // mdoc(7) "Sub Section" 4305 mandoc_command = 1; 4306 Q_FALLTHROUGH(); 4307 case REQ_SS: // mdoc(7) "Sub Section" 4308 mode = true; 4309 Q_FALLTHROUGH(); 4310 case REQ_Sh: // mdoc(7) "Sub Header" 4311 /* hack for fallthru from above */ 4312 mandoc_command = !mode || mandoc_command; 4313 Q_FALLTHROUGH(); 4314 case REQ_SH: // man(7) "Sub Header" 4315 { 4316 c = c + j; 4317 if (*c == '\n') 4318 c++; 4319 while (itemdepth || dl_set[itemdepth]) { 4320 out_html("</DL>\n"); 4321 if (dl_set[itemdepth]) 4322 dl_set[itemdepth] = 0; 4323 else if (itemdepth > 0) 4324 itemdepth--; 4325 } 4326 out_html(set_font("R")); 4327 out_html(change_to_size(0)); 4328 if (!fillout) { 4329 fillout = 1; 4330 out_html("</PRE>"); 4331 } 4332 trans_char(c, '"', '\a'); 4333 if (in_div) { 4334 out_html("</div>\n"); 4335 in_div = 0; 4336 } 4337 if (mode) 4338 out_html("\n<H3>"); 4339 else 4340 out_html("\n<H2>"); 4341 mandoc_synopsis = qstrncmp(c, "SYNOPSIS", 8) == 0; 4342 c = mandoc_command ? scan_troff_mandoc(c, 1, nullptr) : scan_troff(c, 1, nullptr); 4343 if (mode) 4344 out_html("</H3>\n"); 4345 else 4346 out_html("</H2>\n"); 4347 4348 out_html("<div>\n"); 4349 in_div = 1; 4350 curpos = 0; 4351 break; 4352 } 4353 case REQ_Sx: // mdoc(7) 4354 { 4355 // reference to a section header 4356 out_html(set_font("B")); 4357 trans_char(c, '"', '\a'); 4358 c = c + j; 4359 if (*c == '\n') 4360 c++; 4361 c = scan_troff(c, 1, nullptr); 4362 out_html(set_font("R")); 4363 out_html(NEWLINE); 4364 if (fillout) 4365 curpos++; 4366 else 4367 curpos = 0; 4368 break; 4369 } 4370 case REQ_St: // groff_mdoc 4371 { 4372 c += j; 4373 getArguments(c, args); 4374 if (args.count()) { 4375 bool found = false; 4376 for (const StandardName &standardName : STANDARD_NAMES) { 4377 if (args[0] == standardName.abbrev) { 4378 found = true; 4379 out_html(standardName.formalName); 4380 break; 4381 } 4382 } 4383 if (!found) // an unknown standard - print the abbreviation 4384 out_html(args[0]); 4385 } 4386 break; 4387 } 4388 case REQ_TS: // Table Start tbl(1) 4389 { 4390 c = scan_table(c); 4391 break; 4392 } 4393 case REQ_Dt: /* mdoc(7) */ 4394 mandoc_command = true; 4395 Q_FALLTHROUGH(); 4396 case REQ_TH: // man(7) "Title Header" 4397 { 4398 if (!output_possible) { 4399 c += j; 4400 getArguments(c, args); 4401 output_possible = true; 4402 out_html(DOCTYPE "<HTML>\n<HEAD>\n"); 4403 out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n"); 4404 out_html("<TITLE>"); 4405 if (args.count()) { 4406 // work around the problem that in a title no HTML tags are allowed 4407 // but args[0] can have formatting escapes, e.g. to switch a font 4408 // which results in a HTML tag added to the output 4409 char *result = nullptr; 4410 scan_troff(args[0].data(), 0, &result); 4411 char *p = result; 4412 QByteArray title; 4413 while (*p) { 4414 if (*p == '<') // tag begin -> skip whole tag 4415 { 4416 for (p++; *p && (*p != '>'); p++) 4417 ; 4418 if (*p) 4419 p++; 4420 } 4421 if (*p) 4422 title += *p++; 4423 } 4424 ignore_links = true; 4425 title += '\n'; // needed so that out_html flushes buffer and ignore_links works 4426 out_html(title); 4427 ignore_links = false; 4428 delete[] result; 4429 } 4430 out_html(" Manpage</TITLE>\n"); 4431 4432 // KDE defaults. 4433 out_html("<link rel=\"stylesheet\" href=\"help:/kdoctools5-common/kde-default.css\""); 4434 out_html(" type=\"text/css\">\n"); 4435 4436 // Output our custom stylesheet. 4437 out_html("<link rel=\"stylesheet\" href=\""); 4438 out_html(cssFile); 4439 out_html("\" type=\"text/css\">\n"); 4440 4441 // Some elements need background images, but this 4442 // could not be included in the stylesheet, 4443 // include it now. 4444 out_html( 4445 "<style type=\"text/css\">\n#header_top { " 4446 "background-image: url(\"help:/kdoctools5-common/top.jpg\"); }\n\n" 4447 "#header_top div { " 4448 "background-image: url(\"help:/kdoctools5-common/top-left.jpg\"); }\n\n" 4449 "#header_top div div { " 4450 "background-image: url(\"help:/kdoctools5-common/top-right.jpg\"); }\n\n" 4451 "</style>\n\n"); 4452 4453 out_html("<meta name=\"ROFF_Type\" content=\""); 4454 if (mandoc_command) 4455 out_html("mdoc"); 4456 else 4457 out_html("man"); 4458 out_html("\">\n"); 4459 4460 out_html("</HEAD>\n\n"); 4461 out_html("<BODY>\n\n"); 4462 4463 out_html("<div id=\"header\"><div id=\"header_top\">\n"); 4464 out_html("<div><div>\n"); 4465 out_html("<img src=\"help:/kdoctools5-common/top-kde.jpg\" alt=\"top-kde\"> "); 4466 if (args.count()) 4467 scan_troff(args[0].data(), 0, nullptr); 4468 out_html(" Manual Page"); 4469 out_html("</div></div></div></div>\n"); 4470 4471 out_html("<div style=\"margin-left: 5em; margin-right: 5em;\">\n"); 4472 out_html("<h1>"); 4473 if (args.count()) 4474 scan_troff(args[0].data(), 0, nullptr); 4475 out_html("</h1>\n"); 4476 if (args.count() > 1) { 4477 out_html("Section: "); 4478 if (!mandoc_command && (args.count() > 4)) 4479 scan_troff(args[4].data(), 0, nullptr); 4480 else 4481 out_html(section_name(args[1].data())); 4482 out_html(" ("); 4483 scan_troff(args[1].data(), 0, nullptr); 4484 out_html(")\n"); 4485 } else { 4486 out_html("Section not specified"); 4487 } 4488 } else { 4489 qCWarning(KIO_MAN_LOG) << ".TH found but output not possible"; 4490 c = skip_till_newline(c); 4491 } 4492 curpos = 0; 4493 break; 4494 } 4495 case REQ_TX: // mdoc(7) 4496 { 4497 c += j; 4498 getArguments(c, args); 4499 out_html(set_font("I")); 4500 const char *c2 = lookup_abbrev(args[0]); 4501 curpos += qstrlen(c2); 4502 out_html(c2); 4503 out_html(set_font("R")); 4504 if (args.count() > 1) 4505 out_html(args[1]); 4506 break; 4507 } 4508 case REQ_rm: // groff(7) "ReMove" 4509 /* .rm xx : Remove request, macro or string */ 4510 mode = true; 4511 Q_FALLTHROUGH(); 4512 case REQ_rn: // groff(7) "ReName" 4513 /* .rn xx yy : Rename request, macro or string xx to yy */ 4514 { 4515 qCDebug(KIO_MAN_LOG) << "start .rm/.rn"; 4516 c += j; 4517 const QByteArray name(scan_identifier(c)); 4518 if (name.isEmpty()) { 4519 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename"; 4520 break; 4521 } 4522 QByteArray name2; 4523 if (!mode) { 4524 while (*c && isspace(*c) && *c != '\n') 4525 ++c; 4526 name2 = scan_identifier(c); 4527 if (name2.isEmpty()) { 4528 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to rename"; 4529 break; 4530 } 4531 } 4532 c = skip_till_newline(c); 4533 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name); 4534 if (it == s_stringDefinitionMap.end()) { 4535 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to rename or remove: " << BYTEARRAY(name); 4536 } else { 4537 if (mode) { 4538 // .rm ReMove 4539 s_stringDefinitionMap.remove(name); // ### QT4: removeAll 4540 } else { 4541 // .rn ReName 4542 StringDefinition def = (*it); 4543 s_stringDefinitionMap.remove(name); // ### QT4: removeAll 4544 s_stringDefinitionMap.insert(name2, def); 4545 } 4546 } 4547 qCDebug(KIO_MAN_LOG) << "end .rm/.rn"; 4548 break; 4549 } 4550 case REQ_nx: 4551 case REQ_in: // groff(7) "INdent" 4552 { 4553 /* .in +-N : Indent */ 4554 c = skip_till_newline(c); 4555 break; 4556 } 4557 case REQ_nr: // groff(7) "Number Register" 4558 { 4559 qCDebug(KIO_MAN_LOG) << "start .nr"; 4560 c += j; 4561 const QByteArray name(scan_identifier(c)); 4562 if (name.isEmpty()) { 4563 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty name for register variable"; 4564 break; 4565 } 4566 while (*c && (*c == ' ' || *c == '\t')) 4567 c++; 4568 int sign = 0; 4569 if (*c && (*c == '+' || *c == '-')) { 4570 if (*c == '+') 4571 sign = 1; 4572 else if (*c == '-') 4573 sign = -1; 4574 } 4575 int value = 0; 4576 int increment = 0; 4577 c = scan_expression(c, &value); 4578 if (*c && *c != '\n') { 4579 while (*c && (*c == ' ' || *c == '\t')) 4580 c++; 4581 c = scan_expression(c, &increment); 4582 } 4583 c = skip_till_newline(c); 4584 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name); 4585 if (it == s_numberDefinitionMap.end()) { 4586 if (sign < 1) 4587 value = -value; 4588 NumberDefinition def(value, increment); 4589 s_numberDefinitionMap.insert(name, def); 4590 } else { 4591 if (sign > 0) 4592 (*it).m_value += value; 4593 else if (sign < 0) 4594 (*it).m_value += -value; 4595 else 4596 (*it).m_value = value; 4597 (*it).m_increment = increment; 4598 } 4599 qCDebug(KIO_MAN_LOG) << "end .nr"; 4600 break; 4601 } 4602 case REQ_am: // groff(7) "Append Macro" 4603 /* .am xx yy : append to a macro. */ 4604 /* define or handle as .ig yy */ 4605 mode = true; 4606 Q_FALLTHROUGH(); 4607 case REQ_de: // groff(7) "DEfine macro" 4608 case REQ_de1: // groff(7) "DEfine macro" 4609 { 4610 /* .de xx yy : define or redefine macro xx; end at .yy (..) */ 4611 /* define or handle as .ig yy */ 4612 qCDebug(KIO_MAN_LOG) << "Start .am/.de"; 4613 c += j; 4614 getArguments(c, args); 4615 if (args.count() == 0) 4616 break; 4617 4618 const QByteArray name(args[0]); 4619 4620 QByteArray endmacro; 4621 if (args.count() == 1) 4622 endmacro = ".."; 4623 else 4624 endmacro = "." + args[1]; // krazy:exclude=doublequote_chars 4625 4626 sl = c; 4627 while (*c && qstrncmp(c, endmacro, endmacro.length())) 4628 c = skip_till_newline(c); 4629 4630 QByteArray macro; 4631 while (sl != c) { 4632 if (sl[0] == '\\' && sl[1] == '\\') { 4633 macro += '\\'; 4634 sl++; 4635 } else 4636 macro += *sl; 4637 sl++; 4638 } 4639 4640 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name); 4641 if (it == s_stringDefinitionMap.end()) { 4642 StringDefinition def; 4643 def.m_length = 0; 4644 def.m_output = macro; 4645 s_stringDefinitionMap.insert(name, def); 4646 } else if (mode) { 4647 // .am Append Macro 4648 (*it).m_length = 0; // It could be formerly a string 4649 if (!(*it).m_output.endsWith('\n')) 4650 (*it).m_output += '\n'; 4651 (*it).m_output += macro; 4652 } else { 4653 // .de DEfine macro 4654 (*it).m_length = 0; // It could be formerly a string 4655 (*it).m_output = macro; 4656 } 4657 c = skip_till_newline(c); 4658 qCDebug(KIO_MAN_LOG) << "End .am/.de"; 4659 break; 4660 } 4661 case REQ_Bl: // mdoc(7) "Begin List" 4662 { 4663 QByteArray list_options; 4664 char *nl = strchr(c, '\n'); 4665 c = c + j; 4666 if (dl_set[itemdepth]) { 4667 /* These things can nest. */ 4668 itemdepth++; 4669 } 4670 if (nl) { 4671 /* Parse list options */ 4672 list_options = QByteArray(c, nl - c); 4673 } 4674 if (list_options.contains("-bullet")) { 4675 /* HTML Unnumbered List */ 4676 dl_set[itemdepth] = BL_BULLET_LIST; 4677 out_html("<UL>\n"); 4678 } else if (list_options.contains("-enum")) { 4679 /* HTML Ordered List */ 4680 dl_set[itemdepth] = BL_ENUM_LIST; 4681 out_html("<OL>\n"); 4682 } else { 4683 /* HTML Descriptive List */ 4684 dl_set[itemdepth] = BL_DESC_LIST; 4685 out_html("<DL>\n"); 4686 } 4687 curpos = 0; 4688 c = skip_till_newline(c); 4689 break; 4690 } 4691 case REQ_El: // mdoc(7) "End List" 4692 { 4693 checkListStack(); 4694 c = c + j; 4695 if (dl_set[itemdepth] & BL_DESC_LIST) 4696 out_html("</DL>\n"); 4697 else if (dl_set[itemdepth] & BL_BULLET_LIST) 4698 out_html("</UL>\n"); 4699 else if (dl_set[itemdepth] & BL_ENUM_LIST) 4700 out_html("</OL>\n"); 4701 dl_set[itemdepth] = 0; 4702 if (itemdepth > 0) 4703 itemdepth--; 4704 if (!fillout) 4705 out_html(NEWLINE); 4706 4707 curpos = 0; 4708 c = skip_till_newline(c); 4709 break; 4710 } 4711 case REQ_It: // mdoc(7) "list ITem" 4712 { 4713 checkListStack(); 4714 c = c + j; 4715 // if (qstrncmp(c, "Xo", 2) == 0 && isspace(*(c + 2))) 4716 // c = skip_till_newline(c); 4717 if (dl_set[itemdepth] & BL_DESC_LIST) { 4718 out_html("<DT>"); 4719 out_html(set_font("B")); 4720 if (*c == '\n') { 4721 /* Don't allow embedded comms after a newline */ 4722 c++; 4723 c = scan_troff(c, 1, nullptr); 4724 } else { 4725 /* Do allow embedded comms on the same line. */ 4726 c = scan_troff_mandoc(c, 1, nullptr); 4727 } 4728 out_html(set_font("R")); 4729 out_html("</DT>"); 4730 out_html(NEWLINE); 4731 out_html("<DD>"); 4732 listItemStack.push("DD"); 4733 } else if (dl_set[itemdepth] & (BL_BULLET_LIST | BL_ENUM_LIST)) { 4734 out_html("<LI>"); 4735 listItemStack.push("LI"); 4736 c = scan_troff_mandoc(c, 1, nullptr); 4737 out_html(NEWLINE); 4738 } 4739 if (fillout) 4740 curpos++; 4741 else 4742 curpos = 0; 4743 break; 4744 } 4745 case REQ_Bk: /* mdoc(7) */ 4746 case REQ_Ek: /* mdoc(7) */ 4747 case REQ_Dd: /* mdoc(7) */ 4748 case REQ_Os: // mdoc(7) "Operating System" 4749 case REQ_Sm: // mdoc(7) space mode 4750 c = skip_till_newline(c); // TODO 4751 break; 4752 case REQ_Bt: // mdoc(7) "Beta Test" 4753 { 4754 // trans_char(c, '"', '\a'); 4755 // c = c + j; 4756 out_html(" is currently in beta test."); 4757 if (fillout) 4758 curpos++; 4759 else 4760 curpos = 0; 4761 break; 4762 } 4763 case REQ_At: /* mdoc(7) */ 4764 case REQ_Fx: /* mdoc(7) */ 4765 case REQ_Nx: /* mdoc(7) */ 4766 case REQ_Ox: /* mdoc(7) */ 4767 case REQ_Bx: /* mdoc(7) */ 4768 case REQ_Ux: /* mdoc(7) */ 4769 case REQ_Dx: /* mdoc(7) */ 4770 { 4771 bool parsable = true; 4772 trans_char(c, '"', '\a'); 4773 c = c + j; 4774 if (*c == '\n') 4775 c++; 4776 if (request == REQ_At) { 4777 out_html("AT&T UNIX "); 4778 parsable = false; 4779 } else if (request == REQ_Fx) { 4780 out_html("FreeBSD "); 4781 parsable = false; 4782 } else if (request == REQ_Nx) 4783 out_html("NetBSD "); 4784 else if (request == REQ_Ox) 4785 out_html("OpenBSD "); 4786 else if (request == REQ_Bx) 4787 out_html("BSD "); 4788 else if (request == REQ_Ux) 4789 out_html("UNIX "); 4790 else if (request == REQ_Dx) 4791 out_html("DragonFly "); 4792 if (parsable) 4793 c = scan_troff_mandoc(c, 1, nullptr); 4794 else 4795 c = scan_troff(c, 1, nullptr); 4796 if (fillout) 4797 curpos++; 4798 else 4799 curpos = 0; 4800 break; 4801 } 4802 case REQ_Dl: /* mdoc(7) */ 4803 { 4804 c = c + j; 4805 out_html(NEWLINE); 4806 out_html("<BLOCKQUOTE>"); 4807 if (*c == '\n') 4808 c++; 4809 c = scan_troff_mandoc(c, 1, nullptr); 4810 out_html("</BLOCKQUOTE>"); 4811 if (fillout) 4812 curpos++; 4813 else 4814 curpos = 0; 4815 break; 4816 } 4817 case REQ_Bd: /* mdoc(7) */ 4818 { /* Seems like a kind of example/literal mode */ 4819 QByteArray bd_options; 4820 char *nl = strchr(c, '\n'); 4821 c = c + j; 4822 if (nl) 4823 bd_options = QByteArray(c, nl - c); 4824 out_html(NEWLINE); 4825 mandoc_bd_options = 0; /* Remember options for terminating Bl */ 4826 if (bd_options.contains("-offset indent")) { 4827 mandoc_bd_options |= BD_INDENT; 4828 out_html("<BLOCKQUOTE>\n"); 4829 } 4830 if (bd_options.contains("-literal") || bd_options.contains("-unfilled")) { 4831 if (fillout) { 4832 mandoc_bd_options |= BD_LITERAL; 4833 out_html(set_font("R")); 4834 out_html(change_to_size('0')); 4835 out_html("<PRE>\n"); 4836 } 4837 curpos = 0; 4838 fillout = 0; 4839 } 4840 c = skip_till_newline(c); 4841 break; 4842 } 4843 case REQ_Ed: /* mdoc(7) */ 4844 { 4845 if (mandoc_bd_options & BD_LITERAL) { 4846 if (!fillout) { 4847 out_html(set_font("R")); 4848 out_html(change_to_size('0')); 4849 out_html("</PRE>\n"); 4850 } 4851 } 4852 if (mandoc_bd_options & BD_INDENT) 4853 out_html("</BLOCKQUOTE>\n"); 4854 curpos = 0; 4855 fillout = 1; 4856 c = skip_till_newline(c); 4857 break; 4858 } 4859 case REQ_Be: /* mdoc(7) */ 4860 { 4861 c = c + j; 4862 if (fillout) 4863 out_html("<br><br>"); 4864 else { 4865 out_html(NEWLINE); 4866 } 4867 curpos = 0; 4868 c = skip_till_newline(c); 4869 break; 4870 } 4871 case REQ_Xr: /* mdoc(7) */ // ### FIXME: it should issue a <a href="man:somewhere(x)"> directly 4872 { 4873 /* Translate xyz 1 to xyz(1) 4874 * Allow for multiple spaces. Allow the section to be missing. 4875 */ 4876 char buff[NULL_TERMINATED(MED_STR_MAX)]; 4877 char *bufptr; 4878 trans_char(c, '"', '\a'); 4879 bufptr = buff; 4880 c = c + j; 4881 if (*c == '\n') 4882 c++; /* Skip spaces */ 4883 while (isspace(*c) && *c != '\n') 4884 c++; 4885 while (isalnum(*c) || *c == '.' || *c == ':' || *c == '_' || *c == '-') { 4886 /* Copy the xyz part */ 4887 *bufptr = *c; 4888 bufptr++; 4889 if (bufptr >= buff + MED_STR_MAX) 4890 break; 4891 c++; 4892 } 4893 while (isspace(*c) && *c != '\n') 4894 c++; /* Skip spaces */ 4895 if (isdigit(*c)) { 4896 /* Convert the number if there is one */ 4897 *bufptr = '('; 4898 bufptr++; 4899 if (bufptr < buff + MED_STR_MAX) { 4900 while (isalnum(*c)) { 4901 *bufptr = *c; 4902 bufptr++; 4903 if (bufptr >= buff + MED_STR_MAX) 4904 break; 4905 c++; 4906 } 4907 if (bufptr < buff + MED_STR_MAX) { 4908 *bufptr = ')'; 4909 bufptr++; 4910 } 4911 } 4912 } 4913 while (*c != '\n') { 4914 /* Copy the remainder */ 4915 if (!isspace(*c)) { 4916 *bufptr = *c; 4917 bufptr++; 4918 if (bufptr >= buff + MED_STR_MAX) 4919 break; 4920 } 4921 c++; 4922 } 4923 *bufptr = '\n'; 4924 bufptr[1] = 0; 4925 scan_troff_mandoc(buff, 1, nullptr); 4926 out_html(NEWLINE); 4927 if (fillout) 4928 curpos++; 4929 else 4930 curpos = 0; 4931 break; 4932 } 4933 case REQ_Fl: // mdoc(7) "FLags" 4934 { 4935 // trans_char(c, '"', '\a'); 4936 c += j; 4937 QList<char *> argPointers; 4938 getArguments(c, args, &argPointers); 4939 out_html(set_font("B")); 4940 out_html("-"); 4941 if (args.count() == 0) { 4942 /*out_html("-");*/ // stdin or stdout 4943 } else { 4944 if (argPointers.count()) 4945 scan_troff_mandoc(argPointers[0], 1, nullptr); 4946 /* 4947 for (i = 0; i < args.count(); ++i) 4948 { 4949 if (ispunct(args[i][0]) && args[i][0] != '-') 4950 { 4951 scan_troff_mandoc(argPointers[i], 1, NULL); 4952 } 4953 else 4954 { 4955 if (i > 0) 4956 out_html(" "); // Put a space between flags 4957 out_html("-"); 4958 scan_troff_mandoc(argPointers[i], 1, NULL); 4959 } 4960 } 4961 */ 4962 } 4963 out_html(set_font("R")); 4964 out_html(NEWLINE); 4965 if (fillout) 4966 curpos++; 4967 else 4968 curpos = 0; 4969 break; 4970 } 4971 case REQ_Pa: /* mdoc(7) */ 4972 case REQ_Pf: /* mdoc(7) */ 4973 { 4974 trans_char(c, '"', '\a'); 4975 c = c + j; 4976 if (*c == '\n') 4977 c++; 4978 c = scan_troff_mandoc(c, 1, nullptr); 4979 out_html(NEWLINE); 4980 if (fillout) 4981 curpos++; 4982 else 4983 curpos = 0; 4984 break; 4985 } 4986 case REQ_Pp: /* mdoc(7) */ 4987 { 4988 if (fillout) 4989 out_html("<br><br>\n"); 4990 else { 4991 out_html(NEWLINE); 4992 } 4993 curpos = 0; 4994 c = skip_till_newline(c); 4995 break; 4996 } 4997 case REQ_Aq: // mdoc(7) "Angle bracket Quote" 4998 c = process_quote(c, j, "<", ">"); 4999 break; 5000 case REQ_Bq: // mdoc(7) "Bracket Quote" 5001 c = process_quote(c, j, "[", "]"); 5002 break; 5003 case REQ_Dq: // mdoc(7) "Double Quote" 5004 c = process_quote(c, j, "“", "”"); 5005 break; 5006 case REQ_Pq: // mdoc(7) "Parenthese Quote" 5007 c = process_quote(c, j, "(", ")"); 5008 break; 5009 case REQ_Qq: // mdoc(7) "straight double Quote" 5010 c = process_quote(c, j, """, """); 5011 break; 5012 case REQ_Sq: // mdoc(7) "Single Quote" 5013 c = process_quote(c, j, "‘", "’"); 5014 break; 5015 case REQ_Op: /* mdoc(7) */ 5016 { 5017 trans_char(c, '"', '\a'); 5018 c += j; 5019 if (*c == '\n') 5020 c++; 5021 out_html(set_font("R")); 5022 out_html("["); 5023 c = scan_troff_mandoc(c, 1, nullptr); 5024 out_html(set_font("R")); 5025 out_html("]"); 5026 out_html(NEWLINE); 5027 if (fillout) 5028 curpos++; 5029 else 5030 curpos = 0; 5031 break; 5032 } 5033 case REQ_Oo: /* mdoc(7) */ 5034 { 5035 trans_char(c, '"', '\a'); 5036 c += j; 5037 if (*c == '\n') 5038 c++; 5039 out_html(set_font("R")); 5040 out_html("["); 5041 c = scan_troff_mandoc(c, 1, nullptr); 5042 if (fillout) 5043 curpos++; 5044 else 5045 curpos = 0; 5046 break; 5047 } 5048 case REQ_Oc: /* mdoc(7) */ 5049 { 5050 trans_char(c, '"', '\a'); 5051 c += j; 5052 out_html(set_font("R")); 5053 out_html("]"); 5054 c = scan_troff_mandoc(c, 1, nullptr); 5055 if (fillout) 5056 curpos++; 5057 else 5058 curpos = 0; 5059 break; 5060 } 5061 case REQ_Ql: /* mdoc(7) */ 5062 { 5063 /* Single quote first word in the line */ 5064 char *sp; 5065 trans_char(c, '"', '\a'); 5066 c = c + j; 5067 if (*c == '\n') 5068 c++; 5069 sp = c; 5070 do { 5071 /* Find first whitespace after the 5072 * first word that isn't a mandoc macro 5073 */ 5074 while (*sp && isspace(*sp)) 5075 sp++; 5076 while (*sp && !isspace(*sp)) 5077 sp++; 5078 } while (*sp && isupper(*(sp - 2)) && islower(*(sp - 1))); 5079 5080 /* Use a newline to mark the end of text to 5081 * be quoted 5082 */ 5083 if (*sp) 5084 *sp = '\n'; 5085 out_html("`"); /* Quote the text */ 5086 c = scan_troff_mandoc(c, 1, nullptr); 5087 out_html("'"); 5088 out_html(NEWLINE); 5089 if (fillout) 5090 curpos++; 5091 else 5092 curpos = 0; 5093 break; 5094 } 5095 case REQ_Ar: /* mdoc(7) */ 5096 { 5097 /* parse one line in italics */ 5098 out_html(set_font("I")); 5099 c += j; 5100 QList<char *> argPointers; 5101 getArguments(c, args, &argPointers); 5102 if (args.count() == 0) { 5103 // An empty Ar means "file ..." 5104 out_html("file ..."); 5105 } else { 5106 if (argPointers.count()) 5107 c = scan_troff_mandoc(argPointers[0], 1, nullptr); 5108 } 5109 5110 out_html(set_font("R")); 5111 out_html(NEWLINE); 5112 if (fillout) 5113 curpos++; 5114 else 5115 curpos = 0; 5116 break; 5117 } 5118 case REQ_Em: /* mdoc(7) */ 5119 { 5120 out_html("<em>"); 5121 trans_char(c, '"', '\a'); 5122 c += j; 5123 if (*c == '\n') 5124 c++; 5125 c = scan_troff_mandoc(c, 1, nullptr); 5126 out_html("</em>"); 5127 out_html(NEWLINE); 5128 if (fillout) 5129 curpos++; 5130 else 5131 curpos = 0; 5132 break; 5133 } 5134 case REQ_Ad: /* mdoc(7) */ 5135 case REQ_Va: /* mdoc(7) */ 5136 case REQ_Xo: /* mdoc(7) */ 5137 case REQ_Xc: /* mdoc(7) */ 5138 { 5139 /* parse one line in italics */ 5140 out_html(set_font("I")); 5141 trans_char(c, '"', '\a'); 5142 c = c + j; 5143 if (*c == '\n') 5144 c++; 5145 c = scan_troff_mandoc(c, 1, nullptr); 5146 out_html(set_font("R")); 5147 out_html(NEWLINE); 5148 if (fillout) 5149 curpos++; 5150 else 5151 curpos = 0; 5152 break; 5153 } 5154 case REQ_Nd: /* mdoc(7) */ 5155 { 5156 trans_char(c, '"', '\a'); 5157 c = c + j; 5158 if (*c == '\n') 5159 c++; 5160 out_html(" - "); 5161 c = scan_troff_mandoc(c, 1, nullptr); 5162 out_html(NEWLINE); 5163 if (fillout) 5164 curpos++; 5165 else 5166 curpos = 0; 5167 break; 5168 } 5169 case REQ_Nm: // mdoc(7) "Name Macro" 5170 { 5171 c += j; 5172 QList<char *> argPointers; 5173 getArguments(c, args, &argPointers); 5174 5175 if (mandoc_name.isEmpty() && args.count()) 5176 mandoc_name = args[0]; 5177 5178 if (mandoc_synopsis) { 5179 /* Break lines only in the Synopsis. 5180 * The Synopsis section seems to be treated 5181 * as a special case - Bummer! 5182 * Do not insert a break before the very first Nm in this section 5183 */ 5184 5185 if (mandoc_name_count) 5186 out_html("<BR>"); 5187 5188 mandoc_name_count++; 5189 } 5190 5191 out_html(set_font("B")); 5192 5193 // only show name if 5194 // .Nm (first not-null-length defined name) 5195 // .Nm name 5196 // do not show 5197 // .Nm "" 5198 if (args.count() == 0) 5199 scan_troff(mandoc_name.data(), 0, nullptr); 5200 else { 5201 if (argPointers.count()) 5202 c = scan_troff_mandoc(argPointers[0], 1, nullptr); 5203 } 5204 5205 out_html(set_font("R")); 5206 5207 if (fillout) 5208 curpos++; 5209 else 5210 curpos = 0; 5211 break; 5212 } 5213 case REQ_Cd: /* mdoc(7) */ 5214 case REQ_Cm: /* mdoc(7) */ 5215 case REQ_Ic: /* mdoc(7) */ 5216 case REQ_Ms: /* mdoc(7) */ 5217 case REQ_Or: /* mdoc(7) */ 5218 case REQ_Sy: /* mdoc(7) */ 5219 { 5220 /* parse one line in bold */ 5221 out_html(set_font("B")); 5222 trans_char(c, '"', '\a'); 5223 c = c + j; 5224 if (*c == '\n') 5225 c++; 5226 c = scan_troff_mandoc(c, 1, nullptr); 5227 out_html(set_font("R")); 5228 out_html(NEWLINE); 5229 if (fillout) 5230 curpos++; 5231 else 5232 curpos = 0; 5233 break; 5234 } 5235 case REQ_Ta: /* mdoc(7) */ 5236 { 5237 // ### FIXME: this is a simplification 5238 // for a list item element in a ".Bl -tag -width indent" type list 5239 // man:mdoc says: "indent == Six constant width spaces" 5240 out_html(" "); 5241 c = c + j; 5242 if (*c == '\n') 5243 c++; 5244 break; 5245 } 5246 // ### FIXME: punctuation is handled badly! 5247 case REQ_Dv: /* mdoc(7) */ 5248 case REQ_Ev: /* mdoc(7) */ 5249 case REQ_Fr: /* mdoc(7) */ 5250 case REQ_Li: /* mdoc(7) */ 5251 case REQ_nN: /* mdoc(7) */ 5252 { 5253 trans_char(c, '"', '\a'); 5254 c += j; 5255 if (*c == '\n') 5256 c++; 5257 out_html(set_font("B")); 5258 c = scan_troff_mandoc(c, 1, nullptr); 5259 out_html(set_font("R")); 5260 out_html(NEWLINE); 5261 if (fillout) 5262 curpos++; 5263 else 5264 curpos = 0; 5265 break; 5266 } 5267 case REQ_Tn: /* mdoc(7) Trade Names ... prints its arguments in a smaller font */ 5268 { 5269 trans_char(c, '"', '\a'); 5270 c += j; 5271 if (*c == '\n') 5272 c++; 5273 out_html("<small>"); 5274 c = scan_troff_mandoc(c, 1, NULL); 5275 out_html("</small>"); 5276 if (fillout) 5277 curpos++; 5278 else 5279 curpos = 0; 5280 break; 5281 } 5282 case REQ_Ns: /* mdoc(7) No-Space Macro */ 5283 { 5284 c += j; 5285 while (*c && isspace(*c) && (*c != '\n')) 5286 c++; 5287 Q_FALLTHROUGH(); // (The '.Ns' macro always invokes the '.No' macro...) 5288 } 5289 case REQ_No: /* mdoc(7) Normal Text Macro */ 5290 { 5291 if (request == REQ_No) // not fallen through from REQ_Ns 5292 { 5293 trans_char(c, '"', '\a'); 5294 c += j; 5295 if (*c == '\n') 5296 c++; 5297 } 5298 out_html("<span style=\"font-style:normal\">"); 5299 c = scan_troff_mandoc(c, 1, NULL); 5300 out_html("</span>"); 5301 out_html(NEWLINE); 5302 if (fillout) 5303 curpos++; 5304 else 5305 curpos = 0; 5306 break; 5307 } 5308 case REQ_perc_A: /* mdoc(7) biblio stuff */ 5309 case REQ_perc_D: 5310 case REQ_perc_N: 5311 case REQ_perc_O: 5312 case REQ_perc_P: 5313 case REQ_perc_Q: 5314 case REQ_perc_V: { 5315 c = c + j; 5316 if (*c == '\n') 5317 c++; 5318 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */ 5319 if (fillout) 5320 curpos++; 5321 else 5322 curpos = 0; 5323 break; 5324 } 5325 case REQ_perc_B: 5326 case REQ_perc_J: 5327 case REQ_perc_R: 5328 case REQ_perc_T: { 5329 c = c + j; 5330 out_html(set_font("I")); 5331 if (*c == '\n') 5332 c++; 5333 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */ 5334 out_html(set_font("R")); 5335 if (fillout) 5336 curpos++; 5337 else 5338 curpos = 0; 5339 break; 5340 } 5341 case REQ_URL: // man(7) ".URL url link trailer" 5342 { 5343 c += j; 5344 5345 getArguments(c, args); 5346 ignore_links = true; 5347 out_html("<a href=\""); 5348 5349 if (args.count() > 0) 5350 scan_troff(args[0].data(), 0, nullptr); 5351 5352 out_html("\">"); 5353 if (args.count() > 1) 5354 scan_troff(args[1].data(), 0, nullptr); 5355 5356 out_html("</a>\n"); // trailing newline important to make ignore_links work 5357 ignore_links = false; 5358 5359 if (args.count() > 2) 5360 scan_troff(args[2].data(), 1, nullptr); 5361 5362 break; 5363 } 5364 case REQ_tr: // translate TODO 5365 { 5366 c = skip_till_newline(c); 5367 break; 5368 } 5369 case REQ_nroff: // groff(7) "NROFF mode" 5370 mode = true; 5371 Q_FALLTHROUGH(); 5372 case REQ_troff: // groff(7) "TROFF mode" 5373 { 5374 s_nroff = mode; 5375 c += j; 5376 c = skip_till_newline(c); 5377 break; 5378 } 5379 case REQ_als: // groff(7) "ALias String" 5380 { 5381 /* 5382 * Note an alias is supposed to be something like a hard link 5383 * However to make it simplier, we only copy the string. 5384 */ 5385 // Be careful: unlike .rn, the destination is first, origin is second 5386 qCDebug(KIO_MAN_LOG) << "start .als"; 5387 c += j; 5388 const QByteArray name(scan_identifier(c)); 5389 if (name.isEmpty()) { 5390 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to alias"; 5391 break; 5392 } 5393 while (*c && isspace(*c) && *c != '\n') 5394 ++c; 5395 const QByteArray name2(scan_identifier(c)); 5396 if (name2.isEmpty()) { 5397 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to alias"; 5398 break; 5399 } 5400 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name); 5401 c = skip_till_newline(c); 5402 if (name == name2) { 5403 qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination string to alias: " << BYTEARRAY(name); 5404 break; 5405 } 5406 // Second parameter is origin (unlike in .rn) 5407 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name2); 5408 if (it == s_stringDefinitionMap.end()) { 5409 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias of " << BYTEARRAY(name2); 5410 } else { 5411 StringDefinition def = (*it); 5412 s_stringDefinitionMap.insert(name, def); 5413 } 5414 qCDebug(KIO_MAN_LOG) << "end .als"; 5415 break; 5416 } 5417 case REQ_rr: // groff(7) "Remove number Register" 5418 { 5419 qCDebug(KIO_MAN_LOG) << "start .rr"; 5420 c += j; 5421 const QByteArray name(scan_identifier(c)); 5422 if (name.isEmpty()) { 5423 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename: "; 5424 break; 5425 } 5426 c = skip_till_newline(c); 5427 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name); 5428 if (it == s_numberDefinitionMap.end()) { 5429 qCDebug(KIO_MAN_LOG) << "EXCEPTION: trying to remove inexistant number register: "; 5430 } else { 5431 s_numberDefinitionMap.remove(name); 5432 } 5433 qCDebug(KIO_MAN_LOG) << "end .rr"; 5434 break; 5435 } 5436 case REQ_rnn: // groff(7) "ReName Number register" 5437 { 5438 qCDebug(KIO_MAN_LOG) << "start .rnn"; 5439 c += j; 5440 const QByteArray name(scan_identifier(c)); 5441 if (name.isEmpty()) { 5442 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin to remove/rename number register"; 5443 break; 5444 } 5445 while (*c && isspace(*c) && *c != '\n') 5446 ++c; 5447 const QByteArray name2(scan_identifier(c)); 5448 if (name2.isEmpty()) { 5449 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination to rename number register"; 5450 break; 5451 } 5452 c = skip_till_newline(c); 5453 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name); 5454 if (it == s_numberDefinitionMap.end()) { 5455 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find number register to rename" << BYTEARRAY(name); 5456 } else { 5457 NumberDefinition def = (*it); 5458 s_numberDefinitionMap.remove(name); // ### QT4: removeAll 5459 s_numberDefinitionMap.insert(name2, def); 5460 } 5461 qCDebug(KIO_MAN_LOG) << "end .rnn"; 5462 break; 5463 } 5464 case REQ_aln: // groff(7) "ALias Number Register" 5465 { 5466 /* 5467 * Note an alias is supposed to be something like a hard link 5468 * However to make it simplier, we only copy the string. 5469 */ 5470 // Be careful: unlike .rnn, the destination is first, origin is second 5471 qCDebug(KIO_MAN_LOG) << "start .aln"; 5472 c += j; 5473 const QByteArray name(scan_identifier(c)); 5474 if (name.isEmpty()) { 5475 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination number register to alias"; 5476 break; 5477 } 5478 while (*c && isspace(*c) && *c != '\n') 5479 ++c; 5480 const QByteArray name2(scan_identifier(c)); 5481 if (name2.isEmpty()) { 5482 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin number register to alias"; 5483 break; 5484 } 5485 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name); 5486 c = skip_till_newline(c); 5487 if (name == name2) { 5488 qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination number register to alias: " << BYTEARRAY(name); 5489 break; 5490 } 5491 // Second parameter is origin (unlike in .rnn) 5492 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name2); 5493 if (it == s_numberDefinitionMap.end()) { 5494 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias: " << BYTEARRAY(name2); 5495 } else { 5496 NumberDefinition def = (*it); 5497 s_numberDefinitionMap.insert(name, def); 5498 } 5499 qCDebug(KIO_MAN_LOG) << "end .aln"; 5500 break; 5501 } 5502 case REQ_shift: // groff(7) "SHIFT parameter" 5503 { 5504 c += j; 5505 h = c; 5506 while (*h && *h != '\n' && isdigit(*h)) 5507 ++h; 5508 const char tempchar = *h; 5509 *h = 0; 5510 const QByteArray number(c); 5511 *h = tempchar; 5512 c = skip_till_newline(h); 5513 unsigned int result = 1; // Numbers of shifts to do 5514 if (!number.isEmpty()) { 5515 bool ok = false; 5516 result = number.toUInt(&ok); 5517 if (!ok || result < 1) 5518 result = 1; 5519 } 5520 for (unsigned int num = 0; num < result; ++num) { 5521 if (!s_argumentList.isEmpty()) 5522 s_argumentList.pop_front(); 5523 } 5524 break; 5525 } 5526 case REQ_while: // groff(7) "WHILE loop" 5527 { 5528 request_while(c, j, mandoc_command); 5529 break; 5530 } 5531 case REQ_break: // groff(7) Break out of a while loop. 5532 { 5533 c += j; 5534 break_the_while_loop = true; 5535 break; 5536 } 5537 case REQ_do: // groff(7) "DO command" 5538 { 5539 // ### HACK: we just replace do by a \n and a . 5540 *c = '\n'; 5541 c++; 5542 *c = '.'; 5543 // The . will be treated as next character 5544 break; 5545 } 5546 case REQ_nop: // groff(7) nop 5547 { 5548 c += j; 5549 break; 5550 } 5551 default: { 5552 if (mandoc_command && ((isupper(*c) && islower(*(c + 1))) || (islower(*c) && isupper(*(c + 1))))) { 5553 /* Let through any mdoc(7) commands that haven't 5554 * been delt with. 5555 * I don't want to miss anything out of the text. 5556 */ 5557 char buf[4] = {c[0], c[1], ' ', 0}; 5558 out_html(buf); /* Print the command (it might just be text). */ 5559 c = c + j; 5560 trans_char(c, '"', '\a'); 5561 if (*c == '\n') 5562 c++; 5563 out_html(set_font("R")); 5564 c = scan_troff(c, 1, nullptr); 5565 out_html(NEWLINE); 5566 if (fillout) 5567 curpos++; 5568 else 5569 curpos = 0; 5570 } else 5571 c = skip_till_newline(c); 5572 break; 5573 } 5574 } 5575 } 5576 } 5577 if (fillout) { 5578 out_html(NEWLINE); 5579 curpos++; 5580 } 5581 return c; 5582 } 5583 5584 //--------------------------------------------------------------------- 5585 5586 static int contained_tab = 0; 5587 static bool mandoc_line = false; // Signals whether to look for embedded mandoc commands. 5588 5589 static char *scan_troff(char *c, bool san, char **result) 5590 { /* san : stop at newline */ 5591 QByteArray intbuff; 5592 intbuff.reserve(MED_STR_MAX); 5593 #define FLUSHIBP \ 5594 { \ 5595 out_html(intbuff); \ 5596 intbuff.clear(); \ 5597 } 5598 char *exbuffer; 5599 int exbuffpos, exbuffmax, exnewline_for_fun; 5600 bool exscaninbuff; 5601 int usenbsp = 0; 5602 5603 exbuffer = buffer; 5604 exbuffpos = buffpos; 5605 exbuffmax = buffmax; 5606 exnewline_for_fun = newline_for_fun; 5607 exscaninbuff = scaninbuff; 5608 newline_for_fun = 0; 5609 if (result) { 5610 if (*result) { 5611 buffer = *result; 5612 buffpos = qstrlen(buffer); 5613 buffmax = buffpos; 5614 } else { 5615 buffer = new char[LARGE_STR_MAX + 1]; 5616 buffpos = 0; 5617 buffmax = LARGE_STR_MAX; 5618 } 5619 scaninbuff = true; 5620 } 5621 char *h = c; // ### FIXME below are too many tests that may go before the position of c 5622 /* start scanning */ 5623 5624 while (h && *h && (!san || newline_for_fun || (*h != '\n')) && !break_the_while_loop) { 5625 if (*h == escapesym) { 5626 h++; 5627 FLUSHIBP; 5628 // ###HACK: I think after escape expansion, the line should be reparsed 5629 // (this seems to be what troff does), but it would double-escape 5630 // HTML chars, e.g. the first escape produces "<span...", the second 5631 // would change that to <span... 5632 // Therefore work around some man pages (e.g. nmap, smb.conf), 5633 // which have \." at beginning of 5634 // line (probably just typos), but troff would skip these 5635 if ((h[-2] == '\n') && (*h == '.')) // when line starts with \. ignore line 5636 { 5637 while (*h && (*h != '\n')) 5638 h++; 5639 continue; // avoid h++ at the end 5640 } else { 5641 h = scan_escape(h); 5642 } 5643 } else if (*h == controlsym && h[-1] == '\n') { 5644 h++; 5645 FLUSHIBP; 5646 h = scan_request(h); 5647 if (h && san && h[-1] == '\n') 5648 h--; 5649 } else if (mandoc_line && ((*(h - 1)) && (isspace(*(h - 1)) || (*(h - 1)) == '\n')) && *(h) && isupper(*(h)) && *(h + 1) && islower(*(h + 1)) 5650 && *(h + 2) && isspace(*(h + 2))) { 5651 // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2" 5652 FLUSHIBP; 5653 h = scan_request(h); 5654 if (san && h[-1] == '\n') { 5655 h--; 5656 break; 5657 } 5658 } else if (*h == nobreaksym && h[-1] == '\n') { 5659 h++; 5660 FLUSHIBP; 5661 h = scan_request(h); 5662 if (san && h[-1] == '\n') 5663 h--; 5664 } else { 5665 if (still_dd && isalnum(*h) && h[-1] == '\n') { 5666 /* sometimes a .HP request is not followed by a .br request */ 5667 FLUSHIBP; 5668 out_html("<DD>"); 5669 curpos = 0; 5670 still_dd = false; 5671 } 5672 switch (*h) { 5673 case '&': { 5674 intbuff += "&"; 5675 curpos++; 5676 break; 5677 } 5678 case '<': { 5679 intbuff += "<"; 5680 curpos++; 5681 break; 5682 } 5683 case '>': { 5684 intbuff += ">"; 5685 curpos++; 5686 break; 5687 } 5688 case '"': { 5689 intbuff += """; 5690 curpos++; 5691 break; 5692 } 5693 case '\n': { 5694 if (h != c && h[-1] == '\n' && fillout) { 5695 intbuff += "<p>"; 5696 } 5697 if (contained_tab && fillout) { 5698 intbuff += "<br>"; 5699 } 5700 contained_tab = 0; 5701 curpos = 0; 5702 usenbsp = 0; 5703 intbuff += '\n'; 5704 FLUSHIBP; 5705 break; 5706 } 5707 case '\t': { 5708 int curtab = 0; 5709 contained_tab = 1; 5710 FLUSHIBP; 5711 /* like a typewriter, not like TeX */ 5712 tabstops[19] = curpos + 1; 5713 while (curtab < maxtstop && tabstops[curtab] <= curpos) 5714 curtab++; 5715 if (curtab < maxtstop) { 5716 if (!fillout) { 5717 while (curpos < tabstops[curtab]) { 5718 intbuff += ' '; 5719 if (intbuff.length() > MED_STR_MAX) { 5720 FLUSHIBP; 5721 } 5722 curpos++; 5723 } 5724 } else { 5725 out_html("<TT>"); 5726 while (curpos < tabstops[curtab]) { 5727 out_html(" "); 5728 curpos++; 5729 } 5730 out_html("</TT>"); 5731 } 5732 } 5733 break; 5734 } 5735 default: { 5736 if (*h == ' ' && (h[-1] == '\n' || usenbsp)) { 5737 FLUSHIBP; 5738 if (!usenbsp && fillout) { 5739 out_html("<BR>"); 5740 curpos = 0; 5741 } 5742 usenbsp = fillout; 5743 if (usenbsp) 5744 out_html(" "); 5745 else 5746 intbuff += ' '; 5747 } else if (*h > 31 && *h < 127) 5748 intbuff += *h; 5749 else if (((unsigned char)(*h)) > 127) { 5750 intbuff += *h; 5751 } 5752 curpos++; 5753 break; 5754 } 5755 } 5756 if (intbuff.length() > MED_STR_MAX) 5757 FLUSHIBP; 5758 h++; 5759 } 5760 } 5761 FLUSHIBP; 5762 if (buffer) 5763 buffer[buffpos] = '\0'; 5764 if (san && h && *h) 5765 h++; 5766 newline_for_fun = exnewline_for_fun; 5767 if (result) { 5768 *result = buffer; 5769 buffer = exbuffer; 5770 buffpos = exbuffpos; 5771 buffmax = exbuffmax; 5772 scaninbuff = exscaninbuff; 5773 } 5774 5775 return h; 5776 } 5777 5778 //--------------------------------------------------------------------- 5779 5780 static char *scan_troff_mandoc(char *c, bool san, char **result) 5781 { 5782 char *ret; 5783 char *end = c; 5784 bool oldval = mandoc_line; 5785 mandoc_line = true; 5786 while (*end && *end != '\n') { 5787 end++; 5788 } 5789 5790 if (end > c + 2 && ispunct(*(end - 1)) && isspace(*(end - 2)) && *(end - 2) != '\n') { 5791 /* Don't format lonely punctuation E.g. in "xyz ," format 5792 * the xyz and then append the comma removing the space. 5793 */ 5794 *(end - 2) = '\n'; 5795 ret = scan_troff(c, san, result); 5796 *end = 0; 5797 out_html(end - 1); // output the punct char 5798 *end = '\n'; 5799 ret = end; 5800 } else { 5801 ret = scan_troff(c, san, result); 5802 } 5803 mandoc_line = oldval; 5804 return ret; 5805 } 5806 5807 //--------------------------------------------------------------------- 5808 // Entry point 5809 5810 void scan_man_page(const char *man_page) 5811 { 5812 if (!man_page) 5813 return; 5814 5815 qCDebug(KIO_MAN_LOG) << "Start scanning man page"; 5816 5817 // ### Do more init 5818 // Unlike man2html, we actually call this several times, hence the need to 5819 // properly cleanup all those static vars 5820 s_ifelseval.clear(); 5821 5822 s_characterDefinitionMap.clear(); 5823 InitCharacterDefinitions(); 5824 5825 s_stringDefinitionMap.clear(); 5826 InitStringDefinitions(); 5827 5828 s_numberDefinitionMap.clear(); 5829 InitNumberDefinitions(); 5830 5831 s_argumentList.clear(); 5832 listItemStack.clear(); 5833 5834 in_div = 0; 5835 5836 s_dollarZero = ""; // No macro called yet! 5837 mandoc_name = ""; 5838 5839 output_possible = false; 5840 int strLength = qstrlen(man_page); 5841 char *buf = new char[strLength + 2]; 5842 qstrcpy(buf + 1, man_page); 5843 buf[0] = '\n'; 5844 5845 qCDebug(KIO_MAN_LOG) << "Parse man page"; 5846 5847 scan_troff(buf + 1, 0, nullptr); 5848 5849 qCDebug(KIO_MAN_LOG) << "Man page parsed!"; 5850 5851 while (itemdepth || dl_set[itemdepth]) { 5852 checkListStack(); 5853 out_html("</DL>\n"); 5854 if (dl_set[itemdepth]) 5855 dl_set[itemdepth] = 0; 5856 else if (itemdepth > 0) 5857 itemdepth--; 5858 } 5859 5860 out_html(set_font("R")); 5861 out_html(change_to_size(0)); 5862 if (!fillout) { 5863 fillout = 1; 5864 out_html("</PRE>"); 5865 } 5866 out_html(NEWLINE); 5867 5868 if (in_div) { 5869 output_real("</div><div style=\"margin-left: 2cm\">\n"); 5870 in_div = 0; 5871 } 5872 5873 if (output_possible) { 5874 // The output is buggy wrt to how divs are handled. Fixing it would 5875 // require closing divs before other block-level elements are output, 5876 // and I do not feel like going to find them all. 5877 output_real("</div></div></div></div>\n"); 5878 5879 output_real("<div id=\"footer\"><div id=\"footer_text\">\n"); 5880 #ifdef SIMPLE_MAN2HTML 5881 output_real("Generated by kio_man"); 5882 #else 5883 output_real("Generated by kio_man version "); 5884 output_real(QString(KDE_VERSION_STRING).toHtmlEscaped().toLocal8Bit()); 5885 #endif 5886 output_real("</div></div>\n\n"); 5887 5888 output_real("</BODY>\n</HTML>\n"); 5889 } 5890 delete[] buf; 5891 5892 // Release memory 5893 s_characterDefinitionMap.clear(); 5894 s_stringDefinitionMap.clear(); 5895 s_numberDefinitionMap.clear(); 5896 s_argumentList.clear(); 5897 5898 // reinit static variables for reuse 5899 delete[] buffer; 5900 buffer = nullptr; 5901 5902 escapesym = '\\'; 5903 nobreaksym = '\''; 5904 controlsym = '.'; 5905 fieldsym = 0; 5906 padsym = 0; 5907 5908 buffpos = 0; 5909 buffmax = 0; 5910 scaninbuff = false; 5911 itemdepth = 0; 5912 for (int i = 0; i < 20; i++) 5913 dl_set[i] = 0; 5914 still_dd = false; 5915 for (int i = 0; i < 12; i++) 5916 tabstops[i] = (i + 1) * 8; 5917 maxtstop = 12; 5918 curpos = 0; 5919 5920 mandoc_name_count = 0; 5921 } 5922 5923 //--------------------------------------------------------------------- 5924 5925 char *manPageToUtf8(const QByteArray &input, const QByteArray &dirName) 5926 { 5927 // as we do not know in which encoding the man source is, try to automatically 5928 // detect it and always return it as UTF-8 5929 5930 QByteArray encoding; 5931 5932 // some pages contain "coding:" information. See "man manconv" 5933 // (but I find pages which do not exactly obey the format described in manconv, e.g. 5934 // the control char is either "." or "'") 5935 // Therefore use a QRegularExpression 5936 const QRegularExpression regex("[\\.']\\\\\"[^$]*coding:\\s*(\\S*)\\s", QRegularExpression::CaseInsensitiveOption); 5937 QRegularExpressionMatch rmatch; 5938 if (QString::fromLatin1(input).indexOf(regex, 0, &rmatch) == 0) { 5939 encoding = rmatch.captured(1).toLatin1(); 5940 5941 qCDebug(KIO_MAN_LOG) << "found embedded encoding" << encoding; 5942 } else { 5943 // check according to the dirName the man page is in 5944 5945 // if the dirName contains a ".", the encoding follows, e.g. "de.UTF-8" 5946 int dot = dirName.indexOf('.'); 5947 if (dot != -1) { 5948 encoding = dirName.mid(dot + 1); 5949 } else { 5950 /* wanted to use KEncodingProber ... however it fails and gives very unreliable 5951 results ... telling me often UTF-8 encoded pages are EUC-JP or gb18030 ... 5952 In fact all man pages here on openSuse are encoded in UTF-8 5953 5954 KEncodingProber encodingProber; 5955 encodingProber.feed(input); 5956 5957 qCDebug(KIO_MAN_LOG) << "auto-detect encoding; guess=" << encodingProber.encoding() 5958 << "confidence=" << encodingProber.confidence(); 5959 5960 encoding = encodingProber.encoding(); 5961 */ 5962 5963 // the original bug report #141340 5964 // mentioned the env var MAN_ICONV_INPUT_CHARSET ... let's check if it is set 5965 // This seems not be a std. man-db env var, but I find several traces of it on the web 5966 encoding = qgetenv("MAN_ICONV_INPUT_CHARSET"); 5967 5968 if (encoding.isEmpty()) 5969 encoding = "UTF-8"; 5970 } 5971 } 5972 5973 QTextCodec *codec = 0; 5974 5975 if (!encoding.isEmpty()) 5976 codec = QTextCodec::codecForName(encoding); 5977 5978 if (!codec) // fallback encoding 5979 codec = QTextCodec::codecForName("ISO-8859-1"); 5980 5981 qCDebug(KIO_MAN_LOG) << "using the encoding" << codec->name() << "for file in dir" << dirName; 5982 5983 QString out = codec->toUnicode(input); 5984 QByteArray array = out.toUtf8(); 5985 5986 // TODO get rid of this double allocation and scan a QByteArray 5987 const int len = array.size(); 5988 char *buf = new char[len + 4]; 5989 memmove(buf + 1, array.data(), len); 5990 buf[0] = buf[len + 1] = '\n'; // Start and end with an end of line 5991 buf[len + 2] = buf[len + 3] = '\0'; // Two NUL characters at end 5992 5993 return buf; 5994 } 5995 5996 //--------------------------------------------------------------------- 5997 5998 #ifdef SIMPLE_MAN2HTML 5999 void output_real(const char *insert) 6000 { 6001 std::cout << insert; 6002 } 6003 6004 char *read_man_page(const char *filename) 6005 { 6006 KCompressionDevice fd(QFile::decodeName(filename)); 6007 if (!fd.open(QIODevice::ReadOnly)) { 6008 std::cerr << "read_man_page: can not open " << filename << std::endl; 6009 return nullptr; 6010 } 6011 6012 QDir dir(QFileInfo(QFile::decodeName(filename)).dir()); 6013 dir.cdUp(); 6014 char *data = manPageToUtf8(fd.readAll(), QFile::encodeName(dir.dirName())); 6015 6016 return data; 6017 } 6018 6019 //-------------------------------------------------------------------------------- 6020 6021 #ifndef KIO_MAN_TEST 6022 int main(int argc, char **argv) 6023 { 6024 if (argc < 2) { 6025 std::cerr << "call: " << argv[0] << " <filename>\n"; 6026 return 1; 6027 } 6028 if (chdir(argv[1])) { 6029 char *buf = read_man_page(argv[1]); 6030 if (buf) { 6031 scan_man_page(buf); 6032 delete[] buf; 6033 } 6034 } else { 6035 DIR *dir = opendir("."); 6036 struct dirent *ent; 6037 while ((ent = readdir(dir)) != nullptr) { 6038 std::cerr << "converting " << ent->d_name << std::endl; 6039 char *buf = read_man_page(ent->d_name); 6040 if (buf) { 6041 scan_man_page(buf); 6042 delete[] buf; 6043 } 6044 } 6045 closedir(dir); 6046 } 6047 return 0; 6048 } 6049 #endif 6050 6051 #endif 6052 6053 // kate: indent-mode cstyle; space-indent on; indent-width 2; replace-tabs on;