File indexing completed on 2024-05-19 05:05:32

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2022 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "encoderlatex.h"
0021 
0022 #include <QString>
0023 #include <QStack>
0024 
0025 #include "logging_io.h"
0026 
0027 inline bool isAsciiLetter(const QChar c) {
0028     static const ushort upperCaseLetterA = QLatin1Char('A').unicode();
0029     static const ushort upperCaseLetterZ = QLatin1Char('Z').unicode();
0030     static const ushort lowerCaseLetterA = QLatin1Char('a').unicode();
0031     static const ushort lowerCaseLetterZ = QLatin1Char('z').unicode();
0032     const ushort unicode = c.unicode();
0033     return (unicode >= upperCaseLetterA && unicode <= upperCaseLetterZ) || (unicode >= lowerCaseLetterA && unicode <= lowerCaseLetterZ);
0034 }
0035 
0036 inline int asciiLetterOrDigitToPos(const QChar c) {
0037     static const ushort upperCaseLetterA = QLatin1Char('A').unicode();
0038     static const ushort upperCaseLetterZ = QLatin1Char('Z').unicode();
0039     static const ushort lowerCaseLetterA = QLatin1Char('a').unicode();
0040     static const ushort lowerCaseLetterZ = QLatin1Char('z').unicode();
0041     static const ushort digit0 = QLatin1Char('0').unicode();
0042     static const ushort digit9 = QLatin1Char('9').unicode();
0043     const ushort unicode = c.unicode();
0044     if (unicode >= upperCaseLetterA && unicode <= upperCaseLetterZ) return unicode - upperCaseLetterA;
0045     else if (unicode >= lowerCaseLetterA && unicode <= lowerCaseLetterZ) return unicode + 26 - lowerCaseLetterA;
0046     else if (unicode >= digit0 && unicode <= digit9) return unicode + 52 - digit0;
0047     else return -1;
0048 }
0049 
0050 inline bool isIJ(const QChar c) {
0051     static const QChar upperCaseLetterI = QLatin1Char('I');
0052     static const QChar upperCaseLetterJ = QLatin1Char('J');
0053     static const QChar lowerCaseLetterI = QLatin1Char('i');
0054     static const QChar lowerCaseLetterJ = QLatin1Char('j');
0055     return c == upperCaseLetterI || c == upperCaseLetterJ || c == lowerCaseLetterI || c == lowerCaseLetterJ;
0056 }
0057 
0058 enum EncoderLaTeXCommandDirection {
0059     DirectionCommandToUnicode = 1, //< A mapping between command and unicode value may be used in the direction from command to unicode value
0060     DirectionUnicodeToCommand = 2, //< A mapping between command and unicode value may be used in the direction from unicode value to command
0061     DirectionBoth = DirectionCommandToUnicode | DirectionUnicodeToCommand
0062 };
0063 
0064 /**
0065  * General documentation on this topic:
0066  *   https://www.latex-project.org/help/documentation/encguide.pdf
0067  *   https://mirror.hmc.edu/ctan/macros/xetex/latex/xecjk/xunicode-symbols.pdf
0068  *   ftp://ftp.dante.de/tex-archive/biblio/biber/documentation/utf8-macro-map.html
0069  */
0070 
0071 /**
0072  * This structure contains information how escaped characters
0073  * such as \"a are translated to an Unicode character and back.
0074  * The structure is a table with three columns: (1) the modifier
0075  * (in the example before the quotation mark) (2) the ASCII
0076  * character ((in the example before the 'a') (3) the Unicode
0077  * character described by a hexcode.
0078  * This data structure is used both directly and indirectly via
0079  * the LookupTable structure which is initialized when the
0080  * EncoderLaTeX object is created.
0081  */
0082 static const struct EncoderLaTeXEscapedCharacter {
0083     const QChar modifier;
0084     const QChar letter;
0085     const ushort unicode;
0086     const EncoderLaTeXCommandDirection direction;
0087 }
0088 encoderLaTeXEscapedCharacters[] = {
0089     {QLatin1Char('`'), QLatin1Char('A'), 0x00C0, DirectionBoth},
0090     {QLatin1Char('\''), QLatin1Char('A'), 0x00C1, DirectionBoth},
0091     {QLatin1Char('^'), QLatin1Char('A'), 0x00C2, DirectionBoth},
0092     {QLatin1Char('~'), QLatin1Char('A'), 0x00C3, DirectionBoth},
0093     {QLatin1Char('"'), QLatin1Char('A'), 0x00C4, DirectionBoth},
0094     {QLatin1Char('r'), QLatin1Char('A'), 0x00C5, DirectionBoth},
0095     /** 0x00C6: see EncoderLaTeXCharacterCommand */
0096     {QLatin1Char('c'), QLatin1Char('C'), 0x00C7, DirectionBoth},
0097     {QLatin1Char('`'), QLatin1Char('E'), 0x00C8, DirectionBoth},
0098     {QLatin1Char('\''), QLatin1Char('E'), 0x00C9, DirectionBoth},
0099     {QLatin1Char('^'), QLatin1Char('E'), 0x00CA, DirectionBoth},
0100     {QLatin1Char('"'), QLatin1Char('E'), 0x00CB, DirectionBoth},
0101     {QLatin1Char('`'), QLatin1Char('I'), 0x00CC, DirectionBoth},
0102     {QLatin1Char('\''), QLatin1Char('I'), 0x00CD, DirectionBoth},
0103     {QLatin1Char('^'), QLatin1Char('I'), 0x00CE, DirectionBoth},
0104     {QLatin1Char('"'), QLatin1Char('I'), 0x00CF, DirectionBoth},
0105     /** 0x00D0: see EncoderLaTeXCharacterCommand */
0106     {QLatin1Char('~'), QLatin1Char('N'), 0x00D1, DirectionBoth},
0107     {QLatin1Char('`'), QLatin1Char('O'), 0x00D2, DirectionBoth},
0108     {QLatin1Char('\''), QLatin1Char('O'), 0x00D3, DirectionBoth},
0109     {QLatin1Char('^'), QLatin1Char('O'), 0x00D4, DirectionBoth},
0110     {QLatin1Char('~'), QLatin1Char('O'), 0x00D5, DirectionBoth},
0111     {QLatin1Char('"'), QLatin1Char('O'), 0x00D6, DirectionBoth},
0112     /** 0x00D7: see EncoderLaTeXCharacterCommand */
0113     /** 0x00D8: see EncoderLaTeXCharacterCommand */
0114     {QLatin1Char('`'), QLatin1Char('U'), 0x00D9, DirectionBoth},
0115     {QLatin1Char('\''), QLatin1Char('U'), 0x00DA, DirectionBoth},
0116     {QLatin1Char('^'), QLatin1Char('U'), 0x00DB, DirectionBoth},
0117     {QLatin1Char('"'), QLatin1Char('U'), 0x00DC, DirectionBoth},
0118     {QLatin1Char('\''), QLatin1Char('Y'), 0x00DD, DirectionBoth},
0119     /** 0x00DE: see EncoderLaTeXCharacterCommand */
0120     {QLatin1Char('"'), QLatin1Char('s'), 0x00DF, DirectionBoth},
0121     {QLatin1Char('`'), QLatin1Char('a'), 0x00E0, DirectionBoth},
0122     {QLatin1Char('\''), QLatin1Char('a'), 0x00E1, DirectionBoth},
0123     {QLatin1Char('^'), QLatin1Char('a'), 0x00E2, DirectionBoth},
0124     {QLatin1Char('~'), QLatin1Char('a'), 0x00E3, DirectionBoth},
0125     {QLatin1Char('"'), QLatin1Char('a'), 0x00E4, DirectionBoth},
0126     {QLatin1Char('r'), QLatin1Char('a'), 0x00E5, DirectionBoth},
0127     /** 0x00E6: see EncoderLaTeXCharacterCommand */
0128     {QLatin1Char('c'), QLatin1Char('c'), 0x00E7, DirectionBoth},
0129     {QLatin1Char('`'), QLatin1Char('e'), 0x00E8, DirectionBoth},
0130     {QLatin1Char('\''), QLatin1Char('e'), 0x00E9, DirectionBoth},
0131     {QLatin1Char('^'), QLatin1Char('e'), 0x00EA, DirectionBoth},
0132     {QLatin1Char('"'), QLatin1Char('e'), 0x00EB, DirectionBoth},
0133     {QLatin1Char('`'), QLatin1Char('i'), 0x00EC, DirectionBoth},
0134     {QLatin1Char('\''), QLatin1Char('i'), 0x00ED, DirectionBoth},
0135     {QLatin1Char('^'), QLatin1Char('i'), 0x00EE, DirectionBoth},
0136     {QLatin1Char('"'), QLatin1Char('i'), 0x00EF, DirectionBoth},
0137     /** 0x00F0: see EncoderLaTeXCharacterCommand */
0138     {QLatin1Char('~'), QLatin1Char('n'), 0x00F1, DirectionBoth},
0139     {QLatin1Char('`'), QLatin1Char('o'), 0x00F2, DirectionBoth},
0140     {QLatin1Char('\''), QLatin1Char('o'), 0x00F3, DirectionBoth},
0141     {QLatin1Char('^'), QLatin1Char('o'), 0x00F4, DirectionBoth},
0142     {QLatin1Char('~'), QLatin1Char('o'), 0x00F5, DirectionBoth},
0143     {QLatin1Char('"'), QLatin1Char('o'), 0x00F6, DirectionBoth},
0144     /** 0x00F7: see EncoderLaTeXCharacterCommand */
0145     /** 0x00F8: see EncoderLaTeXCharacterCommand */
0146     {QLatin1Char('`'), QLatin1Char('u'), 0x00F9, DirectionBoth},
0147     {QLatin1Char('\''), QLatin1Char('u'), 0x00FA, DirectionBoth},
0148     {QLatin1Char('^'), QLatin1Char('u'), 0x00FB, DirectionBoth},
0149     {QLatin1Char('"'), QLatin1Char('u'), 0x00FC, DirectionBoth},
0150     {QLatin1Char('\''), QLatin1Char('y'), 0x00FD, DirectionBoth},
0151     /** 0x00FE: see EncoderLaTeXCharacterCommand */
0152     {QLatin1Char('"'), QLatin1Char('y'), 0x00FF, DirectionBoth},
0153     {QLatin1Char('='), QLatin1Char('A'), 0x0100, DirectionBoth},
0154     {QLatin1Char('='), QLatin1Char('a'), 0x0101, DirectionBoth},
0155     {QLatin1Char('u'), QLatin1Char('A'), 0x0102, DirectionBoth},
0156     {QLatin1Char('u'), QLatin1Char('a'), 0x0103, DirectionBoth},
0157     {QLatin1Char('k'), QLatin1Char('A'), 0x0104, DirectionBoth},
0158     {QLatin1Char('k'), QLatin1Char('a'), 0x0105, DirectionBoth},
0159     {QLatin1Char('\''), QLatin1Char('C'), 0x0106, DirectionBoth},
0160     {QLatin1Char('\''), QLatin1Char('c'), 0x0107, DirectionBoth},
0161     {QLatin1Char('^'), QLatin1Char('C'), 0x0108, DirectionBoth},
0162     {QLatin1Char('^'), QLatin1Char('c'), 0x0109, DirectionBoth},
0163     {QLatin1Char('.'), QLatin1Char('C'), 0x010A, DirectionBoth},
0164     {QLatin1Char('.'), QLatin1Char('c'), 0x010B, DirectionBoth},
0165     {QLatin1Char('v'), QLatin1Char('C'), 0x010C, DirectionBoth},
0166     {QLatin1Char('v'), QLatin1Char('c'), 0x010D, DirectionBoth},
0167     {QLatin1Char('v'), QLatin1Char('D'), 0x010E, DirectionBoth},
0168     {QLatin1Char('v'), QLatin1Char('d'), 0x010F, DirectionBoth},
0169     {QLatin1Char('B'), QLatin1Char('D'), 0x0110, DirectionCommandToUnicode}, //< 'African D', command provided by package 'fc' (command seems to be the same as \M{D})
0170     {QLatin1Char('B'), QLatin1Char('d'), 0x0111, DirectionCommandToUnicode}, //< 'African d' (?), command provided by package 'fc'
0171     {QLatin1Char('='), QLatin1Char('E'), 0x0112, DirectionBoth},
0172     {QLatin1Char('='), QLatin1Char('e'), 0x0113, DirectionBoth},
0173     {QLatin1Char('u'), QLatin1Char('E'), 0x0114, DirectionBoth},
0174     {QLatin1Char('u'), QLatin1Char('e'), 0x0115, DirectionBoth},
0175     {QLatin1Char('.'), QLatin1Char('E'), 0x0116, DirectionBoth},
0176     {QLatin1Char('.'), QLatin1Char('e'), 0x0117, DirectionBoth},
0177     {QLatin1Char('k'), QLatin1Char('E'), 0x0118, DirectionBoth},
0178     {QLatin1Char('k'), QLatin1Char('e'), 0x0119, DirectionBoth},
0179     {QLatin1Char('v'), QLatin1Char('E'), 0x011A, DirectionBoth},
0180     {QLatin1Char('v'), QLatin1Char('e'), 0x011B, DirectionBoth},
0181     {QLatin1Char('^'), QLatin1Char('G'), 0x011C, DirectionBoth},
0182     {QLatin1Char('^'), QLatin1Char('g'), 0x011D, DirectionBoth},
0183     {QLatin1Char('u'), QLatin1Char('G'), 0x011E, DirectionBoth},
0184     {QLatin1Char('u'), QLatin1Char('g'), 0x011F, DirectionBoth},
0185     {QLatin1Char('.'), QLatin1Char('G'), 0x0120, DirectionBoth},
0186     {QLatin1Char('.'), QLatin1Char('g'), 0x0121, DirectionBoth},
0187     {QLatin1Char('c'), QLatin1Char('G'), 0x0122, DirectionBoth},
0188     {QLatin1Char('c'), QLatin1Char('g'), 0x0123, DirectionBoth},
0189     {QLatin1Char('^'), QLatin1Char('H'), 0x0124, DirectionBoth},
0190     {QLatin1Char('^'), QLatin1Char('h'), 0x0125, DirectionBoth},
0191     {QLatin1Char('B'), QLatin1Char('H'), 0x0126, DirectionCommandToUnicode},
0192     {QLatin1Char('B'), QLatin1Char('h'), 0x0127, DirectionCommandToUnicode},
0193     {QLatin1Char('~'), QLatin1Char('I'), 0x0128, DirectionBoth},
0194     {QLatin1Char('~'), QLatin1Char('i'), 0x0129, DirectionBoth},
0195     {QLatin1Char('='), QLatin1Char('I'), 0x012A, DirectionBoth},
0196     {QLatin1Char('='), QLatin1Char('i'), 0x012B, DirectionBoth},
0197     {QLatin1Char('u'), QLatin1Char('I'), 0x012C, DirectionBoth},
0198     {QLatin1Char('u'), QLatin1Char('i'), 0x012D, DirectionBoth},
0199     {QLatin1Char('k'), QLatin1Char('I'), 0x012E, DirectionBoth},
0200     {QLatin1Char('k'), QLatin1Char('i'), 0x012F, DirectionBoth},
0201     {QLatin1Char('.'), QLatin1Char('I'), 0x0130, DirectionBoth},
0202     /** 0x0131: see EncoderLaTeXCharacterCommand */
0203     /** 0x0132: see EncoderLaTeXCharacterCommand */
0204     /** 0x0133: see EncoderLaTeXCharacterCommand */
0205     {QLatin1Char('^'), QLatin1Char('J'), 0x012E, DirectionBoth},
0206     {QLatin1Char('^'), QLatin1Char('j'), 0x012F, DirectionBoth},
0207     {QLatin1Char('c'), QLatin1Char('K'), 0x0136, DirectionBoth},
0208     {QLatin1Char('c'), QLatin1Char('k'), 0x0137, DirectionBoth},
0209     /** 0x0138: see EncoderLaTeXCharacterCommand */
0210     {QLatin1Char('\''), QLatin1Char('L'), 0x0139, DirectionBoth},
0211     {QLatin1Char('\''), QLatin1Char('l'), 0x013A, DirectionBoth},
0212     {QLatin1Char('c'), QLatin1Char('L'), 0x013B, DirectionBoth},
0213     {QLatin1Char('c'), QLatin1Char('l'), 0x013C, DirectionBoth},
0214     {QLatin1Char('v'), QLatin1Char('L'), 0x013D, DirectionBoth},
0215     {QLatin1Char('v'), QLatin1Char('l'), 0x013E, DirectionBoth},
0216     {QLatin1Char('.'), QLatin1Char('L'), 0x013F, DirectionBoth},
0217     {QLatin1Char('.'), QLatin1Char('l'), 0x0140, DirectionBoth},
0218     {QLatin1Char('B'), QLatin1Char('L'), 0x0141, DirectionCommandToUnicode},
0219     {QLatin1Char('B'), QLatin1Char('l'), 0x0142, DirectionCommandToUnicode},
0220     {QLatin1Char('\''), QLatin1Char('N'), 0x0143, DirectionBoth},
0221     {QLatin1Char('\''), QLatin1Char('n'), 0x0144, DirectionBoth},
0222     {QLatin1Char('c'), QLatin1Char('n'), 0x0145, DirectionBoth},
0223     {QLatin1Char('c'), QLatin1Char('n'), 0x0146, DirectionBoth},
0224     {QLatin1Char('v'), QLatin1Char('N'), 0x0147, DirectionBoth},
0225     {QLatin1Char('v'), QLatin1Char('n'), 0x0148, DirectionBoth},
0226     /** 0x0149: TODO n preceded by apostrophe */
0227     {QLatin1Char('m'), QLatin1Char('N'), 0x014A, DirectionCommandToUnicode},
0228     {QLatin1Char('m'), QLatin1Char('n'), 0x014B, DirectionCommandToUnicode},
0229     {QLatin1Char('='), QLatin1Char('O'), 0x014C, DirectionBoth},
0230     {QLatin1Char('='), QLatin1Char('o'), 0x014D, DirectionBoth},
0231     {QLatin1Char('u'), QLatin1Char('O'), 0x014E, DirectionBoth},
0232     {QLatin1Char('u'), QLatin1Char('o'), 0x014F, DirectionBoth},
0233     {QLatin1Char('H'), QLatin1Char('O'), 0x0150, DirectionBoth},
0234     {QLatin1Char('H'), QLatin1Char('o'), 0x0151, DirectionBoth},
0235     /** 0x0152: see EncoderLaTeXCharacterCommand */
0236     /** 0x0153: see EncoderLaTeXCharacterCommand */
0237     {QLatin1Char('\''), QLatin1Char('R'), 0x0154, DirectionBoth},
0238     {QLatin1Char('\''), QLatin1Char('r'), 0x0155, DirectionBoth},
0239     {QLatin1Char('c'), QLatin1Char('R'), 0x0156, DirectionBoth},
0240     {QLatin1Char('c'), QLatin1Char('r'), 0x0157, DirectionBoth},
0241     {QLatin1Char('v'), QLatin1Char('R'), 0x0158, DirectionBoth},
0242     {QLatin1Char('v'), QLatin1Char('r'), 0x0159, DirectionBoth},
0243     {QLatin1Char('\''), QLatin1Char('S'), 0x015A, DirectionBoth},
0244     {QLatin1Char('\''), QLatin1Char('s'), 0x015B, DirectionBoth},
0245     {QLatin1Char('^'), QLatin1Char('S'), 0x015C, DirectionBoth},
0246     {QLatin1Char('^'), QLatin1Char('s'), 0x015D, DirectionBoth},
0247     {QLatin1Char('c'), QLatin1Char('S'), 0x015E, DirectionBoth},
0248     {QLatin1Char('c'), QLatin1Char('s'), 0x015F, DirectionBoth},
0249     {QLatin1Char('v'), QLatin1Char('S'), 0x0160, DirectionBoth},
0250     {QLatin1Char('v'), QLatin1Char('s'), 0x0161, DirectionBoth},
0251     {QLatin1Char('c'), QLatin1Char('T'), 0x0162, DirectionBoth},
0252     {QLatin1Char('c'), QLatin1Char('t'), 0x0163, DirectionBoth},
0253     {QLatin1Char('v'), QLatin1Char('T'), 0x0164, DirectionBoth},
0254     {QLatin1Char('v'), QLatin1Char('t'), 0x0165, DirectionBoth},
0255     {QLatin1Char('B'), QLatin1Char('T'), 0x0166, DirectionCommandToUnicode},
0256     {QLatin1Char('B'), QLatin1Char('t'), 0x0167, DirectionCommandToUnicode},
0257     {QLatin1Char('~'), QLatin1Char('U'), 0x0168, DirectionBoth},
0258     {QLatin1Char('~'), QLatin1Char('u'), 0x0169, DirectionBoth},
0259     {QLatin1Char('='), QLatin1Char('U'), 0x016A, DirectionBoth},
0260     {QLatin1Char('='), QLatin1Char('u'), 0x016B, DirectionBoth},
0261     {QLatin1Char('u'), QLatin1Char('U'), 0x016C, DirectionBoth},
0262     {QLatin1Char('u'), QLatin1Char('u'), 0x016D, DirectionBoth},
0263     {QLatin1Char('r'), QLatin1Char('U'), 0x016E, DirectionBoth},
0264     {QLatin1Char('r'), QLatin1Char('u'), 0x016F, DirectionBoth},
0265     {QLatin1Char('H'), QLatin1Char('U'), 0x0170, DirectionBoth},
0266     {QLatin1Char('H'), QLatin1Char('u'), 0x0171, DirectionBoth},
0267     {QLatin1Char('k'), QLatin1Char('U'), 0x0172, DirectionBoth},
0268     {QLatin1Char('k'), QLatin1Char('u'), 0x0173, DirectionBoth},
0269     {QLatin1Char('^'), QLatin1Char('W'), 0x0174, DirectionBoth},
0270     {QLatin1Char('^'), QLatin1Char('w'), 0x0175, DirectionBoth},
0271     {QLatin1Char('^'), QLatin1Char('Y'), 0x0176, DirectionBoth},
0272     {QLatin1Char('^'), QLatin1Char('y'), 0x0177, DirectionBoth},
0273     {QLatin1Char('"'), QLatin1Char('Y'), 0x0178, DirectionBoth},
0274     {QLatin1Char('\''), QLatin1Char('Z'), 0x0179, DirectionBoth},
0275     {QLatin1Char('\''), QLatin1Char('z'), 0x017A, DirectionBoth},
0276     {QLatin1Char('.'), QLatin1Char('Z'), 0x017B, DirectionBoth},
0277     {QLatin1Char('.'), QLatin1Char('z'), 0x017C, DirectionBoth},
0278     {QLatin1Char('v'), QLatin1Char('Z'), 0x017D, DirectionBoth},
0279     {QLatin1Char('v'), QLatin1Char('z'), 0x017E, DirectionBoth},
0280     /** 0x017F: TODO long s */
0281     {QLatin1Char('B'), QLatin1Char('b'), 0x0180, DirectionCommandToUnicode},
0282     {QLatin1Char('m'), QLatin1Char('B'), 0x0181, DirectionCommandToUnicode},
0283     /** 0x0182 */
0284     /** 0x0183 */
0285     /** 0x0184 */
0286     /** 0x0185 */
0287     {QLatin1Char('m'), QLatin1Char('O'), 0x0186, DirectionCommandToUnicode},
0288     {QLatin1Char('m'), QLatin1Char('C'), 0x0187, DirectionCommandToUnicode},
0289     {QLatin1Char('m'), QLatin1Char('c'), 0x0188, DirectionCommandToUnicode},
0290     {QLatin1Char('M'), QLatin1Char('D'), 0x0189, DirectionBoth}, //< 'African D', command provided by package 'fc' (command seems to be the same as \B{D})
0291     {QLatin1Char('m'), QLatin1Char('D'), 0x018A, DirectionCommandToUnicode},
0292     /** 0x018B */
0293     /** 0x018C */
0294     /** 0x018D */
0295     {QLatin1Char('M'), QLatin1Char('E'), 0x018E, DirectionCommandToUnicode},
0296     /** 0x018F */
0297     {QLatin1Char('m'), QLatin1Char('E'), 0x0190, DirectionCommandToUnicode},
0298     {QLatin1Char('m'), QLatin1Char('F'), 0x0191, DirectionCommandToUnicode},
0299     {QLatin1Char('m'), QLatin1Char('f'), 0x0192, DirectionCommandToUnicode},
0300     /** 0x0193 */
0301     {QLatin1Char('m'), QLatin1Char('G'), 0x0194, DirectionCommandToUnicode},
0302     /** 0x0195: see EncoderLaTeXCharacterCommand */
0303     {QLatin1Char('m'), QLatin1Char('I'), 0x0196, DirectionCommandToUnicode},
0304     {QLatin1Char('B'), QLatin1Char('I'), 0x0197, DirectionCommandToUnicode},
0305     {QLatin1Char('m'), QLatin1Char('K'), 0x0198, DirectionCommandToUnicode},
0306     {QLatin1Char('m'), QLatin1Char('k'), 0x0199, DirectionCommandToUnicode},
0307     {QLatin1Char('B'), QLatin1Char('l'), 0x019A, DirectionCommandToUnicode},
0308     /** 0x019B */
0309     /** 0x019C */
0310     {QLatin1Char('m'), QLatin1Char('J'), 0x019D, DirectionCommandToUnicode},
0311     /** 0x019E */
0312     /** 0x019F */
0313     /** 0x01A0 */
0314     /** 0x01A1 */
0315     /** 0x01A2 */
0316     /** 0x01A3 */
0317     {QLatin1Char('m'), QLatin1Char('P'), 0x01A4, DirectionCommandToUnicode},
0318     {QLatin1Char('m'), QLatin1Char('p'), 0x01A5, DirectionCommandToUnicode},
0319     /** 0x01A6 */
0320     /** 0x01A7 */
0321     /** 0x01A8 */
0322     /** 0x01A9: see EncoderLaTeXCharacterCommand */
0323     /** 0x01AA */
0324     /** 0x01AB */
0325     {QLatin1Char('m'), QLatin1Char('T'), 0x01AC, DirectionCommandToUnicode},
0326     {QLatin1Char('m'), QLatin1Char('t'), 0x01AD, DirectionCommandToUnicode},
0327     {QLatin1Char('M'), QLatin1Char('T'), 0x01AE, DirectionCommandToUnicode},
0328     /** 0x01AF */
0329     /** 0x01B0 */
0330     {QLatin1Char('m'), QLatin1Char('U'), 0x01B1, DirectionCommandToUnicode},
0331     {QLatin1Char('m'), QLatin1Char('V'), 0x01B2, DirectionCommandToUnicode},
0332     {QLatin1Char('m'), QLatin1Char('Y'), 0x01B3, DirectionCommandToUnicode},
0333     {QLatin1Char('m'), QLatin1Char('y'), 0x01B4, DirectionCommandToUnicode},
0334     {QLatin1Char('B'), QLatin1Char('Z'), 0x01B5, DirectionCommandToUnicode},
0335     {QLatin1Char('B'), QLatin1Char('z'), 0x01B6, DirectionCommandToUnicode},
0336     {QLatin1Char('m'), QLatin1Char('Z'), 0x01B7, DirectionCommandToUnicode},
0337     /** 0x01B8 */
0338     /** 0x01B9 */
0339     /** 0x01BA */
0340     {QLatin1Char('B'), QLatin1Char('2'), 0x01BB, DirectionCommandToUnicode},
0341     /** 0x01BC */
0342     /** 0x01BD */
0343     /** 0x01BE */
0344     /** 0x01BF */
0345     /** 0x01C0 */
0346     /** 0x01C1 */
0347     /** 0x01C2 */
0348     /** 0x01C3 */
0349     /** 0x01C4 */
0350     /** 0x01C5 */
0351     /** 0x01C6 */
0352     /** 0x01C7 */
0353     /** 0x01C8 */
0354     /** 0x01C9 */
0355     /** 0x01CA */
0356     /** 0x01CB */
0357     /** 0x01CC */
0358     {QLatin1Char('v'), QLatin1Char('A'), 0x01CD, DirectionBoth},
0359     {QLatin1Char('v'), QLatin1Char('a'), 0x01CE, DirectionBoth},
0360     {QLatin1Char('v'), QLatin1Char('G'), 0x01E6, DirectionBoth},
0361     {QLatin1Char('v'), QLatin1Char('g'), 0x01E7, DirectionBoth},
0362     {QLatin1Char('k'), QLatin1Char('O'), 0x01EA, DirectionBoth},
0363     {QLatin1Char('k'), QLatin1Char('o'), 0x01EB, DirectionBoth},
0364     {QLatin1Char('\''), QLatin1Char('F'), 0x01F4, DirectionBoth},
0365     {QLatin1Char('\''), QLatin1Char('f'), 0x01F5, DirectionBoth},
0366     {QLatin1Char('.'), QLatin1Char('A'), 0x0226, DirectionBoth},
0367     {QLatin1Char('.'), QLatin1Char('a'), 0x0227, DirectionBoth},
0368     {QLatin1Char('c'), QLatin1Char('E'), 0x0228, DirectionBoth},
0369     {QLatin1Char('c'), QLatin1Char('e'), 0x0229, DirectionBoth},
0370     {QLatin1Char('='), QLatin1Char('Y'), 0x0232, DirectionBoth},
0371     {QLatin1Char('='), QLatin1Char('y'), 0x0233, DirectionBoth},
0372     {QLatin1Char('.'), QLatin1Char('O'), 0x022E, DirectionBoth},
0373     {QLatin1Char('.'), QLatin1Char('o'), 0x022F, DirectionBoth},
0374     {QLatin1Char('M'), QLatin1Char('d'), 0x0256, DirectionBoth}, //< 'African d', command provided by package 'fc' (may be same as \B{d} ?)
0375     {QLatin1Char('.'), QLatin1Char('B'), 0x1E02, DirectionBoth},
0376     {QLatin1Char('.'), QLatin1Char('b'), 0x1E03, DirectionBoth},
0377     {QLatin1Char('d'), QLatin1Char('B'), 0x1E04, DirectionBoth},
0378     {QLatin1Char('d'), QLatin1Char('b'), 0x1E05, DirectionBoth},
0379     {QLatin1Char('.'), QLatin1Char('D'), 0x1E0A, DirectionBoth},
0380     {QLatin1Char('.'), QLatin1Char('d'), 0x1E0B, DirectionBoth},
0381     {QLatin1Char('d'), QLatin1Char('D'), 0x1E0C, DirectionBoth},
0382     {QLatin1Char('d'), QLatin1Char('d'), 0x1E0D, DirectionBoth},
0383     {QLatin1Char('c'), QLatin1Char('D'), 0x1E10, DirectionBoth},
0384     {QLatin1Char('c'), QLatin1Char('d'), 0x1E11, DirectionBoth},
0385     {QLatin1Char('.'), QLatin1Char('E'), 0x1E1E, DirectionBoth},
0386     {QLatin1Char('.'), QLatin1Char('e'), 0x1E1F, DirectionBoth},
0387     {QLatin1Char('.'), QLatin1Char('H'), 0x1E22, DirectionBoth},
0388     {QLatin1Char('.'), QLatin1Char('h'), 0x1E23, DirectionBoth},
0389     {QLatin1Char('d'), QLatin1Char('H'), 0x1E24, DirectionBoth},
0390     {QLatin1Char('d'), QLatin1Char('h'), 0x1E25, DirectionBoth},
0391     {QLatin1Char('"'), QLatin1Char('H'), 0x1E26, DirectionBoth},
0392     {QLatin1Char('"'), QLatin1Char('h'), 0x1E27, DirectionBoth},
0393     {QLatin1Char('c'), QLatin1Char('H'), 0x1E28, DirectionBoth},
0394     {QLatin1Char('c'), QLatin1Char('h'), 0x1E29, DirectionBoth},
0395     {QLatin1Char('d'), QLatin1Char('K'), 0x1E32, DirectionBoth},
0396     {QLatin1Char('d'), QLatin1Char('k'), 0x1E33, DirectionBoth},
0397     {QLatin1Char('d'), QLatin1Char('L'), 0x1E36, DirectionBoth},
0398     {QLatin1Char('d'), QLatin1Char('l'), 0x1E37, DirectionBoth},
0399     {QLatin1Char('.'), QLatin1Char('M'), 0x1E40, DirectionBoth},
0400     {QLatin1Char('.'), QLatin1Char('m'), 0x1E41, DirectionBoth},
0401     {QLatin1Char('d'), QLatin1Char('M'), 0x1E42, DirectionBoth},
0402     {QLatin1Char('d'), QLatin1Char('m'), 0x1E43, DirectionBoth},
0403     {QLatin1Char('.'), QLatin1Char('N'), 0x1E44, DirectionBoth},
0404     {QLatin1Char('.'), QLatin1Char('n'), 0x1E45, DirectionBoth},
0405     {QLatin1Char('.'), QLatin1Char('N'), 0x1E46, DirectionBoth},
0406     {QLatin1Char('.'), QLatin1Char('n'), 0x1E47, DirectionBoth},
0407     {QLatin1Char('.'), QLatin1Char('P'), 0x1E56, DirectionBoth},
0408     {QLatin1Char('.'), QLatin1Char('p'), 0x1E57, DirectionBoth},
0409     {QLatin1Char('.'), QLatin1Char('R'), 0x1E58, DirectionBoth},
0410     {QLatin1Char('.'), QLatin1Char('r'), 0x1E59, DirectionBoth},
0411     {QLatin1Char('d'), QLatin1Char('R'), 0x1E5A, DirectionBoth},
0412     {QLatin1Char('d'), QLatin1Char('r'), 0x1E5B, DirectionBoth},
0413     {QLatin1Char('.'), QLatin1Char('S'), 0x1E60, DirectionBoth},
0414     {QLatin1Char('.'), QLatin1Char('s'), 0x1E61, DirectionBoth},
0415     {QLatin1Char('d'), QLatin1Char('S'), 0x1E62, DirectionBoth},
0416     {QLatin1Char('d'), QLatin1Char('s'), 0x1E63, DirectionBoth},
0417     {QLatin1Char('.'), QLatin1Char('T'), 0x1E6A, DirectionBoth},
0418     {QLatin1Char('.'), QLatin1Char('t'), 0x1E6B, DirectionBoth},
0419     {QLatin1Char('d'), QLatin1Char('T'), 0x1E6C, DirectionBoth},
0420     {QLatin1Char('d'), QLatin1Char('t'), 0x1E6D, DirectionBoth},
0421     {QLatin1Char('d'), QLatin1Char('V'), 0x1E7E, DirectionBoth},
0422     {QLatin1Char('d'), QLatin1Char('v'), 0x1E7F, DirectionBoth},
0423     {QLatin1Char('`'), QLatin1Char('W'), 0x1E80, DirectionBoth},
0424     {QLatin1Char('`'), QLatin1Char('w'), 0x1E81, DirectionBoth},
0425     {QLatin1Char('\''), QLatin1Char('W'), 0x1E82, DirectionBoth},
0426     {QLatin1Char('\''), QLatin1Char('w'), 0x1E83, DirectionBoth},
0427     {QLatin1Char('"'), QLatin1Char('W'), 0x1E84, DirectionBoth},
0428     {QLatin1Char('"'), QLatin1Char('w'), 0x1E85, DirectionBoth},
0429     {QLatin1Char('.'), QLatin1Char('W'), 0x1E86, DirectionBoth},
0430     {QLatin1Char('.'), QLatin1Char('w'), 0x1E87, DirectionBoth},
0431     {QLatin1Char('d'), QLatin1Char('W'), 0x1E88, DirectionBoth},
0432     {QLatin1Char('d'), QLatin1Char('w'), 0x1E88, DirectionBoth},
0433     {QLatin1Char('.'), QLatin1Char('X'), 0x1E8A, DirectionBoth},
0434     {QLatin1Char('.'), QLatin1Char('x'), 0x1E8B, DirectionBoth},
0435     {QLatin1Char('"'), QLatin1Char('X'), 0x1E8C, DirectionBoth},
0436     {QLatin1Char('"'), QLatin1Char('x'), 0x1E8D, DirectionBoth},
0437     {QLatin1Char('.'), QLatin1Char('Y'), 0x1E8E, DirectionBoth},
0438     {QLatin1Char('.'), QLatin1Char('y'), 0x1E8F, DirectionBoth},
0439     {QLatin1Char('d'), QLatin1Char('Z'), 0x1E92, DirectionBoth},
0440     {QLatin1Char('d'), QLatin1Char('z'), 0x1E93, DirectionBoth},
0441     {QLatin1Char('"'), QLatin1Char('t'), 0x1E97, DirectionBoth},
0442     {QLatin1Char('r'), QLatin1Char('w'), 0x1E98, DirectionBoth},
0443     {QLatin1Char('r'), QLatin1Char('y'), 0x1E99, DirectionBoth},
0444     {QLatin1Char('d'), QLatin1Char('A'), 0x1EA0, DirectionBoth},
0445     {QLatin1Char('d'), QLatin1Char('a'), 0x1EA1, DirectionBoth},
0446     {QLatin1Char('d'), QLatin1Char('E'), 0x1EB8, DirectionBoth},
0447     {QLatin1Char('d'), QLatin1Char('e'), 0x1EB9, DirectionBoth},
0448     {QLatin1Char('d'), QLatin1Char('I'), 0x1ECA, DirectionBoth},
0449     {QLatin1Char('d'), QLatin1Char('i'), 0x1ECB, DirectionBoth},
0450     {QLatin1Char('d'), QLatin1Char('O'), 0x1ECC, DirectionBoth},
0451     {QLatin1Char('d'), QLatin1Char('o'), 0x1ECD, DirectionBoth},
0452     {QLatin1Char('d'), QLatin1Char('U'), 0x1EE4, DirectionBoth},
0453     {QLatin1Char('d'), QLatin1Char('u'), 0x1EE5, DirectionBoth},
0454     {QLatin1Char('`'), QLatin1Char('Y'), 0x1EF2, DirectionBoth},
0455     {QLatin1Char('`'), QLatin1Char('y'), 0x1EF3, DirectionBoth},
0456     {QLatin1Char('d'), QLatin1Char('Y'), 0x1EF4, DirectionBoth},
0457     {QLatin1Char('d'), QLatin1Char('y'), 0x1EF5, DirectionBoth},
0458     {QLatin1Char('r'), QLatin1Char('q'), 0x2019, DirectionCommandToUnicode} ///< tricky: this is \rq
0459 };
0460 
0461 
0462 /**
0463  * This structure contains information on the usage of dotless i
0464  * and dotless j in combination with accent-like modifiers.
0465  * Combinations such as \"{\i} are translated to an Unicode character
0466  * and back. The structure is a table with three columns: (1) the
0467  * modified (in the example before the quotation mark) (2) the ASCII
0468  * character (in the example before the 'i') (3) the Unicode
0469  * character described by a hexcode.
0470  */
0471 // TODO other cases of \i and \j?
0472 static const struct DotlessIJCharacter {
0473     const QChar modifier;
0474     const QChar letter;
0475     const ushort unicode;
0476     const EncoderLaTeXCommandDirection direction;
0477 }
0478 dotlessIJCharacters[] = {
0479     {QLatin1Char('`'), QLatin1Char('i'), 0x00EC, DirectionBoth},
0480     {QLatin1Char('\''), QLatin1Char('i'), 0x00ED, DirectionBoth},
0481     {QLatin1Char('^'), QLatin1Char('i'), 0x00EE, DirectionBoth},
0482     {QLatin1Char('"'), QLatin1Char('i'), 0x00EF, DirectionBoth},
0483     {QLatin1Char('~'), QLatin1Char('i'), 0x0129, DirectionBoth},
0484     {QLatin1Char('='), QLatin1Char('i'), 0x012B, DirectionBoth},
0485     {QLatin1Char('u'), QLatin1Char('i'), 0x012D, DirectionBoth},
0486     {QLatin1Char('k'), QLatin1Char('i'), 0x012F, DirectionBoth},
0487     {QLatin1Char('^'), QLatin1Char('j'), 0x0135, DirectionBoth},
0488     {QLatin1Char('v'), QLatin1Char('i'), 0x01D0, DirectionBoth},
0489     {QLatin1Char('v'), QLatin1Char('j'), 0x01F0, DirectionBoth},
0490     {QLatin1Char('G'), QLatin1Char('i'), 0x0209, DirectionCommandToUnicode}
0491 };
0492 
0493 
0494 /**
0495  * This lookup allows to quickly find hits in the
0496  * EncoderLaTeXEscapedCharacter table. This data structure here
0497  * consists of a number of rows. Each row consists of a
0498  * modifier (like '"' or 'v') and an array of Unicode chars.
0499  * Letters 'A'..'Z','a'..'z','0'..'9' are used as index to this
0500  * array by invocing asciiLetterOrDigitToPos().
0501  * This data structure is built in the constructor.
0502  */
0503 static const int lookupTableNumModifiers = 32;
0504 static const int lookupTableNumCharacters = 26 * 2 + 10;
0505 static struct EncoderLaTeXEscapedCharacterLookupTableRow {
0506     QChar modifier;
0507     QChar unicode[lookupTableNumCharacters];
0508 } *lookupTable[lookupTableNumModifiers];
0509 
0510 
0511 /**
0512  * This data structure keeps track of math commands, which
0513  * have to be treated differently in text and math mode.
0514  * The math command like "subset of" could be used directly
0515  * in math mode, but must be enclosed in \ensuremath{...}
0516  * in text mode.
0517  */
0518 static const struct MathCommand {
0519     const QString command;
0520     const ushort unicode;
0521     const EncoderLaTeXCommandDirection direction;
0522 }
0523 mathCommands[] = {
0524     {QStringLiteral("pm"), 0x00B1, DirectionBoth},
0525     {QStringLiteral("mu"), 0x00B5, DirectionUnicodeToCommand}, //< Unicode's micro symbol becomes Greek letter
0526     {QStringLiteral("times"), 0x00D7, DirectionBoth},
0527     {QStringLiteral("div"), 0x00F7, DirectionBoth},
0528     {QStringLiteral("phi"), 0x0278, DirectionBoth}, ///< see also 0x03C6 (GREEK SMALL LETTER PHI)
0529     {QStringLiteral("Alpha"), 0x0391, DirectionBoth},
0530     {QStringLiteral("Beta"), 0x0392, DirectionBoth},
0531     {QStringLiteral("Gamma"), 0x0393, DirectionBoth},
0532     {QStringLiteral("Delta"), 0x0394, DirectionBoth},
0533     {QStringLiteral("Epsilon"), 0x0395, DirectionBoth},
0534     {QStringLiteral("Zeta"), 0x0396, DirectionBoth},
0535     {QStringLiteral("Eta"), 0x0397, DirectionBoth},
0536     {QStringLiteral("Theta"), 0x0398, DirectionBoth},
0537     {QStringLiteral("Iota"), 0x0399, DirectionBoth},
0538     {QStringLiteral("Kappa"), 0x039A, DirectionBoth},
0539     {QStringLiteral("Lamda"), 0x039B, DirectionCommandToUnicode}, ///< \Lamda does not exist, this is mostly for spelling errors
0540     {QStringLiteral("Lambda"), 0x039B, DirectionBoth},
0541     {QStringLiteral("Mu"), 0x039C, DirectionBoth},
0542     {QStringLiteral("Nu"), 0x039D, DirectionBoth},
0543     {QStringLiteral("Xi"), 0x039E, DirectionBoth},
0544     {QStringLiteral("Omicron"), 0x039F, DirectionBoth},
0545     {QStringLiteral("Pi"), 0x03A0, DirectionBoth},
0546     {QStringLiteral("Rho"), 0x03A1, DirectionBoth},
0547     {QStringLiteral("Sigma"), 0x03A3, DirectionBoth},
0548     {QStringLiteral("Tau"), 0x03A4, DirectionBoth},
0549     {QStringLiteral("Upsilon"), 0x03A5, DirectionBoth},
0550     {QStringLiteral("Phi"), 0x03A6, DirectionBoth},
0551     {QStringLiteral("Chi"), 0x03A7, DirectionBoth},
0552     {QStringLiteral("Psi"), 0x03A8, DirectionBoth},
0553     {QStringLiteral("Omega"), 0x03A9, DirectionBoth},
0554     {QStringLiteral("alpha"), 0x03B1, DirectionBoth},
0555     {QStringLiteral("beta"), 0x03B2, DirectionBoth},
0556     {QStringLiteral("gamma"), 0x03B3, DirectionBoth},
0557     {QStringLiteral("delta"), 0x03B4, DirectionBoth},
0558     {QStringLiteral("varepsilon"), 0x03B5, DirectionBoth},
0559     {QStringLiteral("zeta"), 0x03B6, DirectionBoth},
0560     {QStringLiteral("eta"), 0x03B7, DirectionBoth},
0561     {QStringLiteral("theta"), 0x03B8, DirectionBoth},
0562     {QStringLiteral("iota"), 0x03B9, DirectionBoth},
0563     {QStringLiteral("kappa"), 0x03BA, DirectionBoth},
0564     {QStringLiteral("lamda"), 0x03BB, DirectionCommandToUnicode}, ///< \lamda does not exist, this is mostly for spelling errors
0565     {QStringLiteral("lambda"), 0x03BB, DirectionBoth},
0566     {QStringLiteral("mu"), 0x03BC, DirectionBoth},
0567     {QStringLiteral("nu"), 0x03BD, DirectionBoth},
0568     {QStringLiteral("xi"), 0x03BE, DirectionBoth},
0569     {QStringLiteral("omicron"), 0x03BF, DirectionBoth},
0570     {QStringLiteral("pi"), 0x03C0, DirectionBoth},
0571     {QStringLiteral("rho"), 0x03C1, DirectionBoth},
0572     {QStringLiteral("varsigma"), 0x03C2, DirectionBoth},
0573     {QStringLiteral("sigma"), 0x03C3, DirectionBoth},
0574     {QStringLiteral("tau"), 0x03C4, DirectionBoth},
0575     {QStringLiteral("upsilon"), 0x03C5, DirectionBoth},
0576     {QStringLiteral("varphi"), 0x03C6, DirectionBoth}, ///< see also 0x0278 (LATIN SMALL LETTER PHI)
0577     {QStringLiteral("chi"), 0x03C7, DirectionBoth},
0578     {QStringLiteral("psi"), 0x03C8, DirectionBoth},
0579     {QStringLiteral("omega"), 0x03C9, DirectionBoth},
0580     {QStringLiteral("vartheta"), 0x03D1, DirectionBoth},
0581     {QStringLiteral("varpi"), 0x03D6, DirectionBoth},
0582     {QStringLiteral("digamma"), 0x03DC, DirectionBoth},
0583     {QStringLiteral("varkappa"), 0x03F0, DirectionBoth},
0584     {QStringLiteral("varrho"), 0x03F1, DirectionBoth},
0585     {QStringLiteral("epsilon"), 0x03F5, DirectionBoth},
0586     {QStringLiteral("backepsilon"), 0x03F6, DirectionBoth},
0587     {QStringLiteral("aleph"), 0x05D0, DirectionBoth},
0588     {QStringLiteral("dagger"), 0x2020, DirectionBoth},
0589     {QStringLiteral("ddagger"), 0x2021, DirectionBoth},
0590     {QStringLiteral("mathbb{C}"), 0x2102, DirectionBoth},
0591     {QStringLiteral("ell"), 0x2113, DirectionBoth},
0592     {QStringLiteral("mho"), 0x2127, DirectionBoth},
0593     {QStringLiteral("beth"), 0x2136, DirectionBoth},
0594     {QStringLiteral("gimel"), 0x2137, DirectionBoth},
0595     {QStringLiteral("daleth"), 0x2138, DirectionBoth},
0596     {QStringLiteral("rightarrow"), 0x2192, DirectionBoth},
0597     {QStringLiteral("forall"), 0x2200, DirectionBoth},
0598     {QStringLiteral("complement"), 0x2201, DirectionBoth},
0599     {QStringLiteral("partial"), 0x2202, DirectionBoth},
0600     {QStringLiteral("exists"), 0x2203, DirectionBoth},
0601     {QStringLiteral("nexists"), 0x2204, DirectionBoth},
0602     {QStringLiteral("varnothing"), 0x2205, DirectionBoth},
0603     {QStringLiteral("nabla"), 0x2207, DirectionBoth},
0604     {QStringLiteral("in"), 0x2208, DirectionBoth},
0605     {QStringLiteral("notin"), 0x2209, DirectionBoth},
0606     {QStringLiteral("ni"), 0x220B, DirectionBoth},
0607     {QStringLiteral("not\\ni"), 0x220C, DirectionBoth},
0608     {QStringLiteral("asterisk"), 0x2217, DirectionCommandToUnicode},
0609     {QStringLiteral("infty"), 0x221E, DirectionBoth},
0610     {QStringLiteral("leq"), 0x2264, DirectionBoth},
0611     {QStringLiteral("geq"), 0x2265, DirectionBoth},
0612     {QStringLiteral("lneq"), 0x2268, DirectionBoth},
0613     {QStringLiteral("gneq"), 0x2269, DirectionBoth},
0614     {QStringLiteral("ll"), 0x226A, DirectionBoth},
0615     {QStringLiteral("gg"), 0x226B, DirectionBoth},
0616     {QStringLiteral("nless"), 0x226E, DirectionBoth},
0617     {QStringLiteral("ngtr"), 0x226F, DirectionBoth},
0618     {QStringLiteral("nleq"), 0x2270, DirectionBoth},
0619     {QStringLiteral("ngeq"), 0x2271, DirectionBoth},
0620     {QStringLiteral("subset"), 0x2282, DirectionBoth},
0621     {QStringLiteral("supset"), 0x2283, DirectionBoth},
0622     {QStringLiteral("subseteq"), 0x2286, DirectionBoth},
0623     {QStringLiteral("supseteq"), 0x2287, DirectionBoth},
0624     {QStringLiteral("nsubseteq"), 0x2288, DirectionBoth},
0625     {QStringLiteral("nsupseteq"), 0x2289, DirectionBoth},
0626     {QStringLiteral("subsetneq"), 0x228A, DirectionBoth},
0627     {QStringLiteral("supsetneq"), 0x228A, DirectionBoth},
0628     {QStringLiteral("Subset"), 0x22D0, DirectionBoth},
0629     {QStringLiteral("Supset"), 0x22D1, DirectionBoth},
0630     {QStringLiteral("lll"), 0x22D8, DirectionBoth},
0631     {QStringLiteral("ggg"), 0x22D9, DirectionBoth},
0632     {QStringLiteral("top"), 0x22A4, DirectionBoth},
0633     {QStringLiteral("bot"), 0x22A5, DirectionBoth},
0634 };
0635 
0636 
0637 /**
0638  * This data structure holds commands representing a single
0639  * character. For example, it maps \AA to A with a ring (Nordic
0640  * letter) and back. The structure is a table with two columns:
0641  * (1) the command's name without a backslash (in the example
0642  * before the 'AA') (2) the Unicode character described by a
0643  * hexcode.
0644  */
0645 static const struct EncoderLaTeXCharacterCommand {
0646     const QString command;
0647     const ushort unicode;
0648     const EncoderLaTeXCommandDirection direction;
0649 }
0650 encoderLaTeXCharacterCommands[] = {
0651     {QStringLiteral("textexclamdown"), 0x00A1, DirectionCommandToUnicode},
0652     {QStringLiteral("textcent"), 0x00A2, DirectionBoth},
0653     {QStringLiteral("pounds"), 0x00A3, DirectionBoth},
0654     {QStringLiteral("textsterling"), 0x00A3, DirectionBoth},
0655     /** 0x00A4 */
0656     {QStringLiteral("textyen"), 0x00A5, DirectionBoth},
0657     {QStringLiteral("textbrokenbar"), 0x00A6, DirectionBoth},
0658     {QStringLiteral("S"), 0x00A7, DirectionBoth},
0659     {QStringLiteral("textsection"), 0x00A7, DirectionBoth},
0660     /** 0x00A8 */
0661     {QStringLiteral("copyright"), 0x00A9, DirectionBoth},
0662     {QStringLiteral("textcopyright"), 0x00A9, DirectionBoth},
0663     {QStringLiteral("textordfeminine"), 0x00AA, DirectionBoth},
0664     {QStringLiteral("guillemotleft"), 0x00AB, DirectionCommandToUnicode},
0665     {QStringLiteral("textflqq"), 0x00AB, DirectionCommandToUnicode},
0666     {QStringLiteral("flqq"), 0x00AB, DirectionBoth},
0667     /** 0x00AC */
0668     /** 0x00AD */
0669     {QStringLiteral("textregistered"), 0x00AE, DirectionBoth},
0670     /** 0x00AF */
0671     {QStringLiteral("textdegree"), 0x00B0, DirectionBoth},
0672     {QStringLiteral("textpm"), 0x00B1, DirectionBoth},
0673     {QStringLiteral("textplusminus"), 0x00B1, DirectionCommandToUnicode},
0674     /** 0x00B2 */
0675     /** 0x00B3 */
0676     /** 0x00B4 */
0677     // Notes about Unicode U+00B5 ('micro sign'):
0678     // - Derived from the Greek 'mu' but used as a SI prefix meaning 'one millionth'
0679     // - Unicode differs between this symbol and a 'real' Greek 'mu' which has position U+03BC
0680     // - There are more lower case 'mu' in Unicode for mathematics (bold, italics, sans-serif, ...)
0681     //   at position U+1D6CD and later; those are not supported at all by KBibTeX
0682     {QStringLiteral("textmu"), 0x00B5, DirectionUnicodeToCommand},
0683     {QStringLiteral("textparagraph"), 0x00B6, DirectionBoth},
0684     {QStringLiteral("textpilcrow"), 0x00B6, DirectionBoth},
0685     {QStringLiteral("textperiodcentered"), 0x00B7, DirectionCommandToUnicode},
0686     {QStringLiteral("textcdot"), 0x00B7, DirectionBoth},
0687     {QStringLiteral("textcentereddot"), 0x00B7, DirectionCommandToUnicode},
0688     /** 0x00B8 */
0689     /** 0x00B9 */
0690     {QStringLiteral("textordmasculine"), 0x00BA, DirectionBoth},
0691     {QStringLiteral("guillemotright"), 0x00BB, DirectionCommandToUnicode},
0692     {QStringLiteral("textfrqq"), 0x00BB, DirectionCommandToUnicode},
0693     {QStringLiteral("frqq"), 0x00BB, DirectionBoth},
0694     {QStringLiteral("textonequarter"), 0x00BC, DirectionBoth},
0695     {QStringLiteral("textonehalf"), 0x00BD, DirectionBoth},
0696     {QStringLiteral("textthreequarters"), 0x00BE, DirectionBoth},
0697     {QStringLiteral("textquestiondown"), 0x00BF, DirectionCommandToUnicode}, // TODO /// recommended to write  ?`  instead of  \textquestiondown
0698     {QStringLiteral("AA"), 0x00C5, DirectionBoth},
0699     {QStringLiteral("AE"), 0x00C6, DirectionBoth},
0700     {QStringLiteral("DH"), 0x00D0, DirectionBoth},
0701     {QStringLiteral("texttimes"), 0x00D7, DirectionBoth},
0702     {QStringLiteral("textmultiply"), 0x00D7, DirectionCommandToUnicode},
0703     {QStringLiteral("O"), 0x00D8, DirectionBoth},
0704     {QStringLiteral("TH"), 0x00DE, DirectionBoth},
0705     {QStringLiteral("Thorn"), 0x00DE, DirectionCommandToUnicode},
0706     {QStringLiteral("textThorn"), 0x00DE, DirectionCommandToUnicode},
0707     {QStringLiteral("ss"), 0x00DF, DirectionBoth},
0708     {QStringLiteral("aa"), 0x00E5, DirectionBoth},
0709     {QStringLiteral("ae"), 0x00E6, DirectionBoth},
0710     {QStringLiteral("dh"), 0x00F0, DirectionBoth},
0711     {QStringLiteral("textdiv"), 0x00F7, DirectionBoth},
0712     {QStringLiteral("textdivide"), 0x00F7, DirectionCommandToUnicode},
0713     {QStringLiteral("o"), 0x00F8, DirectionBoth},
0714     {QStringLiteral("th"), 0x00FE, DirectionBoth},
0715     {QStringLiteral("textthorn"), 0x00FE, DirectionCommandToUnicode},
0716     {QStringLiteral("textthornvari"), 0x00FE, DirectionCommandToUnicode},
0717     {QStringLiteral("textthornvarii"), 0x00FE, DirectionCommandToUnicode},
0718     {QStringLiteral("textthornvariii"), 0x00FE, DirectionCommandToUnicode},
0719     {QStringLiteral("textthornvariv"), 0x00FE, DirectionCommandToUnicode},
0720     {QStringLiteral("Aogonek"), 0x0104, DirectionCommandToUnicode},
0721     {QStringLiteral("aogonek"), 0x0105, DirectionCommandToUnicode},
0722     {QStringLiteral("DJ"), 0x0110, DirectionBoth},
0723     {QStringLiteral("dj"), 0x0111, DirectionBoth},
0724     {QStringLiteral("textcrd"), 0x0111, DirectionCommandToUnicode},
0725     {QStringLiteral("textHslash"), 0x0126, DirectionCommandToUnicode},
0726     {QStringLiteral("textHbar"), 0x0126, DirectionCommandToUnicode},
0727     {QStringLiteral("textcrh"), 0x0127, DirectionCommandToUnicode},
0728     {QStringLiteral("texthbar"), 0x0127, DirectionCommandToUnicode},
0729     {QStringLiteral("i"), 0x0131, DirectionBoth},
0730     {QStringLiteral("IJ"), 0x0132, DirectionBoth},
0731     {QStringLiteral("ij"), 0x0133, DirectionBoth},
0732     {QStringLiteral("textkra"), 0x0138, DirectionCommandToUnicode},
0733     {QStringLiteral("Lcaron"), 0x013D, DirectionCommandToUnicode},
0734     {QStringLiteral("lcaron"), 0x013E, DirectionCommandToUnicode},
0735     {QStringLiteral("L"), 0x0141, DirectionBoth},
0736     {QStringLiteral("Lstroke"), 0x0141, DirectionCommandToUnicode},
0737     {QStringLiteral("l"), 0x0142, DirectionBoth},
0738     {QStringLiteral("lstroke"), 0x0142, DirectionCommandToUnicode},
0739     {QStringLiteral("textbarl"), 0x0142, DirectionCommandToUnicode},
0740     {QStringLiteral("NG"), 0x014A, DirectionBoth},
0741     {QStringLiteral("ng"), 0x014B, DirectionBoth},
0742     {QStringLiteral("OE"), 0x0152, DirectionBoth},
0743     {QStringLiteral("oe"), 0x0153, DirectionBoth},
0744     {QStringLiteral("Racute"), 0x0154, DirectionCommandToUnicode},
0745     {QStringLiteral("racute"), 0x0155, DirectionCommandToUnicode},
0746     {QStringLiteral("Sacute"), 0x015A, DirectionCommandToUnicode},
0747     {QStringLiteral("sacute"), 0x015B, DirectionCommandToUnicode},
0748     {QStringLiteral("Scedilla"), 0x015E, DirectionCommandToUnicode},
0749     {QStringLiteral("scedilla"), 0x015F, DirectionCommandToUnicode},
0750     {QStringLiteral("Scaron"), 0x0160, DirectionCommandToUnicode},
0751     {QStringLiteral("scaron"), 0x0161, DirectionCommandToUnicode},
0752     {QStringLiteral("Tcaron"), 0x0164, DirectionCommandToUnicode},
0753     {QStringLiteral("tcaron"), 0x0165, DirectionCommandToUnicode},
0754     {QStringLiteral("textTstroke"), 0x0166, DirectionCommandToUnicode},
0755     {QStringLiteral("textTbar"), 0x0166, DirectionCommandToUnicode},
0756     {QStringLiteral("textTslash"), 0x0166, DirectionCommandToUnicode},
0757     {QStringLiteral("texttstroke"), 0x0167, DirectionCommandToUnicode},
0758     {QStringLiteral("texttbar"), 0x0167, DirectionCommandToUnicode},
0759     {QStringLiteral("texttslash"), 0x0167, DirectionCommandToUnicode},
0760     {QStringLiteral("Zdotaccent"), 0x017B, DirectionCommandToUnicode},
0761     {QStringLiteral("zdotaccent"), 0x017C, DirectionCommandToUnicode},
0762     {QStringLiteral("Zcaron"), 0x017D, DirectionCommandToUnicode},
0763     {QStringLiteral("zcaron"), 0x017E, DirectionCommandToUnicode},
0764     {QStringLiteral("textlongs"), 0x017F, DirectionCommandToUnicode},
0765     {QStringLiteral("textcrb"), 0x0180, DirectionCommandToUnicode},
0766     {QStringLiteral("textBhook"), 0x0181, DirectionCommandToUnicode},
0767     {QStringLiteral("texthausaB"), 0x0181, DirectionCommandToUnicode},
0768     {QStringLiteral("textOopen"), 0x0186, DirectionCommandToUnicode},
0769     {QStringLiteral("textChook"), 0x0187, DirectionCommandToUnicode},
0770     {QStringLiteral("textchook"), 0x0188, DirectionCommandToUnicode},
0771     {QStringLiteral("texthtc"), 0x0188, DirectionCommandToUnicode},
0772     {QStringLiteral("textDafrican"), 0x0189, DirectionCommandToUnicode},
0773     {QStringLiteral("textDhook"), 0x018A, DirectionCommandToUnicode},
0774     {QStringLiteral("texthausaD"), 0x018A, DirectionCommandToUnicode},
0775     {QStringLiteral("textEreversed"), 0x018E, DirectionCommandToUnicode},
0776     {QStringLiteral("textrevE"), 0x018E, DirectionCommandToUnicode},
0777     {QStringLiteral("textEopen"), 0x0190, DirectionCommandToUnicode},
0778     {QStringLiteral("textFhook"), 0x0191, DirectionCommandToUnicode},
0779     {QStringLiteral("textflorin"), 0x0192, DirectionBoth},
0780     {QStringLiteral("textgamma"), 0x0194, DirectionCommandToUnicode},
0781     {QStringLiteral("textGammaafrican"), 0x0194, DirectionCommandToUnicode},
0782     {QStringLiteral("hv"), 0x0195, DirectionCommandToUnicode},
0783     {QStringLiteral("texthvlig"), 0x0195, DirectionCommandToUnicode},
0784     {QStringLiteral("textIotaafrican"), 0x0196, DirectionCommandToUnicode},
0785     {QStringLiteral("textKhook"), 0x0198, DirectionCommandToUnicode},
0786     {QStringLiteral("texthausaK"), 0x0198, DirectionCommandToUnicode},
0787     {QStringLiteral("texthtk"), 0x0199, DirectionCommandToUnicode},
0788     {QStringLiteral("textkhook"), 0x0199, DirectionCommandToUnicode},
0789     {QStringLiteral("textbarl"), 0x019A, DirectionCommandToUnicode},
0790     {QStringLiteral("textcrlambda"), 0x019B, DirectionCommandToUnicode},
0791     {QStringLiteral("textNhookleft"), 0x019D, DirectionCommandToUnicode},
0792     {QStringLiteral("textnrleg"), 0x019E, DirectionCommandToUnicode},
0793     {QStringLiteral("textPUnrleg"), 0x019E, DirectionCommandToUnicode},
0794     {QStringLiteral("Ohorn"), 0x01A0, DirectionCommandToUnicode},
0795     {QStringLiteral("ohorn"), 0x01A1, DirectionCommandToUnicode},
0796     {QStringLiteral("textPhook"), 0x01A4, DirectionCommandToUnicode},
0797     {QStringLiteral("texthtp"), 0x01A5, DirectionCommandToUnicode},
0798     {QStringLiteral("textphook"), 0x01A5, DirectionCommandToUnicode},
0799     {QStringLiteral("ESH"), 0x01A9, DirectionCommandToUnicode},
0800     {QStringLiteral("textEsh"), 0x01A9, DirectionCommandToUnicode},
0801     {QStringLiteral("textlooptoprevsh"), 0x01AA, DirectionCommandToUnicode},
0802     {QStringLiteral("textlhtlongi"), 0x01AA, DirectionCommandToUnicode},
0803     {QStringLiteral("textlhookt"), 0x01AB, DirectionCommandToUnicode},
0804     {QStringLiteral("textThook"), 0x01AC, DirectionCommandToUnicode},
0805     {QStringLiteral("textthook"), 0x01AD, DirectionCommandToUnicode},
0806     {QStringLiteral("texthtt"), 0x01AD, DirectionCommandToUnicode},
0807     {QStringLiteral("textTretroflexhook"), 0x01AE, DirectionCommandToUnicode},
0808     {QStringLiteral("Uhorn"), 0x01AF, DirectionCommandToUnicode},
0809     {QStringLiteral("uhorn"), 0x01B0, DirectionCommandToUnicode},
0810     {QStringLiteral("textupsilon"), 0x01B1, DirectionCommandToUnicode},
0811     {QStringLiteral("textVhook"), 0x01B2, DirectionCommandToUnicode},
0812     {QStringLiteral("textYhook"), 0x01B3, DirectionCommandToUnicode},
0813     {QStringLiteral("textvhook"), 0x01B4, DirectionCommandToUnicode},
0814     {QStringLiteral("Zbar"), 0x01B5, DirectionCommandToUnicode},
0815     {QStringLiteral("zbar"), 0x01B6, DirectionCommandToUnicode},
0816     {QStringLiteral("EZH"), 0x01B7, DirectionCommandToUnicode},
0817     {QStringLiteral("textEzh"), 0x01B7, DirectionCommandToUnicode},
0818     {QStringLiteral("LJ"), 0x01C7, DirectionCommandToUnicode},
0819     {QStringLiteral("Lj"), 0x01C8, DirectionCommandToUnicode},
0820     {QStringLiteral("lj"), 0x01C9, DirectionCommandToUnicode},
0821     {QStringLiteral("NJ"), 0x01CA, DirectionCommandToUnicode},
0822     {QStringLiteral("Nj"), 0x01CB, DirectionCommandToUnicode},
0823     {QStringLiteral("nj"), 0x01CC, DirectionCommandToUnicode},
0824     {QStringLiteral("DZ"), 0x01F1, DirectionCommandToUnicode},
0825     {QStringLiteral("Dz"), 0x01F2, DirectionCommandToUnicode},
0826     {QStringLiteral("dz"), 0x01F3, DirectionCommandToUnicode},
0827     {QStringLiteral("HV"), 0x01F6, DirectionCommandToUnicode},
0828     {QStringLiteral("j"), 0x0237, DirectionBoth},
0829     // Notes about Unicode U+03BC ('Greek small letter mu'):
0830     // - Unicode differs between this symbol and a 'micro' (SI-prefix) which has position U+00B5
0831     // - There are more lower case 'mu' in Unicode for mathematics (bold, italics, sans-serif, ...)
0832     //   at position U+1D6CD and later; those are not supported at all by KBibTeX
0833     // - LaTeX package 'textcomp' provides command '\textmu' but no other Greek letters
0834     // - LaTeX package 'textgreek' provides commands for all Greek letters (e.g. '\textpi') but
0835     //   to avoid conflicts with 'textcomp', the command for 'mu' is '\textmugreek'
0836     {QStringLiteral("textmugreek"), 0x03BC, DirectionCommandToUnicode},
0837     {QStringLiteral("textmu"), 0x03BC, DirectionBoth},
0838     {QStringLiteral("ldots"), 0x2026, DirectionBoth},
0839     {QStringLiteral("grqq"), 0x201C, DirectionCommandToUnicode},
0840     {QStringLiteral("textquotedblleft"), 0x201C, DirectionCommandToUnicode},
0841     {QStringLiteral("rqq"), 0x201D, DirectionCommandToUnicode},
0842     {QStringLiteral("textquotedblright"), 0x201D, DirectionCommandToUnicode},
0843     {QStringLiteral("glqq"), 0x201E, DirectionCommandToUnicode},
0844     {QStringLiteral("SS"), 0x1E9E, DirectionBoth},
0845     {QStringLiteral("textendash"), 0x2013, DirectionCommandToUnicode},
0846     {QStringLiteral("textemdash"), 0x2014, DirectionCommandToUnicode},
0847     {QStringLiteral("textquoteleft"), 0x2018, DirectionCommandToUnicode},
0848     {QStringLiteral("lq"), 0x2018, DirectionBoth},
0849     {QStringLiteral("textquoteright"), 0x2019, DirectionCommandToUnicode},
0850     {QStringLiteral("rq"), 0x2019, DirectionBoth}, ///< tricky one: 'r' is a valid modifier
0851     {QStringLiteral("quotesinglbase"), 0x201A, DirectionBoth},
0852     {QStringLiteral("quotedblbase"), 0x201E, DirectionBoth},
0853     {QStringLiteral("textbullet "), 0x2022, DirectionBoth},
0854     {QStringLiteral("guilsinglleft "), 0x2039, DirectionBoth},
0855     {QStringLiteral("guilsinglright "), 0x203A, DirectionBoth},
0856     {QStringLiteral("textcelsius"), 0x2103, DirectionBoth},
0857     {QStringLiteral("textleftarrow"), 0x2190, DirectionBoth},
0858     {QStringLiteral("textuparrow"), 0x2191, DirectionBoth},
0859     {QStringLiteral("textrightarrow"), 0x2192, DirectionBoth},
0860     {QStringLiteral("textdownarrow"), 0x2193, DirectionBoth}
0861 };
0862 
0863 const QChar EncoderLaTeX::encoderLaTeXProtectedSymbols[] = {QLatin1Char('#'), QLatin1Char('&'), QLatin1Char('%')};
0864 
0865 const QChar EncoderLaTeX::encoderLaTeXProtectedTextOnlySymbols[] = {QLatin1Char('_')};
0866 
0867 
0868 /**
0869  * This data structure holds LaTeX symbol sequences (without
0870  * any backslash) that represent a single Unicode character.
0871  * For example, it maps --- to an 'em dash' and back.
0872  * The structure is a table with two columns: (1) the symbol
0873  * sequence (in the example before the '---') (2) the Unicode
0874  * character described by a hexcode.
0875  */
0876 static const struct EncoderLaTeXSymbolSequence {
0877     const QString latex;
0878     const ushort unicode;
0879     const EncoderLaTeXCommandDirection direction;
0880 }
0881 encoderLaTeXSymbolSequences[] = {
0882     {QStringLiteral("!`"), 0x00A1, DirectionBoth},
0883     {QStringLiteral("\"<"), 0x00AB, DirectionBoth},
0884     {QStringLiteral("\">"), 0x00BB, DirectionBoth},
0885     {QStringLiteral("?`"), 0x00BF, DirectionBoth},
0886     {QStringLiteral("---"), 0x2014, DirectionBoth}, ///< --- must come before --
0887     {QStringLiteral("--"), 0x2013, DirectionBoth},
0888     {QStringLiteral("``"), 0x201C, DirectionBoth},
0889     {QStringLiteral("''"), 0x201D, DirectionBoth},
0890     {QStringLiteral("ff"), 0xFB00, DirectionUnicodeToCommand},
0891     {QStringLiteral("fi"), 0xFB01, DirectionUnicodeToCommand},
0892     {QStringLiteral("fl"), 0xFB02, DirectionUnicodeToCommand},
0893     {QStringLiteral("ffi"), 0xFB03, DirectionUnicodeToCommand},
0894     {QStringLiteral("ffl"), 0xFB04, DirectionUnicodeToCommand},
0895     {QStringLiteral("ft"), 0xFB05, DirectionUnicodeToCommand},
0896     {QStringLiteral("st"), 0xFB06, DirectionUnicodeToCommand}
0897 };
0898 
0899 
0900 EncoderLaTeX::EncoderLaTeX()
0901         : Encoder()
0902 {
0903     /// Initialize lookup table with NULL pointers
0904     for (int i = 0; i < lookupTableNumModifiers; ++i) lookupTable[i] = nullptr;
0905 
0906     int lookupTableCount = 0;
0907     /// Go through all table rows of encoderLaTeXEscapedCharacters
0908     for (const EncoderLaTeXEscapedCharacter &encoderLaTeXEscapedCharacter : encoderLaTeXEscapedCharacters) {
0909         /// Check if this row's modifier is already known
0910         bool knownModifier = false;
0911         int j;
0912         for (j = lookupTableCount - 1; j >= 0; --j) {
0913             knownModifier |= lookupTable[j]->modifier == encoderLaTeXEscapedCharacter.modifier;
0914             if (knownModifier) break;
0915         }
0916 
0917         if (!knownModifier) {
0918             /// Ok, this row's modifier appeared for the first time,
0919             /// therefore initialize memory structure, i.e. row in lookupTable
0920             lookupTable[lookupTableCount] = new EncoderLaTeXEscapedCharacterLookupTableRow;
0921             lookupTable[lookupTableCount]->modifier = encoderLaTeXEscapedCharacter.modifier;
0922             /// If no special character is known for a letter+modifier
0923             /// combination, fall back using the ASCII character only
0924             for (ushort k = 0; k < 26; ++k) {
0925                 lookupTable[lookupTableCount]->unicode[k] = QChar(QLatin1Char('A').unicode() + k);
0926                 lookupTable[lookupTableCount]->unicode[k + 26] = QChar(QLatin1Char('a').unicode() + k);
0927             }
0928             for (ushort k = 0; k < 10; ++k)
0929                 lookupTable[lookupTableCount]->unicode[k + 52] = QChar(QLatin1Char('0').unicode() + k);
0930             j = lookupTableCount;
0931             ++lookupTableCount;
0932         }
0933 
0934         /// Add the letter as of the current row in encoderLaTeXEscapedCharacters
0935         /// into Unicode char array in the current modifier's row in the lookup table.
0936         int pos = -1;
0937         if ((pos = asciiLetterOrDigitToPos(encoderLaTeXEscapedCharacter.letter)) >= 0)
0938             lookupTable[j]->unicode[pos] = QChar(encoderLaTeXEscapedCharacter.unicode);
0939         else
0940             qCWarning(LOG_KBIBTEX_IO) << "Cannot handle letter " << encoderLaTeXEscapedCharacter.letter;
0941     }
0942 }
0943 
0944 EncoderLaTeX::~EncoderLaTeX()
0945 {
0946     /// Clean-up memory
0947     for (int i = lookupTableNumModifiers - 1; i >= 0; --i)
0948         if (lookupTable[i] != nullptr)
0949             delete lookupTable[i];
0950 }
0951 
0952 QString EncoderLaTeX::decode(const QString &input) const
0953 {
0954     const int len = input.length();
0955     QString output;
0956     output.reserve(((len >> 10) + 2) << 10); // reserving multiples of 1024 Bytes
0957     enum MathMode {
0958         MathModeNone = 0, MathModeDollar, MathModeEnsureMath
0959     };
0960     QStack<MathMode> currentMathMode;
0961 #define currentMathModeTop()  (currentMathMode.empty()?MathModeNone:currentMathMode.top())
0962     int openCurlyBracketCounterEnsureMath = 0;
0963     QStack<int> popEnsureMathAtOpenCurlyBacketCounter;
0964     int cachedAsciiLetterOrDigitToPos = -1;
0965 
0966     /// Go through input char by char
0967     for (int i = 0; i < len; ++i) {
0968         /**
0969          * Repeatedly check if input data contains a verbatim command
0970          * like \url{...}, copy it to output, and update i to point
0971          * to the next character after the verbatim command.
0972          */
0973         while (testAndCopyVerbatimCommands(input, i, output));
0974         if (i >= len) break;
0975 
0976         /// Fetch current input char
0977         const QChar c = input[i];
0978 
0979         if (c == QLatin1Char('{')) {
0980             /// First case: An opening curly bracket,
0981             /// which is harmless (see else case), unless ...
0982             if (i < len - 3 && input[i + 1] == QLatin1Char('\\')) {
0983                 /// ... it continues with a backslash
0984 
0985                 /// Next, check if there follows a modifier after the backslash
0986                 /// For example an quotation mark as used in {\"a}
0987                 const int lookupTablePos = modifierInLookupTable(input[i + 2]);
0988 
0989                 /// Check for spaces between modifier and character, for example
0990                 /// like {\H o}
0991                 int skipSpaces = 0;
0992                 while (i + 3 + skipSpaces < len && input[i + 3 + skipSpaces] == QLatin1Char(' ') && skipSpaces < 16) ++skipSpaces;
0993 
0994                 bool found = false;
0995                 if (lookupTablePos >= 0 && (skipSpaces > 0 || !input[i + 2].isLetter()) && i + skipSpaces < len - 4 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 3 + skipSpaces])) >= 0 && input[i + 4 + skipSpaces] == QLatin1Char('}')) {
0996                     /// If we found a modifier which is followed by
0997                     /// a letter followed by a closing curly bracket,
0998                     /// we are looking at something like {\"A}
0999                     /// Use lookup table to see what Unicode char this
1000                     /// represents
1001                     const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos];
1002                     if (unicodeLetter.unicode() >= 127) {
1003                         output.append(unicodeLetter);
1004                         /// Step over those additional characters
1005                         i += 4 + skipSpaces;
1006                         found = true;
1007                     }
1008                     /// Don't print any warnings yet, as this if-case may got triggered by e.g. \mu
1009                     /// ('m' is a potential modifier, yet \mu should be recognized as Greek letter later)
1010                 } else if (lookupTablePos >= 0 && i + skipSpaces < len - 5 && input[i + 3 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 4 + skipSpaces]) && input[i + 5 + skipSpaces] == QLatin1Char('}')) {
1011                     /// This is the case for {\'\i} or alike.
1012                     for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters)
1013                         if ((dotlessIJCharacter.direction & DirectionCommandToUnicode) && dotlessIJCharacter.letter == input[i + 4 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 2]) {
1014                             output.append(QChar(dotlessIJCharacter.unicode));
1015                             found = true;
1016                             break;
1017                         }
1018                     if (!found) {
1019                         /// This combination of modifier and letter is not known,
1020                         /// so try to preserve it
1021                         output.append(QStringView{input}.mid(i, 6 + skipSpaces));
1022                         qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 2] << "BACKSLASH" << input[i + 4 + skipSpaces];
1023                     }
1024                     /// Step over those additional characters
1025                     i += 5 + skipSpaces;
1026                     found = true;
1027                 } else if (lookupTablePos >= 0 && i + skipSpaces < len - 6 && input[i + 3 + skipSpaces] == QLatin1Char('{') && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 4 + skipSpaces])) >= 0 && input[i + 5 + skipSpaces] == QLatin1Char('}') && input[i + 6 + skipSpaces] == QLatin1Char('}')) {
1028                     /// If we found a modifier which is followed by
1029                     /// an opening curly bracket followed by a letter
1030                     /// followed by two closing curly brackets,
1031                     /// we are looking at something like {\"{A}}
1032                     /// Use lookup table to see what Unicode char this
1033                     /// represents
1034                     const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos];
1035                     if (unicodeLetter.unicode() < 127) {
1036                         /// This combination of modifier and letter is not known,
1037                         /// so try to preserve it
1038                         output.append(QStringView{input}.mid(i, 7 + skipSpaces));
1039                         qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10);
1040                         qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 7 + skipSpaces);
1041                     } else
1042                         output.append(unicodeLetter);
1043                     /// Step over those additional characters
1044                     i += 6 + skipSpaces;
1045                     found = true;
1046                 } else if (lookupTablePos >= 0 && i + skipSpaces < len - 7 && input[i + 3 + skipSpaces] == QLatin1Char('{') && input[i + 4 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 5 + skipSpaces]) && input[i + 6 + skipSpaces] == QLatin1Char('}') && input[i + 7 + skipSpaces] == QLatin1Char('}')) {
1047                     /// This is the case for {\'{\i}} or alike.
1048                     for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters)
1049                         if ((dotlessIJCharacter.direction & DirectionCommandToUnicode) && dotlessIJCharacter.letter == input[i + 5 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 2]) {
1050                             output.append(QChar(dotlessIJCharacter.unicode));
1051                             found = true;
1052                             break;
1053                         }
1054                     if (!found) {
1055                         /// This combination of modifier and letter is not known,
1056                         /// so try to preserve it
1057                         output.append(QStringView{input}.mid(i, 8 + skipSpaces));
1058                         qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10);
1059                         qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 2] << "BACKSLASH {" << input[i + 5 + skipSpaces] << "}";
1060                     }
1061                     /// Step over those additional characters
1062                     i += 7 + skipSpaces;
1063                     found = true;
1064                 }
1065 
1066                 if (!found) {
1067                     /// Now, either some two-letter command like {\AA} or {\mu} is left
1068                     /// to check for or there is completely unsuppored command sequence,
1069                     /// but which then should be kept unmodified
1070                     const QString alpha = readAlphaCharacters(input, i + 2);
1071                     int nextPosAfterAlpha = i + 2 + alpha.size();
1072                     if (nextPosAfterAlpha < input.length() && input[nextPosAfterAlpha] == QLatin1Char('}')) {
1073                         /// We may deal with a string like {\AA} or {\mu}
1074                         /// Check which command it is, then insert corresponding Unicode character
1075                         found = false;
1076                         for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) {
1077                             if ((encoderLaTeXCharacterCommand.direction & DirectionCommandToUnicode) && encoderLaTeXCharacterCommand.command == alpha) {
1078                                 output.append(QChar(encoderLaTeXCharacterCommand.unicode));
1079                                 found = true;
1080                                 break;
1081                             }
1082                         }
1083 
1084                         /// Check if a math command has been read,
1085                         /// like \subset
1086                         /// (automatically skipped if command was found above)
1087                         if (!found)
1088                             for (const MathCommand &mathCommand : mathCommands) {
1089                                 if ((mathCommand.direction & DirectionCommandToUnicode) && mathCommand.command == alpha) {
1090                                     output.append(QChar(mathCommand.unicode));
1091                                     found = true;
1092                                     break;
1093                                 }
1094                             }
1095 
1096                         if (!found) {
1097                             /// Dealing with a string like {\noopsort}
1098                             /// (see BibTeX documentation where this gets explained)
1099                             output.append(QStringView{input}.mid(i, 3 + alpha.size()));
1100                         }
1101                         i = nextPosAfterAlpha;
1102                     } else {
1103                         /// Could be something like {\tt filename.txt}
1104                         /// Keep it as it is
1105                         output.append(c);
1106                     }
1107                 }
1108             } else {
1109                 /// Nothing special, copy input char to output
1110                 output.append(c);
1111             }
1112         } else if (c == QLatin1Char('\\') && i < len - 1) {
1113             /// Second case: A backslash as in \"o
1114 
1115             /// Sometimes such command are closed with just {},
1116             /// so remember if to check for that
1117             bool checkForExtraCurlyAtEnd = false;
1118 
1119             /// Check if there follows a modifier after the backslash
1120             /// For example an quotation mark as used in \"a
1121             const int lookupTablePos = modifierInLookupTable(input[i + 1]);
1122 
1123             /// Check for spaces between modifier and character, for example
1124             /// like \H o
1125             int skipSpaces = 0;
1126             while (i + 2 + skipSpaces < len && input[i + 2 + skipSpaces] == QLatin1Char(' ') && skipSpaces < 16) ++skipSpaces;
1127 
1128             bool found = false;
1129             if (lookupTablePos >= 0 && (skipSpaces > 0 || !input[i + 1].isLetter()) && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0) {
1130                 /// We found a special modifier which is followed by
1131                 /// a letter followed by normal text without any
1132                 /// delimiter, so we are looking at something like
1133                 /// \"u inside Kr\"uger
1134                 /// Use lookup table to see what Unicode char this
1135                 /// represents
1136                 const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos];
1137                 if (unicodeLetter.unicode() > 127) {
1138                     output.append(unicodeLetter);
1139                     /// Step over those additional characters
1140                     i += 2 + skipSpaces;
1141                     found = true;
1142                 }
1143                 /// Don't print any warnings yet, as this if-case may got triggered by e.g. \mu
1144                 /// ('m' is a potential modifier, yet \mu should be recognized as Greek letter later)
1145             } else if (lookupTablePos >= 0 && (skipSpaces > 0 || !input[i + 1].isLetter()) && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0 && (i + skipSpaces == len - 3 || input[i + 3 + skipSpaces] == QLatin1Char('}') || input[i + 3 + skipSpaces] == QLatin1Char('{') || input[i + 3 + skipSpaces] == QLatin1Char(' ') || input[i + 3 + skipSpaces] == QLatin1Char('\t') || input[i + 3 + skipSpaces] == QLatin1Char('\\') || input[i + 3 + skipSpaces] == QLatin1Char('\r') || input[i + 3 + skipSpaces] == QLatin1Char('\n'))) {
1146                 /// We found a modifier which is followed by
1147                 /// a letter followed by a command delimiter such
1148                 /// as a whitespace, so we are looking at something
1149                 /// like \"u followed by a space or another delimiter
1150                 /// Use lookup table to see what Unicode char this
1151                 /// represents
1152                 const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos];
1153                 if (unicodeLetter.unicode() >= 127) {
1154                     output.append(unicodeLetter);
1155                     /// Step over those additional characters
1156                     i += 2 + skipSpaces;
1157                     found = true;
1158 
1159                     if (input[i + 1] != QLatin1Char(' ') && input[i + 1] != QLatin1Char('\r') && input[i + 1] != QLatin1Char('\n')) {
1160                         /// If no whitespace follows, still
1161                         /// check for extra curly brackets
1162                         checkForExtraCurlyAtEnd = true;
1163                     }
1164                 }
1165                 /// Don't print any warnings yet, as this if-case may got triggered by e.g. \mu
1166                 /// ('m' is a potential modifier, yet \mu should be recognized as Greek letter later)
1167             } else if (lookupTablePos >= 0 && i + skipSpaces < len - 4 && input[i + 2 + skipSpaces] == QLatin1Char('{') && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 3 + skipSpaces])) >= 0 && input[i + 4 + skipSpaces] == QLatin1Char('}')) {
1168                 /// We found a modifier which is followed by an opening
1169                 /// curly bracket followed a letter followed by a closing
1170                 /// curly bracket, so we are looking at something
1171                 /// like \"{u}
1172                 /// Use lookup table to see what Unicode char this
1173                 /// represents
1174                 const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos];
1175                 if (unicodeLetter.unicode() < 127) {
1176                     /// This combination of modifier and letter is not known,
1177                     /// so try to preserve it
1178                     output.append(QStringView{input}.mid(i, 5 + skipSpaces));
1179                     qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10);
1180                     qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 5 + skipSpaces);
1181                 } else
1182                     output.append(unicodeLetter);
1183                 /// Step over those additional characters
1184                 i += 4 + skipSpaces;
1185                 found = true;
1186             } else if (lookupTablePos >= 0 && i + skipSpaces < len - 3 && input[i + 2 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 3 + skipSpaces])) {
1187                 /// This is the case for \'\i or alike.
1188                 for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters)
1189                     if ((dotlessIJCharacter.direction & DirectionCommandToUnicode) && dotlessIJCharacter.letter == input[i + 3 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 1]) {
1190                         output.append(QChar(dotlessIJCharacter.unicode));
1191                         found = true;
1192                         break;
1193                     }
1194                 if (!found) {
1195                     /// This combination of modifier and letter is not known,
1196                     /// so try to preserve it
1197                     output.append(QStringView{input}.mid(i, 4 + skipSpaces));
1198                     qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 1] << "BACKSLASH" << input[i + 3 + skipSpaces];
1199                 }
1200                 /// Step over those additional characters
1201                 i += 3 + skipSpaces;
1202                 found = true;
1203             } else if (lookupTablePos >= 0 && i + skipSpaces < len - 5 && input[i + 2 + skipSpaces] == QLatin1Char('{') && input[i + 3 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 4 + skipSpaces]) && input[i + 5 + skipSpaces] == QLatin1Char('}')) {
1204                 /// This is the case for \'{\i} or alike.
1205                 for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters)
1206                     if ((dotlessIJCharacter.direction & DirectionCommandToUnicode) && dotlessIJCharacter.letter == input[i + 4 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 1]) {
1207                         output.append(QChar(dotlessIJCharacter.unicode));
1208                         found = true;
1209                         break;
1210                     }
1211                 if (!found) {
1212                     /// This combination of modifier and letter is not known,
1213                     /// so try to preserve it
1214                     output.append(QStringView{input}.mid(i, 6 + skipSpaces));
1215                     qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 1] << "BACKSLASH {" << input[i + 4 + skipSpaces] << "}";
1216                 }
1217                 /// Step over those additional characters
1218                 i += 5 + skipSpaces;
1219                 found = true;
1220             }
1221 
1222             if (!found && i < len - 1) {
1223                 /// Now, the case of something like \AA is left
1224                 /// to check for
1225                 const QString alpha = readAlphaCharacters(input, i + 1);
1226                 int nextPosAfterAlpha = i + alpha.size();
1227                 if (alpha.size() >= 1 && alpha.at(0).isLetter()) {
1228                     /// We are dealing actually with a string like \AA or \o
1229                     /// Check which command it is,
1230                     /// insert corresponding Unicode character
1231                     for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) {
1232                         if ((encoderLaTeXCharacterCommand.direction & DirectionCommandToUnicode) && encoderLaTeXCharacterCommand.command == alpha) {
1233                             output.append(QChar(encoderLaTeXCharacterCommand.unicode));
1234                             found = true;
1235                             break;
1236                         }
1237                     }
1238 
1239                     /// Check if a math command has been read,
1240                     /// like \subset
1241                     /// (automatically skipped if command was found above)
1242                     if (!found)
1243                         for (const MathCommand &mathCommand : mathCommands) {
1244                             if ((mathCommand.direction & DirectionCommandToUnicode) && mathCommand.command == alpha) {
1245                                 if (currentMathModeTop() == MathModeNone)
1246                                     qCDebug(LOG_KBIBTEX_IO) << "Found math mode command" << QString(QStringLiteral("\\%1")).arg(alpha) << "outside of a math expression";
1247                                 output.append(QChar(mathCommand.unicode));
1248                                 found = true;
1249                                 break;
1250                             }
1251                         }
1252 
1253                     if (found) {
1254                         /// Now, after a command, a whitespace may follow
1255                         /// which has to get "eaten" as it acts as a command
1256                         /// delimiter
1257                         if (nextPosAfterAlpha + 1 < input.length() && (input[nextPosAfterAlpha + 1] == QLatin1Char(' ') || input[nextPosAfterAlpha + 1] == QLatin1Char('\r') || input[nextPosAfterAlpha + 1] == QLatin1Char('\n')))
1258                             ++nextPosAfterAlpha;
1259                         else {
1260                             /// If no whitespace follows, still
1261                             /// check for extra curly brackets
1262                             checkForExtraCurlyAtEnd = true;
1263                         }
1264                     } else {
1265                         /// No command found? Just copy input char to output
1266                         output.append(QStringView{input}.mid(i, 1 + alpha.size()));
1267 
1268                         if (alpha == QStringLiteral("ensuremath") && input[nextPosAfterAlpha + 1] == QLatin1Char('{')) {
1269                             currentMathMode.push(MathModeEnsureMath);
1270                             popEnsureMathAtOpenCurlyBacketCounter.push(openCurlyBracketCounterEnsureMath);
1271                             ++openCurlyBracketCounterEnsureMath;
1272                             output.append(QLatin1Char('{'));
1273                             ++nextPosAfterAlpha;
1274                         }
1275                     }
1276                     i = nextPosAfterAlpha;
1277                 } else {
1278                     /// Maybe we are dealing with a string like \& or \_
1279                     /// Check which command it is
1280                     found = false;
1281                     for (const QChar &encoderLaTeXProtectedSymbol : encoderLaTeXProtectedSymbols)
1282                         if (encoderLaTeXProtectedSymbol == input[i + 1]) {
1283                             output.append(encoderLaTeXProtectedSymbol);
1284                             found = true;
1285                             break;
1286                         }
1287 
1288                     if (!found && currentMathModeTop() == MathModeNone)
1289                         for (const QChar &encoderLaTeXProtectedTextOnlySymbol : encoderLaTeXProtectedTextOnlySymbols)
1290                             if (encoderLaTeXProtectedTextOnlySymbol == input[i + 1]) {
1291                                 output.append(encoderLaTeXProtectedTextOnlySymbol);
1292                                 found = true;
1293                                 break;
1294                             }
1295 
1296                     /// If command has been found, nothing has to be done
1297                     /// except for hopping over this backslash
1298                     if (found)
1299                         ++i;
1300                     else if (i < len - 1 && input[i + 1] == QChar(0x002c /* comma */)) {
1301                         /// Found a thin space: \,
1302                         /// Replacing Latex-like thin space with Unicode thin space
1303                         output.append(QChar(0x2009));
1304                         // found = true; ///< only necessary if more tests will follow in the future
1305                         ++i;
1306                         found = true;
1307                     } else {
1308                         /// Nothing special, copy input char to output
1309                         output.append(c);
1310                         found = true;
1311                     }
1312                 }
1313             } else if (!found) {
1314                 /// Nothing special, copy input char to output
1315                 output.append(c);
1316             }
1317 
1318             /// Finally, check if there may be extra curly brackets
1319             /// like {} and hop over them
1320             if (checkForExtraCurlyAtEnd && i < len - 2 && input[i + 1] == QLatin1Char('{') && input[i + 2] == QLatin1Char('}'))
1321                 i += 2;
1322         } else {
1323             /// So far, no opening curly bracket and no backslash
1324             /// May still be a symbol sequence like ---
1325             bool isSymbolSequence = false;
1326             /// Go through all known symbol sequnces
1327             for (const EncoderLaTeXSymbolSequence &encoderLaTeXSymbolSequence : encoderLaTeXSymbolSequences) {
1328                 /// First, check if read input character matches beginning of symbol sequence
1329                 /// and input buffer as enough characters left to potentially contain
1330                 /// symbol sequence
1331                 const int latexLen = encoderLaTeXSymbolSequence.latex.length();
1332                 if ((encoderLaTeXSymbolSequence.direction & DirectionCommandToUnicode) && encoderLaTeXSymbolSequence.latex[0] == c && i <= len - latexLen) {
1333                     /// Now actually check if symbol sequence is in input buffer
1334                     isSymbolSequence = true;
1335                     for (int p = 1; isSymbolSequence && p < latexLen; ++p)
1336                         isSymbolSequence &= encoderLaTeXSymbolSequence.latex[p] == input[i + p];
1337                     if (isSymbolSequence) {
1338                         /// Ok, found sequence: insert Unicode character in output
1339                         /// and hop over sequence in input buffer
1340                         output.append(QChar(encoderLaTeXSymbolSequence.unicode));
1341                         i += encoderLaTeXSymbolSequence.latex.length() - 1;
1342                         break;
1343                     }
1344                 }
1345             }
1346 
1347             if (!isSymbolSequence) {
1348                 /// No symbol sequence found, so just copy input to output
1349                 output.append(c);
1350 
1351                 /// Still, check if input character is a dollar sign
1352                 /// without a preceding backslash, means toggling between
1353                 /// text mode and math mode
1354                 if (c == QLatin1Char('$') && (i == 0 || input[i - 1] != QLatin1Char('\\'))) {
1355                     if (currentMathModeTop() == MathModeDollar)
1356                         currentMathMode.pop(); //< the Dollar sign that got just read closes the math mode
1357                     else
1358                         currentMathMode.push(MathModeDollar); //< the Dollar sign that got just read starts a new math mode
1359                 }
1360                 if (currentMathModeTop() == MathModeEnsureMath) {
1361                     if (c == QLatin1Char('{'))
1362                         ++openCurlyBracketCounterEnsureMath;
1363                     else if (c == QLatin1Char('}'))
1364                         --openCurlyBracketCounterEnsureMath;
1365                     if (!popEnsureMathAtOpenCurlyBacketCounter.empty() && openCurlyBracketCounterEnsureMath == popEnsureMathAtOpenCurlyBacketCounter.top()) {
1366                         popEnsureMathAtOpenCurlyBacketCounter.pop();
1367                         currentMathMode.pop();
1368                     }
1369                 }
1370             }
1371         }
1372     }
1373 
1374     output.squeeze();
1375     return output;
1376 }
1377 
1378 bool EncoderLaTeX::testAndCopyVerbatimCommands(const QString &input, int &pos, QString &output) const
1379 {
1380     int copyBytesCount = 0;
1381     int openedClosedCurlyBrackets = 0;
1382 
1383     /// check for \url
1384     if (pos < input.length() - 6 && QStringView{input}.mid(pos, 5) == QStringLiteral("\\url{")) {
1385         copyBytesCount = 5;
1386         openedClosedCurlyBrackets = 1;
1387     }
1388 
1389     if (copyBytesCount > 0) {
1390         while (openedClosedCurlyBrackets > 0 && pos + copyBytesCount < input.length()) {
1391             ++copyBytesCount;
1392             if (input[pos + copyBytesCount] == QLatin1Char('{') && input[pos + copyBytesCount - 1] != QLatin1Char('\\')) ++openedClosedCurlyBrackets;
1393             else if (input[pos + copyBytesCount] == QLatin1Char('}') && input[pos + copyBytesCount - 1] != QLatin1Char('\\')) --openedClosedCurlyBrackets;
1394         }
1395 
1396         output.append(QStringView{input}.mid(pos, copyBytesCount));
1397         pos += copyBytesCount;
1398     }
1399 
1400     return copyBytesCount > 0;
1401 }
1402 
1403 QString EncoderLaTeX::encode(const QString &ninput, const TargetEncoding targetEncoding) const
1404 {
1405     /// Perform Canonical Decomposition followed by Canonical Composition
1406     const QString input = ninput.normalized(QString::NormalizationForm_C);
1407 
1408     int len = input.length();
1409     QString output;
1410     output.reserve(((len >> 10) + 2) << 10); // reserving multiples of 1024 Bytes
1411     enum MathMode {
1412         MathModeNone = 0, MathModeDollar, MathModeEnsureMath
1413     };
1414     QStack<MathMode> currentMathMode;
1415 #define currentMathModeTop()  (currentMathMode.empty()?MathModeNone:currentMathMode.top())
1416     int openCurlyBracketCounterEnsureMath = 0;
1417     QStack<int> popEnsureMathAtOpenCurlyBacketCounter;
1418 
1419     /// Go through input char by char
1420     for (int i = 0; i < len; ++i) {
1421         /**
1422          * Repeatedly check if input data contains a verbatim command
1423          * like \url{...}, append it to output, and update i to point
1424          * to the next character after the verbatim command.
1425          */
1426         while (testAndCopyVerbatimCommands(input, i, output));
1427         if (i >= len) break;
1428 
1429         const QChar c = input[i];
1430 
1431         if (targetEncoding == TargetEncoding::ASCII && c.unicode() > 127) {
1432             /// If current char is outside ASCII boundaries ...
1433             bool found = false;
1434 
1435             if (!found && !currentMathMode.empty()) {
1436                 /// Ok, test for math commands if already in math mode
1437                 for (const MathCommand &mathCommand : mathCommands)
1438                     if ((mathCommand.direction & DirectionUnicodeToCommand) && mathCommand.unicode == c.unicode()) {
1439                         output.append(QString(QStringLiteral("\\%1")).arg(mathCommand.command));
1440                         const QChar peekAhead = i < len - 1 ? input[i + 1] : QChar();
1441                         if (peekAhead != QLatin1Char('\\') && peekAhead != QLatin1Char('}') && peekAhead != QLatin1Char('$')) {
1442                             // Between current command and following character a separator is necessary
1443                             // FIXME This peek-ahead won't do its job properly, as it is not yet known
1444                             // whether the next character will be kept as-is or rewritten to, for example, a LaTeX command
1445                             // Example: if the complete input string is '$µµ$' and the current variable 'c' comes from
1446                             // the first 'µ', it will assume that curly brackets are necessary, thus the final output
1447                             // becomes '$\mu{}\mu$ despite that '$\mu\mu$' would have been a better output.
1448                             output.append(QStringLiteral("{}"));
1449                         }
1450                         found = true;
1451                         break;
1452                     }
1453             }
1454 
1455             /// Handle special cases of i without a dot (\i)
1456             for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters)
1457                 if ((dotlessIJCharacter.direction & DirectionUnicodeToCommand) && c.unicode() == dotlessIJCharacter.unicode) {
1458                     // FIXME Find a better solution, as the curly brackets are unnecessary in some situations
1459                     // e.g. '{\'\i}{\'\i}' should better be '{\'\i\'\i}'
1460                     output.append(QString(QStringLiteral("{\\%1\\%2}")).arg(dotlessIJCharacter.modifier, dotlessIJCharacter.letter));
1461                     found = true;
1462                     break;
1463                 }
1464 
1465             if (!found) {
1466                 /// ... test if there is a symbol sequence like ---
1467                 /// to encode it
1468                 for (const EncoderLaTeXSymbolSequence &encoderLaTeXSymbolSequence : encoderLaTeXSymbolSequences)
1469                     if (encoderLaTeXSymbolSequence.unicode == c.unicode() && (encoderLaTeXSymbolSequence.direction & DirectionUnicodeToCommand)) {
1470                         for (int l = 0; l < encoderLaTeXSymbolSequence.latex.length(); ++l)
1471                             output.append(encoderLaTeXSymbolSequence.latex[l]);
1472                         found = true;
1473                         break;
1474                     }
1475             }
1476 
1477             if (!found) {
1478                 /// Ok, no symbol sequence. Let's test character
1479                 /// commands like \ss
1480                 for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands)
1481                     if (encoderLaTeXCharacterCommand.unicode == c.unicode() && (encoderLaTeXCharacterCommand.direction & DirectionUnicodeToCommand)) {
1482                         // FIXME Find a better solution, as the curly brackets are unnecessary in some situations
1483                         // e.g. '{\command}{\command}' should better be '{\command\command}'
1484                         output.append(QString(QStringLiteral("{\\%1}")).arg(encoderLaTeXCharacterCommand.command));
1485                         found = true;
1486                         break;
1487                     }
1488             }
1489 
1490             if (!found) {
1491                 /// Ok, neither a character command. Let's test
1492                 /// escaped characters with modifiers like \"a
1493                 for (const EncoderLaTeXEscapedCharacter &encoderLaTeXEscapedCharacter : encoderLaTeXEscapedCharacters)
1494                     if ((encoderLaTeXEscapedCharacter.direction & DirectionUnicodeToCommand) && encoderLaTeXEscapedCharacter.unicode == c.unicode()) {
1495                         // FIXME Find a better solution, as the curly brackets are unnecessary in some situations
1496                         // e.g. '{\"a}{\"a}' should better be '{\"a\"a}'
1497                         const QString formatString = isAsciiLetter(encoderLaTeXEscapedCharacter.modifier) ? QStringLiteral("{\\%1 %2}") : QStringLiteral("{\\%1%2}");
1498                         output.append(formatString.arg(encoderLaTeXEscapedCharacter.modifier).arg(encoderLaTeXEscapedCharacter.letter));
1499                         found = true;
1500                         break;
1501                     }
1502             }
1503 
1504             if (!found && currentMathMode.empty()) {
1505                 /// Ok, test for math commands, even if outside of a math mode, then enter math mode for this character
1506                 for (const MathCommand &mathCommand : mathCommands)
1507                     if ((mathCommand.direction & DirectionUnicodeToCommand) && mathCommand.unicode == c.unicode()) {
1508                         // FIXME Find a better solution, as the \ensuremath should span several characters
1509                         // e.g. '\ensuremath{\alpha}\ensuremath{\alpha}' should better be '\ensuremath{\alpha\alpha}'
1510                         output.append(QString(QStringLiteral("\\ensuremath{\\%1}")).arg(mathCommand.command));
1511                         found = true;
1512                         break;
1513                     }
1514             }
1515 
1516             if (!found && c.unicode() == 0x2009) {
1517                 /// Thin space
1518                 output.append(QStringLiteral("\\,"));
1519                 found = true;
1520             }
1521 
1522             if (!found) {
1523                 qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10);
1524                 qCWarning(LOG_KBIBTEX_IO) << "Don't know how to encode Unicode char" << QString(QStringLiteral("0x%1")).arg(c.unicode(), 4, 16, QLatin1Char('0'));
1525                 output.append(c);
1526             }
1527         } else if ((targetEncoding == TargetEncoding::ASCII && c.unicode() <= 127)
1528                    || targetEncoding == TargetEncoding::UTF8
1529                    /** but not  targetEncoding == TargetEncoding::RAW */) {
1530             /// Current character is normal ASCII
1531             /// and targetEncoding was set to accept only ASCII characters
1532             /// -- or -- targetEncoding was set to accept UTF-8 characters
1533 
1534             /// Still, some characters have special meaning
1535             /// in TeX and have to be preceded with a backslash
1536             bool found = false;
1537             for (const QChar &encoderLaTeXProtectedSymbol : encoderLaTeXProtectedSymbols)
1538                 if (encoderLaTeXProtectedSymbol == c) {
1539                     output.append(QLatin1Char('\\')).append(c);
1540                     found = true;
1541                     break;
1542                 }
1543 
1544             if (!found && !currentMathMode.empty()) {
1545                 /// Ok, test for math commands if already in math mode
1546                 for (const MathCommand &mathCommand : mathCommands)
1547                     if ((mathCommand.direction & DirectionUnicodeToCommand) && mathCommand.unicode == c.unicode()) {
1548                         output.append(QString(QStringLiteral("\\%1")).arg(mathCommand.command));
1549                         const QChar peekAhead = i < len - 1 ? input[i + 1] : QChar();
1550                         if (peekAhead != QLatin1Char('\\') && peekAhead != QLatin1Char('}') && peekAhead != QLatin1Char('$')) {
1551                             // Between current command and following character a separator is necessary
1552                             // FIXME This peek-ahead won't do its job properly, as it is not yet known
1553                             // whether the next character will be kept as-is or rewritten to, for example, a LaTeX command
1554                             // Example: if the complete input string is '$µµ$' and the current variable 'c' comes from
1555                             // the first 'µ', it will assume that curly brackets are necessary, thus the final output
1556                             // becomes '$\mu{}\mu$ despite that '$\mu\mu$' would have been a better output.
1557                             output.append(QStringLiteral("{}"));
1558                         }
1559                         found = true;
1560                         break;
1561                     }
1562             }
1563 
1564             if (!found && currentMathMode.empty())
1565                 for (const QChar &encoderLaTeXProtectedTextOnlySymbol : encoderLaTeXProtectedTextOnlySymbols)
1566                     if (encoderLaTeXProtectedTextOnlySymbol == c) {
1567                         output.append(QLatin1Char('\\')).append(c);
1568                         found = true;
1569                         break;
1570                     }
1571 
1572             if (!found) {
1573                 /// Well, either this is not a special character or
1574                 /// we do not know what to do with it, so just dump it into the output
1575                 output.append(c);
1576                 found = true;
1577             }
1578 
1579             /// Finally, check if input character is a dollar sign
1580             /// without a preceding backslash, means toggling between
1581             /// text mode and math mode
1582             if (c == QLatin1Char('$') && (i == 0 || input[i - 1] != QLatin1Char('\\'))) {
1583                 if (currentMathMode.empty())
1584                     currentMathMode.push(MathModeDollar);
1585                 else if (currentMathModeTop() == MathModeDollar)
1586                     currentMathMode.pop();
1587                 else if (currentMathModeTop() == MathModeEnsureMath)
1588                     currentMathMode.push(MathModeDollar);
1589             } else if (output.right(12) == QStringLiteral("\\ensuremath{")) {
1590                 currentMathMode.push(MathModeEnsureMath);
1591                 popEnsureMathAtOpenCurlyBacketCounter.push(openCurlyBracketCounterEnsureMath);
1592                 // ++openCurlyBracketCounterEnsureMath; //< not necessary as right below both
1593                 /// 'currentMathModeTop() == MathModeEnsureMath' and 'c == QLatin1Char('{')'
1594                 /// will be true
1595             }
1596             if (currentMathModeTop() == MathModeEnsureMath) {
1597                 if (c == QLatin1Char('{'))
1598                     ++openCurlyBracketCounterEnsureMath;
1599                 else if (c == QLatin1Char('}'))
1600                     --openCurlyBracketCounterEnsureMath;
1601                 if (!popEnsureMathAtOpenCurlyBacketCounter.empty() && openCurlyBracketCounterEnsureMath == popEnsureMathAtOpenCurlyBacketCounter.top()) {
1602                     popEnsureMathAtOpenCurlyBacketCounter.pop();
1603                     currentMathMode.pop();
1604                 }
1605             }
1606         }
1607     }
1608 
1609     output.squeeze();
1610     return output;
1611 }
1612 
1613 int EncoderLaTeX::modifierInLookupTable(const QChar modifier) const
1614 {
1615     for (int m = 0; m < lookupTableNumModifiers && lookupTable[m] != nullptr; ++m)
1616         if (lookupTable[m]->modifier == modifier) return m;
1617     return -1;
1618 }
1619 
1620 QString EncoderLaTeX::readAlphaCharacters(const QString &base, int startFrom) const
1621 {
1622     const int len = base.size();
1623     for (int j = startFrom; j < len; ++j) {
1624         if (!isAsciiLetter(base[j]))
1625             return base.mid(startFrom, j - startFrom);
1626     }
1627     return base.mid(startFrom);
1628 }
1629 
1630 const EncoderLaTeX &EncoderLaTeX::instance()
1631 {
1632     static const EncoderLaTeX self;
1633     return self;
1634 }