File indexing completed on 2024-05-19 05:05:32

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2019 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "encoderxml.h"
0021 
0022 #include <QStringList>
0023 #include <QList>
0024 
0025 static const struct EncoderXMLCharMapping {
0026     QChar unicode;
0027     const QString xml;
0028 }
0029 charmappingdataxml[] = {
0030     {QChar(0x0026), QStringLiteral("&amp;")},
0031     {QChar(0x0022), QStringLiteral("&quot;")},
0032     {QChar(0x003C), QStringLiteral("&lt;")},
0033     {QChar(0x003E), QStringLiteral("&gt;")}
0034 };
0035 
0036 /**
0037  * Private class to store internal variables that should not be visible
0038  * in the interface as defined in the header file.
0039  */
0040 class EncoderXML::EncoderXMLPrivate
0041 {
0042 public:
0043     static const QStringList backslashSymbols;
0044 };
0045 
0046 const QStringList EncoderXML::EncoderXMLPrivate::backslashSymbols {QStringLiteral("\\&"), QStringLiteral("\\%"), QStringLiteral("\\_")};
0047 
0048 EncoderXML::EncoderXML()
0049         : Encoder(), d(new EncoderXML::EncoderXMLPrivate)
0050 {
0051     /// nothing
0052 }
0053 
0054 EncoderXML::~EncoderXML()
0055 {
0056     delete d;
0057 }
0058 
0059 QString EncoderXML::decode(const QString &text) const
0060 {
0061     QString result = text;
0062 
0063     for (const auto &item : charmappingdataxml)
0064         result.replace(item.xml, item.unicode);
0065 
0066     /**
0067      * Find and replace all characters written as hexadecimal number
0068      */
0069     int p = -1;
0070     while ((p = result.indexOf(QStringLiteral("&#x"), p + 1)) >= 0) {
0071         int p2 = result.indexOf(QStringLiteral(";"), p + 1);
0072         if (p2 < 0 || p2 > p + 8) break;
0073         bool ok = false;
0074         int hex = QStringView{result}.mid(p + 3, p2 - p - 3).toInt(&ok, 16);
0075         if (ok && hex > 0)
0076             result.replace(result.mid(p, p2 - p + 1), QChar(hex));
0077     }
0078 
0079     /**
0080       * Find and replace all characters written as decimal number
0081       */
0082     p = -1;
0083     while ((p = result.indexOf(QStringLiteral("&#"), p + 1)) >= 0) {
0084         int p2 = result.indexOf(QStringLiteral(";"), p + 1);
0085         if (p2 < 0 || p2 > p + 8) break;
0086         bool ok = false;
0087         int dec = QStringView{result}.mid(p + 2, p2 - p - 2).toInt(&ok, 10);
0088         if (ok && dec > 0)
0089             result.replace(result.mid(p, p2 - p + 1), QChar(dec));
0090     }
0091 
0092     /// Replace special symbols with backslash-encoded variant (& --> \&)
0093     for (const QString &backslashSymbol : EncoderXMLPrivate::backslashSymbols) {
0094         int p = -1;
0095         while ((p = result.indexOf(backslashSymbol[1], p + 1)) >= 0) {
0096             if (p == 0 || result[p - 1] != QLatin1Char('\\')) {
0097                 /// replace only symbols which have no backslash on their right
0098                 result = result.left(p) + QLatin1Char('\\') + result.mid(p);
0099                 ++p;
0100             }
0101         }
0102     }
0103 
0104     return result;
0105 }
0106 
0107 QString EncoderXML::encode(const QString &text, const TargetEncoding targetEncoding) const
0108 {
0109     QString result = text;
0110 
0111     for (const auto &item : charmappingdataxml)
0112         result.replace(item.unicode, item.xml);
0113 
0114     if (targetEncoding == TargetEncoding::ASCII) {
0115         /// Replace all problematic or non-ASCII characters (code < 32 or code > 127)
0116         /// with an entity code, for example a-umlaut becomes '&#228;'.
0117         for (int i = result.length() - 1; i >= 0; --i) {
0118             const auto code = result[i].unicode();
0119             if (code < 32 || code > 127)
0120                 result = result.left(i) + QStringLiteral("&#") + QString::number(code) + QStringLiteral(";") + result.mid(i + 1);
0121         }
0122     }
0123 
0124     /// Replace backlash-encoded symbols with plain text (\& --> &)
0125     for (const QString &backslashSymbol : EncoderXMLPrivate::backslashSymbols) {
0126         result.replace(backslashSymbol, backslashSymbol[1]);
0127     }
0128 
0129     return result;
0130 }
0131 
0132 const EncoderXML &EncoderXML::instance()
0133 {
0134     static const EncoderXML self;
0135     return self;
0136 }