File indexing completed on 2024-05-19 05:05:32
0001 /*************************************************************************** 0002 * SPDX-License-Identifier: GPL-2.0-or-later 0003 * * 0004 * SPDX-FileCopyrightText: 2004-2019 Thomas Fischer <fischer@unix-ag.uni-kl.de> 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 * This program is distributed in the hope that it will be useful, * 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0014 * GNU General Public License for more details. * 0015 * * 0016 * You should have received a copy of the GNU General Public License * 0017 * along with this program; if not, see <https://www.gnu.org/licenses/>. * 0018 ***************************************************************************/ 0019 0020 #include "encoderxml.h" 0021 0022 #include <QStringList> 0023 #include <QList> 0024 0025 static const struct EncoderXMLCharMapping { 0026 QChar unicode; 0027 const QString xml; 0028 } 0029 charmappingdataxml[] = { 0030 {QChar(0x0026), QStringLiteral("&")}, 0031 {QChar(0x0022), QStringLiteral(""")}, 0032 {QChar(0x003C), QStringLiteral("<")}, 0033 {QChar(0x003E), QStringLiteral(">")} 0034 }; 0035 0036 /** 0037 * Private class to store internal variables that should not be visible 0038 * in the interface as defined in the header file. 0039 */ 0040 class EncoderXML::EncoderXMLPrivate 0041 { 0042 public: 0043 static const QStringList backslashSymbols; 0044 }; 0045 0046 const QStringList EncoderXML::EncoderXMLPrivate::backslashSymbols {QStringLiteral("\\&"), QStringLiteral("\\%"), QStringLiteral("\\_")}; 0047 0048 EncoderXML::EncoderXML() 0049 : Encoder(), d(new EncoderXML::EncoderXMLPrivate) 0050 { 0051 /// nothing 0052 } 0053 0054 EncoderXML::~EncoderXML() 0055 { 0056 delete d; 0057 } 0058 0059 QString EncoderXML::decode(const QString &text) const 0060 { 0061 QString result = text; 0062 0063 for (const auto &item : charmappingdataxml) 0064 result.replace(item.xml, item.unicode); 0065 0066 /** 0067 * Find and replace all characters written as hexadecimal number 0068 */ 0069 int p = -1; 0070 while ((p = result.indexOf(QStringLiteral("&#x"), p + 1)) >= 0) { 0071 int p2 = result.indexOf(QStringLiteral(";"), p + 1); 0072 if (p2 < 0 || p2 > p + 8) break; 0073 bool ok = false; 0074 int hex = QStringView{result}.mid(p + 3, p2 - p - 3).toInt(&ok, 16); 0075 if (ok && hex > 0) 0076 result.replace(result.mid(p, p2 - p + 1), QChar(hex)); 0077 } 0078 0079 /** 0080 * Find and replace all characters written as decimal number 0081 */ 0082 p = -1; 0083 while ((p = result.indexOf(QStringLiteral("&#"), p + 1)) >= 0) { 0084 int p2 = result.indexOf(QStringLiteral(";"), p + 1); 0085 if (p2 < 0 || p2 > p + 8) break; 0086 bool ok = false; 0087 int dec = QStringView{result}.mid(p + 2, p2 - p - 2).toInt(&ok, 10); 0088 if (ok && dec > 0) 0089 result.replace(result.mid(p, p2 - p + 1), QChar(dec)); 0090 } 0091 0092 /// Replace special symbols with backslash-encoded variant (& --> \&) 0093 for (const QString &backslashSymbol : EncoderXMLPrivate::backslashSymbols) { 0094 int p = -1; 0095 while ((p = result.indexOf(backslashSymbol[1], p + 1)) >= 0) { 0096 if (p == 0 || result[p - 1] != QLatin1Char('\\')) { 0097 /// replace only symbols which have no backslash on their right 0098 result = result.left(p) + QLatin1Char('\\') + result.mid(p); 0099 ++p; 0100 } 0101 } 0102 } 0103 0104 return result; 0105 } 0106 0107 QString EncoderXML::encode(const QString &text, const TargetEncoding targetEncoding) const 0108 { 0109 QString result = text; 0110 0111 for (const auto &item : charmappingdataxml) 0112 result.replace(item.unicode, item.xml); 0113 0114 if (targetEncoding == TargetEncoding::ASCII) { 0115 /// Replace all problematic or non-ASCII characters (code < 32 or code > 127) 0116 /// with an entity code, for example a-umlaut becomes 'ä'. 0117 for (int i = result.length() - 1; i >= 0; --i) { 0118 const auto code = result[i].unicode(); 0119 if (code < 32 || code > 127) 0120 result = result.left(i) + QStringLiteral("&#") + QString::number(code) + QStringLiteral(";") + result.mid(i + 1); 0121 } 0122 } 0123 0124 /// Replace backlash-encoded symbols with plain text (\& --> &) 0125 for (const QString &backslashSymbol : EncoderXMLPrivate::backslashSymbols) { 0126 result.replace(backslashSymbol, backslashSymbol[1]); 0127 } 0128 0129 return result; 0130 } 0131 0132 const EncoderXML &EncoderXML::instance() 0133 { 0134 static const EncoderXML self; 0135 return self; 0136 }