File indexing completed on 2024-04-28 03:53:33
0001 /* 0002 This file is part of the KContacts framework. 0003 SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "kcontacts_debug.h" 0009 #include "vcardparser_p.h" 0010 #include <KCodecs> 0011 #include <QStringDecoder> 0012 #include <QStringEncoder> 0013 #include <functional> 0014 0015 // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings 0016 class StringCache 0017 { 0018 public: 0019 QString fromLatin1(const QByteArray &value) 0020 { 0021 if (value.isEmpty()) { 0022 return QString(); 0023 } 0024 0025 auto it = m_values.constFind(value); 0026 if (it != m_values.constEnd()) { 0027 return it.value(); 0028 } 0029 0030 QString string = QString::fromLatin1(value); 0031 m_values.insert(value, string); 0032 return string; 0033 } 0034 0035 private: 0036 QHash<QByteArray, QString> m_values; 0037 }; 0038 0039 using namespace KContacts; 0040 0041 static void addEscapes(QByteArray &str, bool excludeEscapedComma) 0042 { 0043 str.replace('\\', "\\\\"); 0044 if (!excludeEscapedComma) { 0045 str.replace(',', "\\,"); 0046 } 0047 str.replace('\r', "\\r"); 0048 str.replace('\n', "\\n"); 0049 } 0050 0051 static void removeEscapes(QByteArray &str) 0052 { 0053 // It's more likely that no escape is present, so add fast path 0054 if (!str.contains('\\')) { 0055 return; 0056 } 0057 str.replace("\\n", "\n"); 0058 str.replace("\\N", "\n"); 0059 str.replace("\\r", "\r"); 0060 str.replace("\\,", ","); 0061 str.replace("\\\\", "\\"); 0062 } 0063 0064 class VCardLineParser 0065 { 0066 public: 0067 VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine) 0068 : m_cache(cache) 0069 , m_fetchAnotherLine(fetchAnotherLine) 0070 { 0071 } 0072 0073 void parseLine(const QByteArray ¤tLine, VCardLine *vCardLine); 0074 0075 private: 0076 void addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue); 0077 0078 private: 0079 StringCache &m_cache; 0080 std::function<QByteArray()> m_fetchAnotherLine; 0081 0082 VCardLine *m_vCardLine = nullptr; 0083 QByteArray m_encoding; 0084 QByteArray m_charset; 0085 }; 0086 0087 void VCardLineParser::addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue) 0088 { 0089 if (paramKey == "encoding") { 0090 m_encoding = paramValue.toLower(); 0091 } else if (paramKey == "charset") { 0092 m_charset = paramValue.toLower(); 0093 } 0094 // qDebug() << " add parameter" << paramKey << " = " << paramValue; 0095 m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue)); 0096 } 0097 0098 void VCardLineParser::parseLine(const QByteArray ¤tLine, KContacts::VCardLine *vCardLine) 0099 { 0100 // qDebug() << currentLine; 0101 m_vCardLine = vCardLine; 0102 // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong. 0103 // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234; 0104 // Therefore we need a small state machine, just the way I like it. 0105 enum State { 0106 StateInitial, 0107 StateParamKey, 0108 StateParamValue, 0109 StateQuotedValue, 0110 StateAfterParamValue, 0111 StateValue, 0112 }; 0113 State state = StateInitial; 0114 const int lineLength = currentLine.length(); 0115 const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode 0116 QByteArray paramKey; 0117 QByteArray paramValue; 0118 int start = 0; 0119 int pos = 0; 0120 for (; pos < lineLength; ++pos) { 0121 const char ch = lineData[pos]; 0122 const bool colonOrSemicolon = (ch == ';' || ch == ':'); 0123 switch (state) { 0124 case StateInitial: 0125 if (colonOrSemicolon) { 0126 const QByteArray identifier = currentLine.mid(start, pos - start); 0127 // qDebug() << " identifier" << identifier; 0128 vCardLine->setIdentifier(m_cache.fromLatin1(identifier)); 0129 start = pos + 1; 0130 } 0131 if (ch == ';') { 0132 state = StateParamKey; 0133 } else if (ch == ':') { 0134 state = StateValue; 0135 } else if (ch == '.') { 0136 vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start))); 0137 start = pos + 1; 0138 } 0139 break; 0140 case StateParamKey: 0141 if (colonOrSemicolon || ch == '=') { 0142 paramKey = currentLine.mid(start, pos - start); 0143 start = pos + 1; 0144 } 0145 if (colonOrSemicolon) { 0146 // correct the so-called 2.1 'standard' 0147 paramValue = paramKey; 0148 const QByteArray lowerKey = paramKey.toLower(); 0149 if (lowerKey == "quoted-printable" || lowerKey == "base64") { 0150 paramKey = "encoding"; 0151 } else { 0152 paramKey = "type"; 0153 } 0154 addParameter(paramKey, paramValue); 0155 } 0156 if (ch == ';') { 0157 state = StateParamKey; 0158 } else if (ch == ':') { 0159 state = StateValue; 0160 } else if (ch == '=') { 0161 state = StateParamValue; 0162 } 0163 break; 0164 case StateQuotedValue: 0165 if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) { 0166 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec 0167 paramValue = currentLine.mid(start, pos - start); 0168 addParameter(paramKey.toLower(), paramValue); 0169 start = pos + 1; 0170 if (ch == '"') { 0171 state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char 0172 } 0173 } 0174 break; 0175 case StateParamValue: 0176 if (colonOrSemicolon || ch == ',') { 0177 paramValue = currentLine.mid(start, pos - start); 0178 addParameter(paramKey.toLower(), paramValue); 0179 start = pos + 1; 0180 } 0181 // fall-through intended 0182 Q_FALLTHROUGH(); 0183 case StateAfterParamValue: 0184 if (ch == ';') { 0185 state = StateParamKey; 0186 start = pos + 1; 0187 } else if (ch == ':') { 0188 state = StateValue; 0189 } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec. 0190 state = StateQuotedValue; 0191 start = pos + 1; 0192 } 0193 break; 0194 case StateValue: 0195 Q_UNREACHABLE(); 0196 break; 0197 } 0198 0199 if (state == StateValue) { 0200 break; 0201 } 0202 } 0203 0204 if (state != StateValue) { // invalid line, no ':' 0205 return; 0206 } 0207 0208 QByteArray value = currentLine.mid(pos + 1); 0209 removeEscapes(value); 0210 0211 QByteArray output; 0212 bool wasBase64Encoded = false; 0213 0214 if (!m_encoding.isEmpty()) { 0215 // have to decode the data 0216 if (m_encoding == "b" || m_encoding == "base64") { 0217 output = QByteArray::fromBase64(value); 0218 wasBase64Encoded = true; 0219 } else if (m_encoding == "quoted-printable") { 0220 // join any qp-folded lines 0221 while (value.endsWith('=')) { 0222 value.chop(1); // remove the '=' 0223 value.append(m_fetchAnotherLine()); 0224 } 0225 KCodecs::quotedPrintableDecode(value, output); 0226 } else if (m_encoding == "8bit") { 0227 output = value; 0228 } else { 0229 qDebug("Unknown vcard encoding type!"); 0230 } 0231 } else { 0232 output = value; 0233 } 0234 0235 if (!m_charset.isEmpty()) { 0236 // have to convert the data 0237 auto codec = QStringDecoder(m_charset.constData()); 0238 if (codec.isValid()) { 0239 vCardLine->setValue(QVariant::fromValue<QString>(codec.decode(output))); 0240 } else { 0241 vCardLine->setValue(QString::fromUtf8(output)); 0242 } 0243 } else if (wasBase64Encoded) { 0244 vCardLine->setValue(output); 0245 } else { 0246 vCardLine->setValue(QString::fromUtf8(output)); 0247 } 0248 } 0249 0250 //// 0251 0252 VCardParser::VCardParser() 0253 { 0254 } 0255 0256 VCardParser::~VCardParser() 0257 { 0258 } 0259 0260 VCard::List VCardParser::parseVCards(const QByteArray &text) 0261 { 0262 VCard currentVCard; 0263 VCard::List vCardList; 0264 QByteArray currentLine; 0265 0266 int lineStart = 0; 0267 int lineEnd = text.indexOf('\n'); 0268 0269 bool inVCard = false; 0270 0271 StringCache cache; 0272 for (; lineStart != text.size() + 1; 0273 lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) { 0274 QByteArray cur = text.mid(lineStart, lineEnd - lineStart); 0275 // remove the trailing \r, left from \r\n 0276 if (cur.endsWith('\r')) { 0277 cur.chop(1); 0278 } 0279 0280 if (cur.startsWith(' ') // 0281 || cur.startsWith('\t')) { // folded line => append to previous 0282 currentLine.append(cur.mid(1)); 0283 continue; 0284 } else { 0285 if (cur.trimmed().isEmpty()) { // empty line 0286 continue; 0287 } 0288 if (inVCard && !currentLine.isEmpty()) { // now parse the line 0289 VCardLine vCardLine; 0290 0291 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support) 0292 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray { 0293 const QByteArray ret = cur; 0294 lineStart = lineEnd + 1; 0295 lineEnd = text.indexOf('\n', lineStart); 0296 if (lineEnd != -1) { 0297 cur = text.mid(lineStart, lineEnd - lineStart); 0298 // remove the trailing \r, left from \r\n 0299 if (cur.endsWith('\r')) { 0300 cur.chop(1); 0301 } 0302 } 0303 return ret; 0304 }; 0305 0306 VCardLineParser lineParser(cache, fetchAnotherLine); 0307 0308 lineParser.parseLine(currentLine, &vCardLine); 0309 0310 currentVCard.addLine(vCardLine); 0311 } 0312 0313 // we do not save the start and end tag as vcardline 0314 if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) { 0315 inVCard = true; 0316 currentLine.clear(); 0317 currentVCard.clear(); // flush vcard 0318 continue; 0319 } 0320 0321 if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) { 0322 inVCard = false; 0323 vCardList.append(currentVCard); 0324 currentLine.clear(); 0325 currentVCard.clear(); // flush vcard 0326 continue; 0327 } 0328 0329 currentLine = cur; 0330 } 0331 } 0332 0333 return vCardList; 0334 } 0335 0336 static const int FOLD_WIDTH = 75; 0337 0338 QByteArray VCardParser::createVCards(const VCard::List &list) 0339 { 0340 QByteArray text; 0341 QByteArray textLine; 0342 QString encodingType; 0343 QStringList params; 0344 QStringList values; 0345 0346 VCardLine::List lines; 0347 0348 bool hasEncoding; 0349 0350 text.reserve(list.size() * 300); // reserve memory to be more efficient 0351 0352 // iterate over the cards 0353 for (const VCard &card : list) { 0354 text.append("BEGIN:VCARD\r\n"); 0355 0356 QStringList idents = card.identifiers(); 0357 // VERSION must be first 0358 if (idents.contains(QLatin1String("VERSION"))) { 0359 const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION"))); 0360 idents.prepend(str); 0361 } 0362 0363 for (const auto &id : std::as_const(idents)) { 0364 lines = card.lines(id); 0365 0366 // iterate over the lines 0367 for (const VCardLine &vline : std::as_const(lines)) { 0368 QVariant val = vline.value(); 0369 if (val.isValid()) { 0370 if (vline.hasGroup()) { 0371 textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1(); 0372 } else { 0373 textLine = vline.identifier().toLatin1(); 0374 } 0375 0376 params = vline.parameterList(); 0377 hasEncoding = false; 0378 if (!params.isEmpty()) { // we have parameters 0379 for (const QString ¶m : std::as_const(params)) { 0380 if (param == QLatin1String("encoding")) { 0381 hasEncoding = true; 0382 encodingType = vline.parameter(QStringLiteral("encoding")).toLower(); 0383 } 0384 0385 values = vline.parameters(param); 0386 for (const QString &str : std::as_const(values)) { 0387 textLine.append(';' + param.toLatin1().toUpper()); 0388 if (!str.isEmpty()) { 0389 textLine.append('=' + str.toLatin1()); 0390 } 0391 } 0392 } 0393 } 0394 0395 QByteArray input; 0396 QByteArray output; 0397 bool checkMultibyte = false; // avoid splitting a multibyte character 0398 0399 // handle charset 0400 const QString charset = vline.parameter(QStringLiteral("charset")); 0401 if (!charset.isEmpty()) { 0402 // have to convert the data 0403 const QString value = vline.value().toString(); 0404 auto codec = QStringEncoder(charset.toLatin1().constData()); 0405 if (codec.isValid()) { 0406 input = codec.encode(value); 0407 } else { 0408 checkMultibyte = true; 0409 input = value.toUtf8(); 0410 } 0411 } else if (vline.value().userType() == QMetaType::QByteArray) { 0412 input = vline.value().toByteArray(); 0413 } else { 0414 checkMultibyte = true; 0415 input = vline.value().toString().toUtf8(); 0416 } 0417 0418 // handle encoding 0419 if (hasEncoding) { // have to encode the data 0420 if (encodingType == QLatin1Char('b')) { 0421 checkMultibyte = false; 0422 output = input.toBase64(); 0423 } else if (encodingType == QLatin1String("quoted-printable")) { 0424 checkMultibyte = false; 0425 KCodecs::quotedPrintableEncode(input, output, false); 0426 } 0427 } else { 0428 output = input; 0429 } 0430 addEscapes(output, (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO"))); 0431 0432 if (!output.isEmpty()) { 0433 textLine.append(':' + output); 0434 0435 if (textLine.length() > FOLD_WIDTH) { // we have to fold the line 0436 if (checkMultibyte) { 0437 // RFC 6350: Multi-octet characters MUST remain contiguous. 0438 // we know that textLine contains UTF-8 encoded characters 0439 int lineLength = 0; 0440 for (int i = 0; i < textLine.length(); ++i) { 0441 if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows 0442 int sequenceLength = 2; 0443 if ((textLine[i] & 0xE0) == 0xE0) { 0444 sequenceLength = 3; 0445 } else if ((textLine[i] & 0xF0) == 0xF0) { 0446 sequenceLength = 4; 0447 } 0448 if ((lineLength + sequenceLength) > FOLD_WIDTH) { 0449 // the current line would be too long. fold it 0450 text += "\r\n " + textLine.mid(i, sequenceLength); 0451 lineLength = 1 + sequenceLength; // incl. leading space 0452 } else { 0453 text += textLine.mid(i, sequenceLength); 0454 lineLength += sequenceLength; 0455 } 0456 i += sequenceLength - 1; 0457 } else { 0458 text += textLine[i]; 0459 ++lineLength; 0460 } 0461 if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) { 0462 text += "\r\n "; 0463 lineLength = 1; // leading space 0464 } 0465 } 0466 text += "\r\n"; 0467 } else { 0468 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) { 0469 text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n"); 0470 } 0471 } 0472 } else { 0473 text.append(textLine); 0474 text.append("\r\n"); 0475 } 0476 } 0477 } 0478 } 0479 } 0480 0481 text.append("END:VCARD\r\n"); 0482 text.append("\r\n"); 0483 } 0484 0485 return text; 0486 }