File indexing completed on 2024-05-12 15:34:35
0001 /* 0002 This file is part of the KContacts framework. 0003 SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "kcontacts_debug.h" 0009 #include "vcardparser_p.h" 0010 #include <KCodecs> 0011 #include <QTextCodec> 0012 #include <functional> 0013 0014 // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings 0015 class StringCache 0016 { 0017 public: 0018 QString fromLatin1(const QByteArray &value) 0019 { 0020 if (value.isEmpty()) { 0021 return QString(); 0022 } 0023 0024 auto it = m_values.constFind(value); 0025 if (it != m_values.constEnd()) { 0026 return it.value(); 0027 } 0028 0029 QString string = QString::fromLatin1(value); 0030 m_values.insert(value, string); 0031 return string; 0032 } 0033 0034 private: 0035 QHash<QByteArray, QString> m_values; 0036 }; 0037 0038 using namespace KContacts; 0039 0040 static void addEscapes(QByteArray &str, bool excludeEscapedComma) 0041 { 0042 str.replace('\\', "\\\\"); 0043 if (!excludeEscapedComma) { 0044 str.replace(',', "\\,"); 0045 } 0046 str.replace('\r', "\\r"); 0047 str.replace('\n', "\\n"); 0048 } 0049 0050 static void removeEscapes(QByteArray &str) 0051 { 0052 // It's more likely that no escape is present, so add fast path 0053 if (!str.contains('\\')) { 0054 return; 0055 } 0056 str.replace("\\n", "\n"); 0057 str.replace("\\N", "\n"); 0058 str.replace("\\r", "\r"); 0059 str.replace("\\,", ","); 0060 str.replace("\\\\", "\\"); 0061 } 0062 0063 class VCardLineParser 0064 { 0065 public: 0066 VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine) 0067 : m_cache(cache) 0068 , m_fetchAnotherLine(fetchAnotherLine) 0069 { 0070 } 0071 0072 void parseLine(const QByteArray ¤tLine, VCardLine *vCardLine); 0073 0074 private: 0075 void addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue); 0076 0077 private: 0078 StringCache &m_cache; 0079 std::function<QByteArray()> m_fetchAnotherLine; 0080 0081 VCardLine *m_vCardLine = nullptr; 0082 QByteArray m_encoding; 0083 QByteArray m_charset; 0084 }; 0085 0086 void VCardLineParser::addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue) 0087 { 0088 if (paramKey == "encoding") { 0089 m_encoding = paramValue.toLower(); 0090 } else if (paramKey == "charset") { 0091 m_charset = paramValue.toLower(); 0092 } 0093 // qDebug() << " add parameter" << paramKey << " = " << paramValue; 0094 m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue)); 0095 } 0096 0097 void VCardLineParser::parseLine(const QByteArray ¤tLine, KContacts::VCardLine *vCardLine) 0098 { 0099 // qDebug() << currentLine; 0100 m_vCardLine = vCardLine; 0101 // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong. 0102 // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234; 0103 // Therefore we need a small state machine, just the way I like it. 0104 enum State { 0105 StateInitial, 0106 StateParamKey, 0107 StateParamValue, 0108 StateQuotedValue, 0109 StateAfterParamValue, 0110 StateValue, 0111 }; 0112 State state = StateInitial; 0113 const int lineLength = currentLine.length(); 0114 const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode 0115 QByteArray paramKey; 0116 QByteArray paramValue; 0117 int start = 0; 0118 int pos = 0; 0119 for (; pos < lineLength; ++pos) { 0120 const char ch = lineData[pos]; 0121 const bool colonOrSemicolon = (ch == ';' || ch == ':'); 0122 switch (state) { 0123 case StateInitial: 0124 if (colonOrSemicolon) { 0125 const QByteArray identifier = currentLine.mid(start, pos - start); 0126 // qDebug() << " identifier" << identifier; 0127 vCardLine->setIdentifier(m_cache.fromLatin1(identifier)); 0128 start = pos + 1; 0129 } 0130 if (ch == ';') { 0131 state = StateParamKey; 0132 } else if (ch == ':') { 0133 state = StateValue; 0134 } else if (ch == '.') { 0135 vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start))); 0136 start = pos + 1; 0137 } 0138 break; 0139 case StateParamKey: 0140 if (colonOrSemicolon || ch == '=') { 0141 paramKey = currentLine.mid(start, pos - start); 0142 start = pos + 1; 0143 } 0144 if (colonOrSemicolon) { 0145 // correct the so-called 2.1 'standard' 0146 paramValue = paramKey; 0147 const QByteArray lowerKey = paramKey.toLower(); 0148 if (lowerKey == "quoted-printable" || lowerKey == "base64") { 0149 paramKey = "encoding"; 0150 } else { 0151 paramKey = "type"; 0152 } 0153 addParameter(paramKey, paramValue); 0154 } 0155 if (ch == ';') { 0156 state = StateParamKey; 0157 } else if (ch == ':') { 0158 state = StateValue; 0159 } else if (ch == '=') { 0160 state = StateParamValue; 0161 } 0162 break; 0163 case StateQuotedValue: 0164 if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) { 0165 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec 0166 paramValue = currentLine.mid(start, pos - start); 0167 addParameter(paramKey.toLower(), paramValue); 0168 start = pos + 1; 0169 if (ch == '"') { 0170 state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char 0171 } 0172 } 0173 break; 0174 case StateParamValue: 0175 if (colonOrSemicolon || ch == ',') { 0176 paramValue = currentLine.mid(start, pos - start); 0177 addParameter(paramKey.toLower(), paramValue); 0178 start = pos + 1; 0179 } 0180 // fall-through intended 0181 Q_FALLTHROUGH(); 0182 case StateAfterParamValue: 0183 if (ch == ';') { 0184 state = StateParamKey; 0185 start = pos + 1; 0186 } else if (ch == ':') { 0187 state = StateValue; 0188 } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec. 0189 state = StateQuotedValue; 0190 start = pos + 1; 0191 } 0192 break; 0193 case StateValue: 0194 Q_UNREACHABLE(); 0195 break; 0196 } 0197 0198 if (state == StateValue) { 0199 break; 0200 } 0201 } 0202 0203 if (state != StateValue) { // invalid line, no ':' 0204 return; 0205 } 0206 0207 QByteArray value = currentLine.mid(pos + 1); 0208 removeEscapes(value); 0209 0210 QByteArray output; 0211 bool wasBase64Encoded = false; 0212 0213 if (!m_encoding.isEmpty()) { 0214 // have to decode the data 0215 if (m_encoding == "b" || m_encoding == "base64") { 0216 output = QByteArray::fromBase64(value); 0217 wasBase64Encoded = true; 0218 } else if (m_encoding == "quoted-printable") { 0219 // join any qp-folded lines 0220 while (value.endsWith('=')) { 0221 value.chop(1); // remove the '=' 0222 value.append(m_fetchAnotherLine()); 0223 } 0224 KCodecs::quotedPrintableDecode(value, output); 0225 } else if (m_encoding == "8bit") { 0226 output = value; 0227 } else { 0228 qDebug("Unknown vcard encoding type!"); 0229 } 0230 } else { 0231 output = value; 0232 } 0233 0234 if (!m_charset.isEmpty()) { 0235 // have to convert the data 0236 QTextCodec *codec = QTextCodec::codecForName(m_charset); 0237 if (codec) { 0238 vCardLine->setValue(codec->toUnicode(output)); 0239 } else { 0240 vCardLine->setValue(QString::fromUtf8(output)); 0241 } 0242 } else if (wasBase64Encoded) { 0243 vCardLine->setValue(output); 0244 } else { 0245 vCardLine->setValue(QString::fromUtf8(output)); 0246 } 0247 } 0248 0249 //// 0250 0251 VCardParser::VCardParser() 0252 { 0253 } 0254 0255 VCardParser::~VCardParser() 0256 { 0257 } 0258 0259 VCard::List VCardParser::parseVCards(const QByteArray &text) 0260 { 0261 VCard currentVCard; 0262 VCard::List vCardList; 0263 QByteArray currentLine; 0264 0265 int lineStart = 0; 0266 int lineEnd = text.indexOf('\n'); 0267 0268 bool inVCard = false; 0269 0270 StringCache cache; 0271 for (; lineStart != text.size() + 1; 0272 lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) { 0273 QByteArray cur = text.mid(lineStart, lineEnd - lineStart); 0274 // remove the trailing \r, left from \r\n 0275 if (cur.endsWith('\r')) { 0276 cur.chop(1); 0277 } 0278 0279 if (cur.startsWith(' ') // 0280 || cur.startsWith('\t')) { // folded line => append to previous 0281 currentLine.append(cur.mid(1)); 0282 continue; 0283 } else { 0284 if (cur.trimmed().isEmpty()) { // empty line 0285 continue; 0286 } 0287 if (inVCard && !currentLine.isEmpty()) { // now parse the line 0288 VCardLine vCardLine; 0289 0290 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support) 0291 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray { 0292 const QByteArray ret = cur; 0293 lineStart = lineEnd + 1; 0294 lineEnd = text.indexOf('\n', lineStart); 0295 if (lineEnd != -1) { 0296 cur = text.mid(lineStart, lineEnd - lineStart); 0297 // remove the trailing \r, left from \r\n 0298 if (cur.endsWith('\r')) { 0299 cur.chop(1); 0300 } 0301 } 0302 return ret; 0303 }; 0304 0305 VCardLineParser lineParser(cache, fetchAnotherLine); 0306 0307 lineParser.parseLine(currentLine, &vCardLine); 0308 0309 currentVCard.addLine(vCardLine); 0310 } 0311 0312 // we do not save the start and end tag as vcardline 0313 if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) { 0314 inVCard = true; 0315 currentLine.clear(); 0316 currentVCard.clear(); // flush vcard 0317 continue; 0318 } 0319 0320 if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) { 0321 inVCard = false; 0322 vCardList.append(currentVCard); 0323 currentLine.clear(); 0324 currentVCard.clear(); // flush vcard 0325 continue; 0326 } 0327 0328 currentLine = cur; 0329 } 0330 } 0331 0332 return vCardList; 0333 } 0334 0335 static const int FOLD_WIDTH = 75; 0336 0337 QByteArray VCardParser::createVCards(const VCard::List &list) 0338 { 0339 QByteArray text; 0340 QByteArray textLine; 0341 QString encodingType; 0342 QStringList params; 0343 QStringList values; 0344 0345 VCardLine::List lines; 0346 0347 bool hasEncoding; 0348 0349 text.reserve(list.size() * 300); // reserve memory to be more efficient 0350 0351 // iterate over the cards 0352 for (const VCard &card : list) { 0353 text.append("BEGIN:VCARD\r\n"); 0354 0355 QStringList idents = card.identifiers(); 0356 // VERSION must be first 0357 if (idents.contains(QLatin1String("VERSION"))) { 0358 const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION"))); 0359 idents.prepend(str); 0360 } 0361 0362 for (const auto &id : std::as_const(idents)) { 0363 lines = card.lines(id); 0364 0365 // iterate over the lines 0366 for (const VCardLine &vline : std::as_const(lines)) { 0367 QVariant val = vline.value(); 0368 if (val.isValid()) { 0369 if (vline.hasGroup()) { 0370 textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1(); 0371 } else { 0372 textLine = vline.identifier().toLatin1(); 0373 } 0374 0375 params = vline.parameterList(); 0376 hasEncoding = false; 0377 if (!params.isEmpty()) { // we have parameters 0378 for (const QString ¶m : std::as_const(params)) { 0379 if (param == QLatin1String("encoding")) { 0380 hasEncoding = true; 0381 encodingType = vline.parameter(QStringLiteral("encoding")).toLower(); 0382 } 0383 0384 values = vline.parameters(param); 0385 for (const QString &str : std::as_const(values)) { 0386 textLine.append(';' + param.toLatin1().toUpper()); 0387 if (!str.isEmpty()) { 0388 textLine.append('=' + str.toLatin1()); 0389 } 0390 } 0391 } 0392 } 0393 0394 QByteArray input; 0395 QByteArray output; 0396 bool checkMultibyte = false; // avoid splitting a multibyte character 0397 0398 // handle charset 0399 const QString charset = vline.parameter(QStringLiteral("charset")); 0400 if (!charset.isEmpty()) { 0401 // have to convert the data 0402 const QString value = vline.value().toString(); 0403 QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1()); 0404 if (codec) { 0405 input = codec->fromUnicode(value); 0406 } else { 0407 checkMultibyte = true; 0408 input = value.toUtf8(); 0409 } 0410 } else if (vline.value().type() == QVariant::ByteArray) { 0411 input = vline.value().toByteArray(); 0412 } else { 0413 checkMultibyte = true; 0414 input = vline.value().toString().toUtf8(); 0415 } 0416 0417 // handle encoding 0418 if (hasEncoding) { // have to encode the data 0419 if (encodingType == QLatin1Char('b')) { 0420 checkMultibyte = false; 0421 output = input.toBase64(); 0422 } else if (encodingType == QLatin1String("quoted-printable")) { 0423 checkMultibyte = false; 0424 KCodecs::quotedPrintableEncode(input, output, false); 0425 } 0426 } else { 0427 output = input; 0428 } 0429 addEscapes(output, (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO"))); 0430 0431 if (!output.isEmpty()) { 0432 textLine.append(':' + output); 0433 0434 if (textLine.length() > FOLD_WIDTH) { // we have to fold the line 0435 if (checkMultibyte) { 0436 // RFC 6350: Multi-octet characters MUST remain contiguous. 0437 // we know that textLine contains UTF-8 encoded characters 0438 int lineLength = 0; 0439 for (int i = 0; i < textLine.length(); ++i) { 0440 if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows 0441 int sequenceLength = 2; 0442 if ((textLine[i] & 0xE0) == 0xE0) { 0443 sequenceLength = 3; 0444 } else if ((textLine[i] & 0xF0) == 0xF0) { 0445 sequenceLength = 4; 0446 } 0447 if ((lineLength + sequenceLength) > FOLD_WIDTH) { 0448 // the current line would be too long. fold it 0449 text += "\r\n " + textLine.mid(i, sequenceLength); 0450 lineLength = 1 + sequenceLength; // incl. leading space 0451 } else { 0452 text += textLine.mid(i, sequenceLength); 0453 lineLength += sequenceLength; 0454 } 0455 i += sequenceLength - 1; 0456 } else { 0457 text += textLine[i]; 0458 ++lineLength; 0459 } 0460 if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) { 0461 text += "\r\n "; 0462 lineLength = 1; // leading space 0463 } 0464 } 0465 text += "\r\n"; 0466 } else { 0467 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) { 0468 text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n"); 0469 } 0470 } 0471 } else { 0472 text.append(textLine); 0473 text.append("\r\n"); 0474 } 0475 } 0476 } 0477 } 0478 } 0479 0480 text.append("END:VCARD\r\n"); 0481 text.append("\r\n"); 0482 } 0483 0484 return text; 0485 }